From 186087056a3a59d62315252022ac0181af2e268e Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Fri, 22 Jan 2021 21:04:40 +0100 Subject: [PATCH 01/83] implemented ExponentialStatistics in core.py --- runstats/core.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/runstats/core.py b/runstats/core.py index ff10cd4..2866388 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -228,6 +228,104 @@ def make_statistics(state): return Statistics.fromstate(state) +class ExponentialStatistics: + """Compute exponential mean and variance in a single pass. + + Statistics objects may also be copied. + + Based on + "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at + https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf + """ + + def __init__(self, decay, initial_mean=0.0, iterable=()): + self._check_weight(decay) + + self._mean = float(initial_mean) + self._variance = self._count = 0.0 + self._decay = decay + + for value in iterable: + self.push(value) + + def clear(self, new_mean=0.0, new_decay=None): + self._mean = float(new_mean) + self._variance = self._count = 0.0 + + if new_decay is not None: + self._decay = float(new_decay) + + def change_decay(self, new_decay): + self._check_weight(new_decay) + self._decay = new_decay + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + return self._mean, self._variance, self._decay, self._count + + def set_state(self, state): + ( + self._mean, + self._variance, + self._decay, + self._count + ) = state + + @classmethod + def fromstate(cls, state): + """Return Statistics object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_statistics, (self.get_state(),) + + def copy(self, _=None): + """Copy Statistics object.""" + return self.fromstate(self.get_state()) + + __copy__ = copy + __deepcopy__ = copy + + def __len__(self): + """Number of values that have been pushed.""" + return int(self._count) + + def push(self, value): + value = float(value) + + alpha = (1.0 - self._decay) + diff = (value - self._mean) + incr = alpha * diff + self._variance += alpha * (self._decay * diff ** 2 - self._variance) + self._mean += incr + + def mean(self): + return self._mean + + def variance(self): + return self._variance + + def stddev(self): + return self.variance() ** 0.5 + + @staticmethod + def _check_weight(decay): + if decay >= 1 | decay <= 0: + raise ValueError("decay must be strictly greater 0 " + "and strictly smaller 1") + + +def make_exponential_statistics(state): + return ExponentialStatistics.fromstate(state) + + class Regression(object): """ Compute simple linear regression in a single pass. From 24d99e6dcecfd342f6cdf78adce11fd2588b535f Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 23 Jan 2021 16:07:59 +0100 Subject: [PATCH 02/83] added add and mul functionality to ExponentialStatistics in core.py. Added Docstrings in respective class --- runstats/core.py | 82 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 2866388..f6819ec 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -236,26 +236,48 @@ class ExponentialStatistics: Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf + + For an explanation of these statistics refer to e.g.: + https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html """ - def __init__(self, decay, initial_mean=0.0, iterable=()): + def __init__(self, decay, initial_mean=0.0, initial_variance=0.0, iterable=()): + """Initialize ExponentialStatistics object. + + Incrementally tracks mean and variance and exponentially discounts + old values. + + Requires a `decay` rate in exclusive range (0,1) for discounting + previous statistics. + + Optionally allows setting initial mean and variance. Default 0.0. + + Iterates optional parameter `iterable` and pushes each value into the + statistics summary. + """ + decay = float(decay) self._check_weight(decay) self._mean = float(initial_mean) - self._variance = self._count = 0.0 + self._variance = initial_variance self._decay = decay for value in iterable: self.push(value) def clear(self, new_mean=0.0, new_decay=None): + """Clear ExponentialStatistics object.""" self._mean = float(new_mean) - self._variance = self._count = 0.0 + self._variance = 0.0 if new_decay is not None: - self._decay = float(new_decay) + new_decay = float(new_decay) + self._check_weight(new_decay) + self._decay = new_decay def change_decay(self, new_decay): + """Change decay rate of ExponentialStatistics object.""" + new_decay = float(new_decay) self._check_weight(new_decay) self._decay = new_decay @@ -266,20 +288,21 @@ def __ne__(self, that): return self.get_state() != that.get_state() def get_state(self): - return self._mean, self._variance, self._decay, self._count + """Get internal state.""" + return self._mean, self._variance, self._decay def set_state(self, state): + """Set internal state.""" ( self._mean, self._variance, self._decay, - self._count ) = state @classmethod def fromstate(cls, state): - """Return Statistics object from state.""" - stats = cls() + """Return ExponentialStatistics object from state.""" + stats = cls(None) stats.set_state(state) return stats @@ -287,17 +310,14 @@ def __reduce__(self): return make_exponential_statistics, (self.get_state(),) def copy(self, _=None): - """Copy Statistics object.""" + """Copy ExponentialStatistics object.""" return self.fromstate(self.get_state()) __copy__ = copy __deepcopy__ = copy - def __len__(self): - """Number of values that have been pushed.""" - return int(self._count) - def push(self, value): + """Add `value` to the ExponentialStatistics summary.""" value = float(value) alpha = (1.0 - self._decay) @@ -307,17 +327,51 @@ def push(self, value): self._mean += incr def mean(self): + """Exponential Mean of values.""" return self._mean def variance(self): + """Exponential Variance of values.""" return self._variance def stddev(self): + """Exponential Standard deviation of values.""" return self.variance() ** 0.5 + def __add__(self, that): + """Add two ExponentialStatistics objects together.""" + sigma = self.copy() + sigma += that + return sigma + + def __iadd__(self, that): + """Add another ExponentialStatistics object to this one.""" + self._mean += that.mean() + self._variance += that.variance() + return self + + def __mul__(self, that): + """Multiply by a scalar in (0,1) to change ExponentialStatistics + weighting.""" + sigma = self.copy() + sigma *= that + return sigma + + __rmul__ = __mul__ + + def __imul__(self, that): + """Multiply by a scalar in (0,1) to change ExponentialStatistics + weighting in-place.""" + that = float(that) + self._mean *= that + self._variance *= that + return self + + @staticmethod def _check_weight(decay): - if decay >= 1 | decay <= 0: + """Check if value range of passed decay is correct""" + if (decay >= 1.0) | (decay <= 0.0): raise ValueError("decay must be strictly greater 0 " "and strictly smaller 1") From 62880c0d8ec3258e823ea3e3bfa973927438278d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 24 Jan 2021 14:00:37 +0100 Subject: [PATCH 03/83] implemented c version of ExponentialStatistics in fast.pyx. Not tested yet --- runstats/core.py | 16 +++-- runstats/fast.pyx | 156 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 7 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index f6819ec..24903b2 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -1,6 +1,6 @@ """Python RunStats -Compute Statistics and Regression in a single pass. +Compute Statistics, ExponentialStatistics and Regression in a single pass. """ @@ -241,7 +241,13 @@ class ExponentialStatistics: https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html """ - def __init__(self, decay, initial_mean=0.0, initial_variance=0.0, iterable=()): + def __init__( + self, + decay, + initial_mean=0.0, + initial_variance=0.0, + iterable=() + ): """Initialize ExponentialStatistics object. Incrementally tracks mean and variance and exponentially discounts @@ -265,10 +271,10 @@ def __init__(self, decay, initial_mean=0.0, initial_variance=0.0, iterable=()): for value in iterable: self.push(value) - def clear(self, new_mean=0.0, new_decay=None): + def clear(self, new_mean=0.0, new_var=0.0, new_decay=None): """Clear ExponentialStatistics object.""" self._mean = float(new_mean) - self._variance = 0.0 + self._variance = float(new_var) if new_decay is not None: new_decay = float(new_decay) @@ -357,8 +363,6 @@ def __mul__(self, that): sigma *= that return sigma - __rmul__ = __mul__ - def __imul__(self, that): """Multiply by a scalar in (0,1) to change ExponentialStatistics weighting in-place.""" diff --git a/runstats/fast.pyx b/runstats/fast.pyx index 57d9037..ace24a2 100644 --- a/runstats/fast.pyx +++ b/runstats/fast.pyx @@ -6,7 +6,7 @@ Compute Statistics and Regression in a single pass. from __future__ import division -from .core import make_statistics, make_regression +from .core import make_statistics, make_regression, make_exponential_statistics cdef class Statistics(object): @@ -234,6 +234,160 @@ cdef class Statistics(object): self._phi *= that return self +cdef class ExponentialStatistics(object): + """Compute exponential mean and variance in a single pass. + + Statistics objects may also be copied. + + Based on + "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at + https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf + + For an explanation of these statistics refer to e.g.: + https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html + """ + + cdef public double _mean + cdef public double _variance + cdef public double _decay + + def __init__( + self, + double decay, + double initial_mean=0.0, + double initial_variance=0.0, + iterable=() + ): + """Initialize ExponentialStatistics object. + + Incrementally tracks mean and variance and exponentially discounts + old values. + + Requires a `decay` rate in exclusive range (0,1) for discounting + previous statistics. + + Optionally allows setting initial mean and variance. Default 0.0. + + Iterates optional parameter `iterable` and pushes each value into the + statistics summary. + """ + self._check_weight(decay) + + self._mean = initial_mean + self._variance = initial_variance + self._decay = decay + + for value in iterable: + self.push(value) + + cpdef clear(self, double new_mean=0.0, double new_var=0.0, new_decay=None) except -1: + """Clear ExponentialStatistics object.""" + self._mean = new_mean + self._variance = new_var + + if new_decay is not None: + new_decay = float(new_decay) + self._check_weight(new_decay) + self._decay = new_decay + + def change_decay(self, new_decay): + """Change decay rate of ExponentialStatistics object.""" + new_decay = float(new_decay) + self._check_weight(new_decay) + self._decay = new_decay + + def __richcmp__(self, other, op): + if op == 2: + return self.get_state() == other.get_state() + elif op == 3: + return self.get_state() != other.get_state() + else: + return NotImplemented + + def get_state(self): + """Get internal state.""" + return self._mean, self._variance, self._decay + + def set_state(self, state): + """Set internal state.""" + ( + self._mean, + self._variance, + self._decay, + ) = state + + @classmethod + def fromstate(cls, state): + """Return ExponentialStatistics object from state.""" + stats = cls(None) + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_statistics, (self.get_state(),) + + def copy(self, _=None): + """Copy ExponentialStatistics object.""" + return self.fromstate(self.get_state()) + + __copy__ = copy + __deepcopy__ = copy + + cpdef push(self, double value): + """Add `value` to the ExponentialStatistics summary.""" + + cdef double alpha = (1.0 - self._decay) + cdef double diff = (value - self._mean) + cdef double incr = alpha * diff + self._variance += alpha * (self._decay * diff ** 2 - self._variance) + self._mean += incr + + cpdef mean(self): + """Exponential Mean of values.""" + return self._mean + + cpdef variance(self): + """Exponential Variance of values.""" + return self._variance + + cpdef stddev(self): + """Exponential Standard deviation of values.""" + return self.variance() ** 0.5 + + def __add__(self, that): + """Add two ExponentialStatistics objects together.""" + sigma = self.copy() + sigma += that + return sigma + + def __iadd__(self, that): + """Add another ExponentialStatistics object to this one.""" + self._mean += that.mean() + self._variance += that.variance() + return self + + def __mul__(self, that): + """Multiply by a scalar in (0,1) to change ExponentialStatistics + weighting.""" + sigma = self.copy() + sigma *= that + return sigma + + def __imul__(self, that): + """Multiply by a scalar in (0,1) to change ExponentialStatistics + weighting in-place.""" + that = float(that) + self._mean *= that + self._variance *= that + return self + + + @staticmethod + def _check_weight(decay): + """Check if value range of passed decay is correct""" + if (decay >= 1.0) | (decay <= 0.0): + raise ValueError("decay must be strictly greater 0 " + "and strictly smaller 1") cdef class Regression(object): """ From 872a2b03c38062d1b9485dcb8aa60079ab2977d7 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 24 Jan 2021 14:08:12 +0100 Subject: [PATCH 04/83] Debugged fast.pyx ExponentialStatistics. Removed except clause from cpdef clear() (return value is an object) --- runstats/fast.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runstats/fast.pyx b/runstats/fast.pyx index ace24a2..d2982f0 100644 --- a/runstats/fast.pyx +++ b/runstats/fast.pyx @@ -280,7 +280,7 @@ cdef class ExponentialStatistics(object): for value in iterable: self.push(value) - cpdef clear(self, double new_mean=0.0, double new_var=0.0, new_decay=None) except -1: + cpdef clear(self, double new_mean=0.0, double new_var=0.0, new_decay=None): """Clear ExponentialStatistics object.""" self._mean = new_mean self._variance = new_var From c1ad27c549d4751f39cee001bd371da7c4fa463f Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 24 Jan 2021 18:42:54 +0100 Subject: [PATCH 05/83] added cython ExponentialStatistics to __init__, debugged cython version of fromstate() -> changed Nonetype to double --- runstats/__init__.py | 4 ++-- runstats/core.py | 2 +- runstats/fast.pyx | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index ade5d3b..1e9c9ba 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -5,10 +5,10 @@ """ try: - from .fast import Statistics, Regression + from .fast import Statistics, Regression, ExponentialStatistics __compiled__ = True except ImportError: - from .core import Statistics, Regression + from .core import Statistics, Regression, ExponentialStatistics __compiled__ = False __title__ = 'runstats' diff --git a/runstats/core.py b/runstats/core.py index 24903b2..3d50a4e 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -231,7 +231,7 @@ def make_statistics(state): class ExponentialStatistics: """Compute exponential mean and variance in a single pass. - Statistics objects may also be copied. + ExponentialStatistics objects may also be copied. Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at diff --git a/runstats/fast.pyx b/runstats/fast.pyx index d2982f0..af5bd5a 100644 --- a/runstats/fast.pyx +++ b/runstats/fast.pyx @@ -237,7 +237,7 @@ cdef class Statistics(object): cdef class ExponentialStatistics(object): """Compute exponential mean and variance in a single pass. - Statistics objects may also be copied. + ExponentialStatistics objects may also be copied. Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at @@ -319,7 +319,7 @@ cdef class ExponentialStatistics(object): @classmethod def fromstate(cls, state): """Return ExponentialStatistics object from state.""" - stats = cls(None) + stats = cls(0.0001) stats.set_state(state) return stats From 139953c66431548f8a95a6b35fe60db2cb6f213c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 31 Jan 2021 16:27:38 +0100 Subject: [PATCH 06/83] implemented tests for exponential statistics --- runstats/core.py | 8 +- runstats/fast.pyx | 6 +- tests/test_runstats.py | 182 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 189 insertions(+), 7 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 3d50a4e..5db767c 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -271,10 +271,10 @@ def __init__( for value in iterable: self.push(value) - def clear(self, new_mean=0.0, new_var=0.0, new_decay=None): + def clear(self, new_mean=0.0, new_variance=0.0, new_decay=None): """Clear ExponentialStatistics object.""" self._mean = float(new_mean) - self._variance = float(new_var) + self._variance = float(new_variance) if new_decay is not None: new_decay = float(new_decay) @@ -308,7 +308,7 @@ def set_state(self, state): @classmethod def fromstate(cls, state): """Return ExponentialStatistics object from state.""" - stats = cls(None) + stats = cls(0.9) stats.set_state(state) return stats @@ -363,6 +363,8 @@ def __mul__(self, that): sigma *= that return sigma + __rmul__ = __mul__ + def __imul__(self, that): """Multiply by a scalar in (0,1) to change ExponentialStatistics weighting in-place.""" diff --git a/runstats/fast.pyx b/runstats/fast.pyx index af5bd5a..ce64f83 100644 --- a/runstats/fast.pyx +++ b/runstats/fast.pyx @@ -280,10 +280,10 @@ cdef class ExponentialStatistics(object): for value in iterable: self.push(value) - cpdef clear(self, double new_mean=0.0, double new_var=0.0, new_decay=None): + cpdef clear(self, double new_mean=0.0, double new_variance=0.0, new_decay=None): """Clear ExponentialStatistics object.""" self._mean = new_mean - self._variance = new_var + self._variance = new_variance if new_decay is not None: new_decay = float(new_decay) @@ -319,7 +319,7 @@ cdef class ExponentialStatistics(object): @classmethod def fromstate(cls, state): """Return ExponentialStatistics object from state.""" - stats = cls(0.0001) + stats = cls(0.9) stats.set_state(state) return stats diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 6b3f031..b3a0f1d 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -3,15 +3,16 @@ """ import copy -import functools import math import pickle import pytest import random from runstats import Statistics as FastStatistics +from runstats import ExponentialStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats.core import Statistics as CoreStatistics +from runstats.core import ExponentialStatistics as CoreExponentialStatistics from runstats.core import Regression as CoreRegression limit = 1e-2 @@ -60,6 +61,7 @@ def test_statistics(Statistics, Regression): alpha = [random.random() for _ in range(count)] alpha_stats = Statistics() + for val in alpha: alpha_stats.push(val) @@ -115,6 +117,89 @@ def test_statistics(Statistics, Regression): assert delta_stats.maximum() == max(alpha + beta) +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_exponential_statistics(ExponentialStatistics): + random.seed(0) + alpha = [random.random() for _ in range(count)] + big_alpha = [random.random() for _ in range(count * 100)] + + alpha_exp_stats_zero = ExponentialStatistics(0.9999) + alpha_exp_stats_init = ExponentialStatistics( + decay=0.9999, + initial_mean=mean(alpha), + initial_variance=variance(alpha, 0) + ) + + for val in big_alpha: + alpha_exp_stats_zero.push(val) + alpha_exp_stats_init.push(val) + + assert error(mean(big_alpha), alpha_exp_stats_zero.mean()) < limit + assert error(mean(big_alpha), alpha_exp_stats_init.mean()) < limit + assert error(variance(big_alpha, 0), alpha_exp_stats_zero.variance()) < limit + assert error(variance(big_alpha, 0), alpha_exp_stats_init.variance()) < limit + assert error(stddev(big_alpha, 0), alpha_exp_stats_zero.stddev()) < limit + assert error(stddev(big_alpha, 0), alpha_exp_stats_init.stddev()) < limit + + alpha_exp_stats_zero.clear() + alpha_exp_stats_zero.change_decay(0.1) + alpha_exp_stats_init.clear( + new_decay=0.1, + new_mean=mean(alpha), + new_variance=variance(alpha, 0) + ) + + for val in big_alpha: + alpha_exp_stats_zero.push(val) + alpha_exp_stats_init.push(val) + + assert error(alpha_exp_stats_zero.mean(), alpha_exp_stats_init.mean()) < limit + assert error( + alpha_exp_stats_zero.variance(), alpha_exp_stats_init.variance() + ) < limit + assert error( + alpha_exp_stats_zero.stddev(), alpha_exp_stats_init.stddev() + ) < limit + + alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + beta = [random.random() * 2 for _ in range(count)] + beta_exp_stats = ExponentialStatistics(0.1) + + assert alpha_exp_stats != beta_exp_stats + + for val in beta: + alpha_exp_stats.push(val) + beta_exp_stats.push(val) + + assert alpha_exp_stats == beta_exp_stats + + for val in alpha: + alpha_exp_stats.push(val) + beta_exp_stats.push(val) + + assert alpha_exp_stats == beta_exp_stats + + current_mean = alpha_exp_stats.mean() + current_variance = alpha_exp_stats.variance() + alpha_exp_stats.change_decay(0.99999999) + + for val in range(10): + alpha_exp_stats.push(val) + + assert (error(current_mean, alpha_exp_stats.mean())) < limit + assert (error(current_variance, alpha_exp_stats.variance())) < limit + + alpha_exp_stats.change_decay(0.1) + + for val in range(10): + alpha_exp_stats.push(val) + + assert (error(current_mean, alpha_exp_stats.mean())) > limit + assert (error(current_variance, alpha_exp_stats.variance())) > limit + + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), (FastStatistics, FastRegression), @@ -126,6 +211,16 @@ def test_add_statistics(Statistics, Regression): assert (stats10 + stats0) == stats10 +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_add_exponential_statistics(ExponentialStatistics): + exp_stats0 = ExponentialStatistics(0.9) + exp_stats10 = ExponentialStatistics(0.9, iterable=range(10)) + assert (exp_stats0 + exp_stats10) == exp_stats10 + assert (exp_stats10 + exp_stats0) == exp_stats10 + + def correlation(values): sigma_x = sum(xxx for xxx, yyy in values) / len(values) sigma_y = sum(yyy for xxx, yyy in values) / len(values) @@ -211,6 +306,26 @@ def test_get_set_state_statistics(Statistics, Regression): assert stats == Statistics.fromstate(stats.get_state()) +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_get_set_state_exponential_statistics(ExponentialStatistics): + random.seed(0) + vals = [random.random() for _ in range(count)] + exp_stats = ExponentialStatistics(0.9, iterable=vals) + exp_state = exp_stats.get_state() + + new_exp_stats = ExponentialStatistics(0.9) + assert exp_stats != new_exp_stats + new_exp_stats.set_state(exp_state) + assert exp_stats == new_exp_stats + new_exp_stats.change_decay(0.1) + assert exp_stats != new_exp_stats + assert exp_stats.mean() == new_exp_stats.mean() + assert exp_stats.variance() == new_exp_stats.variance() + + assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), (FastStatistics, FastRegression), @@ -253,6 +368,17 @@ def test_pickle_statistics(Statistics, Regression): assert stats == unpickled_stats, 'protocol: %s' % num +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_pickle_exponential_statistics(ExponentialStatistics): + exp_stats = ExponentialStatistics(0.9, iterable=range(10)) + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) + unpickled_exp_stats = pickle.loads(pickled_exp_stats) + assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), (FastStatistics, FastRegression), @@ -277,6 +403,17 @@ def test_copy_statistics(Statistics, Regression): assert stats == deepcopy_stats +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_copy_exponential_statistics(ExponentialStatistics): + exp_stats = ExponentialStatistics(0.9, iterable=range(10)) + copy_exp_stats = copy.copy(exp_stats) + assert exp_stats == copy_exp_stats + deepcopy_exp_stats = copy.deepcopy(exp_stats) + assert exp_stats == deepcopy_exp_stats + + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), (FastStatistics, FastRegression), @@ -301,6 +438,17 @@ def test_equality_statistics(Statistics, Regression): assert stats1 != stats2 +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_equality_exponential_statistics(ExponentialStatistics): + exp_stats1 = ExponentialStatistics(0.9, iterable=range(10)) + exp_stats2 = ExponentialStatistics(0.9, iterable=range(10)) + assert exp_stats1 == exp_stats2 + exp_stats2.push(42) + assert exp_stats1 != exp_stats2 + + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), (FastStatistics, FastRegression), @@ -363,6 +511,27 @@ def test_multiply(Statistics, Regression): stats5 = math.e * stats1 assert stats1.mean() == stats5.mean() +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_expoential_batch(ExponentialStatistics): + random.seed(0) + + alpha = [random.random() for _ in range(count)] + beta = [random.random() * 2 for _ in range(count)] + + alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + beta_exp_stats = ExponentialStatistics(0.1, iterable=beta) + + gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 + + weighted_mean = alpha_exp_stats.mean() * 0.3 + beta_exp_stats.mean() * 0.7 + assert weighted_mean == gamma_exp_stats.mean() + + weighted_var = alpha_exp_stats.variance() * 0.3 \ + + beta_exp_stats.variance() * 0.7 + assert weighted_var == gamma_exp_stats.variance() + @pytest.mark.parametrize('Statistics,Regression', [ (CoreStatistics, CoreRegression), @@ -373,3 +542,14 @@ def test_raise_if_invalid_multiply(Statistics, Regression): stats2 = Statistics(range(10)) * 2 with pytest.raises(TypeError): stats1 * stats2 + + +@pytest.mark.parametrize('ExponentialStatistics', [ + CoreExponentialStatistics, FastExponentialStatistics +]) +def test_raise_if_invalid_multiply(ExponentialStatistics): + with pytest.raises(ValueError): + ExponentialStatistics(0) + ExponentialStatistics(1) + ExponentialStatistics(-1) + ExponentialStatistics(2) From 5b2ec1e2c724dd93a0dfbb5e1b38e6653de635c8 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 31 Jan 2021 16:32:27 +0100 Subject: [PATCH 07/83] added decay test in test_exponential_batch --- tests/test_runstats.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index b3a0f1d..d14018a 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -521,7 +521,7 @@ def test_expoential_batch(ExponentialStatistics): beta = [random.random() * 2 for _ in range(count)] alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) - beta_exp_stats = ExponentialStatistics(0.1, iterable=beta) + beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 @@ -531,6 +531,9 @@ def test_expoential_batch(ExponentialStatistics): weighted_var = alpha_exp_stats.variance() * 0.3 \ + beta_exp_stats.variance() * 0.7 assert weighted_var == gamma_exp_stats.variance() + assert alpha_exp_stats._decay == gamma_exp_stats._decay + assert beta_exp_stats._decay != gamma_exp_stats._decay + @pytest.mark.parametrize('Statistics,Regression', [ From 4dbc0ae95dc471297710d985789118d6c463b99d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 1 Feb 2021 23:19:47 +0100 Subject: [PATCH 08/83] ExpStats: Implemented get_decay, added to unit tests, added to tests/__main__.py, added to benchmark --- runstats/core.py | 6 +++++- runstats/fast.pyx | 6 +++++- tests/__main__.py | 26 ++++++++++++++++++++++++++ tests/benchmark.py | 36 ++++++++++++++++++++++++++++++++++++ tests/test_runstats.py | 7 +++++-- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 5db767c..4c5b716 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -243,7 +243,7 @@ class ExponentialStatistics: def __init__( self, - decay, + decay=0.9, initial_mean=0.0, initial_variance=0.0, iterable=() @@ -281,6 +281,10 @@ def clear(self, new_mean=0.0, new_variance=0.0, new_decay=None): self._check_weight(new_decay) self._decay = new_decay + def get_decay(self): + """Get decay rate of ExponentialStatistics object.""" + return self._decay + def change_decay(self, new_decay): """Change decay rate of ExponentialStatistics object.""" new_decay = float(new_decay) diff --git a/runstats/fast.pyx b/runstats/fast.pyx index ce64f83..343e41a 100644 --- a/runstats/fast.pyx +++ b/runstats/fast.pyx @@ -253,7 +253,7 @@ cdef class ExponentialStatistics(object): def __init__( self, - double decay, + double decay=0.9, double initial_mean=0.0, double initial_variance=0.0, iterable=() @@ -290,6 +290,10 @@ cdef class ExponentialStatistics(object): self._check_weight(new_decay) self._decay = new_decay + def get_decay(self): + """Get decay rate of ExponentialStatistics object.""" + return self._decay + def change_decay(self, new_decay): """Change decay rate of ExponentialStatistics object.""" new_decay = float(new_decay) diff --git a/tests/__main__.py b/tests/__main__.py index 541802b..446518d 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -3,8 +3,10 @@ import sys from runstats import Statistics as FastStatistics +from runstats import ExponentialStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats.core import Statistics as CoreStatistics +from runstats.core import ExponentialStatistics as CoreExponentialStatistics from runstats.core import Regression as CoreRegression from .test_runstats import mean, variance, stddev, skewness, kurtosis @@ -48,6 +50,30 @@ def main(): print('Skewness:', core_stats.skewness()) print('Kurtosis:', core_stats.kurtosis()) + fast_exp_stats = FastExponentialStatistics() + + for arg in args: + fast_exp_stats.push(arg) + + print() + print('FastExponentialStatistics') + print('Decay Rate (default):', fast_exp_stats.get_decay()) + print('Exponential Mean:', fast_exp_stats.mean()) + print('Exponential Variance:', fast_exp_stats.variance()) + print('Exponential StdDev:', fast_exp_stats.stddev()) + + core_exp_stats = CoreExponentialStatistics() + + for arg in args: + core_exp_stats.push(arg) + + print() + print('CoreExponentialStatistics') + print('Decay Rate (default):', core_exp_stats.get_decay()) + print('Exponential Mean:', core_exp_stats.mean()) + print('Exponential Variance:', core_exp_stats.variance()) + print('Exponential StdDev:', core_exp_stats.stddev()) + fast_regr = FastRegression() for index, arg in enumerate(args, 1): diff --git a/tests/benchmark.py b/tests/benchmark.py index 4583638..a1d3f45 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -44,6 +44,38 @@ def main(): speedup_stats = core_stats / fast_stats - 1 + core_exp_stats = timeit.repeat( + setup=""" + from __main__ import values + from runstats.core import ExponentialStatistics + exp_stats = ExponentialStatistics() + """, + stmt=""" + for value in values: + exp_stats.push(value) + exp_stats.mean() + """, + number=1, + repeat=7, + )[2] + + fast_exp_stats = timeit.repeat( + setup=""" + from __main__ import values + from runstats.fast import ExponentialStatistics + exp_stats = ExponentialStatistics() + """, + stmt=""" + for value in values: + exp_stats.push(value) + exp_stats.mean() + """, + number=1, + repeat=7, + )[2] + + speedup_exp_stats = core_exp_stats / fast_exp_stats - 1 + core_regr = timeit.repeat( setup=""" from __main__ import pairs @@ -80,6 +112,10 @@ def main(): print('fast.Statistics:', fast_stats) print(' Stats Speedup: %.2fx faster' % speedup_stats) + print('core.ExponentialStatistics:', core_exp_stats) + print('fast.ExponentialStatistics:', fast_exp_stats) + print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats) + print('core.Regression:', core_regr) print('fast.Regression:', fast_regr) print(' Regr Speedup: %.2fx faster' % speedup_regr) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index d14018a..08c2d62 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -312,17 +312,20 @@ def test_get_set_state_statistics(Statistics, Regression): def test_get_set_state_exponential_statistics(ExponentialStatistics): random.seed(0) vals = [random.random() for _ in range(count)] - exp_stats = ExponentialStatistics(0.9, iterable=vals) + exp_stats = ExponentialStatistics(iterable=vals) exp_state = exp_stats.get_state() - new_exp_stats = ExponentialStatistics(0.9) + new_exp_stats = ExponentialStatistics(0.8) assert exp_stats != new_exp_stats + assert new_exp_stats.get_decay() == 0.8 new_exp_stats.set_state(exp_state) + assert new_exp_stats.get_decay() == 0.9 assert exp_stats == new_exp_stats new_exp_stats.change_decay(0.1) assert exp_stats != new_exp_stats assert exp_stats.mean() == new_exp_stats.mean() assert exp_stats.variance() == new_exp_stats.variance() + assert new_exp_stats.get_decay() == 0.1 assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) From f9179797291e2de1fc1fbb60747b169aa11a57b9 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 2 Feb 2021 21:27:02 +0100 Subject: [PATCH 09/83] test commit for docu --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ad8d97c..d588d88 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ RunStats: Computing Statistics and Regression in One Pass ========================================================= - +Test Test Test `RunStats`_ is an Apache2 licensed Python module for online statistics and online regression. Statistics and regression summaries are computed in a single pass. Previous values are not recorded in summaries. From c20efab6163ffa6b942e3f0660717fc7870a29d4 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 2 Feb 2021 21:32:38 +0100 Subject: [PATCH 10/83] revert last commit: test docs --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index d588d88..ad8d97c 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ RunStats: Computing Statistics and Regression in One Pass ========================================================= -Test Test Test + `RunStats`_ is an Apache2 licensed Python module for online statistics and online regression. Statistics and regression summaries are computed in a single pass. Previous values are not recorded in summaries. From 66770551b87c48aa8b9adb90df82e5f85817c91e Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 2 Feb 2021 21:50:50 +0100 Subject: [PATCH 11/83] Docu: Wrote intro, adding exponential stats, added to api.rst --- README.rst | 7 +++++++ docs/api.rst | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/README.rst b/README.rst index ad8d97c..562407e 100644 --- a/README.rst +++ b/README.rst @@ -19,6 +19,13 @@ calculating the variance and other higher moments requires multiple passes over the data. With generators, this is not possible and so computing statistics in a single pass is necessary. +Last but not least, there are situations where a user is not interested in a +complete summary of the entire stream of data but rather wants to observe the +'current' state of the system based on the recent past. In these cases +exponential statistics come in handy. Instead of weighting all values uniformly +in the statistics computation, one can exponentially decay the weight of older +values. Thus the e.g. current mean is predominantly based on more recent values. + The Python `RunStats`_ module was designed for these cases by providing a pair of classes for computing online summary statistics and online linear regression in a single pass. Summary objects work on sequences which may be larger than diff --git a/docs/api.rst b/docs/api.rst index f3ce9ef..09e2c66 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -15,3 +15,10 @@ Regression .. autoclass:: runstats.Regression :members: :special-members: + +ExponentialStatistics +.......... + +.. autoclass:: runstats.ExponentialStatistics + :members: + :special-members: From 02652915b68617ac8833a376f89845ef8863cb0c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 2 Feb 2021 23:03:28 +0100 Subject: [PATCH 12/83] Docu: Added major part for ExponentialStats --- README.rst | 68 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 562407e..6e7f958 100644 --- a/README.rst +++ b/README.rst @@ -74,22 +74,25 @@ function: .. code-block:: python - >>> from runstats import Statistics, Regression + >>> from runstats import Statistics, Regression, ExponentialStatistics >>> help(Statistics) >>> help(Regression) + >>> help(ExponentialStatistics) Tutorial -------- -The Python `RunStats`_ module provides two types for computing running -Statistics and Regression. The Regression object leverages Statistics -internally for its calculations. Each can be initialized without arguments: +The Python `RunStats`_ module provides three types for computing running +statistics: Statistics, ExponentialStatistics and Regression.The Regression +object leverages Statistics internally for its calculations. Each can be +initialized without arguments: .. code-block:: python - >>> from runstats import Statistics, Regression + >>> from runstats import Statistics, Regression, ExponentialStatistics >>> stats = Statistics() >>> regr = Regression() + >>> exp_stats = ExponentialStatistics() Statistics objects support four methods for modification. Use `push` to add values to the summary, `clear` to reset the summary, sum to combine Statistics @@ -121,8 +124,8 @@ summaries and multiply to weight summary Statistics by a scalar. Use the Python built-in `len` for the number of pushed values. Unfortunately the Python `min` and `max` built-ins may not be used for the minimum and -maximum as sequences are instead expected. There are instead `minimum` and -`maximum` methods which are provided for that purpose: +maximum as sequences are expected instead. Therefore, there are `minimum` and +`maximum` methods provided for that purpose: .. code-block:: python @@ -203,6 +206,57 @@ Both constructors accept an optional iterable that is consumed and pushed into the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. +Last but not least, ExponentialStatistics are constructed by providing: a decay +rate that is strictly larger than 0.0 and strictly smaller than 1.0 +(default: 0.9), a initial mean and a initial variance (default: 0.0) as well as +an iterable as with the other two objects. The decay rate is the weight by which +the current statistics are discounted by and (1.0 - decay) is the weight of the +new value on the new statistics. The class has five methods of modification: +`push()`, `clear()`, sum and multiply as the Statistics class has and +`change_decay()` to modify the current decay rate in-place. +The clear method allows to optionally set a new mean, new variance and new +decay. If none are provided mean and variance reset to 0, while the decay is not +changed. If two ExponentialStatistics are being added the decay of the left +hand side is the decay of the new object. +The statistics supported are `mean()`, `variance()` and `stddev()`. +The `len()` method is not supported. + +TODO############################# Compute results, check if everything is in there +.. code-block:: python + + >>> exp_stats = ExponentialStatistics(decay=0.5, initial_mean=0.0, initial_variance=0.0, iterable=[5]) + >>> exp_stats.mean() + 4.0 + >>> exp_stats.variance() + 15.33333333333333 + >>> exp_stats.stddev() + 3.915780041490243 + >>> exp_stats.push(10) + >>> exp_stats.mean() + 0.0 + >>> exp_stats.change_decay(0.9) + >>> exp.stats.get_decay() + 0.9 + >>> exp_stats.push(100) + >>> exp_stats.mean() + 0.0 + >>> exp_stats.clear(new_mean=10.0, new_variance=2.0) + >>> new_exp_stats = ExponentialStatistics(decay=0.8, iterable=range(100)) + >>> new_exp_stats.mean() + 0.0 + # Multiply and add are perfect for exponentially weighting two 'batches' + >>> final_exp_stats = 0.5 * exp_stats + 0.5 * new_exp_stats + >>> final_exp_stats.mean() + 0.0 + >>> final_exp_stats.get_decay() + >>> final_exp_stats.clear(new_decay=0.5) + >>> final_exp_stats.get_state() + (0.0, 0.0, 0.5) + >>> exp_stats.set_state(final_exp_stats.get_state()) + >>> exp_stats == final_exp_stats == exp_stats.copy() + True + + All internal calculations are based entirely on the C++ code by John Cook as posted in a couple of articles: From 7ddbc4572e2892fb784ed2290e8fd5cc4b64aae4 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 2 Feb 2021 23:04:38 +0100 Subject: [PATCH 13/83] Docu: Added major part for ExponentialStats --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 6e7f958..2f2b9b9 100644 --- a/README.rst +++ b/README.rst @@ -224,14 +224,15 @@ The `len()` method is not supported. TODO############################# Compute results, check if everything is in there .. code-block:: python - >>> exp_stats = ExponentialStatistics(decay=0.5, initial_mean=0.0, initial_variance=0.0, iterable=[5]) + >>> exp_stats = ExponentialStatistics(decay=0.5, initial_mean=0.0, initial_variance=0.0) + >>> exp_stats.push(10) >>> exp_stats.mean() 4.0 >>> exp_stats.variance() 15.33333333333333 >>> exp_stats.stddev() 3.915780041490243 - >>> exp_stats.push(10) + >>> exp_stats.push(20) >>> exp_stats.mean() 0.0 >>> exp_stats.change_decay(0.9) From 12d966d0a8156e23e4ae75b5a6665c164bd09e19 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Wed, 3 Feb 2021 20:13:47 +0100 Subject: [PATCH 14/83] finished updating docu for ExponentialStats in readme --- README.rst | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 2f2b9b9..e4e9d01 100644 --- a/README.rst +++ b/README.rst @@ -221,34 +221,33 @@ hand side is the decay of the new object. The statistics supported are `mean()`, `variance()` and `stddev()`. The `len()` method is not supported. -TODO############################# Compute results, check if everything is in there .. code-block:: python >>> exp_stats = ExponentialStatistics(decay=0.5, initial_mean=0.0, initial_variance=0.0) >>> exp_stats.push(10) >>> exp_stats.mean() - 4.0 - >>> exp_stats.variance() - 15.33333333333333 - >>> exp_stats.stddev() - 3.915780041490243 + 5.0 >>> exp_stats.push(20) >>> exp_stats.mean() - 0.0 - >>> exp_stats.change_decay(0.9) - >>> exp.stats.get_decay() - 0.9 + 12.5 + >>> exp_stats.change_decay(0.1) + >>> exp_stats.get_decay() + 0.99 >>> exp_stats.push(100) >>> exp_stats.mean() - 0.0 + 13.375 >>> exp_stats.clear(new_mean=10.0, new_variance=2.0) - >>> new_exp_stats = ExponentialStatistics(decay=0.8, iterable=range(100)) - >>> new_exp_stats.mean() - 0.0 + >>> new_exp_stats = ExponentialStatistics(decay=0.99, iterable=range(100)) + >>> round(new_exp_stats.mean(), 2) + 98.9 + >>> round(new_exp_stats.variance(), 2) + 0.12 + >>> round(new_exp_stats.stddev(), 2) + 0.35 # Multiply and add are perfect for exponentially weighting two 'batches' >>> final_exp_stats = 0.5 * exp_stats + 0.5 * new_exp_stats - >>> final_exp_stats.mean() - 0.0 + >>> round(final_exp_stats.mean(), 2) + 54.44 >>> final_exp_stats.get_decay() >>> final_exp_stats.clear(new_decay=0.5) >>> final_exp_stats.get_state() @@ -258,8 +257,8 @@ TODO############################# Compute results, check if everything is in the True -All internal calculations are based entirely on the C++ code by John Cook as -posted in a couple of articles: +All internal calculations of the Statistics and Regression Classes are based +entirely on the C++ code by John Cook as posted in a couple of articles: * `Computing Skewness and Kurtosis in One Pass`_ * `Computing Linear Regression in One Pass`_ @@ -267,13 +266,17 @@ posted in a couple of articles: .. _`Computing Skewness and Kurtosis in One Pass`: http://www.johndcook.com/blog/skewness_kurtosis/ .. _`Computing Linear Regression in One Pass`: http://www.johndcook.com/blog/running_regression/ +The ExponentialStatistics implementation is based on: + +* Finch, 2009, Incremental Calculation of Weighted Mean and Variance + The pure-Python and Cython-optimized versions of `RunStats`_ are each directly available if preferred. .. code-block:: python - >>> from runstats.core import Statistics, Regression # pure-Python - >>> from runstats.fast import Statistics, Regression # Cython-optimized + >>> from runstats.core import Statistics, Regression, ExponentialStatistics # pure-Python + >>> from runstats.fast import Statistics, Regression, ExponentialStatistics # Cython-optimized When importing from `runstats` the `fast` version is preferred and the `core` version is used as fallback. Micro-benchmarking Statistics and Regression by From 584674b4942a9da0ac0c640b63867f156c8a5bea Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Wed, 3 Feb 2021 20:27:08 +0100 Subject: [PATCH 15/83] Minor corrections to readme wrt changes for exponential statistics --- README.rst | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index e4e9d01..ef07e65 100644 --- a/README.rst +++ b/README.rst @@ -24,10 +24,11 @@ complete summary of the entire stream of data but rather wants to observe the 'current' state of the system based on the recent past. In these cases exponential statistics come in handy. Instead of weighting all values uniformly in the statistics computation, one can exponentially decay the weight of older -values. Thus the e.g. current mean is predominantly based on more recent values. +values given a provided decay rate. Thus, one can regulate in how far the e.g. +current mean is based on recent or old values. -The Python `RunStats`_ module was designed for these cases by providing a pair -of classes for computing online summary statistics and online linear regression +The Python `RunStats`_ module was designed for these cases by providing classes +for computing online summary statistics and online linear regression in a single pass. Summary objects work on sequences which may be larger than memory or disk space permit. They may also be efficiently combined together to create aggregate summaries. @@ -206,20 +207,20 @@ Both constructors accept an optional iterable that is consumed and pushed into the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. -Last but not least, ExponentialStatistics are constructed by providing: a decay -rate that is strictly larger than 0.0 and strictly smaller than 1.0 -(default: 0.9), a initial mean and a initial variance (default: 0.0) as well as -an iterable as with the other two objects. The decay rate is the weight by which -the current statistics are discounted by and (1.0 - decay) is the weight of the -new value on the new statistics. The class has five methods of modification: -`push()`, `clear()`, sum and multiply as the Statistics class has and -`change_decay()` to modify the current decay rate in-place. +Last but not least, there are ExponentialStatistics which are constructed by +providing: a decay rate that is strictly larger than 0.0 and strictly smaller than 1.0 +(default: 0.9), a initial mean and a initial variance (default: 0.0) and finally +an iterable as for the other two classes. The decay rate is the weight by which +the current statistics are discounted by. Consequently, (1.0 - decay) is the weight of the +new value. The class has five methods of modification: +`push`, `clear`, sum and multiply as the Statistics class and additionally +`change_decay` to modify the current decay rate in-place. The clear method allows to optionally set a new mean, new variance and new decay. If none are provided mean and variance reset to 0, while the decay is not -changed. If two ExponentialStatistics are being added the decay of the left -hand side is the decay of the new object. -The statistics supported are `mean()`, `variance()` and `stddev()`. -The `len()` method is not supported. +changed. If two ExponentialStatistics are being added the leftmost decay +is the decay of the new object. +The statistics supported are `mean`, `variance` and `stddev`. +The `len` method is not supported. .. code-block:: python @@ -257,7 +258,7 @@ The `len()` method is not supported. True -All internal calculations of the Statistics and Regression Classes are based +All internal calculations of the Statistics and Regression classes are based entirely on the C++ code by John Cook as posted in a couple of articles: * `Computing Skewness and Kurtosis in One Pass`_ From 619732ff104afd67e43d7b1cf0a4179277a2b11d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Thu, 4 Feb 2021 19:34:36 +0100 Subject: [PATCH 16/83] added docstring to make_exponential_statistics at core.py --- runstats/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/runstats/core.py b/runstats/core.py index 4c5b716..0b98ddf 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -387,6 +387,7 @@ def _check_weight(decay): def make_exponential_statistics(state): + """Make ExponentialStatistics object from state.""" return ExponentialStatistics.fromstate(state) From d5fc3a4bfa5c73ca5e2b1486e47b95591ec0b94d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 7 Feb 2021 17:41:05 +0100 Subject: [PATCH 17/83] corrected name of batch_exponential_statistics test --- README.rst | 1 + docs/conf.py | 30 ++++++++++++++++++------------ runstats/core.py | 3 ++- tests/test_runstats.py | 28 ++-------------------------- 4 files changed, 23 insertions(+), 39 deletions(-) diff --git a/README.rst b/README.rst index dc611cd..f581eea 100644 --- a/README.rst +++ b/README.rst @@ -33,6 +33,7 @@ single pass. Summary objects work on sequences which may be larger than memory or disk space permit. They may also be efficiently combined together to create aggregate summaries. + Features -------- diff --git a/docs/conf.py b/docs/conf.py index 7c79112..1bbb21c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,6 +14,7 @@ import os import sys + sys.path.insert(0, os.path.abspath('..')) import runstats @@ -130,15 +131,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -148,8 +146,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'RunStats.tex', 'RunStats Documentation', - 'Grant Jenks', 'manual'), + ( + master_doc, + 'RunStats.tex', + 'RunStats Documentation', + 'Grant Jenks', + 'manual', + ), ] @@ -157,10 +160,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'runstats', 'RunStats Documentation', - [author], 1) -] +man_pages = [(master_doc, 'runstats', 'RunStats Documentation', [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -169,9 +169,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'RunStats', 'RunStats Documentation', - author, 'RunStats', 'Compute statistics and regression in one pass.', - 'Miscellaneous'), + ( + master_doc, + 'RunStats', + 'RunStats Documentation', + author, + 'RunStats', + 'Compute statistics and regression in one pass.', + 'Miscellaneous', + ), ] diff --git a/runstats/core.py b/runstats/core.py index 9ae19ca..c75a9e0 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -248,7 +248,7 @@ class ExponentialStatistics: Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at - https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf For an explanation of these statistics refer to e.g.: https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html @@ -382,6 +382,7 @@ def __imul__(self, that): self._variance *= that return self + def make_exponential_statistics(state): """Make ExponentialStatistics object from state.""" return ExponentialStatistics.fromstate(state) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 2c9269f..82c16b2 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -587,36 +587,12 @@ def test_multiply(Statistics, Regression): stats5 = math.e * stats1 assert stats1.mean() == stats5.mean() -@pytest.mark.parametrize('ExponentialStatistics', [ - CoreExponentialStatistics, FastExponentialStatistics -]) -def test_expoential_batch(ExponentialStatistics): - random.seed(0) - - alpha = [random.random() for _ in range(count)] - beta = [random.random() * 2 for _ in range(count)] - - alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) - beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) - - gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 - - weighted_mean = alpha_exp_stats.mean() * 0.3 + beta_exp_stats.mean() * 0.7 - assert weighted_mean == gamma_exp_stats.mean() - - weighted_var = alpha_exp_stats.variance() * 0.3 \ - + beta_exp_stats.variance() * 0.7 - assert weighted_var == gamma_exp_stats.variance() - assert alpha_exp_stats._decay == gamma_exp_stats._decay - assert beta_exp_stats._decay != gamma_exp_stats._decay - - @pytest.mark.parametrize( 'ExponentialStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_expoential_batch(ExponentialStatistics): +def test_exponential_batch(ExponentialStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] @@ -661,4 +637,4 @@ def test_raise_if_invalid_multiply_exp(ExponentialStatistics): ExponentialStatistics(0) ExponentialStatistics(1) ExponentialStatistics(-1) - ExponentialStatistics(2) + ExponentialStatistics(2) \ No newline at end of file From aabca629be575f45f170d59720ddd1d6f3994850 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 7 Feb 2021 17:41:53 +0100 Subject: [PATCH 18/83] corrected for flake8 --- docs/conf.py | 2 +- tests/test_runstats.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1bbb21c..007619f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,9 +14,9 @@ import os import sys +import runstats sys.path.insert(0, os.path.abspath('..')) -import runstats # -- Project information ----------------------------------------------------- diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 82c16b2..dcb54b5 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -637,4 +637,4 @@ def test_raise_if_invalid_multiply_exp(ExponentialStatistics): ExponentialStatistics(0) ExponentialStatistics(1) ExponentialStatistics(-1) - ExponentialStatistics(2) \ No newline at end of file + ExponentialStatistics(2) From e5a23501271db5ff3c81fc34d204599ffb39526d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 8 Feb 2021 19:26:15 +0100 Subject: [PATCH 19/83] updated finch link to readme, changed clear method in ExponentialStatistics, implemented ExpoenentialCovariance (push and correlation TBD) --- README.rst | 4 +- runstats/core.py | 151 +++++++++++++++++++++++++++++++++++++---- tests/test_runstats.py | 5 +- 3 files changed, 144 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index f581eea..fc0ca4d 100644 --- a/README.rst +++ b/README.rst @@ -290,7 +290,9 @@ entirely on the C++ code by John Cook as posted in a couple of articles: The ExponentialStatistics implementation is based on: -* Finch, 2009, Incremental Calculation of Weighted Mean and Variance +* `Finch, 2009, Incremental Calculation of Weighted Mean and Variance`_ + +.. _`Finch, 2009, Incremental Calculation of Weighted Mean and Variance`: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf The pure-Python and Cython-optimized versions of `RunStats`_ are each directly available if preferred. diff --git a/runstats/core.py b/runstats/core.py index c75a9e0..0df0dfd 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -1,6 +1,7 @@ """Python RunStats -Compute Statistics, Exponential Statistics and Regression in a single pass. +Compute Statistics, Exponential Statistics, Regression and Exponential +Covariance in a single pass. """ @@ -244,7 +245,7 @@ def make_statistics(state): class ExponentialStatistics: """Compute exponential mean and variance in a single pass. - ExponentialStatistics objects may also be copied. + ExponentialStatistics objects may also be added and copied. Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at @@ -271,8 +272,10 @@ def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): """ self.decay = decay - self._mean = float(mean) - self._variance = float(variance) + self._initial_mean = float(mean) + self._initial_variance = float(variance) + self._mean = self._initial_mean + self._variance = self._initial_variance for value in iterable: self.push(value) @@ -289,12 +292,10 @@ def decay(self, value): raise ValueError('decay must be between 0 and 1') self._decay = value - def clear(self, mean=0.0, variance=0.0, decay=None): + def clear(self): """Clear ExponentialStatistics object.""" - self._mean = float(mean) - self._variance = float(variance) - if decay is not None: - self.decay = decay + self._mean = self._initial_mean + self._variance = self._initial_variance def __eq__(self, that): return self.get_state() == that.get_state() @@ -304,12 +305,20 @@ def __ne__(self, that): def get_state(self): """Get internal state.""" - return self._decay, self._mean, self._variance + return ( + self._decay, + self._initial_mean, + self._initial_variance, + self._mean, + self._variance, + ) def set_state(self, state): """Set internal state.""" ( self._decay, + self._initial_mean, + self._initial_variance, self._mean, self._variance, ) = state @@ -389,8 +398,7 @@ def make_exponential_statistics(state): class Regression(object): - """ - Compute simple linear regression in a single pass. + """Compute simple linear regression in a single pass. Computes the slope, intercept, and correlation. Regression objects may also be added together and copied. @@ -524,6 +532,125 @@ def make_regression(state): return Regression.fromstate(state) +class ExponentialCovariance(object): + """Compute exponential covariance and correlation in a single pass. + + ExponentialCovariance objects may also be added and copied. + + """ + + def __init__( + self, + decay=0.9, + mean_x=0.0, + variance_x=0.0, + mean_y=0.0, + variance_y=0.0, + covariance=0.0, + iterable=(), + ): + """Initialize ExponentialCovariance object. + + Incrementally tracks covariance and exponentially discounts old + values. + + Requires a `decay` rate in exclusive range (0, 1) for discounting + previous statistics. + + Optionally allows setting initial covariance. Default 0. + + Iterates optional parameter `iterable` and pushes each pair into the + statistics summary. + + """ + self.decay = decay + self._initial_covariance = float(covariance) + self._covariance = self._initial_covariance + self._xstats = ExponentialStatistics( + decay=decay, mean=mean_x, variance=variance_x, iterable=iterable + ) + self._ystats = ExponentialStatistics( + decay=decay, mean=mean_y, variance=variance_y, iterable=iterable + ) + + for x_val, y_val in iterable: + self.push(x_val, y_val) + + @property + def decay(self): + """Exponential decay rate of old values.""" + return self._decay + + @decay.setter + def decay(self, value): + value = float(value) + self._xstats.decay = value + self._ystats.decay = value + self._decay = value + + def clear(self): + """Clear ExponentialCovariance object.""" + self._xstats.clear() + self._ystats.clear() + self._covariance = self._initial_covariance + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + """Get internal state.""" + return ( + self._decay, + self._initial_covariance, + self._covariance, + self._xstats.get_state(), + self._ystats.get_state(), + ) + + def set_state(self, state): + """Set internal state.""" + decay, initial_covariance, covariance, xstate, ystate = state + self._decay = decay + self._initial_covariance = initial_covariance + self._covariance = covariance + self._xstats.set_state(xstate) + self._ystats.set_state(ystate) + + @classmethod + def fromstate(cls, state): + """Return ExponentialCovariance object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_covariance, (self.get_state(),) + + def copy(self, _=None): + """Copy ExponentialCovariance object.""" + return self.fromstate(self.get_state()) + + __copy__ = copy + __deepcopy__ = copy + + def push(self, x_val, y_val): + pass + + def covariance(self): + return self._covariance + + def correlation(self): + pass + + +def make_exponential_covariance(state): + """Make Regression object from state.""" + return ExponentialCovariance.fromstate(state) + + if __name__ == 'runstats.core': # pragma: no cover try: from ._core import * # noqa # pylint: disable=wildcard-import diff --git a/tests/test_runstats.py b/tests/test_runstats.py index dcb54b5..14c8a0e 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -163,9 +163,8 @@ def test_exponential_statistics(ExponentialStatistics): alpha_exp_stats_zero.clear() alpha_exp_stats_zero.decay = 0.1 - alpha_exp_stats_init.clear( - decay=0.1, mean=mean(alpha), variance=variance(alpha, 0) - ) + alpha_exp_stats_init.clear() + alpha_exp_stats_init.decay = 0.1 for val in big_alpha: alpha_exp_stats_zero.push(val) From 317721bafcff544b2a2947899f64524b567c93d7 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 8 Feb 2021 21:43:30 +0100 Subject: [PATCH 20/83] debugged ExpCov --- runstats/core.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 8cbe408..4e4b396 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -565,15 +565,13 @@ def __init__( statistics summary. """ - self.decay = decay self._initial_covariance = float(covariance) self._covariance = self._initial_covariance self._xstats = ExponentialStatistics( - decay=decay, mean=mean_x, variance=variance_x, iterable=iterable - ) + decay=decay, mean=mean_x, variance=variance_x) self._ystats = ExponentialStatistics( - decay=decay, mean=mean_y, variance=variance_y, iterable=iterable - ) + decay=decay, mean=mean_y, variance=variance_y) + self.decay = decay for x_val, y_val in iterable: self.push(x_val, y_val) @@ -638,14 +636,25 @@ def copy(self, _=None): __copy__ = copy __deepcopy__ = copy - def push(self, x_val, y_val): - pass + def push(self, x_val, y_val): # TODO: is that the same as with same mean + """Add a pair `(x, y)` to the ExponentialCovariance summary.""" + self._xstats.push(x_val) + alpha = (1.0 - self.decay) + self._covariance = self.decay * self.covariance() + alpha * (x_val - self._xstats.mean()) * (y_val - self._ystats.mean()) + self._ystats.push(y_val) def covariance(self): + """Covariance of values""" return self._covariance + # TODO: is it fine to use that variance, not more correct to used n-1 variance? + # TODO: numerical stability def correlation(self): - pass + """Correlation of values""" + root_x = self._xstats.variance() ** 0.5 + root_y = self._ystats.variance() ** 0.5 + denom = root_x * root_y + return self.covariance() / denom def make_exponential_covariance(state): From 8dc282547626643ec37ecd97fcff7315cf57f07f Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 16:06:53 +0100 Subject: [PATCH 21/83] implemented true_exp tests for ExpontialStatistics and ExponentialCovarianace --- runstats/__init__.py | 6 +-- runstats/core.py | 12 +++--- tests/test_runstats.py | 85 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 9 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index 7982d69..eedb7d4 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -7,11 +7,11 @@ """ try: - from ._core import ExponentialStatistics, Regression, Statistics + from ._core import ExponentialCovariance, ExponentialStatistics, Regression, Statistics except ImportError: # pragma: no cover - from .core import ExponentialStatistics, Regression, Statistics + from .core import ExponentialCovariance, ExponentialStatistics, Regression, Statistics -__all__ = ['Statistics', 'Regression', 'ExponentialStatistics'] +__all__ = ['Statistics', 'Regression', 'ExponentialStatistics', 'ExponentialCovariance'] __title__ = 'runstats' __version__ = '1.8.0' __author__ = 'Grant Jenks' diff --git a/runstats/core.py b/runstats/core.py index 4e4b396..8893f41 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -6,6 +6,7 @@ """ from __future__ import division +from math import exp, log class Statistics: @@ -350,6 +351,7 @@ def push(self, value): self._variance += alpha * (self._decay * diff ** 2 - self._variance) self._mean += incr + def mean(self): """Exponential mean of values.""" return self._mean @@ -636,7 +638,7 @@ def copy(self, _=None): __copy__ = copy __deepcopy__ = copy - def push(self, x_val, y_val): # TODO: is that the same as with same mean + def push(self, x_val, y_val): """Add a pair `(x, y)` to the ExponentialCovariance summary.""" self._xstats.push(x_val) alpha = (1.0 - self.decay) @@ -647,16 +649,14 @@ def covariance(self): """Covariance of values""" return self._covariance - # TODO: is it fine to use that variance, not more correct to used n-1 variance? - # TODO: numerical stability def correlation(self): """Correlation of values""" - root_x = self._xstats.variance() ** 0.5 - root_y = self._ystats.variance() ** 0.5 - denom = root_x * root_y + denom = self._xstats.stddev() * self._ystats.stddev() return self.covariance() / denom + + def make_exponential_covariance(state): """Make Regression object from state.""" return ExponentialCovariance.fromstate(state) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 14c8a0e..587ec7e 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -6,13 +6,16 @@ import math import pickle import random +import itertools import pytest from runstats import ExponentialStatistics as FastExponentialStatistics +from runstats import ExponentialCovariance as FastExponentialCovariance from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics from runstats.core import ExponentialStatistics as CoreExponentialStatistics +from runstats.core import ExponentialCovariance as CoreExponentialCovariance from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics @@ -54,6 +57,54 @@ def error(value, test): return abs((test - value) / value) +def exponential_weight(decay, pos): + return (1-decay) * decay ** pos + + +def exp_mean_var(decay, iterable): + indecies = list(range(len(iterable))) + weights = list(map(lambda x: exponential_weight(decay, x), indecies))[::-1] + + mean = 0.0 + for val, weight in zip(iterable, weights): + mean += val * weight + + variance = (0 - mean) ** 2 * (1 - sum(weights)) + for val, weight in zip(iterable, weights): + variance += (val - mean) ** 2 * weight + + return mean, variance + + +def exp_cov_cor(decay, iterable): + lst = list(iterable) + indecies = list(range(len(lst))) + weights = list(map(lambda x: exponential_weight(decay, x), indecies))[::-1] + + mean_1 = 0.0 + mean_2 = 0.0 + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + mean_1 += x_1 * weight + mean_2 += x_2 * weight + + variance_1 = (0 - mean_1) ** 2 * (1 - sum(weights)) + variance_2 = (0 - mean_2) ** 2 * (1 - sum(weights)) + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + variance_1 += (x_1 - mean_1) ** 2 * weight + variance_2 += (x_2 - mean_2) ** 2 * weight + + covar = (0 - mean_1) * (0 - mean_2) * (1 - sum(weights)) + for vals, weight in zip(lst, weights): + x_1, x_2 = vals + covar += (x_1 - mean_1) * (x_2 - mean_2) * weight + + correlation = covar / (variance_1 * variance_2) ** 0.5 + + return covar, correlation + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -219,6 +270,40 @@ def test_exponential_statistics(ExponentialStatistics): assert (error(current_variance, alpha_exp_stats.variance())) > limit +@pytest.mark.parametrize( + 'ExponentialStatistics, decay', + list(itertools.product( + [CoreExponentialStatistics, FastExponentialStatistics], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99] + )), +) +def test_exponential_statistics_decays(ExponentialStatistics, decay): + random.seed(0) + alpha = [random.random() for _ in range(count)] + exp_stats = ExponentialStatistics(decay=decay, iterable=alpha) + true_mean, true_variance = exp_mean_var(decay=decay, iterable=alpha) + + assert (error(true_mean, exp_stats.mean())) < limit + assert (error(true_mean, exp_stats.mean())) < limit + + +@pytest.mark.parametrize( + 'ExponentialCovariance, decay', + list(itertools.product( + [CoreExponentialCovariance, FastExponentialCovariance], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99] + )), +) +def test_exponential_covariance_decays(ExponentialCovariance, decay): + random.seed(0) + alpha = [(random.random(), random.random()) for _ in range(count)] + exp_stats = ExponentialCovariance(decay=decay, iterable=alpha) + true_cov, true_cor = exp_cov_cor(decay=decay, iterable=alpha) + + assert (error(true_cov, exp_stats.covariance())) < limit + assert (error(true_cor, exp_stats.correlation())) < limit + + @pytest.mark.parametrize( 'Statistics,Regression', [ From 2f9bc4423ed5e67a3ec6aaee2752ee04e122e6b3 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 16:16:53 +0100 Subject: [PATCH 22/83] debugged Readme Test: Changed to --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 01928cf..8f8e7fb 100644 --- a/README.rst +++ b/README.rst @@ -304,7 +304,7 @@ The pure-Python version of `RunStats`_ is directly available if preferred. >>> import runstats.core # Pure-Python >>> runstats.core.Statistics - + When importing from `runstats` the Cython-optimized version `_core` is preferred and the `core` version is used as fallback. Micro-benchmarking From ca3b8b4d10231442901b72fb8115d8b77ccc5294 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 16:53:54 +0100 Subject: [PATCH 23/83] created test_exponential_covariance --- runstats/__init__.py | 21 +++++++++++-- runstats/core.py | 16 +++++----- tests/test_runstats.py | 71 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index eedb7d4..92bbdbb 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -7,11 +7,26 @@ """ try: - from ._core import ExponentialCovariance, ExponentialStatistics, Regression, Statistics + from ._core import ( + ExponentialCovariance, + ExponentialStatistics, + Regression, + Statistics, + ) except ImportError: # pragma: no cover - from .core import ExponentialCovariance, ExponentialStatistics, Regression, Statistics + from .core import ( + ExponentialCovariance, + ExponentialStatistics, + Regression, + Statistics, + ) -__all__ = ['Statistics', 'Regression', 'ExponentialStatistics', 'ExponentialCovariance'] +__all__ = [ + 'Statistics', + 'Regression', + 'ExponentialStatistics', + 'ExponentialCovariance', +] __title__ = 'runstats' __version__ = '1.8.0' __author__ = 'Grant Jenks' diff --git a/runstats/core.py b/runstats/core.py index 8893f41..6649711 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -6,7 +6,6 @@ """ from __future__ import division -from math import exp, log class Statistics: @@ -351,7 +350,6 @@ def push(self, value): self._variance += alpha * (self._decay * diff ** 2 - self._variance) self._mean += incr - def mean(self): """Exponential mean of values.""" return self._mean @@ -570,9 +568,11 @@ def __init__( self._initial_covariance = float(covariance) self._covariance = self._initial_covariance self._xstats = ExponentialStatistics( - decay=decay, mean=mean_x, variance=variance_x) + decay=decay, mean=mean_x, variance=variance_x + ) self._ystats = ExponentialStatistics( - decay=decay, mean=mean_y, variance=variance_y) + decay=decay, mean=mean_y, variance=variance_y + ) self.decay = decay for x_val, y_val in iterable: @@ -641,8 +641,10 @@ def copy(self, _=None): def push(self, x_val, y_val): """Add a pair `(x, y)` to the ExponentialCovariance summary.""" self._xstats.push(x_val) - alpha = (1.0 - self.decay) - self._covariance = self.decay * self.covariance() + alpha * (x_val - self._xstats.mean()) * (y_val - self._ystats.mean()) + alpha = 1.0 - self.decay + self._covariance = self.decay * self.covariance() + alpha * ( + x_val - self._xstats.mean() + ) * (y_val - self._ystats.mean()) self._ystats.push(y_val) def covariance(self): @@ -655,8 +657,6 @@ def correlation(self): return self.covariance() / denom - - def make_exponential_covariance(state): """Make Regression object from state.""" return ExponentialCovariance.fromstate(state) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 587ec7e..b691148 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -57,8 +57,17 @@ def error(value, test): return abs((test - value) / value) +def covariance(values): + values = list(values) + x_vals = [x for x, y in values] + y_vals = [y for x, y in values] + mean_x = mean(x_vals) + mean_y = mean(y_vals) + return sum((x - mean_x) * (y - mean_y) for x, y in values) / len(values) + + def exponential_weight(decay, pos): - return (1-decay) * decay ** pos + return (1 - decay) * decay ** pos def exp_mean_var(decay, iterable): @@ -272,10 +281,12 @@ def test_exponential_statistics(ExponentialStatistics): @pytest.mark.parametrize( 'ExponentialStatistics, decay', - list(itertools.product( - [CoreExponentialStatistics, FastExponentialStatistics], - [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99] - )), + list( + itertools.product( + [CoreExponentialStatistics, FastExponentialStatistics], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), ) def test_exponential_statistics_decays(ExponentialStatistics, decay): random.seed(0) @@ -287,12 +298,54 @@ def test_exponential_statistics_decays(ExponentialStatistics, decay): assert (error(true_mean, exp_stats.mean())) < limit +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance(ExponentialCovariance): + random.seed(0) + alpha = [random.random() for _ in range(count)] + beta = [x * -10 for x in alpha] + big_alpha = [random.random() for _ in range(count * 100)] + big_beta = [x * -10 for x in big_alpha] + data = list(zip(big_alpha, big_beta)) + + exp_cov = ExponentialCovariance( + decay=0.9999, + mean_x=mean(alpha), + variance_x=variance(alpha, 0), + mean_y=mean(beta), + variance_y=variance(beta), + covariance=covariance(data), + ) + + for x, y in zip(big_alpha, big_beta): + exp_cov.push(x, y) + + assert error(covariance(data), exp_cov.covariance()) < limit + assert error(-1.0, exp_cov.correlation()) < limit + + exp_cov_2 = exp_cov.copy() + assert exp_cov == exp_cov_2 + assert exp_cov.covariance() != covariance(data) + exp_cov.clear() + assert exp_cov != exp_cov_2 + assert exp_cov.covariance() == covariance(data) + exp_cov_2.clear() + exp_cov.decay = 0.1 + exp_cov_2.decay = 0.1 + assert exp_cov.decay == 0.1 + assert exp_cov == exp_cov_2 + + @pytest.mark.parametrize( 'ExponentialCovariance, decay', - list(itertools.product( - [CoreExponentialCovariance, FastExponentialCovariance], - [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99] - )), + list( + itertools.product( + [CoreExponentialCovariance, FastExponentialCovariance], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), ) def test_exponential_covariance_decays(ExponentialCovariance, decay): random.seed(0) From 30ffb77a0f914a90cb7b77a559b65ec2d99d0098 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 17:23:34 +0100 Subject: [PATCH 24/83] implemented add and mul for ExponentialCovariance --- runstats/core.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/runstats/core.py b/runstats/core.py index 6649711..cfc9fcc 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -534,7 +534,7 @@ def make_regression(state): return Regression.fromstate(state) -class ExponentialCovariance(object): +class ExponentialCovariance: """Compute exponential covariance and correlation in a single pass. ExponentialCovariance objects may also be added and copied. @@ -656,6 +656,38 @@ def correlation(self): denom = self._xstats.stddev() * self._ystats.stddev() return self.covariance() / denom + def __add__(self, that): + """Add two ExponentialCovariance objects together.""" + sigma = self.copy() + sigma += that + return sigma + + def __iadd__(self, that): + """Add another ExponentialCovariance object to this one.""" + self._xstats += that._xstats + self._ystats += that._ystats + self._covariance += that.covariance() + return self + + def __mul__(self, that): + """Multiply by a scalar to change ExponentialCovariance weighting.""" + sigma = self.copy() + sigma *= that + return sigma + + __rmul__ = __mul__ + + def __imul__(self, that): + """Multiply by a scalar to change ExponentialCovariance weighting + in-place. + + """ + that = float(that) + self._xstats *= that + self._ystats *= that + self._covariance *= that + return self + def make_exponential_covariance(state): """Make Regression object from state.""" From 86158358f8413a381c8003156e8826f918bfca5c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 17:25:09 +0100 Subject: [PATCH 25/83] refactored tests --- tests/test_runstats.py | 76 +++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index b691148..b290454 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -279,25 +279,6 @@ def test_exponential_statistics(ExponentialStatistics): assert (error(current_variance, alpha_exp_stats.variance())) > limit -@pytest.mark.parametrize( - 'ExponentialStatistics, decay', - list( - itertools.product( - [CoreExponentialStatistics, FastExponentialStatistics], - [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], - ) - ), -) -def test_exponential_statistics_decays(ExponentialStatistics, decay): - random.seed(0) - alpha = [random.random() for _ in range(count)] - exp_stats = ExponentialStatistics(decay=decay, iterable=alpha) - true_mean, true_variance = exp_mean_var(decay=decay, iterable=alpha) - - assert (error(true_mean, exp_stats.mean())) < limit - assert (error(true_mean, exp_stats.mean())) < limit - - @pytest.mark.parametrize( 'ExponentialCovariance', [CoreExponentialCovariance, FastExponentialCovariance], @@ -338,25 +319,6 @@ def test_exponential_covariance(ExponentialCovariance): assert exp_cov == exp_cov_2 -@pytest.mark.parametrize( - 'ExponentialCovariance, decay', - list( - itertools.product( - [CoreExponentialCovariance, FastExponentialCovariance], - [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], - ) - ), -) -def test_exponential_covariance_decays(ExponentialCovariance, decay): - random.seed(0) - alpha = [(random.random(), random.random()) for _ in range(count)] - exp_stats = ExponentialCovariance(decay=decay, iterable=alpha) - true_cov, true_cor = exp_cov_cor(decay=decay, iterable=alpha) - - assert (error(true_cov, exp_stats.covariance())) < limit - assert (error(true_cor, exp_stats.correlation())) < limit - - @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -751,6 +713,44 @@ def test_exponential_batch(ExponentialStatistics): assert beta_exp_stats._decay != gamma_exp_stats._decay +@pytest.mark.parametrize( + 'ExponentialStatistics, decay', + list( + itertools.product( + [CoreExponentialStatistics, FastExponentialStatistics], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), +) +def test_exponential_statistics_decays(ExponentialStatistics, decay): + random.seed(0) + alpha = [random.random() for _ in range(count)] + exp_stats = ExponentialStatistics(decay=decay, iterable=alpha) + true_mean, true_variance = exp_mean_var(decay=decay, iterable=alpha) + + assert (error(true_mean, exp_stats.mean())) < limit + assert (error(true_mean, exp_stats.mean())) < limit + + +@pytest.mark.parametrize( + 'ExponentialCovariance, decay', + list( + itertools.product( + [CoreExponentialCovariance, FastExponentialCovariance], + [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99], + ) + ), +) +def test_exponential_covariance_decays(ExponentialCovariance, decay): + random.seed(0) + alpha = [(random.random(), random.random()) for _ in range(count)] + exp_stats = ExponentialCovariance(decay=decay, iterable=alpha) + true_cov, true_cor = exp_cov_cor(decay=decay, iterable=alpha) + + assert (error(true_cov, exp_stats.covariance())) < limit + assert (error(true_cor, exp_stats.correlation())) < limit + + @pytest.mark.parametrize( 'Statistics,Regression', [ From 47521f3afc259c2f7ac07d90bd46086104969906 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 18:07:40 +0100 Subject: [PATCH 26/83] finised all tests for ExponentialCovariance --- tests/test_runstats.py | 144 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 15 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index b290454..0b67dc0 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -53,10 +53,6 @@ def kurtosis(values): return (numerator / denominator) - 3 -def error(value, test): - return abs((test - value) / value) - - def covariance(values): values = list(values) x_vals = [x for x, y in values] @@ -66,6 +62,17 @@ def covariance(values): return sum((x - mean_x) * (y - mean_y) for x, y in values) / len(values) +def correlation(values): + sigma_x = sum(xxx for xxx, yyy in values) / len(values) + sigma_y = sum(yyy for xxx, yyy in values) / len(values) + sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values) + sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values) + sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values) + return (sigma_xy - sigma_x * sigma_y) / ( + ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5 + ) + + def exponential_weight(decay, pos): return (1 - decay) * decay ** pos @@ -114,6 +121,10 @@ def exp_cov_cor(decay, iterable): return covar, correlation +def error(value, test): + return abs((test - value) / value) + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -318,6 +329,9 @@ def test_exponential_covariance(ExponentialCovariance): assert exp_cov.decay == 0.1 assert exp_cov == exp_cov_2 + exp_cov_3 = exp_cov * 0.5 + exp_cov * 0.5 + assert exp_cov_3 == exp_cov + @pytest.mark.parametrize( 'Statistics,Regression', @@ -344,15 +358,15 @@ def test_add_exponential_statistics(ExponentialStatistics): assert (exp_stats10 + exp_stats0) == exp_stats10 -def correlation(values): - sigma_x = sum(xxx for xxx, yyy in values) / len(values) - sigma_y = sum(yyy for xxx, yyy in values) / len(values) - sigma_xy = sum(xxx * yyy for xxx, yyy in values) / len(values) - sigma_x2 = sum(xxx ** 2 for xxx, yyy in values) / len(values) - sigma_y2 = sum(yyy ** 2 for xxx, yyy in values) / len(values) - return (sigma_xy - sigma_x * sigma_y) / ( - ((sigma_x2 - sigma_x ** 2) * (sigma_y2 - sigma_y ** 2)) ** 0.5 - ) +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_add_exponential_covariance(ExponentialCovariance): + exp_cov0 = ExponentialCovariance(0.9) + exp_cov10 = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) + assert (exp_cov0 + exp_cov10) == exp_cov10 + assert (exp_cov10 + exp_cov0) == exp_cov10 @pytest.mark.parametrize( @@ -466,6 +480,30 @@ def test_get_set_state_exponential_statistics(ExponentialStatistics): assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_get_set_state_exponential_covariance(ExponentialCovariance): + random.seed(0) + vals = [(random.random(), random.random()) for _ in range(count)] + exp_cov = ExponentialCovariance(iterable=vals) + exp_state = exp_cov.get_state() + + new_exp_cov = ExponentialCovariance(0.8) + assert exp_cov != new_exp_cov + assert new_exp_cov.decay == 0.8 + new_exp_cov.set_state(exp_state) + assert new_exp_cov.decay == 0.9 + assert exp_cov == new_exp_cov + new_exp_cov.decay = 0.1 + assert exp_cov != new_exp_cov + assert exp_cov.covariance() == new_exp_cov.covariance() + assert new_exp_cov.decay == 0.1 + + assert exp_cov == ExponentialCovariance.fromstate(exp_cov.get_state()) + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -528,6 +566,18 @@ def test_pickle_exponential_statistics(ExponentialStatistics): assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_pickle_exponential_statistics(ExponentialCovariance): + exp_cov = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_cov = pickle.dumps(exp_cov, protocol=num) + unpickled_exp_cov = pickle.loads(pickled_exp_cov) + assert exp_cov == unpickled_exp_cov, 'protocol: %s' % num + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -570,6 +620,18 @@ def test_copy_exponential_statistics(ExponentialStatistics): assert exp_stats == deepcopy_exp_stats +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_copy_exponential_covariance(ExponentialCovariance): + exp_cov = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) + copy_exp_cov = copy.copy(exp_cov) + assert exp_cov == copy_exp_cov + deepcopy_exp_cov = copy.deepcopy(exp_cov) + assert exp_cov == deepcopy_exp_cov + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -612,6 +674,18 @@ def test_equality_exponential_statistics(ExponentialStatistics): assert exp_stats1 != exp_stats2 +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_equality_exponential_covariance(ExponentialCovariance): + exp_cov1 = ExponentialCovariance(0.9, iterable=enumerate(range(10))) + exp_cov2 = ExponentialCovariance(0.9, iterable=enumerate(range(10))) + assert exp_cov1 == exp_cov2 + exp_cov2.push(42, 42) + assert exp_cov1 != exp_cov2 + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -691,13 +765,16 @@ def test_multiply(Statistics, Regression): 'ExponentialStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_exponential_batch(ExponentialStatistics): +def test_exponential_statistics_batch(ExponentialStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] beta = [random.random() * 2 for _ in range(count)] alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + + assert (alpha_exp_stats * 0.5 + alpha_exp_stats * 0.5) == alpha_exp_stats + beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 @@ -713,6 +790,31 @@ def test_exponential_batch(ExponentialStatistics): assert beta_exp_stats._decay != gamma_exp_stats._decay +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance_batch(ExponentialCovariance): + random.seed(0) + + alpha = [(random.random(), random.random()) for _ in range(count)] + beta = [(random.random() * 2, random.random() * 2) for _ in range(count)] + + alpha_exp_cov = ExponentialCovariance(0.1, iterable=alpha) + + assert (alpha_exp_cov * 0.5 + alpha_exp_cov * 0.5) == alpha_exp_cov + + beta_exp_cov = ExponentialCovariance(0.9, iterable=beta) + + gamma_exp_cov = alpha_exp_cov * 0.3 + beta_exp_cov * 0.7 + + weighted_cov = alpha_exp_cov.covariance() * 0.3 + beta_exp_cov.covariance() * 0.7 + assert weighted_cov == gamma_exp_cov.covariance() + + assert alpha_exp_cov._decay == gamma_exp_cov._decay + assert beta_exp_cov._decay != gamma_exp_cov._decay + + @pytest.mark.parametrize( 'ExponentialStatistics, decay', list( @@ -769,9 +871,21 @@ def test_raise_if_invalid_multiply(Statistics, Regression): 'ExponentialStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_raise_if_invalid_multiply_exp(ExponentialStatistics): +def test_raise_if_invalid_decay_exp(ExponentialStatistics): with pytest.raises(ValueError): ExponentialStatistics(0) ExponentialStatistics(1) ExponentialStatistics(-1) ExponentialStatistics(2) + + +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_raise_if_invalid_decay_exp(ExponentialCovariance): + with pytest.raises(ValueError): + ExponentialCovariance(0) + ExponentialCovariance(1) + ExponentialCovariance(-1) + ExponentialCovariance(2) \ No newline at end of file From 0f444a41268071a9ef15147de4e78b9b2c657c67 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 18:08:22 +0100 Subject: [PATCH 27/83] applied blue --- tests/test_runstats.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 0b67dc0..d59ba55 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -808,7 +808,9 @@ def test_exponential_covariance_batch(ExponentialCovariance): gamma_exp_cov = alpha_exp_cov * 0.3 + beta_exp_cov * 0.7 - weighted_cov = alpha_exp_cov.covariance() * 0.3 + beta_exp_cov.covariance() * 0.7 + weighted_cov = ( + alpha_exp_cov.covariance() * 0.3 + beta_exp_cov.covariance() * 0.7 + ) assert weighted_cov == gamma_exp_cov.covariance() assert alpha_exp_cov._decay == gamma_exp_cov._decay @@ -888,4 +890,4 @@ def test_raise_if_invalid_decay_exp(ExponentialCovariance): ExponentialCovariance(0) ExponentialCovariance(1) ExponentialCovariance(-1) - ExponentialCovariance(2) \ No newline at end of file + ExponentialCovariance(2) From 41e810eaa7b95626b11821989d7d426ee217411c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 14 Feb 2021 18:12:05 +0100 Subject: [PATCH 28/83] changed name of test_pickle_exponential_statistics(ExponentialCovariance) to test_pickle_exponential_covariance(ExponentialCovariance) --- tests/test_runstats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index d59ba55..44d13bd 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -570,7 +570,7 @@ def test_pickle_exponential_statistics(ExponentialStatistics): 'ExponentialCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_pickle_exponential_statistics(ExponentialCovariance): +def test_pickle_exponential_covariance(ExponentialCovariance): exp_cov = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_cov = pickle.dumps(exp_cov, protocol=num) @@ -873,7 +873,7 @@ def test_raise_if_invalid_multiply(Statistics, Regression): 'ExponentialStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_raise_if_invalid_decay_exp(ExponentialStatistics): +def test_raise_if_invalid_decay_exp_stats(ExponentialStatistics): with pytest.raises(ValueError): ExponentialStatistics(0) ExponentialStatistics(1) @@ -885,7 +885,7 @@ def test_raise_if_invalid_decay_exp(ExponentialStatistics): 'ExponentialCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_raise_if_invalid_decay_exp(ExponentialCovariance): +def test_raise_if_invalid_decay_exp_cov(ExponentialCovariance): with pytest.raises(ValueError): ExponentialCovariance(0) ExponentialCovariance(1) From 6ca1ef187222d50f461c003ff8cddb483491ff74 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 15 Feb 2021 22:00:09 +0100 Subject: [PATCH 29/83] took care of style CI errors (pylint, isort ...) --- docs/conf.py | 1 + runstats/core.py | 2 +- tests/test_runstats.py | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 4c1eab9..c6c3a4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,6 +12,7 @@ import os import sys + import runstats sys.path.insert(0, os.path.abspath('..')) diff --git a/runstats/core.py b/runstats/core.py index cfc9fcc..3fb2b50 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -550,7 +550,7 @@ def __init__( variance_y=0.0, covariance=0.0, iterable=(), - ): + ): # pylint: disable=too-many-arguments """Initialize ExponentialCovariance object. Incrementally tracks covariance and exponentially discounts old diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 44d13bd..1aab601 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -3,19 +3,19 @@ """ import copy +import itertools import math import pickle import random -import itertools import pytest -from runstats import ExponentialStatistics as FastExponentialStatistics from runstats import ExponentialCovariance as FastExponentialCovariance +from runstats import ExponentialStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialStatistics as CoreExponentialStatistics from runstats.core import ExponentialCovariance as CoreExponentialCovariance +from runstats.core import ExponentialStatistics as CoreExponentialStatistics from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics From 97480dfb8c0dd8c5c39d76d65dc7b1d23df564e8 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:12:14 +0100 Subject: [PATCH 30/83] debugging ci pipeline, error on ubuntu with readme --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8f8e7fb..e7d7911 100644 --- a/README.rst +++ b/README.rst @@ -303,7 +303,7 @@ The pure-Python version of `RunStats`_ is directly available if preferred. .. code-block:: python >>> import runstats.core # Pure-Python - >>> runstats.core.Statistics + >>> runstats._core.Statistics When importing from `runstats` the Cython-optimized version `_core` is From 066a08d9176a6cfeb548cfc8e2f462b97e7dbd72 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:27:48 +0100 Subject: [PATCH 31/83] renamend ExponentialStatistics to ExponentialMovingStatistics --- README.rst | 24 ++++++------ docs/api.rst | 6 +-- runstats/__init__.py | 6 +-- runstats/core.py | 30 +++++++-------- tests/__main__.py | 6 ++- tests/benchmark.py | 12 +++--- tests/test_runstats.py | 84 ++++++++++++++++++++++-------------------- 7 files changed, 87 insertions(+), 81 deletions(-) diff --git a/README.rst b/README.rst index e7d7911..39d3276 100644 --- a/README.rst +++ b/README.rst @@ -76,23 +76,23 @@ function: >>> help(runstats) # doctest: +SKIP >>> help(runstats.Statistics) # doctest: +SKIP >>> help(runstats.Regression) # doctest: +SKIP - >>> help(runstats.ExponentialStatistics) # doctest: +SKIP + >>> help(runstats.ExponentialMovingStatistics) # doctest: +SKIP Tutorial -------- The Python `RunStats`_ module provides three types for computing running -statistics: Statistics, ExponentialStatistics and Regression.The Regression +statistics: Statistics, ExponentialMovingStatistics and Regression.The Regression object leverages Statistics internally for its calculations. Each can be initialized without arguments: .. code-block:: python - >>> from runstats import Statistics, Regression, ExponentialStatistics + >>> from runstats import Statistics, Regression, ExponentialMovingStatistics >>> stats = Statistics() >>> regr = Regression() - >>> exp_stats = ExponentialStatistics() + >>> exp_stats = ExponentialMovingStatistics() Statistics objects support four methods for modification. Use `push` to add values to the summary, `clear` to reset the summary, sum to combine Statistics @@ -206,13 +206,13 @@ Both constructors accept an optional iterable that is consumed and pushed into the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. -The ExponentialStatistics are constructed by providing a decay rate, initial +The ExponentialMovingStatistics are constructed by providing a decay rate, initial mean, and initial variance. The decay rate has default 0.9 and must be between 0 and 1. The initial mean and variance default to zero. .. code-block:: python - >>> exp_stats = ExponentialStatistics() + >>> exp_stats = ExponentialMovingStatistics() >>> exp_stats.decay 0.9 >>> exp_stats.mean() @@ -265,16 +265,16 @@ not changed. >>> exp_stats.variance() 0.0 -Combining `ExponentialStatistics` is done by adding them together. The mean and +Combining `ExponentialMovingStatistics` is done by adding them together. The mean and variance are simply added to create a new object. To weight each -`ExponentialStatistics`, multiply them by a constant factor. If two -`ExponentialStatistics` are added then the leftmost decay is used for the new +`ExponentialMovingStatistics`, multiply them by a constant factor. If two +`ExponentialMovingStatistics` are added then the leftmost decay is used for the new object. The `len` method is not supported. .. code-block:: python - >>> alpha_stats = ExponentialStatistics(iterable=range(10)) - >>> beta_stats = ExponentialStatistics(decay=0.1) + >>> alpha_stats = ExponentialMovingStatistics(iterable=range(10)) + >>> beta_stats = ExponentialMovingStatistics(decay=0.1) >>> for num in range(10): ... beta_stats.push(num) >>> exp_stats = beta_stats * 0.5 + alpha_stats * 0.5 @@ -292,7 +292,7 @@ entirely on the C++ code by John Cook as posted in a couple of articles: .. _`Computing Skewness and Kurtosis in One Pass`: http://www.johndcook.com/blog/skewness_kurtosis/ .. _`Computing Linear Regression in One Pass`: http://www.johndcook.com/blog/running_regression/ -The ExponentialStatistics implementation is based on: +The ExponentialMovingStatistics implementation is based on: * `Finch, 2009, Incremental Calculation of Weighted Mean and Variance`_ diff --git a/docs/api.rst b/docs/api.rst index b32d5df..2d99e80 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,9 +17,9 @@ Regression :special-members: -ExponentialStatistics -..................... +ExponentialMovingStatistics +........................... -.. autoclass:: runstats.ExponentialStatistics +.. autoclass:: runstats.ExponentialMovingStatistics :members: :special-members: diff --git a/runstats/__init__.py b/runstats/__init__.py index 92bbdbb..8fa4c01 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -9,14 +9,14 @@ try: from ._core import ( ExponentialCovariance, - ExponentialStatistics, + ExponentialMovingStatistics, Regression, Statistics, ) except ImportError: # pragma: no cover from .core import ( ExponentialCovariance, - ExponentialStatistics, + ExponentialMovingStatistics, Regression, Statistics, ) @@ -24,7 +24,7 @@ __all__ = [ 'Statistics', 'Regression', - 'ExponentialStatistics', + 'ExponentialMovingStatistics', 'ExponentialCovariance', ] __title__ = 'runstats' diff --git a/runstats/core.py b/runstats/core.py index 3fb2b50..6ab7d61 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -243,10 +243,10 @@ def make_statistics(state): return Statistics.fromstate(state) -class ExponentialStatistics: +class ExponentialMovingStatistics: """Compute exponential mean and variance in a single pass. - ExponentialStatistics objects may also be added and copied. + ExponentialMovingStatistics objects may also be added and copied. Based on "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at @@ -258,7 +258,7 @@ class ExponentialStatistics: """ def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): - """Initialize ExponentialStatistics object. + """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old values. @@ -294,7 +294,7 @@ def decay(self, value): self._decay = value def clear(self): - """Clear ExponentialStatistics object.""" + """Clear ExponentialMovingStatistics object.""" self._mean = self._initial_mean self._variance = self._initial_variance @@ -326,7 +326,7 @@ def set_state(self, state): @classmethod def fromstate(cls, state): - """Return ExponentialStatistics object from state.""" + """Return ExponentialMovingStatistics object from state.""" stats = cls() stats.set_state(state) return stats @@ -335,14 +335,14 @@ def __reduce__(self): return make_exponential_statistics, (self.get_state(),) def copy(self, _=None): - """Copy ExponentialStatistics object.""" + """Copy ExponentialMovingStatistics object.""" return self.fromstate(self.get_state()) __copy__ = copy __deepcopy__ = copy def push(self, value): - """Add `value` to the ExponentialStatistics summary.""" + """Add `value` to the ExponentialMovingStatistics summary.""" value = float(value) alpha = 1.0 - self._decay diff = value - self._mean @@ -363,19 +363,19 @@ def stddev(self): return self.variance() ** 0.5 def __add__(self, that): - """Add two ExponentialStatistics objects together.""" + """Add two ExponentialMovingStatistics objects together.""" sigma = self.copy() sigma += that return sigma def __iadd__(self, that): - """Add another ExponentialStatistics object to this one.""" + """Add another ExponentialMovingStatistics object to this one.""" self._mean += that.mean() self._variance += that.variance() return self def __mul__(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting.""" + """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" sigma = self.copy() sigma *= that return sigma @@ -383,7 +383,7 @@ def __mul__(self, that): __rmul__ = __mul__ def __imul__(self, that): - """Multiply by a scalar to change ExponentialStatistics weighting + """Multiply by a scalar to change ExponentialMovingStatistics weighting in-place. """ @@ -394,8 +394,8 @@ def __imul__(self, that): def make_exponential_statistics(state): - """Make ExponentialStatistics object from state.""" - return ExponentialStatistics.fromstate(state) + """Make ExponentialMovingStatistics object from state.""" + return ExponentialMovingStatistics.fromstate(state) class Regression: @@ -567,10 +567,10 @@ def __init__( """ self._initial_covariance = float(covariance) self._covariance = self._initial_covariance - self._xstats = ExponentialStatistics( + self._xstats = ExponentialMovingStatistics( decay=decay, mean=mean_x, variance=variance_x ) - self._ystats = ExponentialStatistics( + self._ystats = ExponentialMovingStatistics( decay=decay, mean=mean_y, variance=variance_y ) self.decay = decay diff --git a/tests/__main__.py b/tests/__main__.py index 142f9b5..8cb5017 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -2,10 +2,12 @@ import sys -from runstats import ExponentialStatistics as FastExponentialStatistics +from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialStatistics as CoreExponentialStatistics +from runstats.core import ( + ExponentialMovingStatistics as CoreExponentialStatistics, +) from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics diff --git a/tests/benchmark.py b/tests/benchmark.py index c9d03ea..b4ecab9 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -50,8 +50,8 @@ def main(): core_exp_stats = timeit.repeat( setup=''' from __main__ import values - from runstats.core import ExponentialStatistics - exp_stats = ExponentialStatistics() + from runstats.core import ExponentialMovingStatistics + exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in values: @@ -65,8 +65,8 @@ def main(): fast_exp_stats = timeit.repeat( setup=''' from __main__ import values - from runstats.fast import ExponentialStatistics - exp_stats = ExponentialStatistics() + from runstats.fast import ExponentialMovingStatistics + exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in values: @@ -115,8 +115,8 @@ def main(): print('fast.Statistics:', fast_stats) print(' Stats Speedup: %.2fx faster' % speedup_stats) - print('core.ExponentialStatistics:', core_exp_stats) - print('fast.ExponentialStatistics:', fast_exp_stats) + print('core.ExponentialMovingStatistics:', core_exp_stats) + print('fast.ExponentialMovingStatistics:', fast_exp_stats) print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats) print('core.Regression:', core_regr) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 1aab601..792ffe4 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -11,11 +11,13 @@ import pytest from runstats import ExponentialCovariance as FastExponentialCovariance -from runstats import ExponentialStatistics as FastExponentialStatistics +from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics from runstats.core import ExponentialCovariance as CoreExponentialCovariance -from runstats.core import ExponentialStatistics as CoreExponentialStatistics +from runstats.core import ( + ExponentialMovingStatistics as CoreExponentialStatistics, +) from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics @@ -202,16 +204,16 @@ def test_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_exponential_statistics(ExponentialStatistics): +def test_exponential_statistics(ExponentialMovingStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] big_alpha = [random.random() for _ in range(count * 100)] - alpha_exp_stats_zero = ExponentialStatistics(0.9999) - alpha_exp_stats_init = ExponentialStatistics( + alpha_exp_stats_zero = ExponentialMovingStatistics(0.9999) + alpha_exp_stats_init = ExponentialMovingStatistics( decay=0.9999, mean=mean(alpha), variance=variance(alpha, 0), @@ -253,9 +255,9 @@ def test_exponential_statistics(ExponentialStatistics): < limit ) - alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + alpha_exp_stats = ExponentialMovingStatistics(0.1, iterable=alpha) beta = [random.random() * 2 for _ in range(count)] - beta_exp_stats = ExponentialStatistics(0.1) + beta_exp_stats = ExponentialMovingStatistics(0.1) assert alpha_exp_stats != beta_exp_stats @@ -348,12 +350,12 @@ def test_add_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_add_exponential_statistics(ExponentialStatistics): - exp_stats0 = ExponentialStatistics(0.9) - exp_stats10 = ExponentialStatistics(0.9, iterable=range(10)) +def test_add_exponential_statistics(ExponentialMovingStatistics): + exp_stats0 = ExponentialMovingStatistics(0.9) + exp_stats10 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert (exp_stats0 + exp_stats10) == exp_stats10 assert (exp_stats10 + exp_stats0) == exp_stats10 @@ -456,16 +458,16 @@ def test_get_set_state_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_get_set_state_exponential_statistics(ExponentialStatistics): +def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): random.seed(0) vals = [random.random() for _ in range(count)] - exp_stats = ExponentialStatistics(iterable=vals) + exp_stats = ExponentialMovingStatistics(iterable=vals) exp_state = exp_stats.get_state() - new_exp_stats = ExponentialStatistics(0.8) + new_exp_stats = ExponentialMovingStatistics(0.8) assert exp_stats != new_exp_stats assert new_exp_stats.decay == 0.8 new_exp_stats.set_state(exp_state) @@ -477,7 +479,9 @@ def test_get_set_state_exponential_statistics(ExponentialStatistics): assert exp_stats.variance() == new_exp_stats.variance() assert new_exp_stats.decay == 0.1 - assert exp_stats == ExponentialStatistics.fromstate(exp_stats.get_state()) + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) @pytest.mark.parametrize( @@ -555,11 +559,11 @@ def test_pickle_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_pickle_exponential_statistics(ExponentialStatistics): - exp_stats = ExponentialStatistics(0.9, iterable=range(10)) +def test_pickle_exponential_statistics(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) unpickled_exp_stats = pickle.loads(pickled_exp_stats) @@ -609,11 +613,11 @@ def test_copy_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_copy_exponential_statistics(ExponentialStatistics): - exp_stats = ExponentialStatistics(0.9, iterable=range(10)) +def test_copy_exponential_statistics(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) copy_exp_stats = copy.copy(exp_stats) assert exp_stats == copy_exp_stats deepcopy_exp_stats = copy.deepcopy(exp_stats) @@ -663,12 +667,12 @@ def test_equality_statistics(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_equality_exponential_statistics(ExponentialStatistics): - exp_stats1 = ExponentialStatistics(0.9, iterable=range(10)) - exp_stats2 = ExponentialStatistics(0.9, iterable=range(10)) +def test_equality_exponential_statistics(ExponentialMovingStatistics): + exp_stats1 = ExponentialMovingStatistics(0.9, iterable=range(10)) + exp_stats2 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert exp_stats1 == exp_stats2 exp_stats2.push(42) assert exp_stats1 != exp_stats2 @@ -762,20 +766,20 @@ def test_multiply(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_exponential_statistics_batch(ExponentialStatistics): +def test_exponential_statistics_batch(ExponentialMovingStatistics): random.seed(0) alpha = [random.random() for _ in range(count)] beta = [random.random() * 2 for _ in range(count)] - alpha_exp_stats = ExponentialStatistics(0.1, iterable=alpha) + alpha_exp_stats = ExponentialMovingStatistics(0.1, iterable=alpha) assert (alpha_exp_stats * 0.5 + alpha_exp_stats * 0.5) == alpha_exp_stats - beta_exp_stats = ExponentialStatistics(0.9, iterable=beta) + beta_exp_stats = ExponentialMovingStatistics(0.9, iterable=beta) gamma_exp_stats = alpha_exp_stats * 0.3 + beta_exp_stats * 0.7 @@ -818,7 +822,7 @@ def test_exponential_covariance_batch(ExponentialCovariance): @pytest.mark.parametrize( - 'ExponentialStatistics, decay', + 'ExponentialMovingStatistics, decay', list( itertools.product( [CoreExponentialStatistics, FastExponentialStatistics], @@ -826,10 +830,10 @@ def test_exponential_covariance_batch(ExponentialCovariance): ) ), ) -def test_exponential_statistics_decays(ExponentialStatistics, decay): +def test_exponential_statistics_decays(ExponentialMovingStatistics, decay): random.seed(0) alpha = [random.random() for _ in range(count)] - exp_stats = ExponentialStatistics(decay=decay, iterable=alpha) + exp_stats = ExponentialMovingStatistics(decay=decay, iterable=alpha) true_mean, true_variance = exp_mean_var(decay=decay, iterable=alpha) assert (error(true_mean, exp_stats.mean())) < limit @@ -870,15 +874,15 @@ def test_raise_if_invalid_multiply(Statistics, Regression): @pytest.mark.parametrize( - 'ExponentialStatistics', + 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_raise_if_invalid_decay_exp_stats(ExponentialStatistics): +def test_raise_if_invalid_decay_exp_stats(ExponentialMovingStatistics): with pytest.raises(ValueError): - ExponentialStatistics(0) - ExponentialStatistics(1) - ExponentialStatistics(-1) - ExponentialStatistics(2) + ExponentialMovingStatistics(0) + ExponentialMovingStatistics(1) + ExponentialMovingStatistics(-1) + ExponentialMovingStatistics(2) @pytest.mark.parametrize( From 661a97369e65c7ecc60292ab55debfea3e3ae31c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:30:04 +0100 Subject: [PATCH 32/83] debugging ci pipe --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e7d7911..01928cf 100644 --- a/README.rst +++ b/README.rst @@ -303,8 +303,8 @@ The pure-Python version of `RunStats`_ is directly available if preferred. .. code-block:: python >>> import runstats.core # Pure-Python - >>> runstats._core.Statistics - + >>> runstats.core.Statistics + When importing from `runstats` the Cython-optimized version `_core` is preferred and the `core` version is used as fallback. Micro-benchmarking From eefbc49d564edf57b61ca41181f06749afa648a0 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:35:29 +0100 Subject: [PATCH 33/83] debugging ci pipe --- README.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/README.rst b/README.rst index 01928cf..c667bfe 100644 --- a/README.rst +++ b/README.rst @@ -304,7 +304,6 @@ The pure-Python version of `RunStats`_ is directly available if preferred. >>> import runstats.core # Pure-Python >>> runstats.core.Statistics - When importing from `runstats` the Cython-optimized version `_core` is preferred and the `core` version is used as fallback. Micro-benchmarking From be2b786c541adc91cae2f73669fe71261f56c63b Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:40:00 +0100 Subject: [PATCH 34/83] debugging ci pipe --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index c667bfe..8f8e7fb 100644 --- a/README.rst +++ b/README.rst @@ -304,6 +304,7 @@ The pure-Python version of `RunStats`_ is directly available if preferred. >>> import runstats.core # Pure-Python >>> runstats.core.Statistics + When importing from `runstats` the Cython-optimized version `_core` is preferred and the `core` version is used as fallback. Micro-benchmarking From e4a0197fe7132cebeb811bef72fe489083dc3053 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 16 Feb 2021 20:43:44 +0100 Subject: [PATCH 35/83] debugging ci pipe --- README.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 8f8e7fb..a04a560 100644 --- a/README.rst +++ b/README.rst @@ -304,7 +304,7 @@ The pure-Python version of `RunStats`_ is directly available if preferred. >>> import runstats.core # Pure-Python >>> runstats.core.Statistics - + When importing from `runstats` the Cython-optimized version `_core` is preferred and the `core` version is used as fallback. Micro-benchmarking @@ -312,8 +312,6 @@ Statistics and Regression by calling `push` repeatedly shows the Cython-optimized extension as 20-40 times faster than the pure-Python extension. -.. _`RunStats`: http://www.grantjenks.com/docs/runstats/ - Reference and Indices --------------------- From c12dbacf57f3cad1ee17926ee3632b74986c0296 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Thu, 18 Feb 2021 21:13:32 +0100 Subject: [PATCH 36/83] debugging pipeline - issue with readme --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index a04a560..10c0192 100644 --- a/README.rst +++ b/README.rst @@ -294,9 +294,7 @@ entirely on the C++ code by John Cook as posted in a couple of articles: The ExponentialStatistics implementation is based on: -* `Finch, 2009, Incremental Calculation of Weighted Mean and Variance`_ - -.. _`Finch, 2009, Incremental Calculation of Weighted Mean and Variance`: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf +* Finch, 2009, Incremental Calculation of Weighted Mean and Variance The pure-Python version of `RunStats`_ is directly available if preferred. @@ -312,6 +310,8 @@ Statistics and Regression by calling `push` repeatedly shows the Cython-optimized extension as 20-40 times faster than the pure-Python extension. +.. _`RunStats`: http://www.grantjenks.com/docs/runstats/ + Reference and Indices --------------------- From 16e6f123b366bfcd9728475e53a27ca01089194c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Thu, 18 Feb 2021 21:23:29 +0100 Subject: [PATCH 37/83] reverted readme --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 10c0192..a04a560 100644 --- a/README.rst +++ b/README.rst @@ -294,7 +294,9 @@ entirely on the C++ code by John Cook as posted in a couple of articles: The ExponentialStatistics implementation is based on: -* Finch, 2009, Incremental Calculation of Weighted Mean and Variance +* `Finch, 2009, Incremental Calculation of Weighted Mean and Variance`_ + +.. _`Finch, 2009, Incremental Calculation of Weighted Mean and Variance`: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf The pure-Python version of `RunStats`_ is directly available if preferred. @@ -310,8 +312,6 @@ Statistics and Regression by calling `push` repeatedly shows the Cython-optimized extension as 20-40 times faster than the pure-Python extension. -.. _`RunStats`: http://www.grantjenks.com/docs/runstats/ - Reference and Indices --------------------- From b3bc0a81855ea9627a85bfdf91a18724c1cc9cbe Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Fri, 19 Feb 2021 18:29:52 +0100 Subject: [PATCH 38/83] implemented clear() test for exponential classes --- runstats/core.py | 9 +++++--- tests/test_runstats.py | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 6ab7d61..5a3a43e 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -283,7 +283,7 @@ def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): @property def decay(self): - """Exponential decay rate of old values.""" + """Decay rate for old values.""" return self._decay @decay.setter @@ -375,7 +375,10 @@ def __iadd__(self, that): return self def __mul__(self, that): - """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" + """Multiply by a scalar to change ExponentialMovingStatistics + weighting. + + """ sigma = self.copy() sigma *= that return sigma @@ -580,7 +583,7 @@ def __init__( @property def decay(self): - """Exponential decay rate of old values.""" + """Decay rate for old values.""" return self._decay @decay.setter diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 792ffe4..2efc9eb 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -859,6 +859,57 @@ def test_exponential_covariance_decays(ExponentialCovariance, decay): assert (error(true_cor, exp_stats.correlation())) < limit +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_clear(ExponentialMovingStatistics): + random.seed(0) + alpha = [random.random() for _ in range(count)] + mean = 10 + variance = 100 + exp_stats = ExponentialMovingStatistics(mean=mean, variance=variance) + + for val in alpha: + exp_stats.push(val) + + assert exp_stats.mean() != mean + assert exp_stats.variance() != variance + exp_stats.clear() + assert exp_stats.mean() == mean + assert exp_stats.variance() == variance + + +@pytest.mark.parametrize( + 'ExponentialCovariance', + [CoreExponentialCovariance, FastExponentialCovariance], +) +def test_exponential_covariance_clear(ExponentialCovariance): + random.seed(0) + alpha = [(random.random(), random.random()) for _ in range(count)] + mean_x = 10 + variance_x = 100 + mean_y = 1000 + variance_y = 2 + covariance = 20 + exp_cov = ExponentialCovariance(mean_x=mean_x, variance_x=variance_x, mean_y=mean_y, variance_y=variance_y, covariance=covariance) + + for x, y in alpha: + exp_cov.push(x, y) + + assert exp_cov.covariance() != covariance + assert exp_cov._xstats.mean() != mean_x + assert exp_cov._xstats.variance() != variance_x + assert exp_cov._ystats.mean() != mean_y + assert exp_cov._ystats.variance() != variance_y + exp_cov.clear() + assert exp_cov.covariance() == covariance + assert exp_cov._xstats.mean() == mean_x + assert exp_cov._xstats.variance() == variance_x + assert exp_cov._ystats.mean() == mean_y + assert exp_cov._ystats.variance() == variance_y + + @pytest.mark.parametrize( 'Statistics,Regression', [ From b59ae5089c79f493747b4cee532a5716194b4697 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Fri, 19 Feb 2021 19:53:41 +0100 Subject: [PATCH 39/83] implemented time based ExpoStats --- runstats/core.py | 63 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 5a3a43e..944a718 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -4,8 +4,8 @@ Covariance in a single pass. """ - from __future__ import division +import time class Statistics: @@ -257,14 +257,14 @@ class ExponentialMovingStatistics: """ - def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): + def __init__(self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=()): # TODO: Docstring """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old values. Requires a `decay` rate in exclusive range (0, 1) for discounting - previous statistics. + previous statistics. Default 0.9 Optionally allows setting initial mean and variance. Default 0. @@ -277,6 +277,9 @@ def __init__(self, decay=0.9, mean=0.0, variance=0.0, iterable=()): self._initial_variance = float(variance) self._mean = self._initial_mean self._variance = self._initial_variance + self._current_time = None + self._time_diff = None + self.delay = delay for value in iterable: self.push(value) @@ -293,10 +296,26 @@ def decay(self, value): raise ValueError('decay must be between 0 and 1') self._decay = value + @property + def delay(self): + """Delay in sec for time based discounting""" + return self._delay + + @delay.setter + def delay(self, value): + if value: + self._current_time = self._current_time if self._current_time else time.time() + else: + self._current_time = None + + self._delay = value + def clear(self): """Clear ExponentialMovingStatistics object.""" self._mean = self._initial_mean self._variance = self._initial_variance + self._current_time = time.time() if self._current_time else None + self._time_diff = None def __eq__(self, that): return self.get_state() == that.get_state() @@ -312,6 +331,9 @@ def get_state(self): self._initial_variance, self._mean, self._variance, + self._delay, + self._current_time, + self._time_diff ) def set_state(self, state): @@ -322,6 +344,9 @@ def set_state(self, state): self._initial_variance, self._mean, self._variance, + self._delay, + self._current_time, + self._time_diff ) = state @classmethod @@ -341,13 +366,41 @@ def copy(self, _=None): __copy__ = copy __deepcopy__ = copy + def clear_timer(self): + if self._current_time: + self._current_time = time.time() + else: + raise AttributeError("clear_timer on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + + def freeze(self): + """freeze time i.e. save the difference between now and _current_time""" + if self._current_time: + self._time_diff = time.time() - self._current_time + else: + raise AttributeError("freeze on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + + def unfreeze(self): + if self._current_time is None: + raise AttributeError("unfreeze on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + + if self._time_diff is None: + raise AttributeError("Object must be freezed first before it can be unfreezed") + + self._current_time = time.time() - self._time_diff + def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" + if self.delay: + norm_diff = (time.time() - self._current_time) / self.delay + decay = self.decay ** norm_diff + else: + decay = self.decay + value = float(value) - alpha = 1.0 - self._decay + alpha = 1.0 - decay diff = value - self._mean incr = alpha * diff - self._variance += alpha * (self._decay * diff ** 2 - self._variance) + self._variance += alpha * (decay * diff ** 2 - self._variance) self._mean += incr def mean(self): From 4fd9b26282415f6969928f06edf06fba2eff5859 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Fri, 19 Feb 2021 19:56:45 +0100 Subject: [PATCH 40/83] added docstring to time based methods, blue . --- runstats/core.py | 34 ++++++++++++++++++++++++++-------- tests/test_runstats.py | 8 +++++++- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 944a718..394be84 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -257,7 +257,9 @@ class ExponentialMovingStatistics: """ - def __init__(self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=()): # TODO: Docstring + def __init__( + self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() + ): # TODO: Docstring """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old @@ -304,7 +306,9 @@ def delay(self): @delay.setter def delay(self, value): if value: - self._current_time = self._current_time if self._current_time else time.time() + self._current_time = ( + self._current_time if self._current_time else time.time() + ) else: self._current_time = None @@ -333,7 +337,7 @@ def get_state(self): self._variance, self._delay, self._current_time, - self._time_diff + self._time_diff, ) def set_state(self, state): @@ -346,7 +350,7 @@ def set_state(self, state): self._variance, self._delay, self._current_time, - self._time_diff + self._time_diff, ) = state @classmethod @@ -367,24 +371,38 @@ def copy(self, _=None): __deepcopy__ = copy def clear_timer(self): + """Reset _current_time to now""" if self._current_time: self._current_time = time.time() else: - raise AttributeError("clear_timer on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + raise AttributeError( + 'clear_timer on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) def freeze(self): """freeze time i.e. save the difference between now and _current_time""" if self._current_time: self._time_diff = time.time() - self._current_time else: - raise AttributeError("freeze on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + raise AttributeError( + 'freeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) def unfreeze(self): + """unfreeze time i.e. set the _current_time to be difference between + now and _time_diff""" if self._current_time is None: - raise AttributeError("unfreeze on a non-time time based (i.e. delay == None) ExponentialMovingStatistics object is illegal") + raise AttributeError( + 'unfreeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) if self._time_diff is None: - raise AttributeError("Object must be freezed first before it can be unfreezed") + raise AttributeError( + 'Time must be freezed first before it can be unfreezed' + ) self._current_time = time.time() - self._time_diff diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 2efc9eb..3398ec4 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -892,7 +892,13 @@ def test_exponential_covariance_clear(ExponentialCovariance): mean_y = 1000 variance_y = 2 covariance = 20 - exp_cov = ExponentialCovariance(mean_x=mean_x, variance_x=variance_x, mean_y=mean_y, variance_y=variance_y, covariance=covariance) + exp_cov = ExponentialCovariance( + mean_x=mean_x, + variance_x=variance_x, + mean_y=mean_y, + variance_y=variance_y, + covariance=covariance, + ) for x, y in alpha: exp_cov.push(x, y) From cac8db9bfed0967b6a4470546c5684f3be65997c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Fri, 19 Feb 2021 20:05:38 +0100 Subject: [PATCH 41/83] implemented is_time_based for exp_stats --- runstats/core.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 394be84..342798e 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -372,7 +372,7 @@ def copy(self, _=None): def clear_timer(self): """Reset _current_time to now""" - if self._current_time: + if self.is_time_based(): self._current_time = time.time() else: raise AttributeError( @@ -382,7 +382,7 @@ def clear_timer(self): def freeze(self): """freeze time i.e. save the difference between now and _current_time""" - if self._current_time: + if self.is_time_based(): self._time_diff = time.time() - self._current_time else: raise AttributeError( @@ -393,7 +393,7 @@ def freeze(self): def unfreeze(self): """unfreeze time i.e. set the _current_time to be difference between now and _time_diff""" - if self._current_time is None: + if not self.is_time_based(): raise AttributeError( 'unfreeze on a non-time time based (i.e. delay == None) ' 'ExponentialMovingStatistics object is illegal' @@ -406,9 +406,12 @@ def unfreeze(self): self._current_time = time.time() - self._time_diff + def is_time_based(self): + return True if self._delay else False + def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" - if self.delay: + if self.is_time_based(): norm_diff = (time.time() - self._current_time) / self.delay decay = self.decay ** norm_diff else: @@ -433,7 +436,7 @@ def stddev(self): """Exponential standard deviation of values.""" return self.variance() ** 0.5 - def __add__(self, that): + def __add__(self, that): # TODO: fail if not both same "type" + if add -> new time else left time """Add two ExponentialMovingStatistics objects together.""" sigma = self.copy() sigma += that From 24c0432986b6961fbfd1c063998263fee1fa89dc Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 20 Feb 2021 13:53:14 +0100 Subject: [PATCH 42/83] implemented tests for time based exp_stats, not ready yet --- runstats/core.py | 139 +++++++++++++++++++++++------------------ tests/test_runstats.py | 119 +++++++++++++++++++++++++++++++++-- 2 files changed, 192 insertions(+), 66 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 342798e..867c0c0 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -106,12 +106,12 @@ def push(self, value): self._count += 1 self._eta += delta_n self._phi += ( - term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) - + 6 * delta_n2 * self._rho - - 4 * delta_n * self._tau + term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) + + 6 * delta_n2 * self._rho + - 4 * delta_n * self._tau ) self._tau += ( - term * delta_n * (self._count - 2) - 3 * delta_n * self._rho + term * delta_n * (self._count - 2) - 3 * delta_n * self._rho ) self._rho += term @@ -161,48 +161,49 @@ def __iadd__(self, that): delta4 = delta ** 4 sum_eta = ( - self._count * self._eta + that._count * that._eta - ) / sum_count + self._count * self._eta + that._count * that._eta + ) / sum_count sum_rho = ( - self._rho - + that._rho - + delta2 * self._count * that._count / sum_count + self._rho + + that._rho + + delta2 * self._count * that._count / sum_count ) sum_tau = ( - self._tau - + that._tau - + delta3 - * self._count - * that._count - * (self._count - that._count) - / (sum_count ** 2) - + 3.0 - * delta - * (self._count * that._rho - that._count * self._rho) - / sum_count + self._tau + + that._tau + + delta3 + * self._count + * that._count + * (self._count - that._count) + / (sum_count ** 2) + + 3.0 + * delta + * (self._count * that._rho - that._count * self._rho) + / sum_count ) sum_phi = ( - self._phi - + that._phi - + delta4 - * self._count - * that._count - * (self._count ** 2 - self._count * that._count + that._count ** 2) - / (sum_count ** 3) - + 6.0 - * delta2 - * ( - self._count * self._count * that._rho - + that._count * that._count * self._rho - ) - / (sum_count ** 2) - + 4.0 - * delta - * (self._count * that._tau - that._count * self._tau) - / sum_count + self._phi + + that._phi + + delta4 + * self._count + * that._count + * ( + self._count ** 2 - self._count * that._count + that._count ** 2) + / (sum_count ** 3) + + 6.0 + * delta2 + * ( + self._count * self._count * that._rho + + that._count * that._count * self._rho + ) + / (sum_count ** 2) + + 4.0 + * delta + * (self._count * that._tau - that._count * self._tau) + / sum_count ) if self._count == 0.0: @@ -258,7 +259,7 @@ class ExponentialMovingStatistics: """ def __init__( - self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() + self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() ): # TODO: Docstring """Initialize ExponentialMovingStatistics object. @@ -281,11 +282,13 @@ def __init__( self._variance = self._initial_variance self._current_time = None self._time_diff = None - self.delay = delay + self.delay = None for value in iterable: self.push(value) + self.delay = delay + @property def decay(self): """Decay rate for old values.""" @@ -311,6 +314,7 @@ def delay(self, value): ) else: self._current_time = None + self._time_diff = None self._delay = value @@ -318,7 +322,7 @@ def clear(self): """Clear ExponentialMovingStatistics object.""" self._mean = self._initial_mean self._variance = self._initial_variance - self._current_time = time.time() if self._current_time else None + self._current_time = time.time() if self.is_time_based() else None self._time_diff = None def __eq__(self, that): @@ -374,6 +378,7 @@ def clear_timer(self): """Reset _current_time to now""" if self.is_time_based(): self._current_time = time.time() + self._time_diff = None else: raise AttributeError( 'clear_timer on a non-time time based (i.e. delay == None) ' @@ -405,15 +410,20 @@ def unfreeze(self): ) self._current_time = time.time() - self._time_diff + self._time_diff = None def is_time_based(self): - return True if self._delay else False + """Checks if object is time-based or not i.e. delay is set or None""" + return True if self.delay else False def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" if self.is_time_based(): - norm_diff = (time.time() - self._current_time) / self.delay + diff = self._time_diff if self._time_diff else ( + time.time() - self._current_time) + norm_diff = diff / self.delay decay = self.decay ** norm_diff + self._current_time = time.time() else: decay = self.decay @@ -436,10 +446,19 @@ def stddev(self): """Exponential standard deviation of values.""" return self.variance() ** 0.5 - def __add__(self, that): # TODO: fail if not both same "type" + if add -> new time else left time + def __add__(self, that): """Add two ExponentialMovingStatistics objects together.""" + if self.is_time_based() != that.is_time_based(): + raise AttributeError("Adding two ExponentialMovingStatistics " + "requires both being of same type i.e. " + "time-based") + sigma = self.copy() sigma += that + + if sigma.is_time_based(): + sigma.clear_timer() + return sigma def __iadd__(self, that): @@ -552,10 +571,10 @@ def __len__(self): def push(self, xcoord, ycoord): """Add a pair `(x, y)` to the Regression summary.""" self._sxy += ( - (self._xstats.mean() - xcoord) - * (self._ystats.mean() - ycoord) - * self._count - / (self._count + 1) + (self._xstats.mean() - xcoord) + * (self._ystats.mean() - ycoord) + * self._count + / (self._count + 1) ) self._xstats.push(xcoord) self._ystats.push(ycoord) @@ -593,9 +612,9 @@ def __iadd__(self, that): deltax = that._xstats.mean() - self._xstats.mean() deltay = that._ystats.mean() - self._ystats.mean() sum_sxy = ( - self._sxy - + that._sxy - + self._count * that._count * deltax * deltay / sum_count + self._sxy + + that._sxy + + self._count * that._count * deltax * deltay / sum_count ) self._count = sum_count @@ -619,14 +638,14 @@ class ExponentialCovariance: """ def __init__( - self, - decay=0.9, - mean_x=0.0, - variance_x=0.0, - mean_y=0.0, - variance_y=0.0, - covariance=0.0, - iterable=(), + self, + decay=0.9, + mean_x=0.0, + variance_x=0.0, + mean_y=0.0, + variance_y=0.0, + covariance=0.0, + iterable=(), ): # pylint: disable=too-many-arguments """Initialize ExponentialCovariance object. @@ -720,7 +739,7 @@ def push(self, x_val, y_val): self._xstats.push(x_val) alpha = 1.0 - self.decay self._covariance = self.decay * self.covariance() + alpha * ( - x_val - self._xstats.mean() + x_val - self._xstats.mean() ) * (y_val - self._ystats.mean()) self._ystats.push(y_val) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 3398ec4..68fdba5 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -7,9 +7,11 @@ import math import pickle import random - +import time import pytest +from unittest.mock import patch + from runstats import ExponentialCovariance as FastExponentialCovariance from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression @@ -348,17 +350,28 @@ def test_add_statistics(Statistics, Regression): assert (stats0 + stats10) == stats10 assert (stats10 + stats0) == stats10 - +@patch("runstats.ExponentialMovingStatistics.clear_timer") @pytest.mark.parametrize( 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_add_exponential_statistics(ExponentialMovingStatistics): +def test_add_exponential_statistics(clear_timer_mock, ExponentialMovingStatistics): exp_stats0 = ExponentialMovingStatistics(0.9) exp_stats10 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert (exp_stats0 + exp_stats10) == exp_stats10 assert (exp_stats10 + exp_stats0) == exp_stats10 + exp_stats0.decay = 0.8 + exp_stats0.delay = 60 + exp_stats10.delay = 120 + exp_stats = exp_stats0 + exp_stats10 + assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay + assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay + + exp_stats0 += exp_stats10 + assert exp_stats0.decay == 0.8 + assert exp_stats0.delay == 60 + clear_timer_mock.assert_called_once() @pytest.mark.parametrize( 'ExponentialCovariance', @@ -483,6 +496,15 @@ def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): exp_stats.get_state() ) + new_exp_stats.decay = 0.9 + assert exp_stats == new_exp_stats + exp_stats.delay = 60 + assert exp_stats != new_exp_stats + exp_stats.freeze() + + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) @pytest.mark.parametrize( 'ExponentialCovariance', @@ -563,7 +585,8 @@ def test_pickle_statistics(Statistics, Regression): [CoreExponentialStatistics, FastExponentialStatistics], ) def test_pickle_exponential_statistics(ExponentialMovingStatistics): - exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + exp_stats.freeze() for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) unpickled_exp_stats = pickle.loads(pickled_exp_stats) @@ -617,7 +640,8 @@ def test_copy_statistics(Statistics, Regression): [CoreExponentialStatistics, FastExponentialStatistics], ) def test_copy_exponential_statistics(ExponentialMovingStatistics): - exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + exp_stats.freeze() copy_exp_stats = copy.copy(exp_stats) assert exp_stats == copy_exp_stats deepcopy_exp_stats = copy.deepcopy(exp_stats) @@ -673,9 +697,13 @@ def test_equality_statistics(Statistics, Regression): def test_equality_exponential_statistics(ExponentialMovingStatistics): exp_stats1 = ExponentialMovingStatistics(0.9, iterable=range(10)) exp_stats2 = ExponentialMovingStatistics(0.9, iterable=range(10)) - assert exp_stats1 == exp_stats2 + exp_stats3 = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) + assert exp_stats1 == exp_stats2 != exp_stats3 exp_stats2.push(42) assert exp_stats1 != exp_stats2 + exp_stats3.freeze() + exp_stats3.delay = None + assert exp_stats1 == exp_stats3 @pytest.mark.parametrize( @@ -875,9 +903,30 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): assert exp_stats.mean() != mean assert exp_stats.variance() != variance + assert exp_stats._current_time is None + assert exp_stats._time_diff is None exp_stats.clear() assert exp_stats.mean() == mean assert exp_stats.variance() == variance + assert exp_stats._current_time is None + assert exp_stats._time_diff is None + + exp_stats.delay = 60 + current_time = exp_stats._current_time + assert exp_stats._current_time is not None + assert exp_stats._time_diff is None + exp_stats.freeze() + assert exp_stats._time_diff is not None + exp_stats.clear() + new_current_time = exp_stats._current_time + assert exp_stats._current_time is not None + assert exp_stats._current_time != current_time + assert exp_stats._time_diff is None + exp_stats.freeze() + exp_stats.clear_timer() + assert exp_stats._current_time is not None + assert exp_stats._current_time != new_current_time + assert exp_stats._time_diff is None @pytest.mark.parametrize( @@ -916,6 +965,49 @@ def test_exponential_covariance_clear(ExponentialCovariance): assert exp_cov._ystats.variance() == variance_y +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_is_time(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics() + assert not exp_stats.is_time_based() + assert exp_stats.delay is None + assert exp_stats._current_time is None + assert exp_stats._time_diff is None + exp_stats.delay = 30 + assert exp_stats.is_time_based() + assert exp_stats.delay is not None + assert exp_stats._current_time is not None + assert exp_stats._time_diff is None + exp_stats = ExponentialMovingStatistics(delay=30) + assert exp_stats.delay is not None + assert exp_stats._current_time is not None + assert exp_stats._time_diff is None + exp_stats.freeze() + assert exp_stats is not None + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(delay=30) + current_time = exp_stats._current_time + assert exp_stats._time_diff is None + exp_stats.freeze() + time.sleep(0.01) + assert exp_stats._time_diff is not None + time_diff = exp_stats._time_diff + time.sleep(0.01) + exp_stats.unfreeze() + future = time.time() + assert exp_stats._current_time > current_time + assert exp_stats._current_time < future - time_diff + assert exp_stats._time_diff is None + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -952,3 +1044,18 @@ def test_raise_if_invalid_decay_exp_cov(ExponentialCovariance): ExponentialCovariance(1) ExponentialCovariance(-1) ExponentialCovariance(2) + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics() + exp_stats_time = ExponentialMovingStatistics(delay=60) + with pytest.raises(AttributeError): + exp_stats.clear_timer() + exp_stats.freeze() + exp_stats.unfreeze() + exp_stats_time.unfreeze() + exp_stats + exp_stats_time From 3e4fd8466c96b675fb1ee9eda5bfd9ae2eeff3e0 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 21 Feb 2021 19:53:24 +0100 Subject: [PATCH 43/83] implemented tests for time based ExpMovingStats --- runstats/core.py | 126 +++++++++++++++++++++-------------------- tests/test_runstats.py | 79 +++++++++++++++++++++++++- 2 files changed, 142 insertions(+), 63 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 867c0c0..dc92bf3 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -106,12 +106,12 @@ def push(self, value): self._count += 1 self._eta += delta_n self._phi += ( - term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) - + 6 * delta_n2 * self._rho - - 4 * delta_n * self._tau + term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) + + 6 * delta_n2 * self._rho + - 4 * delta_n * self._tau ) self._tau += ( - term * delta_n * (self._count - 2) - 3 * delta_n * self._rho + term * delta_n * (self._count - 2) - 3 * delta_n * self._rho ) self._rho += term @@ -161,49 +161,48 @@ def __iadd__(self, that): delta4 = delta ** 4 sum_eta = ( - self._count * self._eta + that._count * that._eta - ) / sum_count + self._count * self._eta + that._count * that._eta + ) / sum_count sum_rho = ( - self._rho - + that._rho - + delta2 * self._count * that._count / sum_count + self._rho + + that._rho + + delta2 * self._count * that._count / sum_count ) sum_tau = ( - self._tau - + that._tau - + delta3 - * self._count - * that._count - * (self._count - that._count) - / (sum_count ** 2) - + 3.0 - * delta - * (self._count * that._rho - that._count * self._rho) - / sum_count + self._tau + + that._tau + + delta3 + * self._count + * that._count + * (self._count - that._count) + / (sum_count ** 2) + + 3.0 + * delta + * (self._count * that._rho - that._count * self._rho) + / sum_count ) sum_phi = ( - self._phi - + that._phi - + delta4 - * self._count - * that._count - * ( - self._count ** 2 - self._count * that._count + that._count ** 2) - / (sum_count ** 3) - + 6.0 - * delta2 - * ( - self._count * self._count * that._rho - + that._count * that._count * self._rho - ) - / (sum_count ** 2) - + 4.0 - * delta - * (self._count * that._tau - that._count * self._tau) - / sum_count + self._phi + + that._phi + + delta4 + * self._count + * that._count + * (self._count ** 2 - self._count * that._count + that._count ** 2) + / (sum_count ** 3) + + 6.0 + * delta2 + * ( + self._count * self._count * that._rho + + that._count * that._count * self._rho + ) + / (sum_count ** 2) + + 4.0 + * delta + * (self._count * that._tau - that._count * self._tau) + / sum_count ) if self._count == 0.0: @@ -259,7 +258,7 @@ class ExponentialMovingStatistics: """ def __init__( - self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() + self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() ): # TODO: Docstring """Initialize ExponentialMovingStatistics object. @@ -419,8 +418,11 @@ def is_time_based(self): def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" if self.is_time_based(): - diff = self._time_diff if self._time_diff else ( - time.time() - self._current_time) + diff = ( + self._time_diff + if self._time_diff + else (time.time() - self._current_time) + ) norm_diff = diff / self.delay decay = self.decay ** norm_diff self._current_time = time.time() @@ -449,9 +451,11 @@ def stddev(self): def __add__(self, that): """Add two ExponentialMovingStatistics objects together.""" if self.is_time_based() != that.is_time_based(): - raise AttributeError("Adding two ExponentialMovingStatistics " - "requires both being of same type i.e. " - "time-based") + raise AttributeError( + 'Adding two ExponentialMovingStatistics ' + 'requires both being of same type i.e. ' + 'time-based' + ) sigma = self.copy() sigma += that @@ -571,10 +575,10 @@ def __len__(self): def push(self, xcoord, ycoord): """Add a pair `(x, y)` to the Regression summary.""" self._sxy += ( - (self._xstats.mean() - xcoord) - * (self._ystats.mean() - ycoord) - * self._count - / (self._count + 1) + (self._xstats.mean() - xcoord) + * (self._ystats.mean() - ycoord) + * self._count + / (self._count + 1) ) self._xstats.push(xcoord) self._ystats.push(ycoord) @@ -612,9 +616,9 @@ def __iadd__(self, that): deltax = that._xstats.mean() - self._xstats.mean() deltay = that._ystats.mean() - self._ystats.mean() sum_sxy = ( - self._sxy - + that._sxy - + self._count * that._count * deltax * deltay / sum_count + self._sxy + + that._sxy + + self._count * that._count * deltax * deltay / sum_count ) self._count = sum_count @@ -638,14 +642,14 @@ class ExponentialCovariance: """ def __init__( - self, - decay=0.9, - mean_x=0.0, - variance_x=0.0, - mean_y=0.0, - variance_y=0.0, - covariance=0.0, - iterable=(), + self, + decay=0.9, + mean_x=0.0, + variance_x=0.0, + mean_y=0.0, + variance_y=0.0, + covariance=0.0, + iterable=(), ): # pylint: disable=too-many-arguments """Initialize ExponentialCovariance object. @@ -739,7 +743,7 @@ def push(self, x_val, y_val): self._xstats.push(x_val) alpha = 1.0 - self.decay self._covariance = self.decay * self.covariance() + alpha * ( - x_val - self._xstats.mean() + x_val - self._xstats.mean() ) * (y_val - self._ystats.mean()) self._ystats.push(y_val) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 68fdba5..5f74c15 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -350,12 +350,15 @@ def test_add_statistics(Statistics, Regression): assert (stats0 + stats10) == stats10 assert (stats10 + stats0) == stats10 -@patch("runstats.ExponentialMovingStatistics.clear_timer") + +@patch('runstats.ExponentialMovingStatistics.clear_timer') @pytest.mark.parametrize( 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_add_exponential_statistics(clear_timer_mock, ExponentialMovingStatistics): +def test_add_exponential_statistics( + clear_timer_mock, ExponentialMovingStatistics +): exp_stats0 = ExponentialMovingStatistics(0.9) exp_stats10 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert (exp_stats0 + exp_stats10) == exp_stats10 @@ -373,6 +376,7 @@ def test_add_exponential_statistics(clear_timer_mock, ExponentialMovingStatistic assert exp_stats0.delay == 60 clear_timer_mock.assert_called_once() + @pytest.mark.parametrize( 'ExponentialCovariance', [CoreExponentialCovariance, FastExponentialCovariance], @@ -506,6 +510,7 @@ def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): exp_stats.get_state() ) + @pytest.mark.parametrize( 'ExponentialCovariance', [CoreExponentialCovariance, FastExponentialCovariance], @@ -1008,6 +1013,76 @@ def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): assert exp_stats._time_diff is None +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): + random.seed(0) + alpha = [random.random() for _ in range(count)] + exp_stats = ExponentialMovingStatistics(iterable=alpha) + + assert exp_stats.delay is None + assert exp_stats._current_time is None + exp_stats.delay = 30 + assert exp_stats.delay == 30 + assert exp_stats._current_time is not None + current_time = exp_stats._current_time + time.sleep(0.01) + exp_stats.delay = 60 + assert exp_stats.delay == 60 + assert exp_stats._current_time == current_time + exp_stats.delay = None + assert exp_stats.delay is None + assert exp_stats._current_time is None + + exp_stats_time_init = ExponentialMovingStatistics( + delay=300, iterable=alpha + ) + assert exp_stats_time_init.mean() == exp_stats.mean() + assert exp_stats_time_init.variance() == exp_stats.variance() + assert exp_stats_time_init.delay == 300 + assert exp_stats_time_init._current_time is not None + + exp_stats.push(10) + exp_stats_time_init.push(10) + assert exp_stats.mean() != exp_stats_time_init.mean() + assert exp_stats.variance() != exp_stats_time_init.variance() + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_exponential_statistics_time_based_effective_decay( + ExponentialMovingStatistics, +): + exp_stats = ExponentialMovingStatistics() + exp_stats_time = ExponentialMovingStatistics(delay=0.5) + time.sleep(0.5) + exp_stats_time.push(10) + exp_stats.push(10) + assert error(exp_stats.mean(), exp_stats_time.mean()) < limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + + exp_stats_time.clear_timer() + time.sleep(0.5) + exp_stats_time.freeze() + time.sleep(0.5) + exp_stats_time.push(100) + exp_stats.push(100) + assert error(exp_stats.mean(), exp_stats_time.mean()) < limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + + exp_stats.decay = 0.81 + exp_stats_time.unfreeze() + time.sleep(0.5) + exp_stats_time.push(1000) + exp_stats.push(1000) + assert error(exp_stats.mean(), exp_stats_time.mean()) < limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + + @pytest.mark.parametrize( 'Statistics,Regression', [ From 5d352e385b4179ce8adbee08fa317822dd9ea997 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 21 Feb 2021 20:21:35 +0100 Subject: [PATCH 44/83] updated docstring for ExponentialMovingStatistics --- runstats/core.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index dc92bf3..291dbb1 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -259,7 +259,7 @@ class ExponentialMovingStatistics: def __init__( self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() - ): # TODO: Docstring + ): """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old @@ -273,6 +273,18 @@ def __init__( Iterates optional parameter `iterable` and pushes each value into the statistics summary. + Can discount values based on time passed instead of position if delay is + set from None to a value. Setting delay (in seconds) computes a dynamic + decay rate each time a value is pushed for weighting that value: + dynamic_decay = decay ** (sec_from_last_push / delay). + When the first value x is pushed, sec_from_last_push is the difference + (in sec) between setting the delay from None to a value t (usually at + object initialization) and the time x is being pushed. + When freeze() has been called sec_from_last_push is the difference (in + sec) between the last call to push() and the time freeze() has been + called(). + Note that at object initialization the values in iterable are weighted + as if delay has not been set. """ self.decay = decay self._initial_mean = float(mean) @@ -374,7 +386,7 @@ def copy(self, _=None): __deepcopy__ = copy def clear_timer(self): - """Reset _current_time to now""" + """Reset time counter""" if self.is_time_based(): self._current_time = time.time() self._time_diff = None @@ -385,7 +397,7 @@ def clear_timer(self): ) def freeze(self): - """freeze time i.e. save the difference between now and _current_time""" + """Freeze time i.e. save the difference between now and the last push""" if self.is_time_based(): self._time_diff = time.time() - self._current_time else: @@ -395,8 +407,7 @@ def freeze(self): ) def unfreeze(self): - """unfreeze time i.e. set the _current_time to be difference between - now and _time_diff""" + """Unfreeze time i.e. continue counting the time difference""" if not self.is_time_based(): raise AttributeError( 'unfreeze on a non-time time based (i.e. delay == None) ' From ef6f3440834acc65fd372687760725ddb528e136 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 22 Feb 2021 21:27:15 +0100 Subject: [PATCH 45/83] renamed ExponentialCoveriance to ExponentialMovingCovariance --- runstats/__init__.py | 4 +-- runstats/core.py | 26 +++++++------- tests/__main__.py | 4 +-- tests/test_runstats.py | 80 +++++++++++++++++++++--------------------- 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index 8fa4c01..97b79c6 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -15,7 +15,7 @@ ) except ImportError: # pragma: no cover from .core import ( - ExponentialCovariance, + ExponentialMovingCovariance, ExponentialMovingStatistics, Regression, Statistics, @@ -25,7 +25,7 @@ 'Statistics', 'Regression', 'ExponentialMovingStatistics', - 'ExponentialCovariance', + 'ExponentialMovingCovariance', ] __title__ = 'runstats' __version__ = '1.8.0' diff --git a/runstats/core.py b/runstats/core.py index 291dbb1..92095e5 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -645,10 +645,10 @@ def make_regression(state): return Regression.fromstate(state) -class ExponentialCovariance: - """Compute exponential covariance and correlation in a single pass. +class ExponentialMovingCovariance: + """Compute exponential moving covariance and correlation in a single pass. - ExponentialCovariance objects may also be added and copied. + ExponentialMovingCovariance objects may also be added and copied. """ @@ -662,7 +662,7 @@ def __init__( covariance=0.0, iterable=(), ): # pylint: disable=too-many-arguments - """Initialize ExponentialCovariance object. + """Initialize ExponentialMovingCovariance object. Incrementally tracks covariance and exponentially discounts old values. @@ -702,7 +702,7 @@ def decay(self, value): self._decay = value def clear(self): - """Clear ExponentialCovariance object.""" + """Clear ExponentialMovingCovariance object.""" self._xstats.clear() self._ystats.clear() self._covariance = self._initial_covariance @@ -734,7 +734,7 @@ def set_state(self, state): @classmethod def fromstate(cls, state): - """Return ExponentialCovariance object from state.""" + """Return ExponentialMovingCovariance object from state.""" stats = cls() stats.set_state(state) return stats @@ -743,14 +743,14 @@ def __reduce__(self): return make_exponential_covariance, (self.get_state(),) def copy(self, _=None): - """Copy ExponentialCovariance object.""" + """Copy ExponentialMovingCovariance object.""" return self.fromstate(self.get_state()) __copy__ = copy __deepcopy__ = copy def push(self, x_val, y_val): - """Add a pair `(x, y)` to the ExponentialCovariance summary.""" + """Add a pair `(x, y)` to the ExponentialMovingCovariance summary.""" self._xstats.push(x_val) alpha = 1.0 - self.decay self._covariance = self.decay * self.covariance() + alpha * ( @@ -768,20 +768,20 @@ def correlation(self): return self.covariance() / denom def __add__(self, that): - """Add two ExponentialCovariance objects together.""" + """Add two ExponentialMovingCovariance objects together.""" sigma = self.copy() sigma += that return sigma def __iadd__(self, that): - """Add another ExponentialCovariance object to this one.""" + """Add another ExponentialMovingCovariance object to this one.""" self._xstats += that._xstats self._ystats += that._ystats self._covariance += that.covariance() return self def __mul__(self, that): - """Multiply by a scalar to change ExponentialCovariance weighting.""" + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" sigma = self.copy() sigma *= that return sigma @@ -789,7 +789,7 @@ def __mul__(self, that): __rmul__ = __mul__ def __imul__(self, that): - """Multiply by a scalar to change ExponentialCovariance weighting + """Multiply by a scalar to change ExponentialMovingCovariance weighting in-place. """ @@ -802,7 +802,7 @@ def __imul__(self, that): def make_exponential_covariance(state): """Make Regression object from state.""" - return ExponentialCovariance.fromstate(state) + return ExponentialMovingCovariance.fromstate(state) if __name__ == 'runstats.core': # pragma: no cover diff --git a/tests/__main__.py b/tests/__main__.py index 8cb5017..76a1c61 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -59,7 +59,7 @@ def main(): fast_exp_stats.push(arg) print() - print('FastExponentialStatistics') + print('FastExponentialMovingStatistics') print('Decay Rate (default):', fast_exp_stats.get_decay()) print('Exponential Mean:', fast_exp_stats.mean()) print('Exponential Variance:', fast_exp_stats.variance()) @@ -71,7 +71,7 @@ def main(): core_exp_stats.push(arg) print() - print('CoreExponentialStatistics') + print('CoreExponentialMovingStatistics') print('Decay Rate (default):', core_exp_stats.get_decay()) print('Exponential Mean:', core_exp_stats.mean()) print('Exponential Variance:', core_exp_stats.variance()) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 5f74c15..80a837d 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -12,11 +12,11 @@ from unittest.mock import patch -from runstats import ExponentialCovariance as FastExponentialCovariance +from runstats import ExponentialMovingCovariance as FastExponentialCovariance from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialCovariance as CoreExponentialCovariance +from runstats.core import ExponentialMovingCovariance as CoreExponentialCovariance from runstats.core import ( ExponentialMovingStatistics as CoreExponentialStatistics, ) @@ -295,10 +295,10 @@ def test_exponential_statistics(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_exponential_covariance(ExponentialCovariance): +def test_exponential_covariance(ExponentialMovingCovariance): random.seed(0) alpha = [random.random() for _ in range(count)] beta = [x * -10 for x in alpha] @@ -306,7 +306,7 @@ def test_exponential_covariance(ExponentialCovariance): big_beta = [x * -10 for x in big_alpha] data = list(zip(big_alpha, big_beta)) - exp_cov = ExponentialCovariance( + exp_cov = ExponentialMovingCovariance( decay=0.9999, mean_x=mean(alpha), variance_x=variance(alpha, 0), @@ -378,12 +378,12 @@ def test_add_exponential_statistics( @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_add_exponential_covariance(ExponentialCovariance): - exp_cov0 = ExponentialCovariance(0.9) - exp_cov10 = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) +def test_add_exponential_covariance(ExponentialMovingCovariance): + exp_cov0 = ExponentialMovingCovariance(0.9) + exp_cov10 = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) assert (exp_cov0 + exp_cov10) == exp_cov10 assert (exp_cov10 + exp_cov0) == exp_cov10 @@ -512,16 +512,16 @@ def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_get_set_state_exponential_covariance(ExponentialCovariance): +def test_get_set_state_exponential_covariance(ExponentialMovingCovariance): random.seed(0) vals = [(random.random(), random.random()) for _ in range(count)] - exp_cov = ExponentialCovariance(iterable=vals) + exp_cov = ExponentialMovingCovariance(iterable=vals) exp_state = exp_cov.get_state() - new_exp_cov = ExponentialCovariance(0.8) + new_exp_cov = ExponentialMovingCovariance(0.8) assert exp_cov != new_exp_cov assert new_exp_cov.decay == 0.8 new_exp_cov.set_state(exp_state) @@ -532,7 +532,7 @@ def test_get_set_state_exponential_covariance(ExponentialCovariance): assert exp_cov.covariance() == new_exp_cov.covariance() assert new_exp_cov.decay == 0.1 - assert exp_cov == ExponentialCovariance.fromstate(exp_cov.get_state()) + assert exp_cov == ExponentialMovingCovariance.fromstate(exp_cov.get_state()) @pytest.mark.parametrize( @@ -599,11 +599,11 @@ def test_pickle_exponential_statistics(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_pickle_exponential_covariance(ExponentialCovariance): - exp_cov = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) +def test_pickle_exponential_covariance(ExponentialMovingCovariance): + exp_cov = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_cov = pickle.dumps(exp_cov, protocol=num) unpickled_exp_cov = pickle.loads(pickled_exp_cov) @@ -654,11 +654,11 @@ def test_copy_exponential_statistics(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_copy_exponential_covariance(ExponentialCovariance): - exp_cov = ExponentialCovariance(0.9, iterable=zip(range(10), range(10))) +def test_copy_exponential_covariance(ExponentialMovingCovariance): + exp_cov = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) copy_exp_cov = copy.copy(exp_cov) assert exp_cov == copy_exp_cov deepcopy_exp_cov = copy.deepcopy(exp_cov) @@ -712,12 +712,12 @@ def test_equality_exponential_statistics(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_equality_exponential_covariance(ExponentialCovariance): - exp_cov1 = ExponentialCovariance(0.9, iterable=enumerate(range(10))) - exp_cov2 = ExponentialCovariance(0.9, iterable=enumerate(range(10))) +def test_equality_exponential_covariance(ExponentialMovingCovariance): + exp_cov1 = ExponentialMovingCovariance(0.9, iterable=enumerate(range(10))) + exp_cov2 = ExponentialMovingCovariance(0.9, iterable=enumerate(range(10))) assert exp_cov1 == exp_cov2 exp_cov2.push(42, 42) assert exp_cov1 != exp_cov2 @@ -828,20 +828,20 @@ def test_exponential_statistics_batch(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_exponential_covariance_batch(ExponentialCovariance): +def test_exponential_covariance_batch(ExponentialMovingCovariance): random.seed(0) alpha = [(random.random(), random.random()) for _ in range(count)] beta = [(random.random() * 2, random.random() * 2) for _ in range(count)] - alpha_exp_cov = ExponentialCovariance(0.1, iterable=alpha) + alpha_exp_cov = ExponentialMovingCovariance(0.1, iterable=alpha) assert (alpha_exp_cov * 0.5 + alpha_exp_cov * 0.5) == alpha_exp_cov - beta_exp_cov = ExponentialCovariance(0.9, iterable=beta) + beta_exp_cov = ExponentialMovingCovariance(0.9, iterable=beta) gamma_exp_cov = alpha_exp_cov * 0.3 + beta_exp_cov * 0.7 @@ -874,7 +874,7 @@ def test_exponential_statistics_decays(ExponentialMovingStatistics, decay): @pytest.mark.parametrize( - 'ExponentialCovariance, decay', + 'ExponentialMovingCovariance, decay', list( itertools.product( [CoreExponentialCovariance, FastExponentialCovariance], @@ -882,10 +882,10 @@ def test_exponential_statistics_decays(ExponentialMovingStatistics, decay): ) ), ) -def test_exponential_covariance_decays(ExponentialCovariance, decay): +def test_exponential_covariance_decays(ExponentialMovingCovariance, decay): random.seed(0) alpha = [(random.random(), random.random()) for _ in range(count)] - exp_stats = ExponentialCovariance(decay=decay, iterable=alpha) + exp_stats = ExponentialMovingCovariance(decay=decay, iterable=alpha) true_cov, true_cor = exp_cov_cor(decay=decay, iterable=alpha) assert (error(true_cov, exp_stats.covariance())) < limit @@ -935,10 +935,10 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_exponential_covariance_clear(ExponentialCovariance): +def test_exponential_covariance_clear(ExponentialMovingCovariance): random.seed(0) alpha = [(random.random(), random.random()) for _ in range(count)] mean_x = 10 @@ -946,7 +946,7 @@ def test_exponential_covariance_clear(ExponentialCovariance): mean_y = 1000 variance_y = 2 covariance = 20 - exp_cov = ExponentialCovariance( + exp_cov = ExponentialMovingCovariance( mean_x=mean_x, variance_x=variance_x, mean_y=mean_y, @@ -1110,15 +1110,15 @@ def test_raise_if_invalid_decay_exp_stats(ExponentialMovingStatistics): @pytest.mark.parametrize( - 'ExponentialCovariance', + 'ExponentialMovingCovariance', [CoreExponentialCovariance, FastExponentialCovariance], ) -def test_raise_if_invalid_decay_exp_cov(ExponentialCovariance): +def test_raise_if_invalid_decay_exp_cov(ExponentialMovingCovariance): with pytest.raises(ValueError): - ExponentialCovariance(0) - ExponentialCovariance(1) - ExponentialCovariance(-1) - ExponentialCovariance(2) + ExponentialMovingCovariance(0) + ExponentialMovingCovariance(1) + ExponentialMovingCovariance(-1) + ExponentialMovingCovariance(2) @pytest.mark.parametrize( From 86a48552a504aae991a43de65d37619e84f8176c Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 22 Feb 2021 21:40:51 +0100 Subject: [PATCH 46/83] adjused tests/_main_ for ExpCov --- tests/__main__.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/tests/__main__.py b/tests/__main__.py index 76a1c61..e813ea7 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -11,7 +11,10 @@ from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics -from .test_runstats import kurtosis, mean, skewness, stddev, variance +from runstats import ExponentialMovingCovariance as FastExponentialCoveriance +from runstats.core import ExponentialMovingCovariance as CoreExponentialCoveriance + +from tests.test_runstats import kurtosis, mean, skewness, stddev, variance, exp_mean_var, exp_cov_cor def main(): @@ -25,6 +28,11 @@ def main(): print('Skewness:', skewness(args)) print('Kurtosis:', kurtosis(args)) + exp_mean, exp_var = exp_mean_var(0.9, args) + print("Exponential Moving Mean (decay=0.9):", exp_mean) + print("Exponential Moving Variance (decay=0.9):", exp_var) + print("Exponential Moving StdDev (decay=0.9):", exp_var ** 0.5) + fast_stats = FastStatistics() for arg in args: @@ -60,7 +68,7 @@ def main(): print() print('FastExponentialMovingStatistics') - print('Decay Rate (default):', fast_exp_stats.get_decay()) + print('Decay Rate (default):', fast_exp_stats.decay) print('Exponential Mean:', fast_exp_stats.mean()) print('Exponential Variance:', fast_exp_stats.variance()) print('Exponential StdDev:', fast_exp_stats.stddev()) @@ -72,7 +80,7 @@ def main(): print() print('CoreExponentialMovingStatistics') - print('Decay Rate (default):', core_exp_stats.get_decay()) + print('Decay Rate (default):', core_exp_stats.decay) print('Exponential Mean:', core_exp_stats.mean()) print('Exponential Variance:', core_exp_stats.variance()) print('Exponential StdDev:', core_exp_stats.stddev()) @@ -101,6 +109,28 @@ def main(): print('Intercept:', core_regr.intercept()) print('Correlation:', core_regr.correlation()) + fast_exp_cov = FastExponentialCoveriance() + + for index, arg in enumerate(args, 1): + fast_exp_cov.push(index, arg) + + print() + print('FastExponentialCovariance') + print('Decay Rate (default):', fast_exp_cov.decay) + print('Exponential Moving Covariance:', fast_exp_cov.covariance()) + print('Exponential Moving Correlation:', fast_exp_cov.correlation()) + + core_exp_cov = CoreExponentialCoveriance() + + for index, arg in enumerate(args, 1): + core_exp_cov.push(index, arg) + + print() + print('CoreExponentialCovariance') + print('Decay Rate (default):', core_exp_cov.decay) + print('Exponential Moving Covariance:', core_exp_cov.covariance()) + print('Exponential Moving Correlation:', core_exp_cov.correlation()) + if __name__ == '__main__': main() From bcc3aafd1a9a36c84cd364e3c08a890750acf3f2 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 22 Feb 2021 21:55:15 +0100 Subject: [PATCH 47/83] adjusted clear() in readme for ExpMovStats --- README.rst | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/README.rst b/README.rst index 11ee7d3..1d5efeb 100644 --- a/README.rst +++ b/README.rst @@ -95,8 +95,9 @@ initialized without arguments: >>> exp_stats = ExponentialMovingStatistics() Statistics objects support four methods for modification. Use `push` to add -values to the summary, `clear` to reset the summary, sum to combine Statistics -summaries and multiply to weight summary Statistics by a scalar. +values to the summary, `clear` to reset the the object to its initialization +state, sum to combine Statistics summaries and multiply to weight summary +Statistics by a scalar. .. code-block:: python @@ -251,23 +252,10 @@ between 0 and 1. ... ValueError: decay must be between 0 and 1 -The clear method allows to optionally set a new mean, new variance and new -decay. If none are provided mean and variance reset to zero, while the decay is -not changed. - -.. code-block:: python - - >>> exp_stats.clear() - >>> exp_stats.decay - 0.5 - >>> exp_stats.mean() - 0.0 - >>> exp_stats.variance() - 0.0 - Combining `ExponentialMovingStatistics` is done by adding them together. The mean and variance are simply added to create a new object. To weight each -`ExponentialMovingStatistics`, multiply them by a constant factor. If two +`ExponentialMovingStatistics`, multiply them by a constant factor. +Note how this behaviour differs from the two previous classes. `ExponentialMovingStatistics` are added then the leftmost decay is used for the new object. The `len` method is not supported. From ae2561d8b2ee3dafdd5442c3950ba8011b66b82a Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 27 Feb 2021 20:25:42 +0100 Subject: [PATCH 48/83] extended readme for time based ExpMovingStats --- README.rst | 71 +++++++++++++++++++++++++++++++++++++++--- runstats/core.py | 13 ++------ tests/test_runstats.py | 1 - 3 files changed, 70 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index 1d5efeb..5d25ada 100644 --- a/README.rst +++ b/README.rst @@ -237,8 +237,8 @@ multiply. >>> exp_stats.stddev() 3.4049127627507683 -The decay of the exponential statistics can also be changed. The value must be -between 0 and 1. +The decay of the exponential statistics can also be changed during the lifetime +of the object. .. code-block:: python @@ -255,8 +255,8 @@ between 0 and 1. Combining `ExponentialMovingStatistics` is done by adding them together. The mean and variance are simply added to create a new object. To weight each `ExponentialMovingStatistics`, multiply them by a constant factor. -Note how this behaviour differs from the two previous classes. -`ExponentialMovingStatistics` are added then the leftmost decay is used for the new +Note how this behaviour differs from the two previous classes. When two +`ExponentialMovingStatistics` are added the decay of the left object is used for the new object. The `len` method is not supported. .. code-block:: python @@ -271,6 +271,69 @@ object. The `len` method is not supported. >>> exp_stats.mean() 6.187836645 +`ExponentialMovingStatistics` can also work in a time-based mode i.e. old statistics +are not simply discounted by the decay rate each time a value is pushed but an +effective decay rate is calculated based on the provided decay rate and the time +difference between the last push and the current push. `ExponentialMovingStatistics` +operate in time based mode when a `delay` value is provided at construction. +The delay is the no. of seconds that need to pass for the effective decay rate +to be equal to the provided decay rate. For example, if a delay of 60 and a +delay of 0.9 is provided, than after 60 seconds pass between calls to push() +the effective decay rate for discounting the old statistics equals 0.9, +when 120 seconds pass than it equals 0.9 ** 2 = 0.81 and so on. +The exact formula for calculating the effective decay rate at a given call to +push is: decay ** ((current_timestamp - timestamp_at_last_push) / delay). The +initial timestamp is the timestamp at object construction. + +.. code-block:: python + + >>> alpha_stats = ExponentialMovingStatistics(decay=0.9, delay=1) + >>> time.sleep(1) + >>> alpha_stats.push(100) + >>> round(alpha_stats.mean()) + >>> 10 + >>> alpha_stats.clear() # note that clear() resets the timer as well + >>> time.sleep(2) + >>> alpha_stats.push(100) + >>> round(alpha_stats.mean()) + >>> 19 + +There are a few things to note about an time_based `ExponentialMovingStatistics` object: +- When providing an iterable at construction together with a delay, the iterable +is first processed in non-time based mode i.e. as if there would be no delay +- The delay can also be set after object construction. In this case the initial +timestamp is the time when the delay is set. If a non `None` delay is changed, +this does not effect the timer. Setting delay to `None` deactivates time based +mode. +- When two ExponentialMovingStatistics objects are added the state of the delay +is taken from the left object. If the left object is time-based (non `None` delay) +the timer is reset during an regular __add__ (a + b) for the resulting object +while it is not during an incremental add __iadd__ (a += b). +- Last but not least the timer can be stopped with a call to freeze(). This can be +useful when saving the state of the object (get_state()) for later usage. With a call +to unfreeze() the timer continues where it left of (e.g. after loading). Note that +pushes onto a freezed object use a effective decay rate based on the time +difference between the last call to push and the moment freeze was called(). +- It is not recommended to use time based discounting for use cases that require +high precision on below seconds granularity. + +.. code-block:: python + + >>> alpha_stats = ExponentialMovingStatistics(decay=0.9, delay=1) + >>> time.sleep(1) + >>> alpha_stats.freeze() + >>> saved_state = alpha_stats.get_state() + >>> time.sleep(2) + >>> beta_stats = ExponentialMovingStatistics.fromstate(saved_state) + >>> beta_stats.push(10) + >>> round(beta_stats.mean()) + >>> 1 + >>> beta_stats.unfreeze() + >>> time.sleep(1) + >>> beta_stats.push(10) + >>> round(beta_stats.mean()) + >>> 3 + All internal calculations of the Statistics and Regression classes are based entirely on the C++ code by John Cook as posted in a couple of articles: diff --git a/runstats/core.py b/runstats/core.py index 92095e5..d6ce8a0 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -279,9 +279,9 @@ def __init__( dynamic_decay = decay ** (sec_from_last_push / delay). When the first value x is pushed, sec_from_last_push is the difference (in sec) between setting the delay from None to a value t (usually at - object initialization) and the time x is being pushed. - When freeze() has been called sec_from_last_push is the difference (in - sec) between the last call to push() and the time freeze() has been + object construction) and the times when x is being pushed. + When freeze() has been called sec_from_last_push is the difference + between the last call to push() and the time freeze() has been called(). Note that at object initialization the values in iterable are weighted as if delay has not been set. @@ -461,13 +461,6 @@ def stddev(self): def __add__(self, that): """Add two ExponentialMovingStatistics objects together.""" - if self.is_time_based() != that.is_time_based(): - raise AttributeError( - 'Adding two ExponentialMovingStatistics ' - 'requires both being of same type i.e. ' - 'time-based' - ) - sigma = self.copy() sigma += that diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 80a837d..bce40af 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -1133,4 +1133,3 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats.freeze() exp_stats.unfreeze() exp_stats_time.unfreeze() - exp_stats + exp_stats_time From bf26224e21401696cd9a567042c5194183c11dbb Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 27 Feb 2021 20:33:35 +0100 Subject: [PATCH 49/83] added exp_cov/cor to __main__.py --- tests/__main__.py | 23 ++++++++++++++++++----- tests/test_runstats.py | 20 +++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/__main__.py b/tests/__main__.py index e813ea7..a7553d7 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -12,9 +12,19 @@ from runstats.core import Statistics as CoreStatistics from runstats import ExponentialMovingCovariance as FastExponentialCoveriance -from runstats.core import ExponentialMovingCovariance as CoreExponentialCoveriance +from runstats.core import ( + ExponentialMovingCovariance as CoreExponentialCoveriance, +) -from tests.test_runstats import kurtosis, mean, skewness, stddev, variance, exp_mean_var, exp_cov_cor +from tests.test_runstats import ( + kurtosis, + mean, + skewness, + stddev, + variance, + exp_mean_var, + exp_cov_cor, +) def main(): @@ -29,9 +39,12 @@ def main(): print('Kurtosis:', kurtosis(args)) exp_mean, exp_var = exp_mean_var(0.9, args) - print("Exponential Moving Mean (decay=0.9):", exp_mean) - print("Exponential Moving Variance (decay=0.9):", exp_var) - print("Exponential Moving StdDev (decay=0.9):", exp_var ** 0.5) + exp_cov, exp_cor = exp_cov_cor(0.9, enumerate(args, 1)) + print('Exponential Moving Mean (decay=0.9):', exp_mean) + print('Exponential Moving Variance (decay=0.9):', exp_var) + print('Exponential Moving StdDev (decay=0.9):', exp_var ** 0.5) + print('Exponential Moving Covariance (decay=0.9):', exp_cov) + print('Exponential Moving Correlation (decay=0.9):', exp_cor) fast_stats = FastStatistics() diff --git a/tests/test_runstats.py b/tests/test_runstats.py index bce40af..774b06b 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -16,7 +16,9 @@ from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics -from runstats.core import ExponentialMovingCovariance as CoreExponentialCovariance +from runstats.core import ( + ExponentialMovingCovariance as CoreExponentialCovariance, +) from runstats.core import ( ExponentialMovingStatistics as CoreExponentialStatistics, ) @@ -383,7 +385,9 @@ def test_add_exponential_statistics( ) def test_add_exponential_covariance(ExponentialMovingCovariance): exp_cov0 = ExponentialMovingCovariance(0.9) - exp_cov10 = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) + exp_cov10 = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) + ) assert (exp_cov0 + exp_cov10) == exp_cov10 assert (exp_cov10 + exp_cov0) == exp_cov10 @@ -532,7 +536,9 @@ def test_get_set_state_exponential_covariance(ExponentialMovingCovariance): assert exp_cov.covariance() == new_exp_cov.covariance() assert new_exp_cov.decay == 0.1 - assert exp_cov == ExponentialMovingCovariance.fromstate(exp_cov.get_state()) + assert exp_cov == ExponentialMovingCovariance.fromstate( + exp_cov.get_state() + ) @pytest.mark.parametrize( @@ -603,7 +609,9 @@ def test_pickle_exponential_statistics(ExponentialMovingStatistics): [CoreExponentialCovariance, FastExponentialCovariance], ) def test_pickle_exponential_covariance(ExponentialMovingCovariance): - exp_cov = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) + exp_cov = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) + ) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_cov = pickle.dumps(exp_cov, protocol=num) unpickled_exp_cov = pickle.loads(pickled_exp_cov) @@ -658,7 +666,9 @@ def test_copy_exponential_statistics(ExponentialMovingStatistics): [CoreExponentialCovariance, FastExponentialCovariance], ) def test_copy_exponential_covariance(ExponentialMovingCovariance): - exp_cov = ExponentialMovingCovariance(0.9, iterable=zip(range(10), range(10))) + exp_cov = ExponentialMovingCovariance( + 0.9, iterable=zip(range(10), range(10)) + ) copy_exp_cov = copy.copy(exp_cov) assert exp_cov == copy_exp_cov deepcopy_exp_cov = copy.deepcopy(exp_cov) From c4362217bd40ad60021e34af5c8c86aafa85be7f Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 28 Feb 2021 19:39:55 +0100 Subject: [PATCH 50/83] resolved integration test failures< --- README.rst | 79 ++++++++++++++++++++++-------------------- runstats/__init__.py | 2 +- runstats/core.py | 1 + tests/__main__.py | 14 ++++---- tests/test_runstats.py | 4 +-- 5 files changed, 51 insertions(+), 49 deletions(-) diff --git a/README.rst b/README.rst index 5d25ada..4f34401 100644 --- a/README.rst +++ b/README.rst @@ -83,9 +83,9 @@ Tutorial -------- The Python `RunStats`_ module provides three types for computing running -statistics: Statistics, ExponentialMovingStatistics and Regression.The Regression -object leverages Statistics internally for its calculations. Each can be -initialized without arguments: +statistics: Statistics, ExponentialMovingStatistics and Regression.The +Regression object leverages Statistics internally for its calculations. +Each can be initialized without arguments: .. code-block:: python @@ -207,9 +207,9 @@ Both constructors accept an optional iterable that is consumed and pushed into the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. -The ExponentialMovingStatistics are constructed by providing a decay rate, initial -mean, and initial variance. The decay rate has default 0.9 and must be between -0 and 1. The initial mean and variance default to zero. +The ExponentialMovingStatistics are constructed by providing a decay rate, +initial mean, and initial variance. The decay rate has default 0.9 and must be +between 0 and 1. The initial mean and variance default to zero. .. code-block:: python @@ -222,9 +222,9 @@ mean, and initial variance. The decay rate has default 0.9 and must be between 0.0 The decay rate is the weight by which the current statistics are discounted -by. Consequently, (1 - decay) is the weight of the new value. Like the `Statistics` class, -there are four methods for modification: `push`, `clear`, sum and -multiply. +by. Consequently, (1 - decay) is the weight of the new value. Like the +`Statistics` class, there are four methods for modification: `push`, `clear`, +sum and multiply. .. code-block:: python @@ -252,12 +252,12 @@ of the object. ... ValueError: decay must be between 0 and 1 -Combining `ExponentialMovingStatistics` is done by adding them together. The mean and -variance are simply added to create a new object. To weight each +Combining `ExponentialMovingStatistics` is done by adding them together. The +mean and variance are simply added to create a new object. To weight each `ExponentialMovingStatistics`, multiply them by a constant factor. Note how this behaviour differs from the two previous classes. When two -`ExponentialMovingStatistics` are added the decay of the left object is used for the new -object. The `len` method is not supported. +`ExponentialMovingStatistics` are added the decay of the left object is used for +the new object. The `len` method is not supported. .. code-block:: python @@ -271,34 +271,36 @@ object. The `len` method is not supported. >>> exp_stats.mean() 6.187836645 -`ExponentialMovingStatistics` can also work in a time-based mode i.e. old statistics -are not simply discounted by the decay rate each time a value is pushed but an -effective decay rate is calculated based on the provided decay rate and the time -difference between the last push and the current push. `ExponentialMovingStatistics` -operate in time based mode when a `delay` value is provided at construction. -The delay is the no. of seconds that need to pass for the effective decay rate -to be equal to the provided decay rate. For example, if a delay of 60 and a -delay of 0.9 is provided, than after 60 seconds pass between calls to push() -the effective decay rate for discounting the old statistics equals 0.9, -when 120 seconds pass than it equals 0.9 ** 2 = 0.81 and so on. +`ExponentialMovingStatistics` can also work in a time-based mode i.e. old +statistics are not simply discounted by the decay rate each time a value is +pushed but an effective decay rate is calculated based on the provided decay +rate and the time difference between the last push and the current push. +`ExponentialMovingStatistics` operate in time based mode when a `delay` value is +provided at construction. The delay is the no. of seconds that need to pass for +the effective decay rate to be equal to the provided decay rate. For example, if +a delay of 60 and a delay of 0.9 is provided, than after 60 seconds pass between +calls to push() the effective decay rate for discounting the old statistics +equals 0.9, when 120 seconds pass than it equals 0.9 ** 2 = 0.81 and so on. The exact formula for calculating the effective decay rate at a given call to push is: decay ** ((current_timestamp - timestamp_at_last_push) / delay). The initial timestamp is the timestamp at object construction. .. code-block:: python + >>> import time >>> alpha_stats = ExponentialMovingStatistics(decay=0.9, delay=1) >>> time.sleep(1) >>> alpha_stats.push(100) >>> round(alpha_stats.mean()) - >>> 10 + 10 >>> alpha_stats.clear() # note that clear() resets the timer as well >>> time.sleep(2) >>> alpha_stats.push(100) >>> round(alpha_stats.mean()) - >>> 19 + 19 -There are a few things to note about an time_based `ExponentialMovingStatistics` object: +There are a few things to note about an time_based +`ExponentialMovingStatistics` object: - When providing an iterable at construction together with a delay, the iterable is first processed in non-time based mode i.e. as if there would be no delay - The delay can also be set after object construction. In this case the initial @@ -306,16 +308,17 @@ timestamp is the time when the delay is set. If a non `None` delay is changed, this does not effect the timer. Setting delay to `None` deactivates time based mode. - When two ExponentialMovingStatistics objects are added the state of the delay -is taken from the left object. If the left object is time-based (non `None` delay) -the timer is reset during an regular __add__ (a + b) for the resulting object -while it is not during an incremental add __iadd__ (a += b). -- Last but not least the timer can be stopped with a call to freeze(). This can be -useful when saving the state of the object (get_state()) for later usage. With a call -to unfreeze() the timer continues where it left of (e.g. after loading). Note that -pushes onto a freezed object use a effective decay rate based on the time -difference between the last call to push and the moment freeze was called(). -- It is not recommended to use time based discounting for use cases that require -high precision on below seconds granularity. +is taken from the left object. If the left object is time-based (non `None` +delay) the timer is reset during an regular __add__ (a + b) for the resulting +object while it is not during an incremental add __iadd__ (a += b). +- Last but not least the timer can be stopped with a call to freeze(). This can +be useful when saving the state of the object (get_state()) for later usage. +With a call to unfreeze() the timer continues where it left of (e.g. after +loading). Note that pushes onto a freezed object use a effective decay rate +based on the time difference between the last call to push and the moment +freeze was called(). +- It is not recommended to use time based discounting for use cases that +require high precision on below seconds granularity. .. code-block:: python @@ -327,12 +330,12 @@ high precision on below seconds granularity. >>> beta_stats = ExponentialMovingStatistics.fromstate(saved_state) >>> beta_stats.push(10) >>> round(beta_stats.mean()) - >>> 1 + 1 >>> beta_stats.unfreeze() >>> time.sleep(1) >>> beta_stats.push(10) >>> round(beta_stats.mean()) - >>> 3 + 3 All internal calculations of the Statistics and Regression classes are based entirely on the C++ code by John Cook as posted in a couple of articles: diff --git a/runstats/__init__.py b/runstats/__init__.py index 97b79c6..72ddb54 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -8,7 +8,7 @@ try: from ._core import ( - ExponentialCovariance, + ExponentialMovingCovariance, ExponentialMovingStatistics, Regression, Statistics, diff --git a/runstats/core.py b/runstats/core.py index d6ce8a0..df8d422 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -5,6 +5,7 @@ """ from __future__ import division + import time diff --git a/tests/__main__.py b/tests/__main__.py index a7553d7..551d32b 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -2,28 +2,26 @@ import sys +from runstats import ExponentialMovingCovariance as FastExponentialCoveriance from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression from runstats import Statistics as FastStatistics +from runstats.core import ( + ExponentialMovingCovariance as CoreExponentialCoveriance, +) from runstats.core import ( ExponentialMovingStatistics as CoreExponentialStatistics, ) from runstats.core import Regression as CoreRegression from runstats.core import Statistics as CoreStatistics - -from runstats import ExponentialMovingCovariance as FastExponentialCoveriance -from runstats.core import ( - ExponentialMovingCovariance as CoreExponentialCoveriance, -) - from tests.test_runstats import ( + exp_cov_cor, + exp_mean_var, kurtosis, mean, skewness, stddev, variance, - exp_mean_var, - exp_cov_cor, ) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 774b06b..e128529 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -8,10 +8,10 @@ import pickle import random import time -import pytest - from unittest.mock import patch +import pytest + from runstats import ExponentialMovingCovariance as FastExponentialCovariance from runstats import ExponentialMovingStatistics as FastExponentialStatistics from runstats import Regression as FastRegression From 15eb66c60e2f6d29367e7002072637892cb0a456 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 28 Feb 2021 20:27:23 +0100 Subject: [PATCH 51/83] trying to resolve doc8 --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4f34401..5c2199f 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ RunStats: Computing Statistics and Regression in One Pass ========================================================= -`RunStats`_ is an Apache2 licensed Python module for online statistics and +`runstats`_ is an Apache2 licensed Python module for online statistics and online regression. Statistics and regression summaries are computed in a single pass. Previous values are not recorded in summaries. From febb2bdea22811aa4c0312af765bc5b5f1d28bca Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 28 Feb 2021 20:29:29 +0100 Subject: [PATCH 52/83] undone change in readme for doc8 --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 5c2199f..4f34401 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ RunStats: Computing Statistics and Regression in One Pass ========================================================= -`runstats`_ is an Apache2 licensed Python module for online statistics and +`RunStats`_ is an Apache2 licensed Python module for online statistics and online regression. Statistics and regression summaries are computed in a single pass. Previous values are not recorded in summaries. From 47f097b83845f100855d029924d1113552cfbb52 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 28 Feb 2021 21:07:07 +0100 Subject: [PATCH 53/83] debugged ExponentialMovingStatistics delay setter --- README.rst | 55 ++++++++++++++++++++++++++++++------------ runstats/core.py | 9 ++++--- tests/test_runstats.py | 4 +++ 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 4f34401..4ec3ac2 100644 --- a/README.rst +++ b/README.rst @@ -82,17 +82,19 @@ function: Tutorial -------- -The Python `RunStats`_ module provides three types for computing running -statistics: Statistics, ExponentialMovingStatistics and Regression.The -Regression object leverages Statistics internally for its calculations. +The Python `RunStats`_ module provides four types for computing running +statistics: Statistics, ExponentialMovingStatistics, +ExponentialMovingCovariance and Regression. +The Regression object leverages Statistics internally for its calculations. Each can be initialized without arguments: .. code-block:: python - >>> from runstats import Statistics, Regression, ExponentialMovingStatistics + >>> from runstats import Statistics, Regression, ExponentialMovingStatistics, ExponentialMovingCovariance >>> stats = Statistics() >>> regr = Regression() >>> exp_stats = ExponentialMovingStatistics() + >>> exp_cov = ExponentialMovingCovariance() Statistics objects support four methods for modification. Use `push` to add values to the summary, `clear` to reset the the object to its initialization @@ -208,7 +210,7 @@ the summary. Note that you may pass a generator as an iterable and the generator will be entirely consumed. The ExponentialMovingStatistics are constructed by providing a decay rate, -initial mean, and initial variance. The decay rate has default 0.9 and must be +initial mean, and initial variance. The decay rate defaults to 0.9 and must be between 0 and 1. The initial mean and variance default to zero. .. code-block:: python @@ -257,7 +259,8 @@ mean and variance are simply added to create a new object. To weight each `ExponentialMovingStatistics`, multiply them by a constant factor. Note how this behaviour differs from the two previous classes. When two `ExponentialMovingStatistics` are added the decay of the left object is used for -the new object. The `len` method is not supported. +the new object. The `len` method as well as minimum and maximum are not +supported. .. code-block:: python @@ -271,19 +274,41 @@ the new object. The `len` method is not supported. >>> exp_stats.mean() 6.187836645 +The `ExponentialMovingCovariance` works equivalently to +`ExponentialMovingStatistics`. + +.. code-block:: python + + >>> exp_cov = ExponentialMovingCovariance( + ... decay=0.9, + ... mean_x=0.0, + ... variance_x=0.0, + ... mean_y=0.0, + ... variance_y=0.0, + ... covariance=0.0, + ... iterable=(), + ... ) + >>> for num in range(10): + ... exp_cov.push(num, num + 5) + >>> round(exp_cov.covariance(), 2) + 17.67 + >>> round(exp_cov.correlation(), 2) + 0.96 + `ExponentialMovingStatistics` can also work in a time-based mode i.e. old statistics are not simply discounted by the decay rate each time a value is pushed but an effective decay rate is calculated based on the provided decay rate and the time difference between the last push and the current push. -`ExponentialMovingStatistics` operate in time based mode when a `delay` value is -provided at construction. The delay is the no. of seconds that need to pass for -the effective decay rate to be equal to the provided decay rate. For example, if -a delay of 60 and a delay of 0.9 is provided, than after 60 seconds pass between -calls to push() the effective decay rate for discounting the old statistics -equals 0.9, when 120 seconds pass than it equals 0.9 ** 2 = 0.81 and so on. -The exact formula for calculating the effective decay rate at a given call to -push is: decay ** ((current_timestamp - timestamp_at_last_push) / delay). The -initial timestamp is the timestamp at object construction. +`ExponentialMovingStatistics` operate in time based mode when a `delay` value +> 0 is provided at construction. The delay is the no. of seconds that need to +pass for the effective decay rate to be equal to the provided decay rate. +For example, if a delay of 60 and a delay of 0.9 is provided, than after 60 +seconds pass between calls to push() the effective decay rate for discounting +the old statistics equals 0.9, when 120 seconds pass than it equals +0.9 ** 2 = 0.81 and so on. The exact formula for calculating the effective +decay rate at a given call to push is: +decay ** ((current_timestamp - timestamp_at_last_push) / delay). The initial +timestamp is the timestamp at object construction. .. code-block:: python diff --git a/runstats/core.py b/runstats/core.py index df8d422..b7a3e55 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -245,6 +245,7 @@ def make_statistics(state): class ExponentialMovingStatistics: + # pylint: disable=too-many-instance-attributes """Compute exponential mean and variance in a single pass. ExponentialMovingStatistics objects may also be added and copied. @@ -260,7 +261,7 @@ class ExponentialMovingStatistics: def __init__( self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() - ): + ): # pylint: disable=too-many-arguments """Initialize ExponentialMovingStatistics object. Incrementally tracks mean and variance and exponentially discounts old @@ -320,7 +321,9 @@ def delay(self): @delay.setter def delay(self, value): - if value: + if value is not None: + if value <= 0: + raise ValueError('delay must be > 0') self._current_time = ( self._current_time if self._current_time else time.time() ) @@ -425,7 +428,7 @@ def unfreeze(self): def is_time_based(self): """Checks if object is time-based or not i.e. delay is set or None""" - return True if self.delay else False + return bool(self.delay) def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" diff --git a/tests/test_runstats.py b/tests/test_runstats.py index e128529..aee3e70 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -1143,3 +1143,7 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats.freeze() exp_stats.unfreeze() exp_stats_time.unfreeze() + + with pytest.raises(ValueError): + exp_stats_time.delay = 0 + exp_stats_time.delay = -1 From 8c42617c117ea3bb5a55fd6ff4c796a91160f6b7 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 28 Feb 2021 21:14:02 +0100 Subject: [PATCH 54/83] updated readme: ExponentialMovingStatistics clear method --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 4ec3ac2..1d9531d 100644 --- a/README.rst +++ b/README.rst @@ -259,7 +259,8 @@ mean and variance are simply added to create a new object. To weight each `ExponentialMovingStatistics`, multiply them by a constant factor. Note how this behaviour differs from the two previous classes. When two `ExponentialMovingStatistics` are added the decay of the left object is used for -the new object. The `len` method as well as minimum and maximum are not +the new object. The clear method resets the object to its state at +construction. The `len` method as well as minimum and maximum are not supported. .. code-block:: python From f5a7cddf3cc7424813dba40dfb8343e4ffbc79c3 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 28 Jun 2021 21:10:44 +0200 Subject: [PATCH 55/83] adjusted ExponentialMovingCovariance to new interface --- runstats/core.py | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index a3a2e04..679bca4 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -413,7 +413,7 @@ def copy(self, _=None): return self.fromstate(self.get_state()) def __copy__(self, _=None): - """Copy ExponentialStatistics object.""" + """Copy ExponentialMovingStatistics object.""" return self.copy(_) __deepcopy__ = __copy__ @@ -748,7 +748,9 @@ def decay(self): @decay.setter def decay(self, value): - value = float(value) + self._set_decay(value) + + def _set_decay(self, value): self._xstats.decay = value self._ystats.decay = value self._decay = value @@ -798,8 +800,11 @@ def copy(self, _=None): """Copy ExponentialMovingCovariance object.""" return self.fromstate(self.get_state()) - __copy__ = copy - __deepcopy__ = copy + def __copy__(self, _=None): + """Copy ExponentialMovingCovariance object.""" + return self.copy(_) + + __deepcopy__ = __copy__ def push(self, x_val, y_val): """Add a pair `(x, y)` to the ExponentialMovingCovariance summary.""" @@ -819,28 +824,41 @@ def correlation(self): denom = self._xstats.stddev() * self._ystats.stddev() return self.covariance() / denom - def __add__(self, that): + def _add(self, that): """Add two ExponentialMovingCovariance objects together.""" sigma = self.copy() - sigma += that + sigma._iadd(that) return sigma - def __iadd__(self, that): + def __add__(self, that): + """Add two ExponentialMovingCovariance objects together.""" + return self._add(that) + + def _iadd(self, that): """Add another ExponentialMovingCovariance object to this one.""" self._xstats += that._xstats self._ystats += that._ystats self._covariance += that.covariance() return self - def __mul__(self, that): + def __iadd__(self, that): + """Add another ExponentialMovingCovariance object to this one.""" + return self._iadd(that) + + def _mul(self, that): """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" sigma = self.copy() - sigma *= that + sigma._imul(that) return sigma - __rmul__ = __mul__ + def __mul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" + if isinstance(self, ExponentialMovingCovariance): + return self._mul(that) + # https://stackoverflow.com/q/33218006/232571 + return that._mul(self) # pragma: no cover - def __imul__(self, that): + def _imul(self, that): """Multiply by a scalar to change ExponentialMovingCovariance weighting in-place. @@ -851,6 +869,13 @@ def __imul__(self, that): self._covariance *= that return self + def __imul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting + in-place. + + """ + return self._imul(that) + def make_exponential_covariance(state): """Make Regression object from state.""" From a08e7f679b9c44887860c351c119701982c602c9 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 28 Jun 2021 22:04:23 +0200 Subject: [PATCH 56/83] implemented Cython for ExponentialMovingCovarinace --- runstats/core.pxd | 44 ++++++++++++++++++++++++++++++++++++++++++ tests/test_runstats.py | 1 + 2 files changed, 45 insertions(+) diff --git a/runstats/core.pxd b/runstats/core.pxd index cec8001..7b77782 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -148,3 +148,47 @@ cdef class Regression: cpdef Regression make_regression(state) + + +cdef class ExponentialMovingCovariance: + cdef public ExponentialStatistics _xstats, _ystats + cdef public double _decay, _initial_covariance, _covariance + + cpdef _set_decay(self, double value) + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef ExponentialMovingCovariance copy(self, _=*) + + @cython.locals( + alpha=double + ) + cpdef push(self, double x_val, double y_val) + + cpdef double covariance(self) + + @cython.locals( + denom=double + ) + cpdef double correlation(self) + + @cython.locals(sigma=ExponentialMovingCovariance) + cpdef ExponentialMovingCovariance _add(self, ExponentialMovingCovariance that) + + cpdef ExponentialMovingCovariance _iadd(self, ExponentialMovingCovariance that) + + @cython.locals( + sigma=ExponentialMovingCovariance, + ) + cpdef ExponentialMovingCovariance _mul(self, double that) + + cpdef ExponentialMovingCovariance _imul(self, double that) + + +cpdef ExponentialStatistics make_exponential_statistics(state) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 03e0c0d..d62c125 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -349,6 +349,7 @@ def test_exponential_covariance(ExponentialMovingCovariance): exp_cov_3 = exp_cov * 0.5 + exp_cov * 0.5 assert exp_cov_3 == exp_cov + @pytest.mark.parametrize( 'Statistics,Regression', [ From 0adec4231678fe4dd1ea66c8f6dc58a329aa700d Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 28 Jun 2021 22:16:34 +0200 Subject: [PATCH 57/83] removed invalid test that was wrongfully taken over during merge --- runstats/core.pxd | 2 +- runstats/core.py | 4 ++++ tests/test_runstats.py | 20 +++----------------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/runstats/core.pxd b/runstats/core.pxd index 7b77782..8a69eb6 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -65,7 +65,7 @@ cdef class Statistics: cpdef Statistics make_statistics(state) -cdef class ExponentialStatistics: +cdef class ExponentialStatistics: # TODO: adjust to new interface! cdef public double _decay, _mean, _variance cpdef _set_decay(self, double value) diff --git a/runstats/core.py b/runstats/core.py index 679bca4..83c6c9b 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -528,6 +528,8 @@ def __mul__(self, that): # https://stackoverflow.com/q/33218006/232571 return that._mul(self) # pragma: no cover + __rmul__ = __mul__ + def _imul(self, that): """Multiply by a scalar to change ExponentialMovingStatistics weighting in-place. @@ -858,6 +860,8 @@ def __mul__(self, that): # https://stackoverflow.com/q/33218006/232571 return that._mul(self) # pragma: no cover + __rmul__ = __mul__ + def _imul(self, that): """Multiply by a scalar to change ExponentialMovingCovariance weighting in-place. diff --git a/tests/test_runstats.py b/tests/test_runstats.py index d62c125..e55263b 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -300,11 +300,11 @@ def test_exponential_statistics(ExponentialMovingStatistics): 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_bad_decay(ExponentialStatistics): +def test_bad_decay(ExponentialMovingStatistics): with pytest.raises(ValueError): - ExponentialStatistics(decay=2.0) + ExponentialMovingStatistics(decay=2.0) with pytest.raises(ValueError): - ExponentialStatistics(decay=-1.0) + ExponentialMovingStatistics(decay=-1.0) @pytest.mark.parametrize( @@ -1119,20 +1119,6 @@ def test_exponential_statistics_time_based_effective_decay( assert error(exp_stats.variance(), exp_stats_time.variance()) < limit -@pytest.mark.parametrize( - 'Statistics,Regression', - [ - (CoreStatistics, CoreRegression), - (FastStatistics, FastRegression), - ], -) -def test_raise_if_invalid_multiply(Statistics, Regression): - stats1 = Statistics(range(10)) - stats2 = Statistics(range(10)) * 2 - with pytest.raises(TypeError): - stats1 * stats2 - - @pytest.mark.parametrize( 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], From 63765e94d0a3d9825c01748735856ba91151c505 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 10:01:16 +0200 Subject: [PATCH 58/83] debugged test_add_exponential_statistics, mock was not woring with Cython --- runstats/_core.pxd | 206 ++++++++++ runstats/_core.py | 888 +++++++++++++++++++++++++++++++++++++++++ runstats/core.pxd | 38 +- runstats/core.py | 2 +- tests/test_runstats.py | 10 +- 5 files changed, 1124 insertions(+), 20 deletions(-) create mode 100644 runstats/_core.pxd create mode 100644 runstats/_core.py diff --git a/runstats/_core.pxd b/runstats/_core.pxd new file mode 100644 index 0000000..8759230 --- /dev/null +++ b/runstats/_core.pxd @@ -0,0 +1,206 @@ +import cython + + +cdef public double NAN + + +cdef class Statistics: + + cdef public double _count, _eta, _rho, _tau, _phi, _min, _max + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef Statistics copy(self, _=*) + + @cython.locals( + delta=double, + delta_n=double, + delta_n2=double, + term=double, + ) + cpdef push(self, double value) + + cpdef double minimum(self) + + cpdef double maximum(self) + + cpdef double mean(self) + + cpdef double variance(self, double ddof=*) + + cpdef double stddev(self, double ddof=*) + + cpdef double skewness(self) + + cpdef double kurtosis(self) + + @cython.locals(sigma=Statistics) + cpdef Statistics _add(self, Statistics that) + + @cython.locals( + sum_count=double, + delta=double, + delta2=double, + delta3=double, + delta4=double, + sum_eta=double, + sum_rho=double, + sum_tau=double, + sum_phi=double, + ) + cpdef Statistics _iadd(self, Statistics that) + + @cython.locals(sigma=Statistics) + cpdef Statistics _mul(self, double that) + + cpdef Statistics _imul(self, double that) + + +cpdef Statistics make_statistics(state) + + +cdef class ExponentialMovingStatistics: # TODO: adjust to new interface!, check docstrings, check readme + cdef public double _decay, _mean, _variance, _initial_mean, _initial_variance, _delay, _time_diff, _current_time + + cpdef _set_decay(self, double value) + + cpdef _set_delay(self, double value) + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef ExponentialMovingStatistics copy(self, _=*) + + cpdef clear_timer(self) + + cpdef freeze(self) + + cpdef unfreeze(self) + + cpdef is_time_based(self) + + @cython.locals( + alpha=double, + diff=double, + incr=double, + norm_diff=double, + decay=double + ) + cpdef push(self, double value) + + cpdef double mean(self) + + cpdef double variance(self) + + cpdef double stddev(self) + + @cython.locals(sigma=ExponentialMovingStatistics) + cpdef ExponentialMovingStatistics _add(self, ExponentialMovingStatistics that) + + cpdef ExponentialMovingStatistics _iadd(self, ExponentialMovingStatistics that) + + @cython.locals( + sigma=ExponentialMovingStatistics, + ) + cpdef ExponentialMovingStatistics _mul(self, double that) + + cpdef ExponentialMovingStatistics _imul(self, double that) + + +cpdef ExponentialMovingStatistics make_exponential_statistics(state) + + +cdef class Regression: + cdef public Statistics _xstats, _ystats + cdef public double _count, _sxy + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef Regression copy(self, _=*) + + cpdef push(self, double xcoord, double ycoord) + + @cython.locals(sxx=double) + cpdef double slope(self, double ddof=*) + + cpdef double intercept(self, double ddof=*) + + @cython.locals(term=double) + cpdef double correlation(self, double ddof=*) + + @cython.locals(sigma=Regression) + cpdef Regression _add(self, Regression that) + + @cython.locals( + sum_count=double, + sum_xstats=Statistics, + sum_ystats=Statistics, + deltax=double, + deltay=double, + sum_sxy=double, + ) + cpdef Regression _iadd(self, Regression that) + + +cpdef Regression make_regression(state) + + +cdef class ExponentialMovingCovariance: + cdef public ExponentialMovingStatistics _xstats, _ystats + cdef public double _decay, _initial_covariance, _covariance + + cpdef _set_decay(self, double value) + + cpdef clear(self) + + cpdef get_state(self) + + cpdef set_state(self, state) + + cpdef __reduce__(self) + + cpdef ExponentialMovingCovariance copy(self, _=*) + + @cython.locals( + alpha=double + ) + cpdef push(self, double x_val, double y_val) + + cpdef double covariance(self) + + @cython.locals( + denom=double + ) + cpdef double correlation(self) + + @cython.locals(sigma=ExponentialMovingCovariance) + cpdef ExponentialMovingCovariance _add(self, ExponentialMovingCovariance that) + + cpdef ExponentialMovingCovariance _iadd(self, ExponentialMovingCovariance that) + + @cython.locals( + sigma=ExponentialMovingCovariance, + ) + cpdef ExponentialMovingCovariance _mul(self, double that) + + cpdef ExponentialMovingCovariance _imul(self, double that) + + +cpdef ExponentialMovingCovariance make_exponential_statistics(state) diff --git a/runstats/_core.py b/runstats/_core.py new file mode 100644 index 0000000..fbef6df --- /dev/null +++ b/runstats/_core.py @@ -0,0 +1,888 @@ +"""Python RunStats + +Compute Statistics, Exponential Statistics, Regression and Exponential +Covariance in a single pass. + +""" + +from __future__ import division + +import time + +NAN = float('nan') + + +class Statistics: + """Compute statistics in a single pass. + + Computes the minimum, maximum, mean, variance, standard deviation, + skewness, and kurtosis. + Statistics objects may also be added together and copied. + + Based entirely on the C++ code by John D Cook at + http://www.johndcook.com/skewness_kurtosis.html + """ + + def __init__(self, iterable=()): + """Initialize Statistics object. + + Iterates optional parameter `iterable` and pushes each value into the + statistics summary. + """ + self.clear() + for value in iterable: + self.push(value) + + def clear(self): + """Clear Statistics object.""" + self._count = self._eta = self._rho = self._tau = self._phi = 0.0 + self._min = self._max = NAN + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + """Get internal state.""" + return ( + self._count, + self._eta, + self._rho, + self._tau, + self._phi, + self._min, + self._max, + ) + + def set_state(self, state): + """Set internal state.""" + ( + self._count, + self._eta, + self._rho, + self._tau, + self._phi, + self._min, + self._max, + ) = state + + @classmethod + def fromstate(cls, state): + """Return Statistics object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_statistics, (self.get_state(),) + + def copy(self, _=None): + """Copy Statistics object.""" + return self.fromstate(self.get_state()) + + def __copy__(self, _=None): + """Copy Statistics object.""" + return self.copy(_) + + __deepcopy__ = __copy__ + + def __len__(self): + """Number of values that have been pushed.""" + return int(self._count) + + def push(self, value): + """Add `value` to the Statistics summary.""" + if self._count == 0.0: + self._min = value + self._max = value + else: + self._min = min(self._min, value) + self._max = max(self._max, value) + + delta = value - self._eta + delta_n = delta / (self._count + 1) + delta_n2 = delta_n * delta_n + term = delta * delta_n * self._count + + self._count += 1 + self._eta += delta_n + self._phi += ( + term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) + + 6 * delta_n2 * self._rho + - 4 * delta_n * self._tau + ) + self._tau += ( + term * delta_n * (self._count - 2) - 3 * delta_n * self._rho + ) + self._rho += term + + def minimum(self): + """Minimum of values.""" + return self._min + + def maximum(self): + """Maximum of values.""" + return self._max + + def mean(self): + """Mean of values.""" + return self._eta + + def variance(self, ddof=1.0): + """Variance of values (with `ddof` degrees of freedom).""" + return self._rho / (self._count - ddof) + + def stddev(self, ddof=1.0): + """Standard deviation of values (with `ddof` degrees of freedom).""" + return self.variance(ddof) ** 0.5 + + def skewness(self): + """Skewness of values.""" + return (self._count ** 0.5) * self._tau / (self._rho ** 1.5) + + def kurtosis(self): + """Kurtosis of values.""" + return self._count * self._phi / (self._rho * self._rho) - 3.0 + + def _add(self, that): + """Add two Statistics objects together.""" + sigma = self.copy() + sigma._iadd(that) + return sigma + + def __add__(self, that): + """Add two Statistics objects together.""" + return self._add(that) + + def _iadd(self, that): + """Add another Statistics object to this one.""" + sum_count = self._count + that._count + if sum_count == 0: + return self + + delta = that._eta - self._eta + delta2 = delta ** 2 + delta3 = delta ** 3 + delta4 = delta ** 4 + + sum_eta = ( + self._count * self._eta + that._count * that._eta + ) / sum_count + + sum_rho = ( + self._rho + + that._rho + + delta2 * self._count * that._count / sum_count + ) + + sum_tau = ( + self._tau + + that._tau + + delta3 + * self._count + * that._count + * (self._count - that._count) + / (sum_count ** 2) + + 3.0 + * delta + * (self._count * that._rho - that._count * self._rho) + / sum_count + ) + + sum_phi = ( + self._phi + + that._phi + + delta4 + * self._count + * that._count + * (self._count ** 2 - self._count * that._count + that._count ** 2) + / (sum_count ** 3) + + 6.0 + * delta2 + * ( + self._count * self._count * that._rho + + that._count * that._count * self._rho + ) + / (sum_count ** 2) + + 4.0 + * delta + * (self._count * that._tau - that._count * self._tau) + / sum_count + ) + + if self._count == 0.0: + self._min = that._min + self._max = that._max + elif that._count != 0.0: + self._min = min(self._min, that._min) + self._max = max(self._max, that._max) + + self._count = sum_count + self._eta = sum_eta + self._rho = sum_rho + self._tau = sum_tau + self._phi = sum_phi + + return self + + def __iadd__(self, that): + """Add another Statistics object to this one.""" + return self._iadd(that) + + def _mul(self, that): + """Multiply by a scalar to change Statistics weighting.""" + sigma = self.copy() + sigma._imul(that) + return sigma + + def __mul__(self, that): + """Multiply by a scalar to change Statistics weighting.""" + if isinstance(self, Statistics): + return self._mul(that) + # https://stackoverflow.com/q/33218006/232571 + return that._mul(self) # pragma: no cover + + __rmul__ = __mul__ + + def _imul(self, that): + """Multiply by a scalar to change Statistics weighting in-place.""" + self._count *= that + self._rho *= that + self._tau *= that + self._phi *= that + return self + + def __imul__(self, that): + """Multiply by a scalar to change Statistics weighting in-place.""" + return self._imul(that) + + +def make_statistics(state): + """Make Statistics object from state.""" + return Statistics.fromstate(state) + + +class ExponentialMovingStatistics: + # pylint: disable=too-many-instance-attributes + """Compute exponential mean and variance in a single pass. + + ExponentialMovingStatistics objects may also be added and copied. + + Based on + "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at + https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf + + For an explanation of these statistics refer to e.g.: + https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html + + """ + + def __init__( + self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() + ): # pylint: disable=too-many-arguments + """Initialize ExponentialMovingStatistics object. + + Incrementally tracks mean and variance and exponentially discounts old + values. + + Requires a `decay` rate in exclusive range (0, 1) for discounting + previous statistics. Default 0.9 + + Optionally allows setting initial mean and variance. Default 0. + + Iterates optional parameter `iterable` and pushes each value into the + statistics summary. + + Can discount values based on time passed instead of position if delay is + set. Setting delay (in seconds) computes a dynamic + decay rate each time a value is pushed for weighting that value: + dynamic_decay = decay ** (sec_from_last_push / delay). + When the first value x is pushed, sec_from_last_push is the difference + (in sec) between setting the delay from None to a value t (usually at + object construction) and the times when x is being pushed. + When freeze() has been called sec_from_last_push is the difference + between the last call to push() and the time freeze() has been + called(). + Note that at object initialization the values in iterable are weighted + as if delay has not been set. + """ + self.decay = decay + self._initial_mean = mean + self._initial_variance = variance + self._mean = self._initial_mean + self._variance = self._initial_variance + + self._current_time = None + self._time_diff = None + self.delay = None + + for value in iterable: + self.push(value) + + self.delay = delay + + @property + def decay(self): + """Exponential decay rate of old values.""" + return self._decay + + @decay.setter + def decay(self, value): + self._set_decay(value) + + def _set_decay(self, value): + if not 0 <= value <= 1: + raise ValueError('decay must be between 0 and 1') + self._decay = value + + @property + def delay(self): + """Delay in sec for time based discounting""" + return self._delay + + @delay.setter + def delay(self, value): + self._set_delay(value) + + def _set_delay(self, value): + if value is not None: + if value <= 0: + raise ValueError('delay must be > 0') + self._current_time = ( + self._current_time if self._current_time else time.time() + ) + else: + self._current_time = None + self._time_diff = None + + self._delay = value + + def clear(self): + """Clear ExponentialMovingStatistics object.""" + self._mean = self._initial_mean + self._variance = self._initial_variance + self._current_time = time.time() if self.is_time_based() else None + self._time_diff = None + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + """Get internal state.""" + return ( + self._decay, + self._initial_mean, + self._initial_variance, + self._mean, + self._variance, + self._delay, + self._current_time, + self._time_diff, + ) + + def set_state(self, state): + """Set internal state.""" + ( + self._decay, + self._initial_mean, + self._initial_variance, + self._mean, + self._variance, + self._delay, + self._current_time, + self._time_diff, + ) = state + + @classmethod + def fromstate(cls, state): + """Return ExponentialMovingStatistics object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_statistics, (self.get_state(),) + + def copy(self, _=None): + """Copy ExponentialMovingStatistics object.""" + return self.fromstate(self.get_state()) + + def __copy__(self, _=None): + """Copy ExponentialMovingStatistics object.""" + return self.copy(_) + + __deepcopy__ = __copy__ + + def clear_timer(self): + """Reset time counter""" + if self.is_time_based(): + self._current_time = time.time() + self._time_diff = None + else: + raise AttributeError( + 'clear_timer on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + def freeze(self): + """Freeze time i.e. save the difference between now and the last push""" + if self.is_time_based(): + self._time_diff = time.time() - self._current_time + else: + raise AttributeError( + 'freeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + def unfreeze(self): + """Unfreeze time i.e. continue counting the time difference""" + if not self.is_time_based(): + raise AttributeError( + 'unfreeze on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + if self._time_diff is None: + raise AttributeError( + 'Time must be freezed first before it can be unfreezed' + ) + + self._current_time = time.time() - self._time_diff + self._time_diff = None + + def is_time_based(self): + """Checks if object is time-based or not i.e. delay is set or None""" + return self.delay is not None + + def push(self, value): + """Add `value` to the ExponentialMovingStatistics summary.""" + if self.is_time_based(): + diff = ( + self._time_diff + if self._time_diff + else (time.time() - self._current_time) + ) + norm_diff = diff / self.delay + decay = self.decay ** norm_diff + self._current_time = time.time() + else: + decay = self.decay + + alpha = 1.0 - decay + diff = value - self._mean + incr = alpha * diff + self._variance += alpha * (decay * diff ** 2 - self._variance) + self._mean += incr + + def mean(self): + """Exponential mean of values.""" + return self._mean + + def variance(self): + """Exponential variance of values.""" + return self._variance + + def stddev(self): + """Exponential standard deviation of values.""" + return self.variance() ** 0.5 + + def _add(self, that): + """Add two ExponentialMovingStatistics objects together.""" + sigma = self.copy() + sigma._iadd(that) + print("test1") # TODO + + if sigma.is_time_based(): + print("test2") # TODO + sigma.clear_timer() + + return sigma + + def __add__(self, that): + """Add two ExponentialMovingStatistics objects together.""" + return self._add(that) + + def _iadd(self, that): + """Add another ExponentialMovingStatistics object to this one.""" + self._mean += that.mean() + self._variance += that.variance() + return self + + def __iadd__(self, that): + """Add another ExponentialMovingStatistics object to this one.""" + return self._iadd(that) + + def _mul(self, that): + """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" + sigma = self.copy() + sigma._imul(that) + return sigma + + def __mul__(self, that): + """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" + if isinstance(self, ExponentialMovingStatistics): + return self._mul(that) + # https://stackoverflow.com/q/33218006/232571 + return that._mul(self) # pragma: no cover + + __rmul__ = __mul__ + + def _imul(self, that): + """Multiply by a scalar to change ExponentialMovingStatistics weighting + in-place. + + """ + self._mean *= that + self._variance *= that + return self + + def __imul__(self, that): + """Multiply by a scalar to change ExponentialMovingStatistics weighting + in-place. + + """ + return self._imul(that) + + +def make_exponential_statistics(state): + """Make ExponentialMovingStatistics object from state.""" + return ExponentialMovingStatistics.fromstate(state) + + +class Regression: + """ + Compute simple linear regression in a single pass. + + Computes the slope, intercept, and correlation. + Regression objects may also be added together and copied. + + Based entirely on the C++ code by John D Cook at + http://www.johndcook.com/running_regression.html + """ + + def __init__(self, iterable=()): + """Initialize Regression object. + + Iterates optional parameter `iterable` and pushes each pair into the + regression summary. + """ + self._xstats = Statistics() + self._ystats = Statistics() + self._count = self._sxy = 0.0 + + for xcoord, ycoord in iterable: + self.push(xcoord, ycoord) + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def clear(self): + """Clear Regression object.""" + self._xstats.clear() + self._ystats.clear() + self._count = self._sxy = 0.0 + + def get_state(self): + """Get internal state.""" + return ( + self._count, + self._sxy, + self._xstats.get_state(), + self._ystats.get_state(), + ) + + def set_state(self, state): + """Set internal state.""" + count, sxy, xstats, ystats = state + self._count = count + self._sxy = sxy + self._xstats.set_state(xstats) + self._ystats.set_state(ystats) + + @classmethod + def fromstate(cls, state): + """Return Regression object from state.""" + regr = cls() + regr.set_state(state) + return regr + + def __reduce__(self): + return make_regression, (self.get_state(),) + + def copy(self, _=None): + """Copy Regression object.""" + return self.fromstate(self.get_state()) + + def __copy__(self, _=None): + """Copy Regression object.""" + return self.copy(_) + + __deepcopy__ = __copy__ + + def __len__(self): + """Number of values that have been pushed.""" + return int(self._count) + + def push(self, xcoord, ycoord): + """Add a pair `(x, y)` to the Regression summary.""" + self._sxy += ( + (self._xstats.mean() - xcoord) + * (self._ystats.mean() - ycoord) + * self._count + / (self._count + 1) + ) + self._xstats.push(xcoord) + self._ystats.push(ycoord) + self._count += 1 + + def slope(self, ddof=1.0): + """Slope of values (with `ddof` degrees of freedom).""" + sxx = self._xstats.variance(ddof) * (self._count - ddof) + return self._sxy / sxx + + def intercept(self, ddof=1.0): + """Intercept of values (with `ddof` degrees of freedom).""" + return self._ystats.mean() - self.slope(ddof) * self._xstats.mean() + + def correlation(self, ddof=1.0): + """Correlation of values (with `ddof` degrees of freedom).""" + term = self._xstats.stddev(ddof) * self._ystats.stddev(ddof) + return self._sxy / ((self._count - ddof) * term) + + def _add(self, that): + """Add two Regression objects together.""" + sigma = self.copy() + sigma._iadd(that) + return sigma + + def __add__(self, that): + """Add two Regression objects together.""" + return self._add(that) + + def _iadd(self, that): + """Add another Regression object to this one.""" + sum_count = self._count + that._count + if sum_count == 0: + return self + + sum_xstats = self._xstats._add(that._xstats) + sum_ystats = self._ystats._add(that._ystats) + + deltax = that._xstats.mean() - self._xstats.mean() + deltay = that._ystats.mean() - self._ystats.mean() + sum_sxy = ( + self._sxy + + that._sxy + + self._count * that._count * deltax * deltay / sum_count + ) + + self._count = sum_count + self._xstats = sum_xstats + self._ystats = sum_ystats + self._sxy = sum_sxy + + return self + + def __iadd__(self, that): + """Add another Regression object to this one.""" + return self._iadd(that) + + +def make_regression(state): + """Make Regression object from state.""" + return Regression.fromstate(state) + + +class ExponentialMovingCovariance: + """Compute exponential moving covariance and correlation in a single pass. + + ExponentialMovingCovariance objects may also be added and copied. + + """ + + def __init__( + self, + decay=0.9, + mean_x=0.0, + variance_x=0.0, + mean_y=0.0, + variance_y=0.0, + covariance=0.0, + iterable=(), + ): # pylint: disable=too-many-arguments + """Initialize ExponentialMovingCovariance object. + + Incrementally tracks covariance and exponentially discounts old + values. + + Requires a `decay` rate in exclusive range (0, 1) for discounting + previous statistics. + + Optionally allows setting initial covariance. Default 0. + + Iterates optional parameter `iterable` and pushes each pair into the + statistics summary. + + """ + self._initial_covariance = covariance + self._covariance = self._initial_covariance + self._xstats = ExponentialMovingStatistics( + decay=decay, mean=mean_x, variance=variance_x + ) + self._ystats = ExponentialMovingStatistics( + decay=decay, mean=mean_y, variance=variance_y + ) + self.decay = decay + + for x_val, y_val in iterable: + self.push(x_val, y_val) + + @property + def decay(self): + """Decay rate for old values.""" + return self._decay + + @decay.setter + def decay(self, value): + self._set_decay(value) + + def _set_decay(self, value): + self._xstats.decay = value + self._ystats.decay = value + self._decay = value + + def clear(self): + """Clear ExponentialMovingCovariance object.""" + self._xstats.clear() + self._ystats.clear() + self._covariance = self._initial_covariance + + def __eq__(self, that): + return self.get_state() == that.get_state() + + def __ne__(self, that): + return self.get_state() != that.get_state() + + def get_state(self): + """Get internal state.""" + return ( + self._decay, + self._initial_covariance, + self._covariance, + self._xstats.get_state(), + self._ystats.get_state(), + ) + + def set_state(self, state): + """Set internal state.""" + decay, initial_covariance, covariance, xstate, ystate = state + self._decay = decay + self._initial_covariance = initial_covariance + self._covariance = covariance + self._xstats.set_state(xstate) + self._ystats.set_state(ystate) + + @classmethod + def fromstate(cls, state): + """Return ExponentialMovingCovariance object from state.""" + stats = cls() + stats.set_state(state) + return stats + + def __reduce__(self): + return make_exponential_covariance, (self.get_state(),) + + def copy(self, _=None): + """Copy ExponentialMovingCovariance object.""" + return self.fromstate(self.get_state()) + + def __copy__(self, _=None): + """Copy ExponentialMovingCovariance object.""" + return self.copy(_) + + __deepcopy__ = __copy__ + + def push(self, x_val, y_val): + """Add a pair `(x, y)` to the ExponentialMovingCovariance summary.""" + self._xstats.push(x_val) + alpha = 1.0 - self.decay + self._covariance = self.decay * self.covariance() + alpha * ( + x_val - self._xstats.mean() + ) * (y_val - self._ystats.mean()) + self._ystats.push(y_val) + + def covariance(self): + """Covariance of values""" + return self._covariance + + def correlation(self): + """Correlation of values""" + denom = self._xstats.stddev() * self._ystats.stddev() + return self.covariance() / denom + + def _add(self, that): + """Add two ExponentialMovingCovariance objects together.""" + sigma = self.copy() + sigma._iadd(that) + return sigma + + def __add__(self, that): + """Add two ExponentialMovingCovariance objects together.""" + return self._add(that) + + def _iadd(self, that): + """Add another ExponentialMovingCovariance object to this one.""" + self._xstats += that._xstats + self._ystats += that._ystats + self._covariance += that.covariance() + return self + + def __iadd__(self, that): + """Add another ExponentialMovingCovariance object to this one.""" + return self._iadd(that) + + def _mul(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" + sigma = self.copy() + sigma._imul(that) + return sigma + + def __mul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" + if isinstance(self, ExponentialMovingCovariance): + return self._mul(that) + # https://stackoverflow.com/q/33218006/232571 + return that._mul(self) # pragma: no cover + + __rmul__ = __mul__ + + def _imul(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting + in-place. + + """ + that = float(that) + self._xstats *= that + self._ystats *= that + self._covariance *= that + return self + + def __imul__(self, that): + """Multiply by a scalar to change ExponentialMovingCovariance weighting + in-place. + + """ + return self._imul(that) + + +def make_exponential_covariance(state): + """Make Regression object from state.""" + return ExponentialMovingCovariance.fromstate(state) diff --git a/runstats/core.pxd b/runstats/core.pxd index 8a69eb6..8759230 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -65,12 +65,14 @@ cdef class Statistics: cpdef Statistics make_statistics(state) -cdef class ExponentialStatistics: # TODO: adjust to new interface! - cdef public double _decay, _mean, _variance +cdef class ExponentialMovingStatistics: # TODO: adjust to new interface!, check docstrings, check readme + cdef public double _decay, _mean, _variance, _initial_mean, _initial_variance, _delay, _time_diff, _current_time cpdef _set_decay(self, double value) - cpdef clear(self, double mean=*, double variance=*, decay=*) + cpdef _set_delay(self, double value) + + cpdef clear(self) cpdef get_state(self) @@ -78,12 +80,22 @@ cdef class ExponentialStatistics: # TODO: adjust to new interface! cpdef __reduce__(self) - cpdef ExponentialStatistics copy(self, _=*) + cpdef ExponentialMovingStatistics copy(self, _=*) + + cpdef clear_timer(self) + + cpdef freeze(self) + + cpdef unfreeze(self) + + cpdef is_time_based(self) @cython.locals( alpha=double, diff=double, incr=double, + norm_diff=double, + decay=double ) cpdef push(self, double value) @@ -93,20 +105,20 @@ cdef class ExponentialStatistics: # TODO: adjust to new interface! cpdef double stddev(self) - @cython.locals(sigma=ExponentialStatistics) - cpdef ExponentialStatistics _add(self, ExponentialStatistics that) + @cython.locals(sigma=ExponentialMovingStatistics) + cpdef ExponentialMovingStatistics _add(self, ExponentialMovingStatistics that) - cpdef ExponentialStatistics _iadd(self, ExponentialStatistics that) + cpdef ExponentialMovingStatistics _iadd(self, ExponentialMovingStatistics that) @cython.locals( - sigma=ExponentialStatistics, + sigma=ExponentialMovingStatistics, ) - cpdef ExponentialStatistics _mul(self, double that) + cpdef ExponentialMovingStatistics _mul(self, double that) - cpdef ExponentialStatistics _imul(self, double that) + cpdef ExponentialMovingStatistics _imul(self, double that) -cpdef ExponentialStatistics make_exponential_statistics(state) +cpdef ExponentialMovingStatistics make_exponential_statistics(state) cdef class Regression: @@ -151,7 +163,7 @@ cpdef Regression make_regression(state) cdef class ExponentialMovingCovariance: - cdef public ExponentialStatistics _xstats, _ystats + cdef public ExponentialMovingStatistics _xstats, _ystats cdef public double _decay, _initial_covariance, _covariance cpdef _set_decay(self, double value) @@ -191,4 +203,4 @@ cdef class ExponentialMovingCovariance: cpdef ExponentialMovingCovariance _imul(self, double that) -cpdef ExponentialStatistics make_exponential_statistics(state) +cpdef ExponentialMovingCovariance make_exponential_statistics(state) diff --git a/runstats/core.py b/runstats/core.py index 83c6c9b..ddc1134 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -457,7 +457,7 @@ def unfreeze(self): def is_time_based(self): """Checks if object is time-based or not i.e. delay is set or None""" - return bool(self.delay) + return self.delay is not None def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" diff --git a/tests/test_runstats.py b/tests/test_runstats.py index e55263b..247bc9b 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -8,7 +8,6 @@ import pickle import random import time -from unittest.mock import patch import pytest @@ -365,14 +364,11 @@ def test_add_statistics(Statistics, Regression): stats0 += stats10 -@patch('runstats.ExponentialMovingStatistics.clear_timer') @pytest.mark.parametrize( 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_add_exponential_statistics( - clear_timer_mock, ExponentialMovingStatistics -): +def test_add_exponential_statistics(ExponentialMovingStatistics): exp_stats0 = ExponentialMovingStatistics(0.9) exp_stats10 = ExponentialMovingStatistics(0.9, iterable=range(10)) assert (exp_stats0 + exp_stats10) == exp_stats10 @@ -387,14 +383,16 @@ def test_add_exponential_statistics( exp_stats0.decay = 0.8 exp_stats0.delay = 60 exp_stats10.delay = 120 + exp_stats0._time_diff = -1 # To check if clear_timer was called for add and not for iadd exp_stats = exp_stats0 + exp_stats10 assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay + assert exp_stats._time_diff is None exp_stats0 += exp_stats10 assert exp_stats0.decay == 0.8 assert exp_stats0.delay == 60 - clear_timer_mock.assert_called_once() + assert exp_stats0._time_diff == -1 @pytest.mark.parametrize( From 8a483a2e8b8f05fbc1d6d58ca7681b3422db1516 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 10:04:40 +0200 Subject: [PATCH 59/83] removed _core.pxd and _core.py incorrectly checked in to repo, added these files to gitignore --- .gitignore | 4 + runstats/_core.pxd | 206 ----------- runstats/_core.py | 888 --------------------------------------------- 3 files changed, 4 insertions(+), 1094 deletions(-) delete mode 100644 runstats/_core.pxd delete mode 100644 runstats/_core.py diff --git a/.gitignore b/.gitignore index 471a0de..8ff8e70 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,7 @@ # macOS metadata .DS_Store + +# compiled files +runstats/_core.pxd +runstats/_core.py \ No newline at end of file diff --git a/runstats/_core.pxd b/runstats/_core.pxd deleted file mode 100644 index 8759230..0000000 --- a/runstats/_core.pxd +++ /dev/null @@ -1,206 +0,0 @@ -import cython - - -cdef public double NAN - - -cdef class Statistics: - - cdef public double _count, _eta, _rho, _tau, _phi, _min, _max - - cpdef clear(self) - - cpdef get_state(self) - - cpdef set_state(self, state) - - cpdef __reduce__(self) - - cpdef Statistics copy(self, _=*) - - @cython.locals( - delta=double, - delta_n=double, - delta_n2=double, - term=double, - ) - cpdef push(self, double value) - - cpdef double minimum(self) - - cpdef double maximum(self) - - cpdef double mean(self) - - cpdef double variance(self, double ddof=*) - - cpdef double stddev(self, double ddof=*) - - cpdef double skewness(self) - - cpdef double kurtosis(self) - - @cython.locals(sigma=Statistics) - cpdef Statistics _add(self, Statistics that) - - @cython.locals( - sum_count=double, - delta=double, - delta2=double, - delta3=double, - delta4=double, - sum_eta=double, - sum_rho=double, - sum_tau=double, - sum_phi=double, - ) - cpdef Statistics _iadd(self, Statistics that) - - @cython.locals(sigma=Statistics) - cpdef Statistics _mul(self, double that) - - cpdef Statistics _imul(self, double that) - - -cpdef Statistics make_statistics(state) - - -cdef class ExponentialMovingStatistics: # TODO: adjust to new interface!, check docstrings, check readme - cdef public double _decay, _mean, _variance, _initial_mean, _initial_variance, _delay, _time_diff, _current_time - - cpdef _set_decay(self, double value) - - cpdef _set_delay(self, double value) - - cpdef clear(self) - - cpdef get_state(self) - - cpdef set_state(self, state) - - cpdef __reduce__(self) - - cpdef ExponentialMovingStatistics copy(self, _=*) - - cpdef clear_timer(self) - - cpdef freeze(self) - - cpdef unfreeze(self) - - cpdef is_time_based(self) - - @cython.locals( - alpha=double, - diff=double, - incr=double, - norm_diff=double, - decay=double - ) - cpdef push(self, double value) - - cpdef double mean(self) - - cpdef double variance(self) - - cpdef double stddev(self) - - @cython.locals(sigma=ExponentialMovingStatistics) - cpdef ExponentialMovingStatistics _add(self, ExponentialMovingStatistics that) - - cpdef ExponentialMovingStatistics _iadd(self, ExponentialMovingStatistics that) - - @cython.locals( - sigma=ExponentialMovingStatistics, - ) - cpdef ExponentialMovingStatistics _mul(self, double that) - - cpdef ExponentialMovingStatistics _imul(self, double that) - - -cpdef ExponentialMovingStatistics make_exponential_statistics(state) - - -cdef class Regression: - cdef public Statistics _xstats, _ystats - cdef public double _count, _sxy - - cpdef clear(self) - - cpdef get_state(self) - - cpdef set_state(self, state) - - cpdef __reduce__(self) - - cpdef Regression copy(self, _=*) - - cpdef push(self, double xcoord, double ycoord) - - @cython.locals(sxx=double) - cpdef double slope(self, double ddof=*) - - cpdef double intercept(self, double ddof=*) - - @cython.locals(term=double) - cpdef double correlation(self, double ddof=*) - - @cython.locals(sigma=Regression) - cpdef Regression _add(self, Regression that) - - @cython.locals( - sum_count=double, - sum_xstats=Statistics, - sum_ystats=Statistics, - deltax=double, - deltay=double, - sum_sxy=double, - ) - cpdef Regression _iadd(self, Regression that) - - -cpdef Regression make_regression(state) - - -cdef class ExponentialMovingCovariance: - cdef public ExponentialMovingStatistics _xstats, _ystats - cdef public double _decay, _initial_covariance, _covariance - - cpdef _set_decay(self, double value) - - cpdef clear(self) - - cpdef get_state(self) - - cpdef set_state(self, state) - - cpdef __reduce__(self) - - cpdef ExponentialMovingCovariance copy(self, _=*) - - @cython.locals( - alpha=double - ) - cpdef push(self, double x_val, double y_val) - - cpdef double covariance(self) - - @cython.locals( - denom=double - ) - cpdef double correlation(self) - - @cython.locals(sigma=ExponentialMovingCovariance) - cpdef ExponentialMovingCovariance _add(self, ExponentialMovingCovariance that) - - cpdef ExponentialMovingCovariance _iadd(self, ExponentialMovingCovariance that) - - @cython.locals( - sigma=ExponentialMovingCovariance, - ) - cpdef ExponentialMovingCovariance _mul(self, double that) - - cpdef ExponentialMovingCovariance _imul(self, double that) - - -cpdef ExponentialMovingCovariance make_exponential_statistics(state) diff --git a/runstats/_core.py b/runstats/_core.py deleted file mode 100644 index fbef6df..0000000 --- a/runstats/_core.py +++ /dev/null @@ -1,888 +0,0 @@ -"""Python RunStats - -Compute Statistics, Exponential Statistics, Regression and Exponential -Covariance in a single pass. - -""" - -from __future__ import division - -import time - -NAN = float('nan') - - -class Statistics: - """Compute statistics in a single pass. - - Computes the minimum, maximum, mean, variance, standard deviation, - skewness, and kurtosis. - Statistics objects may also be added together and copied. - - Based entirely on the C++ code by John D Cook at - http://www.johndcook.com/skewness_kurtosis.html - """ - - def __init__(self, iterable=()): - """Initialize Statistics object. - - Iterates optional parameter `iterable` and pushes each value into the - statistics summary. - """ - self.clear() - for value in iterable: - self.push(value) - - def clear(self): - """Clear Statistics object.""" - self._count = self._eta = self._rho = self._tau = self._phi = 0.0 - self._min = self._max = NAN - - def __eq__(self, that): - return self.get_state() == that.get_state() - - def __ne__(self, that): - return self.get_state() != that.get_state() - - def get_state(self): - """Get internal state.""" - return ( - self._count, - self._eta, - self._rho, - self._tau, - self._phi, - self._min, - self._max, - ) - - def set_state(self, state): - """Set internal state.""" - ( - self._count, - self._eta, - self._rho, - self._tau, - self._phi, - self._min, - self._max, - ) = state - - @classmethod - def fromstate(cls, state): - """Return Statistics object from state.""" - stats = cls() - stats.set_state(state) - return stats - - def __reduce__(self): - return make_statistics, (self.get_state(),) - - def copy(self, _=None): - """Copy Statistics object.""" - return self.fromstate(self.get_state()) - - def __copy__(self, _=None): - """Copy Statistics object.""" - return self.copy(_) - - __deepcopy__ = __copy__ - - def __len__(self): - """Number of values that have been pushed.""" - return int(self._count) - - def push(self, value): - """Add `value` to the Statistics summary.""" - if self._count == 0.0: - self._min = value - self._max = value - else: - self._min = min(self._min, value) - self._max = max(self._max, value) - - delta = value - self._eta - delta_n = delta / (self._count + 1) - delta_n2 = delta_n * delta_n - term = delta * delta_n * self._count - - self._count += 1 - self._eta += delta_n - self._phi += ( - term * delta_n2 * (self._count ** 2 - 3 * self._count + 3) - + 6 * delta_n2 * self._rho - - 4 * delta_n * self._tau - ) - self._tau += ( - term * delta_n * (self._count - 2) - 3 * delta_n * self._rho - ) - self._rho += term - - def minimum(self): - """Minimum of values.""" - return self._min - - def maximum(self): - """Maximum of values.""" - return self._max - - def mean(self): - """Mean of values.""" - return self._eta - - def variance(self, ddof=1.0): - """Variance of values (with `ddof` degrees of freedom).""" - return self._rho / (self._count - ddof) - - def stddev(self, ddof=1.0): - """Standard deviation of values (with `ddof` degrees of freedom).""" - return self.variance(ddof) ** 0.5 - - def skewness(self): - """Skewness of values.""" - return (self._count ** 0.5) * self._tau / (self._rho ** 1.5) - - def kurtosis(self): - """Kurtosis of values.""" - return self._count * self._phi / (self._rho * self._rho) - 3.0 - - def _add(self, that): - """Add two Statistics objects together.""" - sigma = self.copy() - sigma._iadd(that) - return sigma - - def __add__(self, that): - """Add two Statistics objects together.""" - return self._add(that) - - def _iadd(self, that): - """Add another Statistics object to this one.""" - sum_count = self._count + that._count - if sum_count == 0: - return self - - delta = that._eta - self._eta - delta2 = delta ** 2 - delta3 = delta ** 3 - delta4 = delta ** 4 - - sum_eta = ( - self._count * self._eta + that._count * that._eta - ) / sum_count - - sum_rho = ( - self._rho - + that._rho - + delta2 * self._count * that._count / sum_count - ) - - sum_tau = ( - self._tau - + that._tau - + delta3 - * self._count - * that._count - * (self._count - that._count) - / (sum_count ** 2) - + 3.0 - * delta - * (self._count * that._rho - that._count * self._rho) - / sum_count - ) - - sum_phi = ( - self._phi - + that._phi - + delta4 - * self._count - * that._count - * (self._count ** 2 - self._count * that._count + that._count ** 2) - / (sum_count ** 3) - + 6.0 - * delta2 - * ( - self._count * self._count * that._rho - + that._count * that._count * self._rho - ) - / (sum_count ** 2) - + 4.0 - * delta - * (self._count * that._tau - that._count * self._tau) - / sum_count - ) - - if self._count == 0.0: - self._min = that._min - self._max = that._max - elif that._count != 0.0: - self._min = min(self._min, that._min) - self._max = max(self._max, that._max) - - self._count = sum_count - self._eta = sum_eta - self._rho = sum_rho - self._tau = sum_tau - self._phi = sum_phi - - return self - - def __iadd__(self, that): - """Add another Statistics object to this one.""" - return self._iadd(that) - - def _mul(self, that): - """Multiply by a scalar to change Statistics weighting.""" - sigma = self.copy() - sigma._imul(that) - return sigma - - def __mul__(self, that): - """Multiply by a scalar to change Statistics weighting.""" - if isinstance(self, Statistics): - return self._mul(that) - # https://stackoverflow.com/q/33218006/232571 - return that._mul(self) # pragma: no cover - - __rmul__ = __mul__ - - def _imul(self, that): - """Multiply by a scalar to change Statistics weighting in-place.""" - self._count *= that - self._rho *= that - self._tau *= that - self._phi *= that - return self - - def __imul__(self, that): - """Multiply by a scalar to change Statistics weighting in-place.""" - return self._imul(that) - - -def make_statistics(state): - """Make Statistics object from state.""" - return Statistics.fromstate(state) - - -class ExponentialMovingStatistics: - # pylint: disable=too-many-instance-attributes - """Compute exponential mean and variance in a single pass. - - ExponentialMovingStatistics objects may also be added and copied. - - Based on - "Finch, 2009, Incremental Calculation of Weighted Mean and Variance" at - https://nanopdf.com/download/incremental-calculation-of-weighted-mean-and-variance_pdf - - For an explanation of these statistics refer to e.g.: - https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html - - """ - - def __init__( - self, decay=0.9, mean=0.0, variance=0.0, delay=None, iterable=() - ): # pylint: disable=too-many-arguments - """Initialize ExponentialMovingStatistics object. - - Incrementally tracks mean and variance and exponentially discounts old - values. - - Requires a `decay` rate in exclusive range (0, 1) for discounting - previous statistics. Default 0.9 - - Optionally allows setting initial mean and variance. Default 0. - - Iterates optional parameter `iterable` and pushes each value into the - statistics summary. - - Can discount values based on time passed instead of position if delay is - set. Setting delay (in seconds) computes a dynamic - decay rate each time a value is pushed for weighting that value: - dynamic_decay = decay ** (sec_from_last_push / delay). - When the first value x is pushed, sec_from_last_push is the difference - (in sec) between setting the delay from None to a value t (usually at - object construction) and the times when x is being pushed. - When freeze() has been called sec_from_last_push is the difference - between the last call to push() and the time freeze() has been - called(). - Note that at object initialization the values in iterable are weighted - as if delay has not been set. - """ - self.decay = decay - self._initial_mean = mean - self._initial_variance = variance - self._mean = self._initial_mean - self._variance = self._initial_variance - - self._current_time = None - self._time_diff = None - self.delay = None - - for value in iterable: - self.push(value) - - self.delay = delay - - @property - def decay(self): - """Exponential decay rate of old values.""" - return self._decay - - @decay.setter - def decay(self, value): - self._set_decay(value) - - def _set_decay(self, value): - if not 0 <= value <= 1: - raise ValueError('decay must be between 0 and 1') - self._decay = value - - @property - def delay(self): - """Delay in sec for time based discounting""" - return self._delay - - @delay.setter - def delay(self, value): - self._set_delay(value) - - def _set_delay(self, value): - if value is not None: - if value <= 0: - raise ValueError('delay must be > 0') - self._current_time = ( - self._current_time if self._current_time else time.time() - ) - else: - self._current_time = None - self._time_diff = None - - self._delay = value - - def clear(self): - """Clear ExponentialMovingStatistics object.""" - self._mean = self._initial_mean - self._variance = self._initial_variance - self._current_time = time.time() if self.is_time_based() else None - self._time_diff = None - - def __eq__(self, that): - return self.get_state() == that.get_state() - - def __ne__(self, that): - return self.get_state() != that.get_state() - - def get_state(self): - """Get internal state.""" - return ( - self._decay, - self._initial_mean, - self._initial_variance, - self._mean, - self._variance, - self._delay, - self._current_time, - self._time_diff, - ) - - def set_state(self, state): - """Set internal state.""" - ( - self._decay, - self._initial_mean, - self._initial_variance, - self._mean, - self._variance, - self._delay, - self._current_time, - self._time_diff, - ) = state - - @classmethod - def fromstate(cls, state): - """Return ExponentialMovingStatistics object from state.""" - stats = cls() - stats.set_state(state) - return stats - - def __reduce__(self): - return make_exponential_statistics, (self.get_state(),) - - def copy(self, _=None): - """Copy ExponentialMovingStatistics object.""" - return self.fromstate(self.get_state()) - - def __copy__(self, _=None): - """Copy ExponentialMovingStatistics object.""" - return self.copy(_) - - __deepcopy__ = __copy__ - - def clear_timer(self): - """Reset time counter""" - if self.is_time_based(): - self._current_time = time.time() - self._time_diff = None - else: - raise AttributeError( - 'clear_timer on a non-time time based (i.e. delay == None) ' - 'ExponentialMovingStatistics object is illegal' - ) - - def freeze(self): - """Freeze time i.e. save the difference between now and the last push""" - if self.is_time_based(): - self._time_diff = time.time() - self._current_time - else: - raise AttributeError( - 'freeze on a non-time time based (i.e. delay == None) ' - 'ExponentialMovingStatistics object is illegal' - ) - - def unfreeze(self): - """Unfreeze time i.e. continue counting the time difference""" - if not self.is_time_based(): - raise AttributeError( - 'unfreeze on a non-time time based (i.e. delay == None) ' - 'ExponentialMovingStatistics object is illegal' - ) - - if self._time_diff is None: - raise AttributeError( - 'Time must be freezed first before it can be unfreezed' - ) - - self._current_time = time.time() - self._time_diff - self._time_diff = None - - def is_time_based(self): - """Checks if object is time-based or not i.e. delay is set or None""" - return self.delay is not None - - def push(self, value): - """Add `value` to the ExponentialMovingStatistics summary.""" - if self.is_time_based(): - diff = ( - self._time_diff - if self._time_diff - else (time.time() - self._current_time) - ) - norm_diff = diff / self.delay - decay = self.decay ** norm_diff - self._current_time = time.time() - else: - decay = self.decay - - alpha = 1.0 - decay - diff = value - self._mean - incr = alpha * diff - self._variance += alpha * (decay * diff ** 2 - self._variance) - self._mean += incr - - def mean(self): - """Exponential mean of values.""" - return self._mean - - def variance(self): - """Exponential variance of values.""" - return self._variance - - def stddev(self): - """Exponential standard deviation of values.""" - return self.variance() ** 0.5 - - def _add(self, that): - """Add two ExponentialMovingStatistics objects together.""" - sigma = self.copy() - sigma._iadd(that) - print("test1") # TODO - - if sigma.is_time_based(): - print("test2") # TODO - sigma.clear_timer() - - return sigma - - def __add__(self, that): - """Add two ExponentialMovingStatistics objects together.""" - return self._add(that) - - def _iadd(self, that): - """Add another ExponentialMovingStatistics object to this one.""" - self._mean += that.mean() - self._variance += that.variance() - return self - - def __iadd__(self, that): - """Add another ExponentialMovingStatistics object to this one.""" - return self._iadd(that) - - def _mul(self, that): - """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" - sigma = self.copy() - sigma._imul(that) - return sigma - - def __mul__(self, that): - """Multiply by a scalar to change ExponentialMovingStatistics weighting.""" - if isinstance(self, ExponentialMovingStatistics): - return self._mul(that) - # https://stackoverflow.com/q/33218006/232571 - return that._mul(self) # pragma: no cover - - __rmul__ = __mul__ - - def _imul(self, that): - """Multiply by a scalar to change ExponentialMovingStatistics weighting - in-place. - - """ - self._mean *= that - self._variance *= that - return self - - def __imul__(self, that): - """Multiply by a scalar to change ExponentialMovingStatistics weighting - in-place. - - """ - return self._imul(that) - - -def make_exponential_statistics(state): - """Make ExponentialMovingStatistics object from state.""" - return ExponentialMovingStatistics.fromstate(state) - - -class Regression: - """ - Compute simple linear regression in a single pass. - - Computes the slope, intercept, and correlation. - Regression objects may also be added together and copied. - - Based entirely on the C++ code by John D Cook at - http://www.johndcook.com/running_regression.html - """ - - def __init__(self, iterable=()): - """Initialize Regression object. - - Iterates optional parameter `iterable` and pushes each pair into the - regression summary. - """ - self._xstats = Statistics() - self._ystats = Statistics() - self._count = self._sxy = 0.0 - - for xcoord, ycoord in iterable: - self.push(xcoord, ycoord) - - def __eq__(self, that): - return self.get_state() == that.get_state() - - def __ne__(self, that): - return self.get_state() != that.get_state() - - def clear(self): - """Clear Regression object.""" - self._xstats.clear() - self._ystats.clear() - self._count = self._sxy = 0.0 - - def get_state(self): - """Get internal state.""" - return ( - self._count, - self._sxy, - self._xstats.get_state(), - self._ystats.get_state(), - ) - - def set_state(self, state): - """Set internal state.""" - count, sxy, xstats, ystats = state - self._count = count - self._sxy = sxy - self._xstats.set_state(xstats) - self._ystats.set_state(ystats) - - @classmethod - def fromstate(cls, state): - """Return Regression object from state.""" - regr = cls() - regr.set_state(state) - return regr - - def __reduce__(self): - return make_regression, (self.get_state(),) - - def copy(self, _=None): - """Copy Regression object.""" - return self.fromstate(self.get_state()) - - def __copy__(self, _=None): - """Copy Regression object.""" - return self.copy(_) - - __deepcopy__ = __copy__ - - def __len__(self): - """Number of values that have been pushed.""" - return int(self._count) - - def push(self, xcoord, ycoord): - """Add a pair `(x, y)` to the Regression summary.""" - self._sxy += ( - (self._xstats.mean() - xcoord) - * (self._ystats.mean() - ycoord) - * self._count - / (self._count + 1) - ) - self._xstats.push(xcoord) - self._ystats.push(ycoord) - self._count += 1 - - def slope(self, ddof=1.0): - """Slope of values (with `ddof` degrees of freedom).""" - sxx = self._xstats.variance(ddof) * (self._count - ddof) - return self._sxy / sxx - - def intercept(self, ddof=1.0): - """Intercept of values (with `ddof` degrees of freedom).""" - return self._ystats.mean() - self.slope(ddof) * self._xstats.mean() - - def correlation(self, ddof=1.0): - """Correlation of values (with `ddof` degrees of freedom).""" - term = self._xstats.stddev(ddof) * self._ystats.stddev(ddof) - return self._sxy / ((self._count - ddof) * term) - - def _add(self, that): - """Add two Regression objects together.""" - sigma = self.copy() - sigma._iadd(that) - return sigma - - def __add__(self, that): - """Add two Regression objects together.""" - return self._add(that) - - def _iadd(self, that): - """Add another Regression object to this one.""" - sum_count = self._count + that._count - if sum_count == 0: - return self - - sum_xstats = self._xstats._add(that._xstats) - sum_ystats = self._ystats._add(that._ystats) - - deltax = that._xstats.mean() - self._xstats.mean() - deltay = that._ystats.mean() - self._ystats.mean() - sum_sxy = ( - self._sxy - + that._sxy - + self._count * that._count * deltax * deltay / sum_count - ) - - self._count = sum_count - self._xstats = sum_xstats - self._ystats = sum_ystats - self._sxy = sum_sxy - - return self - - def __iadd__(self, that): - """Add another Regression object to this one.""" - return self._iadd(that) - - -def make_regression(state): - """Make Regression object from state.""" - return Regression.fromstate(state) - - -class ExponentialMovingCovariance: - """Compute exponential moving covariance and correlation in a single pass. - - ExponentialMovingCovariance objects may also be added and copied. - - """ - - def __init__( - self, - decay=0.9, - mean_x=0.0, - variance_x=0.0, - mean_y=0.0, - variance_y=0.0, - covariance=0.0, - iterable=(), - ): # pylint: disable=too-many-arguments - """Initialize ExponentialMovingCovariance object. - - Incrementally tracks covariance and exponentially discounts old - values. - - Requires a `decay` rate in exclusive range (0, 1) for discounting - previous statistics. - - Optionally allows setting initial covariance. Default 0. - - Iterates optional parameter `iterable` and pushes each pair into the - statistics summary. - - """ - self._initial_covariance = covariance - self._covariance = self._initial_covariance - self._xstats = ExponentialMovingStatistics( - decay=decay, mean=mean_x, variance=variance_x - ) - self._ystats = ExponentialMovingStatistics( - decay=decay, mean=mean_y, variance=variance_y - ) - self.decay = decay - - for x_val, y_val in iterable: - self.push(x_val, y_val) - - @property - def decay(self): - """Decay rate for old values.""" - return self._decay - - @decay.setter - def decay(self, value): - self._set_decay(value) - - def _set_decay(self, value): - self._xstats.decay = value - self._ystats.decay = value - self._decay = value - - def clear(self): - """Clear ExponentialMovingCovariance object.""" - self._xstats.clear() - self._ystats.clear() - self._covariance = self._initial_covariance - - def __eq__(self, that): - return self.get_state() == that.get_state() - - def __ne__(self, that): - return self.get_state() != that.get_state() - - def get_state(self): - """Get internal state.""" - return ( - self._decay, - self._initial_covariance, - self._covariance, - self._xstats.get_state(), - self._ystats.get_state(), - ) - - def set_state(self, state): - """Set internal state.""" - decay, initial_covariance, covariance, xstate, ystate = state - self._decay = decay - self._initial_covariance = initial_covariance - self._covariance = covariance - self._xstats.set_state(xstate) - self._ystats.set_state(ystate) - - @classmethod - def fromstate(cls, state): - """Return ExponentialMovingCovariance object from state.""" - stats = cls() - stats.set_state(state) - return stats - - def __reduce__(self): - return make_exponential_covariance, (self.get_state(),) - - def copy(self, _=None): - """Copy ExponentialMovingCovariance object.""" - return self.fromstate(self.get_state()) - - def __copy__(self, _=None): - """Copy ExponentialMovingCovariance object.""" - return self.copy(_) - - __deepcopy__ = __copy__ - - def push(self, x_val, y_val): - """Add a pair `(x, y)` to the ExponentialMovingCovariance summary.""" - self._xstats.push(x_val) - alpha = 1.0 - self.decay - self._covariance = self.decay * self.covariance() + alpha * ( - x_val - self._xstats.mean() - ) * (y_val - self._ystats.mean()) - self._ystats.push(y_val) - - def covariance(self): - """Covariance of values""" - return self._covariance - - def correlation(self): - """Correlation of values""" - denom = self._xstats.stddev() * self._ystats.stddev() - return self.covariance() / denom - - def _add(self, that): - """Add two ExponentialMovingCovariance objects together.""" - sigma = self.copy() - sigma._iadd(that) - return sigma - - def __add__(self, that): - """Add two ExponentialMovingCovariance objects together.""" - return self._add(that) - - def _iadd(self, that): - """Add another ExponentialMovingCovariance object to this one.""" - self._xstats += that._xstats - self._ystats += that._ystats - self._covariance += that.covariance() - return self - - def __iadd__(self, that): - """Add another ExponentialMovingCovariance object to this one.""" - return self._iadd(that) - - def _mul(self, that): - """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" - sigma = self.copy() - sigma._imul(that) - return sigma - - def __mul__(self, that): - """Multiply by a scalar to change ExponentialMovingCovariance weighting.""" - if isinstance(self, ExponentialMovingCovariance): - return self._mul(that) - # https://stackoverflow.com/q/33218006/232571 - return that._mul(self) # pragma: no cover - - __rmul__ = __mul__ - - def _imul(self, that): - """Multiply by a scalar to change ExponentialMovingCovariance weighting - in-place. - - """ - that = float(that) - self._xstats *= that - self._ystats *= that - self._covariance *= that - return self - - def __imul__(self, that): - """Multiply by a scalar to change ExponentialMovingCovariance weighting - in-place. - - """ - return self._imul(that) - - -def make_exponential_covariance(state): - """Make Regression object from state.""" - return ExponentialMovingCovariance.fromstate(state) From a50f0ea197a8698c35dda8837693e16033f05bab Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 10:38:19 +0200 Subject: [PATCH 60/83] debugged pytest raises testes --- runstats/core.py | 2 +- tests/test_runstats.py | 23 +++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index ddc1134..3c20a22 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -333,7 +333,7 @@ def decay(self, value): self._set_decay(value) def _set_decay(self, value): - if not 0 <= value <= 1: + if not 0 < value < 1: raise ValueError('decay must be between 0 and 1') self._decay = value diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 247bc9b..61282df 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -1091,13 +1091,14 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): def test_exponential_statistics_time_based_effective_decay( ExponentialMovingStatistics, ): + time_limit = 0.02 exp_stats = ExponentialMovingStatistics() exp_stats_time = ExponentialMovingStatistics(delay=0.5) time.sleep(0.5) exp_stats_time.push(10) exp_stats.push(10) - assert error(exp_stats.mean(), exp_stats_time.mean()) < limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit exp_stats_time.clear_timer() time.sleep(0.5) @@ -1105,16 +1106,16 @@ def test_exponential_statistics_time_based_effective_decay( time.sleep(0.5) exp_stats_time.push(100) exp_stats.push(100) - assert error(exp_stats.mean(), exp_stats_time.mean()) < limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit exp_stats.decay = 0.81 exp_stats_time.unfreeze() time.sleep(0.5) exp_stats_time.push(1000) exp_stats.push(1000) - assert error(exp_stats.mean(), exp_stats_time.mean()) < limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < limit + assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit + assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit @pytest.mark.parametrize( @@ -1124,8 +1125,11 @@ def test_exponential_statistics_time_based_effective_decay( def test_raise_if_invalid_decay_exp_stats(ExponentialMovingStatistics): with pytest.raises(ValueError): ExponentialMovingStatistics(0) + with pytest.raises(ValueError): ExponentialMovingStatistics(1) + with pytest.raises(ValueError): ExponentialMovingStatistics(-1) + with pytest.raises(ValueError): ExponentialMovingStatistics(2) @@ -1136,8 +1140,11 @@ def test_raise_if_invalid_decay_exp_stats(ExponentialMovingStatistics): def test_raise_if_invalid_decay_exp_cov(ExponentialMovingCovariance): with pytest.raises(ValueError): ExponentialMovingCovariance(0) + with pytest.raises(ValueError): ExponentialMovingCovariance(1) + with pytest.raises(ValueError): ExponentialMovingCovariance(-1) + with pytest.raises(ValueError): ExponentialMovingCovariance(2) @@ -1150,10 +1157,14 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats_time = ExponentialMovingStatistics(delay=60) with pytest.raises(AttributeError): exp_stats.clear_timer() + with pytest.raises(AttributeError): exp_stats.freeze() + with pytest.raises(AttributeError): exp_stats.unfreeze() + with pytest.raises(AttributeError): exp_stats_time.unfreeze() with pytest.raises(ValueError): exp_stats_time.delay = 0 + with pytest.raises(ValueError): exp_stats_time.delay = -1 From 8ecb9bf8497e86eac152ea832fc0a4fbf76944de Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 11:22:03 +0200 Subject: [PATCH 61/83] added i_add and i_mul test cases --- runstats/core.py | 5 +++-- tests/test_runstats.py | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 3c20a22..70fb505 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -462,14 +462,15 @@ def is_time_based(self): def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" if self.is_time_based(): + now = time.time() diff = ( self._time_diff if self._time_diff - else (time.time() - self._current_time) + else (now - self._current_time) ) norm_diff = diff / self.delay decay = self.decay ** norm_diff - self._current_time = time.time() + self._current_time = now else: decay = self.decay diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 61282df..9010042 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -407,6 +407,9 @@ def test_add_exponential_covariance(ExponentialMovingCovariance): assert (exp_cov0 + exp_cov10) == exp_cov10 assert (exp_cov10 + exp_cov0) == exp_cov10 + exp_cov0 += exp_cov10 + assert exp_cov0 == exp_cov10 + @pytest.mark.parametrize( 'Statistics,Regression', @@ -887,6 +890,10 @@ def test_exponential_covariance_batch(ExponentialMovingCovariance): assert alpha_exp_cov._decay == gamma_exp_cov._decay assert beta_exp_cov._decay != gamma_exp_cov._decay + alpha_exp_cov *= 0.3 + beta_exp_cov *= 0.7 + assert (alpha_exp_cov + beta_exp_cov) == gamma_exp_cov + @pytest.mark.parametrize( 'ExponentialMovingStatistics, decay', @@ -1091,31 +1098,50 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): def test_exponential_statistics_time_based_effective_decay( ExponentialMovingStatistics, ): - time_limit = 0.02 + def calc_effective_decay(diff, delay, nominal_decay): + norm_diff = diff / delay + eff_decay = nominal_decay ** norm_diff + return eff_decay + + delay = 0.5 + nominal_decay = 0.9 exp_stats = ExponentialMovingStatistics() exp_stats_time = ExponentialMovingStatistics(delay=0.5) + past = exp_stats_time._current_time time.sleep(0.5) exp_stats_time.push(10) + now = exp_stats_time._current_time + effective_decay = calc_effective_decay(now - past, delay, nominal_decay) + exp_stats.decay = effective_decay exp_stats.push(10) - assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit + + assert exp_stats.mean() == exp_stats_time.mean() + assert exp_stats.variance() == exp_stats_time.variance() exp_stats_time.clear_timer() time.sleep(0.5) exp_stats_time.freeze() time.sleep(0.5) + diff = exp_stats_time._time_diff exp_stats_time.push(100) + effective_decay = calc_effective_decay(diff, delay, nominal_decay) + exp_stats.decay = effective_decay exp_stats.push(100) - assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit - exp_stats.decay = 0.81 + assert exp_stats.mean() == exp_stats_time.mean() + assert exp_stats.variance() == exp_stats_time.variance() + exp_stats_time.unfreeze() + past = exp_stats_time._current_time time.sleep(0.5) exp_stats_time.push(1000) + now = exp_stats_time._current_time + effective_decay = calc_effective_decay(now - past, delay, nominal_decay) + exp_stats.decay = effective_decay exp_stats.push(1000) - assert error(exp_stats.mean(), exp_stats_time.mean()) < time_limit - assert error(exp_stats.variance(), exp_stats_time.variance()) < time_limit + + assert exp_stats.mean() == exp_stats_time.mean() + assert exp_stats.variance() == exp_stats_time.variance() @pytest.mark.parametrize( From 44861c8e775724a31c1e2debb68fd28853ce8db9 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 11:44:59 +0200 Subject: [PATCH 62/83] reformatted blue --- tests/test_runstats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 9010042..1165ae2 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -383,7 +383,9 @@ def test_add_exponential_statistics(ExponentialMovingStatistics): exp_stats0.decay = 0.8 exp_stats0.delay = 60 exp_stats10.delay = 120 - exp_stats0._time_diff = -1 # To check if clear_timer was called for add and not for iadd + exp_stats0._time_diff = ( + -1 + ) # To check if clear_timer was called for add and not for iadd exp_stats = exp_stats0 + exp_stats10 assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay From 1c43f79a10890e25e0a59010d7eab9a86f564084 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 12:01:16 +0200 Subject: [PATCH 63/83] fixed doc8, mission link to runstats docu in readme --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index ba26083..cc30647 100644 --- a/README.rst +++ b/README.rst @@ -386,6 +386,8 @@ Statistics and Regression by calling `push` repeatedly shows the Cython-optimized extension as 20-40 times faster than the pure-Python extension. +.. _`RunStats`: http://www.grantjenks.com/docs/runstats/ + Reference and Indices --------------------- From 36fbdf17b0a88aafacefd98b3c3523402062e1e7 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sat, 3 Jul 2021 16:47:32 +0200 Subject: [PATCH 64/83] removed TODO in core.pxd --- runstats/core.pxd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runstats/core.pxd b/runstats/core.pxd index 8759230..5f95f70 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -65,7 +65,7 @@ cdef class Statistics: cpdef Statistics make_statistics(state) -cdef class ExponentialMovingStatistics: # TODO: adjust to new interface!, check docstrings, check readme +cdef class ExponentialMovingStatistics: cdef public double _decay, _mean, _variance, _initial_mean, _initial_variance, _delay, _time_diff, _current_time cpdef _set_decay(self, double value) @@ -95,7 +95,8 @@ cdef class ExponentialMovingStatistics: # TODO: adjust to new interface!, check diff=double, incr=double, norm_diff=double, - decay=double + decay=double, + now=double ) cpdef push(self, double value) From a42ccd62ac7d78068cef1de3976bce47c842ad8e Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 16:10:35 +0200 Subject: [PATCH 65/83] improved readme wrt time based exponential statistics usage --- README.rst | 47 +++++++++++++++++++++++++++++------------------ runstats/core.py | 4 ++-- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index cc30647..c4d91ec 100644 --- a/README.rst +++ b/README.rst @@ -25,7 +25,8 @@ the system based on the recent past. In these cases exponential statistics are used. Instead of weighting all values uniformly in the statistics computation, an exponential decay weight is applied to older values. The decay rate is configurable and provides a mechanism for balancing recent values with past -values. +values. The exponential weighting may be on a 'per data point' or 'per time +step' basis. The Python `RunStats`_ module was designed for these cases by providing classes for computing online summary statistics and online linear regression in a @@ -71,6 +72,7 @@ function: >>> help(runstats.Statistics) # doctest: +SKIP >>> help(runstats.Regression) # doctest: +SKIP >>> help(runstats.ExponentialMovingStatistics) # doctest: +SKIP + >>> help(runstats.ExponentialMovingCovariance) # doctest: +SKIP Tutorial @@ -79,7 +81,8 @@ Tutorial The Python `RunStats`_ module provides four types for computing running statistics: Statistics, ExponentialMovingStatistics, ExponentialMovingCovariance and Regression. -The Regression object leverages Statistics internally for its calculations. +The Regression object leverages Statistics internally for its calculations +while ExponentialMovingCovariance uses ExponentialMovingStatistics. Each can be initialized without arguments: .. code-block:: python @@ -254,8 +257,7 @@ mean and variance are simply added to create a new object. To weight each Note how this behaviour differs from the two previous classes. When two `ExponentialMovingStatistics` are added the decay of the left object is used for the new object. The clear method resets the object to its state at -construction. The `len` method as well as minimum and maximum are not -supported. +construction. `len`, minimum and maximum are not supported. .. code-block:: python @@ -292,18 +294,18 @@ The `ExponentialMovingCovariance` works equivalently to `ExponentialMovingStatistics` can also work in a time-based mode i.e. old statistics are not simply discounted by the decay rate each time a value is -pushed but an effective decay rate is calculated based on the provided decay -rate and the time difference between the last push and the current push. -`ExponentialMovingStatistics` operate in time based mode when a `delay` value -> 0 is provided at construction. The delay is the no. of seconds that need to -pass for the effective decay rate to be equal to the provided decay rate. -For example, if a delay of 60 and a delay of 0.9 is provided, than after 60 +pushed. Instead an effective decay rate is calculated based on the provided +'nominal' decay rate as well as the time difference between the last push and +the current push.`ExponentialMovingStatistics` operate in time based mode when +a `delay > 0` is provided at construction. The delay is the no. of seconds that +need to pass for the effective decay rate to be equal to the provided decay rate. +For example, if a delay of 60 and a decay of 0.9 is provided, then after 60 seconds pass between calls to push() the effective decay rate for discounting the old statistics equals 0.9, when 120 seconds pass than it equals 0.9 ** 2 = 0.81 and so on. The exact formula for calculating the effective decay rate at a given call to push is: -decay ** ((current_timestamp - timestamp_at_last_push) / delay). The initial -timestamp is the timestamp at object construction. +`decay ** ((current_timestamp - timestamp_at_last_push) / delay)`. The initial +timestamp is the timestamp when delay has been set. .. code-block:: python @@ -331,12 +333,13 @@ mode. is taken from the left object. If the left object is time-based (non `None` delay) the timer is reset during an regular __add__ (a + b) for the resulting object while it is not during an incremental add __iadd__ (a += b). -- Last but not least the timer can be stopped with a call to freeze(). This can -be useful when saving the state of the object (get_state()) for later usage. -With a call to unfreeze() the timer continues where it left of (e.g. after -loading). Note that pushes onto a freezed object use a effective decay rate -based on the time difference between the last call to push and the moment -freeze was called(). +- The timer can be stopped with a call to `freeze()`. This can +be useful when saving the state of the object (`get_state()`) for later usage. +With a call to `unfreeze()` the timer continues where it left of (e.g. after +loading). +- Pushes onto a freezed object use a effective decay rate based on the time +difference between the last call to push and the moment `freeze()` was called. +- With a call to `clear_timer()` the timer can be reset. - It is not recommended to use time based discounting for use cases that require high precision on below seconds granularity. @@ -357,6 +360,10 @@ require high precision on below seconds granularity. >>> round(beta_stats.mean()) 3 + +Sources +------- + All internal calculations of the Statistics and Regression classes are based entirely on the C++ code by John Cook as posted in a couple of articles: @@ -372,6 +379,10 @@ The ExponentialMovingStatistics implementation is based on: .. _`Finch, 2009, Incremental Calculation of Weighted Mean and Variance`: https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + +Pure Python and Cython +---------------------- + The pure-Python version of `RunStats`_ is directly available if preferred. .. code-block:: python diff --git a/runstats/core.py b/runstats/core.py index 70fb505..23f2d42 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -295,9 +295,9 @@ def __init__( Iterates optional parameter `iterable` and pushes each value into the statistics summary. - Can discount values based on time passed instead of position if delay is + Can discount values based on time passed instead of position if 'delay' is set. Setting delay (in seconds) computes a dynamic - decay rate each time a value is pushed for weighting that value: + decay rate each time a value is pushed: dynamic_decay = decay ** (sec_from_last_push / delay). When the first value x is pushed, sec_from_last_push is the difference (in sec) between setting the delay from None to a value t (usually at From 6c089d7aa8e36e2a7c19f3d38d9823dc28b92904 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 16:27:01 +0200 Subject: [PATCH 66/83] fixed benchmarking: Renamed ExponentialStatistics to ExponentialMovingStatistics, added ExponentialMovingCovariance to tests --- tests/benchmark.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/tests/benchmark.py b/tests/benchmark.py index b0bb7d4..d6002ff 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -44,7 +44,7 @@ def main(): core_exp_stats = timeit.repeat( setup=''' from __main__ import VALUES -from runstats.core import ExponentialStatistics +from runstats.core import ExponentialMovingStatistics exp_stats = ExponentialStatistics() ''', stmt=''' @@ -59,7 +59,7 @@ def main(): fast_exp_stats = timeit.repeat( setup=''' from __main__ import VALUES -from runstats._core import ExponentialStatistics +from runstats._core import ExponentialMovingStatistics exp_stats = ExponentialStatistics() ''', stmt=''' @@ -105,18 +105,54 @@ def main(): speedup_regr = core_regr / fast_regr - 1 + core_exp_cov = timeit.repeat( + setup=''' + from __main__ import PAIRS + from runstats.core import ExponentialMovingCovariance + exp_cov = ExponentialMovingCovariance() + ''', + stmt=''' + for pos, val in PAIRS: + exp_cov.push(pos, val) + exp_cov.covariance() + ''', + number=1, + repeat=7, + )[2] + + fast_exp_cov = timeit.repeat( + setup=''' + from __main__ import PAIRS + from runstats._core import ExponentialMovingCovariance + exp_cov = ExponentialMovingCovariance() + ''', + stmt=''' + for pos, val in PAIRS: + exp_cov.push(pos, val) + exp_cov.covariance() + ''', + number=1, + repeat=7, + )[2] + + speedup_exp_cov = core_exp_cov / fast_exp_cov - 1 + print('core.Statistics:', core_stats) print('_core.Statistics:', fast_stats) print(' Stats Speedup: %.2fx faster' % speedup_stats) - print('core.ExponentialStatistics:', core_exp_stats) - print('_core.ExponentialStatistics:', fast_exp_stats) + print('core.ExponentialMovingStatistics:', core_exp_stats) + print('_core.ExponentialMovingStatistics:', fast_exp_stats) print(' ExpStats Speedup: %.2fx faster' % speedup_exp_stats) print('core.Regression:', core_regr) print('_core.Regression:', fast_regr) print(' Regr Speedup: %.2fx faster' % speedup_regr) + print('core.ExponentialMovingCovariance:', core_exp_cov) + print('_core.ExponentialMovingCovariance:', fast_exp_cov) + print(' ExpCov Speedup: %.2fx faster' % speedup_exp_cov) + if __name__ == '__main__': main() From a3c3cc9a91669b1ad6eb703a9f93ac3829dfc670 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 16:31:47 +0200 Subject: [PATCH 67/83] fixed benchmark: Using ExponentialMovingStatitics at object construction instead of ExponentialStatistics --- tests/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/benchmark.py b/tests/benchmark.py index d6002ff..1deb332 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -45,7 +45,7 @@ def main(): setup=''' from __main__ import VALUES from runstats.core import ExponentialMovingStatistics -exp_stats = ExponentialStatistics() +exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in VALUES: @@ -60,7 +60,7 @@ def main(): setup=''' from __main__ import VALUES from runstats._core import ExponentialMovingStatistics -exp_stats = ExponentialStatistics() +exp_stats = ExponentialMovingStatistics() ''', stmt=''' for value in VALUES: From 39398b25a451bcd9145add9337bb2964140bc141 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 16:35:50 +0200 Subject: [PATCH 68/83] fixed benchmark: wrong indentation for ExponentialMovingCovariance --- tests/benchmark.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/benchmark.py b/tests/benchmark.py index 1deb332..95ac94f 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -107,14 +107,14 @@ def main(): core_exp_cov = timeit.repeat( setup=''' - from __main__ import PAIRS - from runstats.core import ExponentialMovingCovariance - exp_cov = ExponentialMovingCovariance() +from __main__ import PAIRS +from runstats.core import ExponentialMovingCovariance +exp_cov = ExponentialMovingCovariance() ''', stmt=''' - for pos, val in PAIRS: - exp_cov.push(pos, val) - exp_cov.covariance() +for pos, val in PAIRS: + exp_cov.push(pos, val) +exp_cov.covariance() ''', number=1, repeat=7, @@ -122,14 +122,14 @@ def main(): fast_exp_cov = timeit.repeat( setup=''' - from __main__ import PAIRS - from runstats._core import ExponentialMovingCovariance - exp_cov = ExponentialMovingCovariance() +from __main__ import PAIRS +from runstats._core import ExponentialMovingCovariance +exp_cov = ExponentialMovingCovariance() ''', stmt=''' - for pos, val in PAIRS: - exp_cov.push(pos, val) - exp_cov.covariance() +for pos, val in PAIRS: + exp_cov.push(pos, val) +exp_cov.covariance() ''', number=1, repeat=7, From 20d6452b08a0b04e0de9c7e2205403b6dcd55300 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 17:03:35 +0200 Subject: [PATCH 69/83] Made member variables of ExponentialMovingStatistics exclusivley float (from Optional[float]) by using float('nan') for cython compatibility --- runstats/core.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 23f2d42..6f39a89 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -314,13 +314,15 @@ def __init__( self._mean = self._initial_mean self._variance = self._initial_variance - self._current_time = None - self._time_diff = None - self.delay = None + # using float('nan') for cython compatibility + self._current_time = NAN + self._time_diff = NAN + self.delay = NAN for value in iterable: self.push(value) + delay = NAN if delay is None else delay self.delay = delay @property @@ -347,15 +349,17 @@ def delay(self, value): self._set_delay(value) def _set_delay(self, value): - if value is not None: - if value <= 0: + if value is not NAN: + if value <= 0.0: raise ValueError('delay must be > 0') self._current_time = ( - self._current_time if self._current_time else time.time() + self._current_time + if self._current_time is not NAN + else time.time() ) else: - self._current_time = None - self._time_diff = None + self._current_time = NAN + self._time_diff = NAN self._delay = value @@ -363,8 +367,8 @@ def clear(self): """Clear ExponentialMovingStatistics object.""" self._mean = self._initial_mean self._variance = self._initial_variance - self._current_time = time.time() if self.is_time_based() else None - self._time_diff = None + self._current_time = time.time() if self.is_time_based() else NAN + self._time_diff = NAN def __eq__(self, that): return self.get_state() == that.get_state() @@ -422,7 +426,7 @@ def clear_timer(self): """Reset time counter""" if self.is_time_based(): self._current_time = time.time() - self._time_diff = None + self._time_diff = NAN else: raise AttributeError( 'clear_timer on a non-time time based (i.e. delay == None) ' @@ -447,17 +451,17 @@ def unfreeze(self): 'ExponentialMovingStatistics object is illegal' ) - if self._time_diff is None: + if self._time_diff is NAN: raise AttributeError( 'Time must be freezed first before it can be unfreezed' ) self._current_time = time.time() - self._time_diff - self._time_diff = None + self._time_diff = NAN def is_time_based(self): """Checks if object is time-based or not i.e. delay is set or None""" - return self.delay is not None + return self.delay is not NAN def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" @@ -465,7 +469,7 @@ def push(self, value): now = time.time() diff = ( self._time_diff - if self._time_diff + if self._time_diff is not NAN else (now - self._current_time) ) norm_diff = diff / self.delay From 4c4ce0c96a2df6c43b870aa2e8b898e535be253e Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Sun, 4 Jul 2021 17:18:47 +0200 Subject: [PATCH 70/83] debugged core.pxd: renamed ExponentialMovingCovariance make_regression to ExponentialMovingCovariance make_exponential_covariance --- runstats/core.pxd | 2 +- runstats/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runstats/core.pxd b/runstats/core.pxd index 5f95f70..dae3d79 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -204,4 +204,4 @@ cdef class ExponentialMovingCovariance: cpdef ExponentialMovingCovariance _imul(self, double that) -cpdef ExponentialMovingCovariance make_exponential_statistics(state) +cpdef ExponentialMovingCovariance make_exponential_covariance(state) diff --git a/runstats/core.py b/runstats/core.py index 6f39a89..4d23e11 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -887,5 +887,5 @@ def __imul__(self, that): def make_exponential_covariance(state): - """Make Regression object from state.""" + """Make ExponentialMovingCovariance object from state.""" return ExponentialMovingCovariance.fromstate(state) From 7bb14fa9f9c9997a6d7bdd996729024c79794558 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 6 Jul 2021 16:50:50 +0200 Subject: [PATCH 71/83] added non_time based serliasiation test for ExponentialMovingStatistics --- README.rst | 3 +- runstats/_core.h | 35 ++++++++++++++++++ runstats/core.py | 22 +++++++----- tests/test_runstats.py | 82 +++++++++++++++++++++++++----------------- 4 files changed, 101 insertions(+), 41 deletions(-) create mode 100644 runstats/_core.h diff --git a/README.rst b/README.rst index c4d91ec..14bf6f7 100644 --- a/README.rst +++ b/README.rst @@ -334,7 +334,8 @@ is taken from the left object. If the left object is time-based (non `None` delay) the timer is reset during an regular __add__ (a + b) for the resulting object while it is not during an incremental add __iadd__ (a += b). - The timer can be stopped with a call to `freeze()`. This can -be useful when saving the state of the object (`get_state()`) for later usage. +be useful when saving the state of the object (`get_state()`) for later usage +or when serializing the object to pickle. With a call to `unfreeze()` the timer continues where it left of (e.g. after loading). - Pushes onto a freezed object use a effective decay rate based on the time diff --git a/runstats/_core.h b/runstats/_core.h new file mode 100644 index 0000000..cb62796 --- /dev/null +++ b/runstats/_core.h @@ -0,0 +1,35 @@ +/* Generated by Cython 0.29.21 */ + +#ifndef __PYX_HAVE__runstats___core +#define __PYX_HAVE__runstats___core + +#include "Python.h" + +#ifndef __PYX_HAVE_API__runstats___core + +#ifndef __PYX_EXTERN_C + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#ifndef DL_IMPORT + #define DL_IMPORT(_T) _T +#endif + +__PYX_EXTERN_C double __pyx_v_8runstats_5_core_NAN; + +#endif /* !__PYX_HAVE_API__runstats___core */ + +/* WARNING: the interface of the module init function changed in CPython 3.5. */ +/* It now returns a PyModuleDef instance instead of a PyModule instance. */ + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC init_core(void); +#else +PyMODINIT_FUNC PyInit__core(void); +#endif + +#endif /* !__PYX_HAVE__runstats___core */ diff --git a/runstats/core.py b/runstats/core.py index 4d23e11..9ae3f01 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -8,6 +8,7 @@ from __future__ import division import time +from math import isnan NAN = float('nan') @@ -322,7 +323,6 @@ def __init__( for value in iterable: self.push(value) - delay = NAN if delay is None else delay self.delay = delay @property @@ -349,12 +349,13 @@ def delay(self, value): self._set_delay(value) def _set_delay(self, value): - if value is not NAN: + value = NAN if value is None else value + if not isnan(value): if value <= 0.0: raise ValueError('delay must be > 0') self._current_time = ( self._current_time - if self._current_time is not NAN + if not isnan(self._current_time) else time.time() ) else: @@ -378,7 +379,7 @@ def __ne__(self, that): def get_state(self): """Get internal state.""" - return ( + state = [ self._decay, self._initial_mean, self._initial_variance, @@ -387,10 +388,15 @@ def get_state(self): self._delay, self._current_time, self._time_diff, - ) + ] + state = [None if isnan(i) else i for i in state] + return tuple(state) def set_state(self, state): """Set internal state.""" + state = list(state) + state = [NAN if i is None else i for i in state] + ( self._decay, self._initial_mean, @@ -451,7 +457,7 @@ def unfreeze(self): 'ExponentialMovingStatistics object is illegal' ) - if self._time_diff is NAN: + if isnan(self._time_diff): raise AttributeError( 'Time must be freezed first before it can be unfreezed' ) @@ -461,7 +467,7 @@ def unfreeze(self): def is_time_based(self): """Checks if object is time-based or not i.e. delay is set or None""" - return self.delay is not NAN + return not isnan(self.delay) def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" @@ -469,7 +475,7 @@ def push(self, value): now = time.time() diff = ( self._time_diff - if self._time_diff is not NAN + if not isnan(self._time_diff) else (now - self._current_time) ) norm_diff = diff / self.delay diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 1165ae2..60dada0 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -389,7 +389,7 @@ def test_add_exponential_statistics(ExponentialMovingStatistics): exp_stats = exp_stats0 + exp_stats10 assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._time_diff) exp_stats0 += exp_stats10 assert exp_stats0.decay == 0.8 @@ -620,7 +620,19 @@ def test_pickle_statistics(Statistics, Regression): 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_pickle_exponential_statistics(ExponentialMovingStatistics): +def test_pickle_exponential_statistics_time_based(ExponentialMovingStatistics): + exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) + unpickled_exp_stats = pickle.loads(pickled_exp_stats) + assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + + +@pytest.mark.parametrize( + 'ExponentialMovingStatistics', + [CoreExponentialStatistics, FastExponentialStatistics], +) +def test_pickle_exponential_statistics_time_based(ExponentialMovingStatistics): exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10), delay=30) exp_stats.freeze() for num in range(pickle.HIGHEST_PROTOCOL): @@ -628,6 +640,12 @@ def test_pickle_exponential_statistics(ExponentialMovingStatistics): unpickled_exp_stats = pickle.loads(pickled_exp_stats) assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + exp_stats.unfreeze() + for num in range(pickle.HIGHEST_PROTOCOL): + pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) + unpickled_exp_stats = pickle.loads(pickled_exp_stats) + assert exp_stats == unpickled_exp_stats, 'protocol: %s' % num + @pytest.mark.parametrize( 'ExponentialMovingCovariance', @@ -951,30 +969,30 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): assert exp_stats.mean() != mean assert exp_stats.variance() != variance - assert exp_stats._current_time is None - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats.clear() assert exp_stats.mean() == mean assert exp_stats.variance() == variance - assert exp_stats._current_time is None - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats.delay = 60 current_time = exp_stats._current_time - assert exp_stats._current_time is not None - assert exp_stats._time_diff is None + assert not math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats.freeze() - assert exp_stats._time_diff is not None + assert not math.isnan(exp_stats._time_diff) exp_stats.clear() new_current_time = exp_stats._current_time - assert exp_stats._current_time is not None + assert not math.isnan(exp_stats._current_time) assert exp_stats._current_time != current_time - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._time_diff) exp_stats.freeze() exp_stats.clear_timer() - assert exp_stats._current_time is not None + assert not math.isnan(exp_stats._current_time) assert exp_stats._current_time != new_current_time - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._time_diff) @pytest.mark.parametrize( @@ -1020,20 +1038,20 @@ def test_exponential_covariance_clear(ExponentialMovingCovariance): def test_exponential_statistics_is_time(ExponentialMovingStatistics): exp_stats = ExponentialMovingStatistics() assert not exp_stats.is_time_based() - assert exp_stats.delay is None - assert exp_stats._current_time is None - assert exp_stats._time_diff is None + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats.delay = 30 assert exp_stats.is_time_based() - assert exp_stats.delay is not None - assert exp_stats._current_time is not None - assert exp_stats._time_diff is None + assert not math.isnan(exp_stats.delay) + assert not math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats = ExponentialMovingStatistics(delay=30) - assert exp_stats.delay is not None - assert exp_stats._current_time is not None - assert exp_stats._time_diff is None + assert not math.isnan(exp_stats.delay) + assert not math.isnan(exp_stats._current_time) + assert math.isnan(exp_stats._time_diff) exp_stats.freeze() - assert exp_stats is not None + assert not math.isnan(exp_stats._time_diff) @pytest.mark.parametrize( @@ -1043,17 +1061,17 @@ def test_exponential_statistics_is_time(ExponentialMovingStatistics): def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): exp_stats = ExponentialMovingStatistics(delay=30) current_time = exp_stats._current_time - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._time_diff) exp_stats.freeze() time.sleep(0.01) - assert exp_stats._time_diff is not None + assert not math.isnan(exp_stats._time_diff) time_diff = exp_stats._time_diff time.sleep(0.01) exp_stats.unfreeze() future = time.time() assert exp_stats._current_time > current_time assert exp_stats._current_time < future - time_diff - assert exp_stats._time_diff is None + assert math.isnan(exp_stats._time_diff) @pytest.mark.parametrize( @@ -1065,19 +1083,19 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): alpha = [random.random() for _ in range(count)] exp_stats = ExponentialMovingStatistics(iterable=alpha) - assert exp_stats.delay is None - assert exp_stats._current_time is None + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) exp_stats.delay = 30 assert exp_stats.delay == 30 - assert exp_stats._current_time is not None + assert not math.isnan(exp_stats._current_time) current_time = exp_stats._current_time time.sleep(0.01) exp_stats.delay = 60 assert exp_stats.delay == 60 assert exp_stats._current_time == current_time exp_stats.delay = None - assert exp_stats.delay is None - assert exp_stats._current_time is None + assert math.isnan(exp_stats.delay) + assert math.isnan(exp_stats._current_time) exp_stats_time_init = ExponentialMovingStatistics( delay=300, iterable=alpha @@ -1085,7 +1103,7 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): assert exp_stats_time_init.mean() == exp_stats.mean() assert exp_stats_time_init.variance() == exp_stats.variance() assert exp_stats_time_init.delay == 300 - assert exp_stats_time_init._current_time is not None + assert not math.isnan(exp_stats_time_init._current_time) exp_stats.push(10) exp_stats_time_init.push(10) From b9ab638f523b438e99b03a495cfed54cc9e457db Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 6 Jul 2021 16:54:39 +0200 Subject: [PATCH 72/83] resolved flake8: duplicated test_pickle_exponential_statistics_time_based function definition renamed to test_pickle_exponential_statistics --- tests/test_runstats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 60dada0..cfd8eae 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -620,7 +620,7 @@ def test_pickle_statistics(Statistics, Regression): 'ExponentialMovingStatistics', [CoreExponentialStatistics, FastExponentialStatistics], ) -def test_pickle_exponential_statistics_time_based(ExponentialMovingStatistics): +def test_pickle_exponential_statistics(ExponentialMovingStatistics): exp_stats = ExponentialMovingStatistics(0.9, iterable=range(10)) for num in range(pickle.HIGHEST_PROTOCOL): pickled_exp_stats = pickle.dumps(exp_stats, protocol=num) From 01ca96ff918ebb331a2af2c420a93814f9977920 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 6 Jul 2021 17:10:16 +0200 Subject: [PATCH 73/83] resolved flake8: tests/test_runstats.py:988:15: E271 multiple spaces after keyword --- tests/test_runstats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index cfd8eae..4386629 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -985,7 +985,7 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): assert not math.isnan(exp_stats._time_diff) exp_stats.clear() new_current_time = exp_stats._current_time - assert not math.isnan(exp_stats._current_time) + assert not math.isnan(exp_stats._current_time) assert exp_stats._current_time != current_time assert math.isnan(exp_stats._time_diff) exp_stats.freeze() From 524892083f774e26940dfbd1deab1503e65e9810 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Thu, 8 Jul 2021 17:26:26 +0200 Subject: [PATCH 74/83] ExponentialMovingStatistics: Moved None to NAN conversion to from _set_delay to delay setter for cython compatiability --- runstats/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runstats/core.py b/runstats/core.py index 9ae3f01..2a484e5 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -346,10 +346,10 @@ def delay(self): @delay.setter def delay(self, value): + value = NAN if value is None else value self._set_delay(value) def _set_delay(self, value): - value = NAN if value is None else value if not isnan(value): if value <= 0.0: raise ValueError('delay must be > 0') From f466e6500fe3b54bda52e247f5e419eb8dc987b2 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Thu, 8 Jul 2021 17:44:28 +0200 Subject: [PATCH 75/83] adjusted time.sleep in unit test from 0.01 to 0.5 to pass build on windows --- tests/test_runstats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 4386629..9a1c1c7 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -1063,10 +1063,10 @@ def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): current_time = exp_stats._current_time assert math.isnan(exp_stats._time_diff) exp_stats.freeze() - time.sleep(0.01) + time.sleep(0.5) assert not math.isnan(exp_stats._time_diff) time_diff = exp_stats._time_diff - time.sleep(0.01) + time.sleep(0.5) exp_stats.unfreeze() future = time.time() assert exp_stats._current_time > current_time @@ -1089,7 +1089,7 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): assert exp_stats.delay == 30 assert not math.isnan(exp_stats._current_time) current_time = exp_stats._current_time - time.sleep(0.01) + time.sleep(0.5) exp_stats.delay = 60 assert exp_stats.delay == 60 assert exp_stats._current_time == current_time From 9861679410ab97f3d2480e94e9706f46e8c417bd Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 12 Jul 2021 20:19:22 +0200 Subject: [PATCH 76/83] Mocked time.time() for test_exponential_statistics_freeze_unfreeze --- runstats/__init__.py | 2 ++ tests/test_runstats.py | 47 +++++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index 1207779..7699927 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -6,6 +6,8 @@ """ +import time + try: from ._core import ( ExponentialMovingCovariance, diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 9a1c1c7..f87c11d 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -8,6 +8,7 @@ import pickle import random import time +from unittest.mock import patch import pytest @@ -130,6 +131,14 @@ def error(value, test): return abs((test - value) / value) +def get_time_patch(ExponentialMovingStatistics): + module = ExponentialMovingStatistics.__module__ + core = "_core" if module == "runstats._core" else "core" + patch_path = f"runstats.{core}.time" + time_patch = patch(patch_path) + return time_patch + + @pytest.mark.parametrize( 'Statistics,Regression', [ @@ -962,6 +971,7 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): alpha = [random.random() for _ in range(count)] mean = 10 variance = 100 + injected_time = 1625756332.6573758 exp_stats = ExponentialMovingStatistics(mean=mean, variance=variance) for val in alpha: @@ -978,20 +988,20 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): assert math.isnan(exp_stats._time_diff) exp_stats.delay = 60 - current_time = exp_stats._current_time assert not math.isnan(exp_stats._current_time) assert math.isnan(exp_stats._time_diff) exp_stats.freeze() assert not math.isnan(exp_stats._time_diff) + exp_stats._current_time = injected_time exp_stats.clear() - new_current_time = exp_stats._current_time assert not math.isnan(exp_stats._current_time) - assert exp_stats._current_time != current_time + assert exp_stats._current_time != injected_time assert math.isnan(exp_stats._time_diff) + exp_stats._current_time = injected_time exp_stats.freeze() exp_stats.clear_timer() assert not math.isnan(exp_stats._current_time) - assert exp_stats._current_time != new_current_time + assert exp_stats._current_time != injected_time assert math.isnan(exp_stats._time_diff) @@ -1059,19 +1069,22 @@ def test_exponential_statistics_is_time(ExponentialMovingStatistics): [CoreExponentialStatistics, FastExponentialStatistics], ) def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): - exp_stats = ExponentialMovingStatistics(delay=30) - current_time = exp_stats._current_time - assert math.isnan(exp_stats._time_diff) - exp_stats.freeze() - time.sleep(0.5) - assert not math.isnan(exp_stats._time_diff) - time_diff = exp_stats._time_diff - time.sleep(0.5) - exp_stats.unfreeze() - future = time.time() - assert exp_stats._current_time > current_time - assert exp_stats._current_time < future - time_diff - assert math.isnan(exp_stats._time_diff) + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + time_mock.time.return_value = 1000.1 + exp_stats = ExponentialMovingStatistics(delay=30) + + assert exp_stats._current_time == 1000.1 + assert math.isnan(exp_stats._time_diff) + + time_mock.time.return_value = 1010.1 + exp_stats.freeze() + assert exp_stats._time_diff == 10.0 + + time_mock.time.return_value = 1110.0 + exp_stats.unfreeze() + assert exp_stats._current_time == 1100.0 + assert math.isnan(exp_stats._time_diff) @pytest.mark.parametrize( From 99791efdda9178381f4e1c307df11953d07b44e8 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 12 Jul 2021 20:20:21 +0200 Subject: [PATCH 77/83] applied blue and isort --- tests/test_runstats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index f87c11d..d6cd4f1 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -133,8 +133,8 @@ def error(value, test): def get_time_patch(ExponentialMovingStatistics): module = ExponentialMovingStatistics.__module__ - core = "_core" if module == "runstats._core" else "core" - patch_path = f"runstats.{core}.time" + core = '_core' if module == 'runstats._core' else 'core' + patch_path = f'runstats.{core}.time' time_patch = patch(patch_path) return time_patch From d30f872c47b211cc7407cea60a8bd031881b12ab Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Mon, 12 Jul 2021 20:21:23 +0200 Subject: [PATCH 78/83] fixed flake8: time imported but unused in runstats.__init__ --- runstats/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/runstats/__init__.py b/runstats/__init__.py index 7699927..1207779 100644 --- a/runstats/__init__.py +++ b/runstats/__init__.py @@ -6,8 +6,6 @@ """ -import time - try: from ._core import ( ExponentialMovingCovariance, From aaa8e7dc75892737c5c6363754abc87778e4a4f2 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 13 Jul 2021 20:10:34 +0200 Subject: [PATCH 79/83] adjusted time based test to use time_mock, introduced is_freezed() function to ExponentialMovingStatistics --- runstats/core.py | 65 ++++++++++++++--------- tests/test_runstats.py | 118 +++++++++++++++++++++-------------------- 2 files changed, 101 insertions(+), 82 deletions(-) diff --git a/runstats/core.py b/runstats/core.py index 2a484e5..0eb2d4f 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -371,6 +371,21 @@ def clear(self): self._current_time = time.time() if self.is_time_based() else NAN self._time_diff = NAN + def clear_timer(self): + """Reset time counter""" + if self.is_time_based(): + self._current_time = time.time() + self._time_diff = NAN + else: + raise AttributeError( + 'clear_timer on a non-time time based (i.e. delay == None) ' + 'ExponentialMovingStatistics object is illegal' + ) + + def is_time_based(self): + """Checks if object is time-based or not i.e. delay is set or None""" + return not isnan(self.delay) + def __eq__(self, that): return self.get_state() == that.get_state() @@ -428,17 +443,6 @@ def __copy__(self, _=None): __deepcopy__ = __copy__ - def clear_timer(self): - """Reset time counter""" - if self.is_time_based(): - self._current_time = time.time() - self._time_diff = NAN - else: - raise AttributeError( - 'clear_timer on a non-time time based (i.e. delay == None) ' - 'ExponentialMovingStatistics object is illegal' - ) - def freeze(self): """Freeze time i.e. save the difference between now and the last push""" if self.is_time_based(): @@ -457,7 +461,7 @@ def unfreeze(self): 'ExponentialMovingStatistics object is illegal' ) - if isnan(self._time_diff): + if not self.is_freezed(): raise AttributeError( 'Time must be freezed first before it can be unfreezed' ) @@ -465,22 +469,17 @@ def unfreeze(self): self._current_time = time.time() - self._time_diff self._time_diff = NAN - def is_time_based(self): - """Checks if object is time-based or not i.e. delay is set or None""" - return not isnan(self.delay) + def is_freezed(self): + if not self.is_time_based(): + raise AttributeError('Only time-based objects can be freezed') + + freezed = not isnan(self._time_diff) + return freezed def push(self, value): """Add `value` to the ExponentialMovingStatistics summary.""" if self.is_time_based(): - now = time.time() - diff = ( - self._time_diff - if not isnan(self._time_diff) - else (now - self._current_time) - ) - norm_diff = diff / self.delay - decay = self.decay ** norm_diff - self._current_time = now + decay = self._effective_decay() else: decay = self.decay @@ -490,6 +489,24 @@ def push(self, value): self._variance += alpha * (decay * diff ** 2 - self._variance) self._mean += incr + def _effective_decay(self): + """Calculate effective decay rate for time based ExponentialMovingStatistics""" + if not self.is_time_based(): + raise AttributeError( + 'Forbidden to call _effective_decay on non-time based object' + ) + + now = time.time() + diff = ( + self._time_diff + if self.is_freezed() + else (now - self._current_time) + ) + norm_diff = diff / self.delay + decay = self.decay ** norm_diff + self._current_time = now + return decay + def mean(self): """Exponential mean of values.""" return self._mean diff --git a/tests/test_runstats.py b/tests/test_runstats.py index d6cd4f1..918aa26 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -392,9 +392,9 @@ def test_add_exponential_statistics(ExponentialMovingStatistics): exp_stats0.decay = 0.8 exp_stats0.delay = 60 exp_stats10.delay = 120 - exp_stats0._time_diff = ( - -1 - ) # To check if clear_timer was called for add and not for iadd + # To check if clear_timer was called for add and not for iadd + exp_stats0._time_diff = -1 + exp_stats = exp_stats0 + exp_stats10 assert exp_stats.delay == exp_stats0.delay != exp_stats10.delay assert exp_stats.decay == exp_stats0.decay != exp_stats10.decay @@ -548,6 +548,11 @@ def test_get_set_state_exponential_statistics(ExponentialMovingStatistics): exp_stats.get_state() ) + exp_stats.unfreeze() + assert exp_stats == ExponentialMovingStatistics.fromstate( + exp_stats.get_state() + ) + @pytest.mark.parametrize( 'ExponentialMovingCovariance', @@ -971,7 +976,6 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): alpha = [random.random() for _ in range(count)] mean = 10 variance = 100 - injected_time = 1625756332.6573758 exp_stats = ExponentialMovingStatistics(mean=mean, variance=variance) for val in alpha: @@ -987,22 +991,24 @@ def test_exponential_statistics_clear(ExponentialMovingStatistics): assert math.isnan(exp_stats._current_time) assert math.isnan(exp_stats._time_diff) - exp_stats.delay = 60 - assert not math.isnan(exp_stats._current_time) - assert math.isnan(exp_stats._time_diff) - exp_stats.freeze() - assert not math.isnan(exp_stats._time_diff) - exp_stats._current_time = injected_time - exp_stats.clear() - assert not math.isnan(exp_stats._current_time) - assert exp_stats._current_time != injected_time - assert math.isnan(exp_stats._time_diff) - exp_stats._current_time = injected_time - exp_stats.freeze() - exp_stats.clear_timer() - assert not math.isnan(exp_stats._current_time) - assert exp_stats._current_time != injected_time - assert math.isnan(exp_stats._time_diff) + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + time_mock.time.return_value = 1000.1 + + exp_stats.delay = 60 + assert exp_stats._current_time == 1000.1 + assert math.isnan(exp_stats._time_diff) + exp_stats.freeze() + assert not math.isnan(exp_stats._time_diff) + time_mock.time.return_value = 5000.5 + exp_stats.clear() + assert exp_stats._current_time == 5000.5 + assert math.isnan(exp_stats._time_diff) + exp_stats.freeze() + time_mock.time.return_value = 100.358 + exp_stats.clear_timer() + assert exp_stats._current_time == 100.358 + assert math.isnan(exp_stats._time_diff) @pytest.mark.parametrize( @@ -1076,15 +1082,17 @@ def test_exponential_statistics_freeze_unfreeze(ExponentialMovingStatistics): assert exp_stats._current_time == 1000.1 assert math.isnan(exp_stats._time_diff) + assert not exp_stats.is_freezed() time_mock.time.return_value = 1010.1 exp_stats.freeze() assert exp_stats._time_diff == 10.0 + assert exp_stats.is_freezed() time_mock.time.return_value = 1110.0 exp_stats.unfreeze() assert exp_stats._current_time == 1100.0 - assert math.isnan(exp_stats._time_diff) + assert not exp_stats.is_freezed() @pytest.mark.parametrize( @@ -1102,7 +1110,6 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): assert exp_stats.delay == 30 assert not math.isnan(exp_stats._current_time) current_time = exp_stats._current_time - time.sleep(0.5) exp_stats.delay = 60 assert exp_stats.delay == 60 assert exp_stats._current_time == current_time @@ -1136,45 +1143,36 @@ def calc_effective_decay(diff, delay, nominal_decay): eff_decay = nominal_decay ** norm_diff return eff_decay - delay = 0.5 - nominal_decay = 0.9 - exp_stats = ExponentialMovingStatistics() - exp_stats_time = ExponentialMovingStatistics(delay=0.5) - past = exp_stats_time._current_time - time.sleep(0.5) - exp_stats_time.push(10) - now = exp_stats_time._current_time - effective_decay = calc_effective_decay(now - past, delay, nominal_decay) - exp_stats.decay = effective_decay - exp_stats.push(10) - - assert exp_stats.mean() == exp_stats_time.mean() - assert exp_stats.variance() == exp_stats_time.variance() - - exp_stats_time.clear_timer() - time.sleep(0.5) - exp_stats_time.freeze() - time.sleep(0.5) - diff = exp_stats_time._time_diff - exp_stats_time.push(100) - effective_decay = calc_effective_decay(diff, delay, nominal_decay) - exp_stats.decay = effective_decay - exp_stats.push(100) + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + delay = 0.5 + nominal_decay = 0.9 + + past = 100.0 + time_mock.time.return_value = past + exp_stats_time = ExponentialMovingStatistics(delay=0.5) + assert exp_stats_time._current_time == past + now = 110.0 + time_mock.time.return_value = now + expected_effective_decay = calc_effective_decay( + now - past, delay, nominal_decay + ) + true_effective_decay = exp_stats_time._effective_decay() - assert exp_stats.mean() == exp_stats_time.mean() - assert exp_stats.variance() == exp_stats_time.variance() + assert expected_effective_decay == true_effective_decay + assert exp_stats_time._current_time == now - exp_stats_time.unfreeze() - past = exp_stats_time._current_time - time.sleep(0.5) - exp_stats_time.push(1000) - now = exp_stats_time._current_time - effective_decay = calc_effective_decay(now - past, delay, nominal_decay) - exp_stats.decay = effective_decay - exp_stats.push(1000) + later = 120.0 + time_mock.time.return_value = later + exp_stats_time.freeze() + diff = later - now + expected_effective_decay = calc_effective_decay( + diff, delay, nominal_decay + ) + true_effective_decay = exp_stats_time._effective_decay() - assert exp_stats.mean() == exp_stats_time.mean() - assert exp_stats.variance() == exp_stats_time.variance() + assert expected_effective_decay == true_effective_decay + assert exp_stats_time._current_time == later @pytest.mark.parametrize( @@ -1220,8 +1218,12 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats.freeze() with pytest.raises(AttributeError): exp_stats.unfreeze() + with pytest.raises(AttributeError): + exp_stats.is_freezed() with pytest.raises(AttributeError): exp_stats_time.unfreeze() + with pytest.raises(AttributeError): + exp_stats_time._effective_decay() with pytest.raises(ValueError): exp_stats_time.delay = 0 From 3ba42fdbb4d7072edf9edd614452f310c8fa032e Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 13 Jul 2021 20:27:01 +0200 Subject: [PATCH 80/83] fixed flake8, fixed cython --- runstats/core.pxd | 25 ++++++++++++++++++------- tests/test_runstats.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/runstats/core.pxd b/runstats/core.pxd index dae3d79..7b1522e 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -74,6 +74,10 @@ cdef class ExponentialMovingStatistics: cpdef clear(self) + cpdef clear_timer(self) + + cpdef is_time_based(self) + cpdef get_state(self) cpdef set_state(self, state) @@ -82,24 +86,31 @@ cdef class ExponentialMovingStatistics: cpdef ExponentialMovingStatistics copy(self, _=*) - cpdef clear_timer(self) - cpdef freeze(self) cpdef unfreeze(self) - cpdef is_time_based(self) + @cython.locals( + freezed=bool + ) + cpdef is_freezed(self) @cython.locals( + decay=double, alpha=double, diff=double, - incr=double, - norm_diff=double, - decay=double, - now=double + incr=double ) cpdef push(self, double value) + @cython.locals( + now=double, + diff=double, + norm_diff=double, + decay=double + ) + cpdef _effective_decay(self) + cpdef double mean(self) cpdef double variance(self) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index 918aa26..f13f69b 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -7,7 +7,6 @@ import math import pickle import random -import time from unittest.mock import patch import pytest @@ -131,6 +130,12 @@ def error(value, test): return abs((test - value) / value) +def calc_effective_decay(diff, delay, nominal_decay): + norm_diff = diff / delay + eff_decay = nominal_decay ** norm_diff + return eff_decay + + def get_time_patch(ExponentialMovingStatistics): module = ExponentialMovingStatistics.__module__ core = '_core' if module == 'runstats._core' else 'core' @@ -303,6 +308,26 @@ def test_exponential_statistics(ExponentialMovingStatistics): assert (error(current_mean, alpha_exp_stats.mean())) > limit assert (error(current_variance, alpha_exp_stats.variance())) > limit + # test time based to calculate correct mean/variance + time_patch = get_time_patch(ExponentialMovingStatistics) + with time_patch as time_mock: + delay = 0.5 + nominal_decay = 0.9 + + past = 100.0 + time_mock.time.return_value = past + gamma_exp_stats = ExponentialMovingStatistics() + exp_stats_time = ExponentialMovingStatistics(delay=0.5) + now = 110.55 + time_mock.time.return_value = now + exp_stats_time.push(10) + effective_decay = calc_effective_decay(now - past, delay, nominal_decay) + gamma_exp_stats.decay = effective_decay + gamma_exp_stats.push(10) + + assert gamma_exp_stats.mean() == exp_stats_time.mean() + assert gamma_exp_stats.variance() == exp_stats_time.variance() + @pytest.mark.parametrize( 'ExponentialMovingStatistics', @@ -1138,11 +1163,6 @@ def test_exponential_statistics_time_based_on_off(ExponentialMovingStatistics): def test_exponential_statistics_time_based_effective_decay( ExponentialMovingStatistics, ): - def calc_effective_decay(diff, delay, nominal_decay): - norm_diff = diff / delay - eff_decay = nominal_decay ** norm_diff - return eff_decay - time_patch = get_time_patch(ExponentialMovingStatistics) with time_patch as time_mock: delay = 0.5 @@ -1214,6 +1234,8 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats_time = ExponentialMovingStatistics(delay=60) with pytest.raises(AttributeError): exp_stats.clear_timer() + with pytest.raises(AttributeError): + exp_stats._effective_decay() with pytest.raises(AttributeError): exp_stats.freeze() with pytest.raises(AttributeError): @@ -1222,8 +1244,6 @@ def test_raise_if_not_time_exp_stats(ExponentialMovingStatistics): exp_stats.is_freezed() with pytest.raises(AttributeError): exp_stats_time.unfreeze() - with pytest.raises(AttributeError): - exp_stats_time._effective_decay() with pytest.raises(ValueError): exp_stats_time.delay = 0 From 46c60e8313e33c6cce913d6e6535f8d8e7052467 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 13 Jul 2021 20:31:48 +0200 Subject: [PATCH 81/83] blue --- tests/test_runstats.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_runstats.py b/tests/test_runstats.py index f13f69b..63ccc42 100644 --- a/tests/test_runstats.py +++ b/tests/test_runstats.py @@ -321,7 +321,9 @@ def test_exponential_statistics(ExponentialMovingStatistics): now = 110.55 time_mock.time.return_value = now exp_stats_time.push(10) - effective_decay = calc_effective_decay(now - past, delay, nominal_decay) + effective_decay = calc_effective_decay( + now - past, delay, nominal_decay + ) gamma_exp_stats.decay = effective_decay gamma_exp_stats.push(10) From 7889d4488f260d82b3f2d658c2f827fe63a74fa5 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 13 Jul 2021 20:35:56 +0200 Subject: [PATCH 82/83] fixed pylint: added docstring to is_freezed() --- runstats/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/runstats/core.py b/runstats/core.py index 0eb2d4f..a5cbe6e 100644 --- a/runstats/core.py +++ b/runstats/core.py @@ -470,6 +470,7 @@ def unfreeze(self): self._time_diff = NAN def is_freezed(self): + """Check if object is in a freezed state""" if not self.is_time_based(): raise AttributeError('Only time-based objects can be freezed') From ef82116d23d3c2f441fbd7bdee6329389fea9803 Mon Sep 17 00:00:00 2001 From: Alan Mazankiewicz Date: Tue, 13 Jul 2021 20:45:50 +0200 Subject: [PATCH 83/83] added bint type to cython is_freezed() --- runstats/_core.h | 35 ----------------------------------- runstats/core.pxd | 2 +- 2 files changed, 1 insertion(+), 36 deletions(-) delete mode 100644 runstats/_core.h diff --git a/runstats/_core.h b/runstats/_core.h deleted file mode 100644 index cb62796..0000000 --- a/runstats/_core.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Generated by Cython 0.29.21 */ - -#ifndef __PYX_HAVE__runstats___core -#define __PYX_HAVE__runstats___core - -#include "Python.h" - -#ifndef __PYX_HAVE_API__runstats___core - -#ifndef __PYX_EXTERN_C - #ifdef __cplusplus - #define __PYX_EXTERN_C extern "C" - #else - #define __PYX_EXTERN_C extern - #endif -#endif - -#ifndef DL_IMPORT - #define DL_IMPORT(_T) _T -#endif - -__PYX_EXTERN_C double __pyx_v_8runstats_5_core_NAN; - -#endif /* !__PYX_HAVE_API__runstats___core */ - -/* WARNING: the interface of the module init function changed in CPython 3.5. */ -/* It now returns a PyModuleDef instance instead of a PyModule instance. */ - -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC init_core(void); -#else -PyMODINIT_FUNC PyInit__core(void); -#endif - -#endif /* !__PYX_HAVE__runstats___core */ diff --git a/runstats/core.pxd b/runstats/core.pxd index 7b1522e..ea101c1 100644 --- a/runstats/core.pxd +++ b/runstats/core.pxd @@ -91,7 +91,7 @@ cdef class ExponentialMovingStatistics: cpdef unfreeze(self) @cython.locals( - freezed=bool + freezed=bint ) cpdef is_freezed(self)