From 0f29529da946cb662b082c9e34cfdcc8b9d88b74 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Fri, 18 Jul 2025 16:09:04 +1000
Subject: [PATCH 01/15] update to nanvar to use more stable algorithm if engine
 is flox

---
 flox/__init__.py       |   2 -
 flox/_version.py       |   1 +
 flox/aggregate_flox.py | 116 ++++++++++++++++++++++++++++++++++++++++-
 flox/aggregations.py   |  71 +++++++++++++++++++++----
 4 files changed, 178 insertions(+), 12 deletions(-)
 create mode 100644 flox/_version.py

diff --git a/flox/__init__.py b/flox/__init__.py
index 898c10e24..2ed80afc5 100644
--- a/flox/__init__.py
+++ b/flox/__init__.py
@@ -13,7 +13,6 @@
     ReindexArrayType,
 )  # noqa
 
-
 def _get_version():
     __version__ = "999"
     try:
@@ -22,5 +21,4 @@ def _get_version():
         pass
     return __version__
 
-
 __version__ = _get_version()
diff --git a/flox/_version.py b/flox/_version.py
new file mode 100644
index 000000000..5b33c8a46
--- /dev/null
+++ b/flox/_version.py
@@ -0,0 +1 @@
+__version__ = "0.1.dev657+g619a390.d20250606"
\ No newline at end of file
diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index e470151a4..ea1aa2c1e 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -5,6 +5,86 @@
 from . import xrdtypes as dtypes
 from .xrutils import is_scalar, isnull, notnull
 
+MULTIARRAY_HANDLED_FUNCTIONS = {}
+
+class MultiArray:
+    arrays: tuple[np.ndarray, ...]
+
+    def __init__(self, arrays):
+        self.arrays = arrays # something else needed here to be more careful about types (not sure what)
+        # Do we want to co-erce arrays into a tuple and make sure it's immutable? Do we want it to be immutable?
+        assert np.all([arrays[0].shape == a.shape for a in arrays]), 'Expect all arrays to have the same shape'
+        
+    def astype(self,dt,**kwargs):
+        new_arrays = [] # I really don't like doing this as a list
+        for array in self.arrays: # Do we care about trying to avoid for loops here? three separate lines would be faster, but harder to read
+            new_arrays.append(array.astype(dt,**kwargs))
+        return MultiArray(new_arrays)
+    
+    def reshape(self,shape,**kwargs):
+        return MultiArray([array.reshape(shape,**kwargs) for array in self.arrays])
+    
+    def squeeze(self,axis=None):
+        return MultiArray([array.squeeze(axis) for array in self.arrays])    
+    
+    def __array_function__(self, func, types, args, kwargs):
+        if func not in MULTIARRAY_HANDLED_FUNCTIONS:
+            return NotImplemented
+        # Note: this allows subclasses that don't override
+        # __array_function__ to handle MyArray objects
+        #if not all(issubclass(t, MyArray) for t in types): # I can't see this being relevant at all for this code, but maybe it's safer to leave it in?
+            #return NotImplemented
+        return MULTIARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs)    
+
+        
+   # Shape is needed, seems likely that the other two might be
+   # Making some strong assumptions here that all the arrays are the same shape, and I don't really like this
+    @property
+    def dtype(self) -> np.dtype:
+        return self.arrays[0].dtype
+
+    @property
+    def shape(self) -> tuple[int, ...]:
+        return self.arrays[0].shape
+
+    @property
+    def ndim(self) -> int:
+        return self.arrays[0].ndim
+
+def implements(numpy_function):
+    """Register an __array_function__ implementation for MyArray objects."""
+    def decorator(func):
+        MULTIARRAY_HANDLED_FUNCTIONS[numpy_function] = func
+        return func
+    return decorator
+
+@implements(np.expand_dims)
+def expand_dims_MultiArray(multiarray,axis):
+    return MultiArray([np.expand_dims(a,axis) for a in multiarray.arrays]) #This is gonna spit out a list and I'm not sure if I'm okay with that?
+
+@implements(np.concatenate)
+def concatenate_MultiArray(multiarrays,axis):
+    
+    n_arrays = len(multiarrays[0].arrays)
+    for ma in multiarrays[1:]:
+        if not (len(ma.arrays) == n_arrays): # I don't know what trying to concatenate MultiArrays with different numbers of arrays would even mean
+            raise NotImplementedError
+      
+    # There's the potential for problematic different shapes coming in here.  
+    # Probably warrants some defensive programming, but I'm not sure what to check for while still being generic
+    
+    # I don't like using append and lists here, but I can't work out how to do it better
+    new_arrays = []
+    for i in range(multiarrays[0].ndim):
+        new_arrays.append(np.concatenate([ma.arrays[i] for ma in multiarrays],axis))
+    
+    out = MultiArray(new_arrays)
+    return out
+
+@implements(np.transpose)
+def transpose_MultiArray(multiarray,axes):
+    return MultiArray([np.transpose(a,axes) for a in multiarray.arrays]) #This is gonna spit out a list and I'm not sure if I'm okay with that?
+
 
 def _prepare_for_flox(group_idx, array):
     """
@@ -206,7 +286,6 @@ def _nan_grouped_op(group_idx, array, func, fillna, *args, **kwargs):
 nanmedian = partial(partial(_np_grouped_op, q=0.5), op=partial(quantile_, skipna=True))
 # TODO: all, any
 
-
 def sum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
     return sum(
         group_idx,
@@ -250,6 +329,41 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None
         out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0)
     return out
 
+def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
+    # Calculate length and sum - important for the adjustment terms to sum squared deviations
+    array_lens = nanlen(
+        group_idx,
+        array,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    )
+    
+    array_sums = sum(
+        group_idx,
+        array,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    )
+    
+    # Calculate sum squared deviations - the main part of variance sum
+    array_means = array_sums/array_lens # Does this risk being run eagerly because it's not wrapped in anything?
+    
+    sum_squared_deviations = sum(
+        group_idx,
+        (array-array_means[..., group_idx])**2,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    ) 
+
+    return MultiArray((sum_squared_deviations,array_sums,array_lens))
+
+
 
 def ffill(group_idx, array, *, axis, **kwargs):
     group_idx, array, perm = _prepare_for_flox(group_idx, array)
diff --git a/flox/aggregations.py b/flox/aggregations.py
index 6182d6bdd..56aec6a5d 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -342,14 +342,57 @@ def _mean_finalize(sum_, count):
     final_dtype=np.floating,
 )
 
-
+def _var_combine(array, axis, keepdims=True):
+    
+    def clip_last(array):
+        '''Return array except the last element along axis
+        Purely included to tidy up the adj_terms line
+        '''
+        not_last = [slice(None,None) for i in range(array.ndim)]
+        not_last[axis[0]] = slice(None,-1)
+        return array[*not_last]
+    def clip_first(array):
+        '''Return array except the first element along axis
+        Purely included to tidy up the adj_terms line
+        '''        
+        not_first = [slice(None,None) for i in range(array.ndim)]
+        not_first[axis[0]] = slice(1,None)        
+        return array[*not_first]    
+    
+    assert len(axis)==1, "Assuming that the combine function is only in one direction at once"
+    
+    # Does this double our memory footprint or are they just views?
+    # If there's a huge memory impact, probably better to copy paste array.arrays[1]
+    # in and accept the hit to readability
+    sum_deviations = array.arrays[0]
+    sum_X = array.arrays[1]
+    sum_len = array.arrays[2]    
+    
+    # Calculate parts needed for cascading combination
+    cumsum_X = np.cumsum(sum_X,axis=axis[0]) #Don't need to be able to merge the last element
+    cumsum_len = np.cumsum(sum_len,axis=axis[0])
+
+
+    # Adjustment terms to tweak the sum of squared deviations because not every chunk has the same mean
+    adj_terms = ((clip_last(cumsum_len)*clip_first(sum_X)-clip_first(sum_len)*clip_last(cumsum_X))**2/
+                 (clip_last(cumsum_len)*clip_first(sum_len)*(clip_last(cumsum_len)+clip_first(sum_len))))
+    
+
+
+    return aggregate_flox.MultiArray((np.sum(sum_deviations,axis=axis,keepdims=keepdims)+np.sum(adj_terms,axis=axis,keepdims=keepdims), # sum of squared deviations
+                                      np.sum(sum_X,axis=axis,keepdims=keepdims),           # sum of array items
+                                      np.sum(sum_len,axis=axis,keepdims=keepdims),           # sum of array lengths
+                                        ))# I'm not even pretending calling this class from there is a good idea, I think it wants to be somewhere else though
+    
 # TODO: fix this for complex numbers
-def _var_finalize(sumsq, sum_, count, ddof=0):
-    with np.errstate(invalid="ignore", divide="ignore"):
-        result = (sumsq - (sum_**2 / count)) / (count - ddof)
-    result[count <= ddof] = np.nan
-    return result
+#def _var_finalize(sumsq, sum_, count, ddof=0):
+    #with np.errstate(invalid="ignore", divide="ignore"):
+        #result = (sumsq - (sum_**2 / count)) / (count - ddof)
+    #result[count <= ddof] = np.nan
+    #return result
 
+def _var_finalize(multiarray,ddof=0):
+    return multiarray.arrays[0]/(multiarray.arrays[2]-ddof) # Is this how ddof works again???
 
 def _std_finalize(sumsq, sum_, count, ddof=0):
     return np.sqrt(_var_finalize(sumsq, sum_, count, ddof))
@@ -366,14 +409,24 @@ def _std_finalize(sumsq, sum_, count, ddof=0):
     dtypes=(None, None, np.intp),
     final_dtype=np.floating,
 )
+#nanvar = Aggregation(
+    #"nanvar",
+    #chunk=("nansum_of_squares", "nansum", "nanlen"),
+    #combine=("sum", "sum", "sum"),
+    #finalize=_var_finalize,
+    #fill_value=0,
+    #final_fill_value=np.nan,
+    #dtypes=(None, None, np.intp),
+    #final_dtype=np.floating,
+#)
 nanvar = Aggregation(
     "nanvar",
-    chunk=("nansum_of_squares", "nansum", "nanlen"),
-    combine=("sum", "sum", "sum"),
+    chunk=("var_chunk"),
+    combine=(_var_combine,),
     finalize=_var_finalize,
     fill_value=0,
     final_fill_value=np.nan,
-    dtypes=(None, None, np.intp),
+    dtypes=(None, ),
     final_dtype=np.floating,
 )
 std = Aggregation(

From 1fbf5f85a192c2cfd322f7095ed724b6b6112689 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 06:26:35 +0000
Subject: [PATCH 02/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 flox/__init__.py       |  2 +
 flox/_version.py       |  2 +-
 flox/aggregate_flox.py | 96 ++++++++++++++++++++++++------------------
 flox/aggregations.py   | 94 ++++++++++++++++++++++-------------------
 4 files changed, 109 insertions(+), 85 deletions(-)

diff --git a/flox/__init__.py b/flox/__init__.py
index 2ed80afc5..898c10e24 100644
--- a/flox/__init__.py
+++ b/flox/__init__.py
@@ -13,6 +13,7 @@
     ReindexArrayType,
 )  # noqa
 
+
 def _get_version():
     __version__ = "999"
     try:
@@ -21,4 +22,5 @@ def _get_version():
         pass
     return __version__
 
+
 __version__ = _get_version()
diff --git a/flox/_version.py b/flox/_version.py
index 5b33c8a46..a76cf84b5 100644
--- a/flox/_version.py
+++ b/flox/_version.py
@@ -1 +1 @@
-__version__ = "0.1.dev657+g619a390.d20250606"
\ No newline at end of file
+__version__ = "0.1.dev657+g619a390.d20250606"
diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index ea1aa2c1e..d5f0084c2 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -7,38 +7,40 @@
 
 MULTIARRAY_HANDLED_FUNCTIONS = {}
 
+
 class MultiArray:
     arrays: tuple[np.ndarray, ...]
 
     def __init__(self, arrays):
-        self.arrays = arrays # something else needed here to be more careful about types (not sure what)
+        self.arrays = arrays  # something else needed here to be more careful about types (not sure what)
         # Do we want to co-erce arrays into a tuple and make sure it's immutable? Do we want it to be immutable?
-        assert np.all([arrays[0].shape == a.shape for a in arrays]), 'Expect all arrays to have the same shape'
-        
-    def astype(self,dt,**kwargs):
-        new_arrays = [] # I really don't like doing this as a list
-        for array in self.arrays: # Do we care about trying to avoid for loops here? three separate lines would be faster, but harder to read
-            new_arrays.append(array.astype(dt,**kwargs))
+        assert np.all([arrays[0].shape == a.shape for a in arrays]), (
+            "Expect all arrays to have the same shape"
+        )
+
+    def astype(self, dt, **kwargs):
+        new_arrays = []  # I really don't like doing this as a list
+        for array in self.arrays:  # Do we care about trying to avoid for loops here? three separate lines would be faster, but harder to read
+            new_arrays.append(array.astype(dt, **kwargs))
         return MultiArray(new_arrays)
-    
-    def reshape(self,shape,**kwargs):
-        return MultiArray([array.reshape(shape,**kwargs) for array in self.arrays])
-    
-    def squeeze(self,axis=None):
-        return MultiArray([array.squeeze(axis) for array in self.arrays])    
-    
+
+    def reshape(self, shape, **kwargs):
+        return MultiArray([array.reshape(shape, **kwargs) for array in self.arrays])
+
+    def squeeze(self, axis=None):
+        return MultiArray([array.squeeze(axis) for array in self.arrays])
+
     def __array_function__(self, func, types, args, kwargs):
         if func not in MULTIARRAY_HANDLED_FUNCTIONS:
             return NotImplemented
         # Note: this allows subclasses that don't override
         # __array_function__ to handle MyArray objects
-        #if not all(issubclass(t, MyArray) for t in types): # I can't see this being relevant at all for this code, but maybe it's safer to leave it in?
-            #return NotImplemented
-        return MULTIARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs)    
+        # if not all(issubclass(t, MyArray) for t in types): # I can't see this being relevant at all for this code, but maybe it's safer to leave it in?
+        # return NotImplemented
+        return MULTIARRAY_HANDLED_FUNCTIONS[func](*args, **kwargs)
 
-        
-   # Shape is needed, seems likely that the other two might be
-   # Making some strong assumptions here that all the arrays are the same shape, and I don't really like this
+    # Shape is needed, seems likely that the other two might be
+    # Making some strong assumptions here that all the arrays are the same shape, and I don't really like this
     @property
     def dtype(self) -> np.dtype:
         return self.arrays[0].dtype
@@ -51,39 +53,50 @@ def shape(self) -> tuple[int, ...]:
     def ndim(self) -> int:
         return self.arrays[0].ndim
 
+
 def implements(numpy_function):
     """Register an __array_function__ implementation for MyArray objects."""
+
     def decorator(func):
         MULTIARRAY_HANDLED_FUNCTIONS[numpy_function] = func
         return func
+
     return decorator
 
+
 @implements(np.expand_dims)
-def expand_dims_MultiArray(multiarray,axis):
-    return MultiArray([np.expand_dims(a,axis) for a in multiarray.arrays]) #This is gonna spit out a list and I'm not sure if I'm okay with that?
+def expand_dims_MultiArray(multiarray, axis):
+    return MultiArray(
+        [np.expand_dims(a, axis) for a in multiarray.arrays]
+    )  # This is gonna spit out a list and I'm not sure if I'm okay with that?
+
 
 @implements(np.concatenate)
-def concatenate_MultiArray(multiarrays,axis):
-    
+def concatenate_MultiArray(multiarrays, axis):
     n_arrays = len(multiarrays[0].arrays)
     for ma in multiarrays[1:]:
-        if not (len(ma.arrays) == n_arrays): # I don't know what trying to concatenate MultiArrays with different numbers of arrays would even mean
+        if not (
+            len(ma.arrays) == n_arrays
+        ):  # I don't know what trying to concatenate MultiArrays with different numbers of arrays would even mean
             raise NotImplementedError
-      
-    # There's the potential for problematic different shapes coming in here.  
+
+    # There's the potential for problematic different shapes coming in here.
     # Probably warrants some defensive programming, but I'm not sure what to check for while still being generic
-    
+
     # I don't like using append and lists here, but I can't work out how to do it better
     new_arrays = []
     for i in range(multiarrays[0].ndim):
-        new_arrays.append(np.concatenate([ma.arrays[i] for ma in multiarrays],axis))
-    
+        new_arrays.append(np.concatenate([ma.arrays[i] for ma in multiarrays], axis))
+
     out = MultiArray(new_arrays)
     return out
 
+
 @implements(np.transpose)
-def transpose_MultiArray(multiarray,axes):
-    return MultiArray([np.transpose(a,axes) for a in multiarray.arrays]) #This is gonna spit out a list and I'm not sure if I'm okay with that?
+def transpose_MultiArray(multiarray, axes):
+    return MultiArray(
+        [np.transpose(a, axes) for a in multiarray.arrays]
+    )  # This is gonna spit out a list and I'm not sure if I'm okay with that?
 
 
 def _prepare_for_flox(group_idx, array):
@@ -286,6 +299,7 @@ def _nan_grouped_op(group_idx, array, func, fillna, *args, **kwargs):
 nanmedian = partial(partial(_np_grouped_op, q=0.5), op=partial(quantile_, skipna=True))
 # TODO: all, any
 
+
 def sum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
     return sum(
         group_idx,
@@ -329,6 +343,7 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None
         out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0)
     return out
 
+
 def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
     # Calculate length and sum - important for the adjustment terms to sum squared deviations
     array_lens = nanlen(
@@ -339,7 +354,7 @@ def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=No
         fill_value=fill_value,
         dtype=dtype,
     )
-    
+
     array_sums = sum(
         group_idx,
         array,
@@ -348,21 +363,22 @@ def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=No
         fill_value=fill_value,
         dtype=dtype,
     )
-    
+
     # Calculate sum squared deviations - the main part of variance sum
-    array_means = array_sums/array_lens # Does this risk being run eagerly because it's not wrapped in anything?
-    
+    array_means = (
+        array_sums / array_lens
+    )  # Does this risk being run eagerly because it's not wrapped in anything?
+
     sum_squared_deviations = sum(
         group_idx,
-        (array-array_means[..., group_idx])**2,
+        (array - array_means[..., group_idx]) ** 2,
         axis=axis,
         size=size,
         fill_value=fill_value,
         dtype=dtype,
-    ) 
-
-    return MultiArray((sum_squared_deviations,array_sums,array_lens))
+    )
 
+    return MultiArray((sum_squared_deviations, array_sums, array_lens))
 
 
 def ffill(group_idx, array, *, axis, **kwargs):
diff --git a/flox/aggregations.py b/flox/aggregations.py
index 56aec6a5d..f95428d6d 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -342,57 +342,63 @@ def _mean_finalize(sum_, count):
     final_dtype=np.floating,
 )
 
+
 def _var_combine(array, axis, keepdims=True):
-    
     def clip_last(array):
-        '''Return array except the last element along axis
+        """Return array except the last element along axis
         Purely included to tidy up the adj_terms line
-        '''
-        not_last = [slice(None,None) for i in range(array.ndim)]
-        not_last[axis[0]] = slice(None,-1)
+        """
+        not_last = [slice(None, None) for i in range(array.ndim)]
+        not_last[axis[0]] = slice(None, -1)
         return array[*not_last]
+
     def clip_first(array):
-        '''Return array except the first element along axis
+        """Return array except the first element along axis
         Purely included to tidy up the adj_terms line
-        '''        
-        not_first = [slice(None,None) for i in range(array.ndim)]
-        not_first[axis[0]] = slice(1,None)        
-        return array[*not_first]    
-    
-    assert len(axis)==1, "Assuming that the combine function is only in one direction at once"
-    
+        """
+        not_first = [slice(None, None) for i in range(array.ndim)]
+        not_first[axis[0]] = slice(1, None)
+        return array[*not_first]
+
+    assert len(axis) == 1, "Assuming that the combine function is only in one direction at once"
+
     # Does this double our memory footprint or are they just views?
     # If there's a huge memory impact, probably better to copy paste array.arrays[1]
     # in and accept the hit to readability
     sum_deviations = array.arrays[0]
     sum_X = array.arrays[1]
-    sum_len = array.arrays[2]    
-    
-    # Calculate parts needed for cascading combination
-    cumsum_X = np.cumsum(sum_X,axis=axis[0]) #Don't need to be able to merge the last element
-    cumsum_len = np.cumsum(sum_len,axis=axis[0])
+    sum_len = array.arrays[2]
 
+    # Calculate parts needed for cascading combination
+    cumsum_X = np.cumsum(sum_X, axis=axis[0])  # Don't need to be able to merge the last element
+    cumsum_len = np.cumsum(sum_len, axis=axis[0])
 
     # Adjustment terms to tweak the sum of squared deviations because not every chunk has the same mean
-    adj_terms = ((clip_last(cumsum_len)*clip_first(sum_X)-clip_first(sum_len)*clip_last(cumsum_X))**2/
-                 (clip_last(cumsum_len)*clip_first(sum_len)*(clip_last(cumsum_len)+clip_first(sum_len))))
-    
+    adj_terms = (
+        clip_last(cumsum_len) * clip_first(sum_X) - clip_first(sum_len) * clip_last(cumsum_X)
+    ) ** 2 / (clip_last(cumsum_len) * clip_first(sum_len) * (clip_last(cumsum_len) + clip_first(sum_len)))
+
+    return aggregate_flox.MultiArray(
+        (
+            np.sum(sum_deviations, axis=axis, keepdims=keepdims)
+            + np.sum(adj_terms, axis=axis, keepdims=keepdims),  # sum of squared deviations
+            np.sum(sum_X, axis=axis, keepdims=keepdims),  # sum of array items
+            np.sum(sum_len, axis=axis, keepdims=keepdims),  # sum of array lengths
+        )
+    )  # I'm not even pretending calling this class from there is a good idea, I think it wants to be somewhere else though
 
 
-    return aggregate_flox.MultiArray((np.sum(sum_deviations,axis=axis,keepdims=keepdims)+np.sum(adj_terms,axis=axis,keepdims=keepdims), # sum of squared deviations
-                                      np.sum(sum_X,axis=axis,keepdims=keepdims),           # sum of array items
-                                      np.sum(sum_len,axis=axis,keepdims=keepdims),           # sum of array lengths
-                                        ))# I'm not even pretending calling this class from there is a good idea, I think it wants to be somewhere else though
-    
 # TODO: fix this for complex numbers
-#def _var_finalize(sumsq, sum_, count, ddof=0):
-    #with np.errstate(invalid="ignore", divide="ignore"):
-        #result = (sumsq - (sum_**2 / count)) / (count - ddof)
-    #result[count <= ddof] = np.nan
-    #return result
+# def _var_finalize(sumsq, sum_, count, ddof=0):
+# with np.errstate(invalid="ignore", divide="ignore"):
+# result = (sumsq - (sum_**2 / count)) / (count - ddof)
+# result[count <= ddof] = np.nan
+# return result
+
+
+def _var_finalize(multiarray, ddof=0):
+    return multiarray.arrays[0] / (multiarray.arrays[2] - ddof)  # Is this how ddof works again???
 
-def _var_finalize(multiarray,ddof=0):
-    return multiarray.arrays[0]/(multiarray.arrays[2]-ddof) # Is this how ddof works again???
 
 def _std_finalize(sumsq, sum_, count, ddof=0):
     return np.sqrt(_var_finalize(sumsq, sum_, count, ddof))
@@ -409,16 +415,16 @@ def _std_finalize(sumsq, sum_, count, ddof=0):
     dtypes=(None, None, np.intp),
     final_dtype=np.floating,
 )
-#nanvar = Aggregation(
-    #"nanvar",
-    #chunk=("nansum_of_squares", "nansum", "nanlen"),
-    #combine=("sum", "sum", "sum"),
-    #finalize=_var_finalize,
-    #fill_value=0,
-    #final_fill_value=np.nan,
-    #dtypes=(None, None, np.intp),
-    #final_dtype=np.floating,
-#)
+# nanvar = Aggregation(
+# "nanvar",
+# chunk=("nansum_of_squares", "nansum", "nanlen"),
+# combine=("sum", "sum", "sum"),
+# finalize=_var_finalize,
+# fill_value=0,
+# final_fill_value=np.nan,
+# dtypes=(None, None, np.intp),
+# final_dtype=np.floating,
+# )
 nanvar = Aggregation(
     "nanvar",
     chunk=("var_chunk"),
@@ -426,7 +432,7 @@ def _std_finalize(sumsq, sum_, count, ddof=0):
     finalize=_var_finalize,
     fill_value=0,
     final_fill_value=np.nan,
-    dtypes=(None, ),
+    dtypes=(None,),
     final_dtype=np.floating,
 )
 std = Aggregation(

From 322f5115509c2127316cc051fdf55c1b804b96cc Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 18 Jul 2025 09:47:42 -0600
Subject: [PATCH 03/15] [revert] only nanvar test

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 88169c0f5..7545480bd 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -235,7 +235,7 @@ def gen_array_by(size, func):
 @pytest.mark.parametrize("size", [(1, 12), (12,), (12, 9)])
 @pytest.mark.parametrize("nby", [1, 2, 3])
 @pytest.mark.parametrize("add_nan_by", [True, False])
-@pytest.mark.parametrize("func", ALL_FUNCS)
+@pytest.mark.parametrize("func", ["nanvar"])
 def test_groupby_reduce_all(to_sparse, nby, size, chunks, func, add_nan_by, engine):
     if ("arg" in func and engine in ["flox", "numbagg"]) or (func in BLOCKWISE_FUNCS and chunks != -1):
         pytest.skip()

From adab8e68b2823d8cba82ff279332fc1e4bf5e858 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <deepak@cherian.net>
Date: Fri, 18 Jul 2025 09:49:10 -0600
Subject: [PATCH 04/15] Some mods

---
 flox/aggregate_flox.py | 41 ++++--------------------------------
 flox/aggregations.py   | 48 +++++++++++++++++++++++++++++++++++++++++-
 flox/core.py           |  8 ++++++-
 3 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index d5f0084c2..41309fcf0 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -1,4 +1,5 @@
 from functools import partial
+from typing import Self
 
 import numpy as np
 
@@ -53,6 +54,9 @@ def shape(self) -> tuple[int, ...]:
     def ndim(self) -> int:
         return self.arrays[0].ndim
 
+    def __getitem__(self, key) -> Self:
+        return type(self)([array[key] for array in self.arrays])
+
 
 def implements(numpy_function):
     """Register an __array_function__ implementation for MyArray objects."""
@@ -344,43 +348,6 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None
     return out
 
 
-def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
-    # Calculate length and sum - important for the adjustment terms to sum squared deviations
-    array_lens = nanlen(
-        group_idx,
-        array,
-        axis=axis,
-        size=size,
-        fill_value=fill_value,
-        dtype=dtype,
-    )
-
-    array_sums = sum(
-        group_idx,
-        array,
-        axis=axis,
-        size=size,
-        fill_value=fill_value,
-        dtype=dtype,
-    )
-
-    # Calculate sum squared deviations - the main part of variance sum
-    array_means = (
-        array_sums / array_lens
-    )  # Does this risk being run eagerly because it's not wrapped in anything?
-
-    sum_squared_deviations = sum(
-        group_idx,
-        (array - array_means[..., group_idx]) ** 2,
-        axis=axis,
-        size=size,
-        fill_value=fill_value,
-        dtype=dtype,
-    )
-
-    return MultiArray((sum_squared_deviations, array_sums, array_lens))
-
-
 def ffill(group_idx, array, *, axis, **kwargs):
     group_idx, array, perm = _prepare_for_flox(group_idx, array)
     shape = array.shape
diff --git a/flox/aggregations.py b/flox/aggregations.py
index f95428d6d..0961d13a7 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -343,6 +343,51 @@ def _mean_finalize(sum_, count):
 )
 
 
+def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=None, dtype=None):
+    from .aggregate_flox import MultiArray
+
+    # Calculate length and sum - important for the adjustment terms to sum squared deviations
+    array_lens = generic_aggregate(
+        group_idx,
+        array,
+        func="nanlen",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    )
+
+    array_sums = generic_aggregate(
+        group_idx,
+        array,
+        func="nansum",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    )
+
+    # Calculate sum squared deviations - the main part of variance sum
+    array_means = (
+        array_sums / array_lens
+    )  # Does this risk being run eagerly because it's not wrapped in anything?
+
+    sum_squared_deviations = generic_aggregate(
+        group_idx,
+        (array - array_means[..., group_idx]) ** 2,
+        func="nansum",
+        engine=engine,
+        axis=axis,
+        size=size,
+        fill_value=fill_value,
+        dtype=dtype,
+    )
+
+    return MultiArray((sum_squared_deviations, array_sums, array_lens))
+
+
 def _var_combine(array, axis, keepdims=True):
     def clip_last(array):
         """Return array except the last element along axis
@@ -427,7 +472,8 @@ def _std_finalize(sumsq, sum_, count, ddof=0):
 # )
 nanvar = Aggregation(
     "nanvar",
-    chunk=("var_chunk"),
+    chunk=var_chunk,
+    numpy="nanvar",
     combine=(_var_combine,),
     finalize=_var_finalize,
     fill_value=0,
diff --git a/flox/core.py b/flox/core.py
index d8600b375..f8d724a16 100644
--- a/flox/core.py
+++ b/flox/core.py
@@ -46,6 +46,7 @@
     _initialize_aggregation,
     generic_aggregate,
     quantile_new_dims_func,
+    var_chunk,
 )
 from .cache import memoize
 from .lib import ArrayLayer, dask_array_type, sparse_array_type
@@ -1251,7 +1252,8 @@ def chunk_reduce(
     # optimize that out.
     previous_reduction: T_Func = ""
     for reduction, fv, kw, dt in zip(funcs, fill_values, kwargss, dtypes):
-        if empty:
+        # UGLY! but this is because the `var` breaks our design assumptions
+        if empty and reduction is not var_chunk:
             result = np.full(shape=final_array_shape, fill_value=fv, like=array)
         elif is_nanlen(reduction) and is_nanlen(previous_reduction):
             result = results["intermediates"][-1]
@@ -1260,6 +1262,10 @@ def chunk_reduce(
             kw_func = dict(size=size, dtype=dt, fill_value=fv)
             kw_func.update(kw)
 
+            # UGLY! but this is because the `var` breaks our design assumptions
+            if reduction is var_chunk:
+                kw_func.update(engine=engine)
+
             if callable(reduction):
                 # passing a custom reduction for npg to apply per-group is really slow!
                 # So this `reduction` has to do the groupby-aggregation

From 93cd9b309277a9512f6d63e251e27c5b5ef00dd3 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Mon, 21 Jul 2025 12:11:31 +1000
Subject: [PATCH 05/15] Update flox/aggregations.py to neater tuple unpacking

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 flox/aggregations.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flox/aggregations.py b/flox/aggregations.py
index 0961d13a7..f989571f5 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -410,9 +410,7 @@ def clip_first(array):
     # Does this double our memory footprint or are they just views?
     # If there's a huge memory impact, probably better to copy paste array.arrays[1]
     # in and accept the hit to readability
-    sum_deviations = array.arrays[0]
-    sum_X = array.arrays[1]
-    sum_len = array.arrays[2]
+   sum_deviations, sum_X, sum_len = array.arrays
 
     # Calculate parts needed for cascading combination
     cumsum_X = np.cumsum(sum_X, axis=axis[0])  # Don't need to be able to merge the last element

From 2be4f744d8eb97f7ba4ac27fd2ec4ba5bcacf0bc Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Mon, 21 Jul 2025 12:19:10 +1000
Subject: [PATCH 06/15] Change np.all to all in  flox/aggregate_flox.py

Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
---
 flox/aggregate_flox.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index 41309fcf0..ce128d283 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -15,7 +15,7 @@ class MultiArray:
     def __init__(self, arrays):
         self.arrays = arrays  # something else needed here to be more careful about types (not sure what)
         # Do we want to co-erce arrays into a tuple and make sure it's immutable? Do we want it to be immutable?
-        assert np.all([arrays[0].shape == a.shape for a in arrays]), (
+        assert all(arrays[0].shape == a.shape for a in arrays), (
             "Expect all arrays to have the same shape"
         )
 

From edb655dd1f9bd661e9b16b1146cb1fd380cd7380 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 02:19:17 +0000
Subject: [PATCH 07/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 flox/aggregate_flox.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index ce128d283..21cbd1119 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -15,9 +15,7 @@ class MultiArray:
     def __init__(self, arrays):
         self.arrays = arrays  # something else needed here to be more careful about types (not sure what)
         # Do we want to co-erce arrays into a tuple and make sure it's immutable? Do we want it to be immutable?
-        assert all(arrays[0].shape == a.shape for a in arrays), (
-            "Expect all arrays to have the same shape"
-        )
+        assert all(arrays[0].shape == a.shape for a in arrays), "Expect all arrays to have the same shape"
 
     def astype(self, dt, **kwargs):
         new_arrays = []  # I really don't like doing this as a list

From dd2e4b6a30784fca103d593c4b96c6056d72f6d9 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Mon, 21 Jul 2025 12:24:19 +1000
Subject: [PATCH 08/15] delete some resolved comments

---
 flox/aggregate_flox.py | 2 +-
 flox/aggregations.py   | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index ea1aa2c1e..fa5a9501d 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -350,7 +350,7 @@ def var_chunk(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=No
     )
     
     # Calculate sum squared deviations - the main part of variance sum
-    array_means = array_sums/array_lens # Does this risk being run eagerly because it's not wrapped in anything?
+    array_means = array_sums/array_lens
     
     sum_squared_deviations = sum(
         group_idx,
diff --git a/flox/aggregations.py b/flox/aggregations.py
index 56aec6a5d..10cb8add9 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -361,9 +361,6 @@ def clip_first(array):
     
     assert len(axis)==1, "Assuming that the combine function is only in one direction at once"
     
-    # Does this double our memory footprint or are they just views?
-    # If there's a huge memory impact, probably better to copy paste array.arrays[1]
-    # in and accept the hit to readability
     sum_deviations = array.arrays[0]
     sum_X = array.arrays[1]
     sum_len = array.arrays[2]    

From 19688709b3fa2ec76b33ed73744a91f14ee1b72c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 02:39:21 +0000
Subject: [PATCH 09/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 flox/aggregate_flox.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py
index b818bd753..21cbd1119 100644
--- a/flox/aggregate_flox.py
+++ b/flox/aggregate_flox.py
@@ -345,6 +345,7 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None
         out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0)
     return out
 
+
 def ffill(group_idx, array, *, axis, **kwargs):
     group_idx, array, perm = _prepare_for_flox(group_idx, array)
     shape = array.shape

From 12bcb0f56296a2dac7835ed269d7dca68d36e081 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Mon, 21 Jul 2025 14:40:52 +1000
Subject: [PATCH 10/15] Remove more unnecessary comments

---
 flox/aggregations.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/flox/aggregations.py b/flox/aggregations.py
index a96747579..e9bfb1667 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -372,7 +372,7 @@ def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=N
     # Calculate sum squared deviations - the main part of variance sum
     array_means = (
         array_sums / array_lens
-    )  # Does this risk being run eagerly because it's not wrapped in anything?
+    )
 
     sum_squared_deviations = generic_aggregate(
         group_idx,
@@ -407,9 +407,6 @@ def clip_first(array):
 
     assert len(axis) == 1, "Assuming that the combine function is only in one direction at once"
 
-    # Does this double our memory footprint or are they just views?
-    # If there's a huge memory impact, probably better to copy paste array.arrays[1]
-    # in and accept the hit to readability
     sum_deviations, sum_X, sum_len = array.arrays
 
     # Calculate parts needed for cascading combination
@@ -440,7 +437,7 @@ def clip_first(array):
 
 
 def _var_finalize(multiarray, ddof=0):
-    return multiarray.arrays[0] / (multiarray.arrays[2] - ddof)  # Is this how ddof works again???
+    return multiarray.arrays[0] / (multiarray.arrays[2] - ddof)
 
 
 def _std_finalize(sumsq, sum_, count, ddof=0):

From b1f7b5d71b934669e44dd78ffa9120b7abaf87b3 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Mon, 21 Jul 2025 14:45:29 +1000
Subject: [PATCH 11/15] Remove _version.py

---
 flox/_version.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 flox/_version.py

diff --git a/flox/_version.py b/flox/_version.py
deleted file mode 100644
index a76cf84b5..000000000
--- a/flox/_version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = "0.1.dev657+g619a390.d20250606"

From cd9a8b83db27a1d9ae894a4ef37e98341115cb6a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 04:46:38 +0000
Subject: [PATCH 12/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 flox/aggregations.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flox/aggregations.py b/flox/aggregations.py
index 1f4c59abf..975c3258e 100644
--- a/flox/aggregations.py
+++ b/flox/aggregations.py
@@ -370,9 +370,7 @@ def var_chunk(group_idx, array, *, engine: str, axis=-1, size=None, fill_value=N
     )
 
     # Calculate sum squared deviations - the main part of variance sum
-    array_means = (
-        array_sums / array_lens
-    )
+    array_means = array_sums / array_lens
 
     sum_squared_deviations = generic_aggregate(
         group_idx,

From 27448e451c89aad1f307ae39f825060508822a38 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Thu, 31 Jul 2025 17:42:11 +1000
Subject: [PATCH 13/15] Add preliminary test for std/var precision

---
 tests/test_core.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 863e552dc..3926502e5 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2240,3 +2240,25 @@ def test_sparse_nan_fill_value_reductions(chunks, fill_value, shape, func):
         expected = np.expand_dims(npfunc(numpy_array, axis=-1), axis=-1)
         actual, *_ = groupby_reduce(array, by, func=func, axis=-1)
     assert_equal(actual, expected)
+
+
+@pytest.mark.parametrize("func", ("nanvar","var")) # Expect to expand this to other functions once written. Putting var in to begin with bc I know it will fail
+@pytest.mark.parametrize("engine",("flox",)) # Expect to expand this to other engines once written
+@pytest.mark.parametrize("offset",(0,10e2,10e4,10e6,10e8,10e10,10e12)) # Should fail at 10e8 for old algorithm, and survive 10e12 for current
+def test_std_var_precision(func,engine,offset):
+    # Generate a dataset with small variance and big mean
+    # Check that func with engine gives you the same answer as numpy
+    
+    l =1000
+    array = np.linspace(-1,1,l) # has zero mean
+    labels = np.arange(l)%2 # Ideally we'd parametrize this too. 
+    
+    # These two need to be the same function, but with the offset added and not added
+    no_offset, _ = groupby_reduce(array, labels, engine=engine, func=func)
+    with_offset, _ = groupby_reduce(array+offset, labels, engine=engine, func=func)
+    
+    tol = {"rtol": 1e-8, "atol": 1e-10} # Not sure how stringent to be here
+    
+    # Failure threshold in my external tests is dependent on dask chunksize, maybe needs exploring better?
+    
+    assert_equal(no_offset, with_offset, tol)
\ No newline at end of file

From a81b1a3847f54ac8f4dca25aee35c86788c18766 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 31 Jul 2025 07:47:15 +0000
Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_core.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 3926502e5..4f1e28560 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2242,23 +2242,27 @@ def test_sparse_nan_fill_value_reductions(chunks, fill_value, shape, func):
     assert_equal(actual, expected)
 
 
-@pytest.mark.parametrize("func", ("nanvar","var")) # Expect to expand this to other functions once written. Putting var in to begin with bc I know it will fail
-@pytest.mark.parametrize("engine",("flox",)) # Expect to expand this to other engines once written
-@pytest.mark.parametrize("offset",(0,10e2,10e4,10e6,10e8,10e10,10e12)) # Should fail at 10e8 for old algorithm, and survive 10e12 for current
-def test_std_var_precision(func,engine,offset):
+@pytest.mark.parametrize(
+    "func", ("nanvar", "var")
+)  # Expect to expand this to other functions once written. Putting var in to begin with bc I know it will fail
+@pytest.mark.parametrize("engine", ("flox",))  # Expect to expand this to other engines once written
+@pytest.mark.parametrize(
+    "offset", (0, 10e2, 10e4, 10e6, 10e8, 10e10, 10e12)
+)  # Should fail at 10e8 for old algorithm, and survive 10e12 for current
+def test_std_var_precision(func, engine, offset):
     # Generate a dataset with small variance and big mean
     # Check that func with engine gives you the same answer as numpy
-    
-    l =1000
-    array = np.linspace(-1,1,l) # has zero mean
-    labels = np.arange(l)%2 # Ideally we'd parametrize this too. 
-    
+
+    l = 1000
+    array = np.linspace(-1, 1, l)  # has zero mean
+    labels = np.arange(l) % 2  # Ideally we'd parametrize this too.
+
     # These two need to be the same function, but with the offset added and not added
     no_offset, _ = groupby_reduce(array, labels, engine=engine, func=func)
-    with_offset, _ = groupby_reduce(array+offset, labels, engine=engine, func=func)
-    
-    tol = {"rtol": 1e-8, "atol": 1e-10} # Not sure how stringent to be here
-    
+    with_offset, _ = groupby_reduce(array + offset, labels, engine=engine, func=func)
+
+    tol = {"rtol": 1e-8, "atol": 1e-10}  # Not sure how stringent to be here
+
     # Failure threshold in my external tests is dependent on dask chunksize, maybe needs exploring better?
-    
-    assert_equal(no_offset, with_offset, tol)
\ No newline at end of file
+
+    assert_equal(no_offset, with_offset, tol)

From 004fddcc88e169a369e4505e7e9cdcac3f80dd39 Mon Sep 17 00:00:00 2001
From: jemmajeffree <98864717+jemmajeffree@users.noreply.github.com>
Date: Thu, 31 Jul 2025 17:56:31 +1000
Subject: [PATCH 15/15] Correct comment

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 3926502e5..623c6ae28 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2242,7 +2242,7 @@ def test_sparse_nan_fill_value_reductions(chunks, fill_value, shape, func):
     assert_equal(actual, expected)
 
 
-@pytest.mark.parametrize("func", ("nanvar","var")) # Expect to expand this to other functions once written. Putting var in to begin with bc I know it will fail
+@pytest.mark.parametrize("func", ("nanvar","var")) # Expect to expand this to other functions once written. "nanvar" has updated chunk, combine functions. "var", for the moment, still uses the old algorithm
 @pytest.mark.parametrize("engine",("flox",)) # Expect to expand this to other engines once written
 @pytest.mark.parametrize("offset",(0,10e2,10e4,10e6,10e8,10e10,10e12)) # Should fail at 10e8 for old algorithm, and survive 10e12 for current
 def test_std_var_precision(func,engine,offset):