From ad384debe19044627f6600f205a9aaf0aeae1fad Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Thu, 17 Oct 2024 13:56:48 +0200 Subject: [PATCH 1/6] =?UTF-8?q?=E2=9C=A8=20add=20char=20comparison=20when?= =?UTF-8?q?=20encountering=20int8=20arrays?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/geos/ats/helpers/restart_check.py | 91 ++++++++++++++----- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index 7aa282e5..bbe5d51f 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -7,6 +7,7 @@ import argparse import logging import time +import string from pathlib import Path try: from geos.ats.helpers.permute_array import permuteArray # type: ignore[import] @@ -375,35 +376,83 @@ def compareIntArrays( self, path, arr, base_arr ): ARR [in]: The hdf5 Dataset to compare. BASE_ARR [in]: The hdf5 Dataset to compare against. """ - # If the shapes are different they can't be compared. + message="" if arr.shape != base_arr.shape: - msg = "Datasets have different shapes and therefore can't be compared: %s, %s.\n" % ( arr.shape, - base_arr.shape ) - self.errorMsg( path, msg, True ) - return + message = "Datasets have different shapes and therefore can't be compared statistically: %s, %s.\n" % ( + arr.shape, base_arr.shape ) + else: + # Calculate the absolute difference. + difference = np.subtract( arr, base_arr ) + np.abs( difference, out=difference ) - # Create a copy of the arrays. + offenders = difference != 0.0 + n_offenders = np.sum( offenders ) - # Calculate the absolute difference. - difference = np.subtract( arr, base_arr ) - np.abs( difference, out=difference ) + if n_offenders != 0: + max_index = np.unravel_index( np.argmax( difference ), difference.shape ) + max_difference = difference[ max_index ] + offenders_mean = np.mean( difference[ offenders ] ) + offenders_std = np.std( difference[ offenders ] ) - offenders = difference != 0.0 - n_offenders = np.sum( offenders ) + message = "Arrays of types %s and %s have %s values of which %d have differing values.\n" % ( + arr.dtype, base_arr.dtype, offenders.size, n_offenders ) + message += "Statistics of the differences greater than 0:\n" + message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( + max_index, max_difference, offenders_mean, offenders_std ) - if n_offenders != 0: - max_index = np.unravel_index( np.argmax( difference ), difference.shape ) - max_difference = difference[ max_index ] - offenders_mean = np.mean( difference[ offenders ] ) - offenders_std = np.std( difference[ offenders ] ) + # actually, int8 arrays are almost always char arrays, so we sould add a character comparison. + if arr.dtype == np.int8 and base_arr.dtype == np.int8: + message += self.compareCharArrays( arr, base_arr ) - message = "Arrays of types %s and %s have %s values of which %d have differing values.\n" % ( - arr.dtype, base_arr.dtype, offenders.size, n_offenders ) - message += "Statistics of the differences greater than 0:\n" - message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( max_index, max_difference, - offenders_mean, offenders_std ) + if message != "": self.errorMsg( path, message, True ) + def compareCharArrays( self, arr, base_arr ): + """ + Compare the valid characters of two arrays and return a formatted string showing differences. + + ARR [in]: The hdf5 Dataset to compare. + BASE_ARR [in]: The hdf5 Dataset to compare against. + + Returns a formatted string highlighting the differing characters. + """ + arr_np = np.array( arr ) + base_arr_np = np.array( base_arr ) + + # Replace invalid characters by line breaks (unused character in the xml inputs) + valid_chars = set( string.ascii_letters + string.digits + string.punctuation ) + arr_chars = "".join([chr(x) if ( 0 <= x < 128 and chr(x) in valid_chars ) else "\n" for x in arr_np.flatten()]) + base_arr_chars = "".join([chr(x) if ( 0 <= x < 128 and chr(x) in valid_chars ) else "\n" for x in base_arr_np.flatten()]) + + # Replace sequences of line breaks with a double spaces to show in the error log + arr_chars_spaced = re.sub( r"\n+", " ", arr_chars ) + base_arr_chars_spaced = re.sub( r"\n+", " ", base_arr_chars ) + + # Trim arrays to the length of the shortest one + min_length = min(len(arr_chars_spaced), len(base_arr_chars_spaced)) + arr_chars_trim = arr_chars_spaced[:min_length] + base_arr_chars_trim = base_arr_chars_spaced[:min_length] + + differing_indices = np.where( np.array( list( arr_chars_trim ) ) != np.array( list( base_arr_chars_trim ) ) )[0] + if differing_indices.size != 0: + # check for reordering + arr_set = sorted(set(arr_chars.split("\n"))) + base_arr_set = sorted(set(base_arr_chars.split("\n"))) + message = "Differing valid characters" + reordering_detected = arr_set == base_arr_set + message += (" (substrings reordering detected):\n" if reordering_detected else ":\n") + + def limited_display(n,string): + return string[:n] + f"... ({len(string)-n} omitted chars)" if len(string) > n else string + + maxDisplay = 110 if reordering_detected else 250 + message += " " + limited_display(maxDisplay, arr_chars_spaced) + "\n" + message += " " + limited_display(maxDisplay, base_arr_chars_spaced) + "\n" + message += " " + "".join(["^" if i in differing_indices else " " for i in range(min(maxDisplay,min_length))]) + "\n" + return message + else: + return "" + def compareStringArrays( self, path, arr, base_arr ): """ Compare two string datasets. Exact equality is used as the acceptance criteria. From e763a57127d0a5fd617b89803ef24028168c8f92 Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Thu, 17 Oct 2024 14:12:30 +0200 Subject: [PATCH 2/6] adding detection of wider strings --- .../src/geos/ats/helpers/restart_check.py | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index bbe5d51f..068a7f3d 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -428,30 +428,37 @@ def compareCharArrays( self, arr, base_arr ): arr_chars_spaced = re.sub( r"\n+", " ", arr_chars ) base_arr_chars_spaced = re.sub( r"\n+", " ", base_arr_chars ) - # Trim arrays to the length of the shortest one - min_length = min(len(arr_chars_spaced), len(base_arr_chars_spaced)) - arr_chars_trim = arr_chars_spaced[:min_length] - base_arr_chars_trim = base_arr_chars_spaced[:min_length] - - differing_indices = np.where( np.array( list( arr_chars_trim ) ) != np.array( list( base_arr_chars_trim ) ) )[0] - if differing_indices.size != 0: - # check for reordering - arr_set = sorted(set(arr_chars.split("\n"))) - base_arr_set = sorted(set(base_arr_chars.split("\n"))) - message = "Differing valid characters" - reordering_detected = arr_set == base_arr_set - message += (" (substrings reordering detected):\n" if reordering_detected else ":\n") - - def limited_display(n,string): - return string[:n] + f"... ({len(string)-n} omitted chars)" if len(string) > n else string - - maxDisplay = 110 if reordering_detected else 250 - message += " " + limited_display(maxDisplay, arr_chars_spaced) + "\n" - message += " " + limited_display(maxDisplay, base_arr_chars_spaced) + "\n" - message += " " + "".join(["^" if i in differing_indices else " " for i in range(min(maxDisplay,min_length))]) + "\n" - return message + message = "" + def limited_display(n,string): + return string[:n] + f"... ({len(string)-n} omitted chars)" if len(string) > n else string + + if len(arr_chars_spaced) != len(base_arr_chars_spaced): + maxDisplay = 250 + message = f"Character arrays have different sizes: {len(arr_chars_spaced)}, {len(base_arr_chars_spaced)}.\n" + message += f" {limited_display(maxDisplay, arr_chars_spaced)}\n" + message += f" {limited_display(maxDisplay, base_arr_chars_spaced)}\n" else: - return "" + # We need to trim arrays to the length of the shortest one for the comparisons + min_length = min(len(arr_chars_spaced), len(base_arr_chars_spaced)) + arr_chars_trim = arr_chars_spaced[:min_length] + base_arr_chars_trim = base_arr_chars_spaced[:min_length] + + differing_indices = np.where( np.array( list( arr_chars_trim ) ) != np.array( list( base_arr_chars_trim ) ) )[0] + if differing_indices.size != 0: + # check for reordering + arr_set = sorted(set(arr_chars.split("\n"))) + base_arr_set = sorted(set(base_arr_chars.split("\n"))) + reordering_detected = arr_set == base_arr_set + + maxDisplay = 110 if reordering_detected else 250 + message = "Differing valid characters" + message += " (substrings reordering detected):\n" if reordering_detected else ":\n" + + message += f" {limited_display(maxDisplay, arr_chars_spaced)}\n" + message += f" {limited_display(maxDisplay, base_arr_chars_spaced)}\n" + message += f" {"".join(["^" if i in differing_indices else " " for i in range(min(maxDisplay,min_length))])}\n" + + return message def compareStringArrays( self, path, arr, base_arr ): """ From 7386a2b1df4a088fd67761b8df4200e70acfd95e Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Thu, 17 Oct 2024 15:31:42 +0200 Subject: [PATCH 3/6] better management of strings separators & whitespaces --- .../src/geos/ats/helpers/restart_check.py | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index 068a7f3d..e7c164ab 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -419,44 +419,50 @@ def compareCharArrays( self, arr, base_arr ): arr_np = np.array( arr ) base_arr_np = np.array( base_arr ) - # Replace invalid characters by line breaks (unused character in the xml inputs) - valid_chars = set( string.ascii_letters + string.digits + string.punctuation ) - arr_chars = "".join([chr(x) if ( 0 <= x < 128 and chr(x) in valid_chars ) else "\n" for x in arr_np.flatten()]) - base_arr_chars = "".join([chr(x) if ( 0 <= x < 128 and chr(x) in valid_chars ) else "\n" for x in base_arr_np.flatten()]) - - # Replace sequences of line breaks with a double spaces to show in the error log - arr_chars_spaced = re.sub( r"\n+", " ", arr_chars ) - base_arr_chars_spaced = re.sub( r"\n+", " ", base_arr_chars ) + # Replace invalid characters by group-separator characters ('\x1D') + valid_chars = set( string.printable ) + invalid_char = str( '\x1D' ) + arr_chars = "".join( [ chr(x) if ( x >= 0 and chr(x) in valid_chars ) else invalid_char for x in arr_np.flatten() ] ) + base_arr_chars = "".join( [ chr(x) if ( x >= 0 and chr(x) in valid_chars ) else invalid_char for x in base_arr_np.flatten() ] ) + + # replace whitespaces sequences by only one space (preventing indentation / spacing changes detection) + whitespace_pattern = r"[ \t\n\r\v\f]+" + arr_chars = re.sub( whitespace_pattern, " ", arr_chars ) + base_arr_chars = re.sub( whitespace_pattern, " ", base_arr_chars ) + # replace invalid characters sequences by a double space (for clear display) + invalid_char_pattern = r"\x1D+" + arr_chars_display = re.sub( invalid_char_pattern, " ", arr_chars) + base_arr_chars_display = re.sub( invalid_char_pattern, " ", base_arr_chars) message = "" def limited_display(n,string): return string[:n] + f"... ({len(string)-n} omitted chars)" if len(string) > n else string - if len(arr_chars_spaced) != len(base_arr_chars_spaced): + if len(arr_chars) != len(base_arr_chars): maxDisplay = 250 - message = f"Character arrays have different sizes: {len(arr_chars_spaced)}, {len(base_arr_chars_spaced)}.\n" - message += f" {limited_display(maxDisplay, arr_chars_spaced)}\n" - message += f" {limited_display(maxDisplay, base_arr_chars_spaced)}\n" + message = f"Character arrays have different sizes: {len( arr_chars )}, {len( base_arr_chars )}.\n" + message += f" {limited_display( maxDisplay, arr_chars_display )}\n" + message += f" {limited_display( maxDisplay, base_arr_chars_display )}\n" else: # We need to trim arrays to the length of the shortest one for the comparisons - min_length = min(len(arr_chars_spaced), len(base_arr_chars_spaced)) - arr_chars_trim = arr_chars_spaced[:min_length] - base_arr_chars_trim = base_arr_chars_spaced[:min_length] + min_length = min( len( arr_chars_display ), len( base_arr_chars_display ) ) + arr_chars_trim = arr_chars_display[:min_length] + base_arr_chars_trim = base_arr_chars_display[:min_length] differing_indices = np.where( np.array( list( arr_chars_trim ) ) != np.array( list( base_arr_chars_trim ) ) )[0] if differing_indices.size != 0: # check for reordering - arr_set = sorted(set(arr_chars.split("\n"))) - base_arr_set = sorted(set(base_arr_chars.split("\n"))) + arr_set = sorted( set( arr_chars.split( invalid_char ) ) ) + base_arr_set = sorted( set( base_arr_chars.split( invalid_char ) ) ) reordering_detected = arr_set == base_arr_set maxDisplay = 110 if reordering_detected else 250 message = "Differing valid characters" message += " (substrings reordering detected):\n" if reordering_detected else ":\n" - message += f" {limited_display(maxDisplay, arr_chars_spaced)}\n" - message += f" {limited_display(maxDisplay, base_arr_chars_spaced)}\n" - message += f" {"".join(["^" if i in differing_indices else " " for i in range(min(maxDisplay,min_length))])}\n" + message += f"- {limited_display( maxDisplay, arr_chars_display )}\n" + message += f"- {limited_display( maxDisplay, base_arr_chars_display )}\n" + message += " " + "".join( ["^" if i in differing_indices else " " for i in range( min( maxDisplay, min_length ) ) ] ) + "\n" return message From f107f350c641a1798ec5524e80078fb7ebd3fd40 Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Thu, 17 Oct 2024 15:36:40 +0200 Subject: [PATCH 4/6] removed potencially disturbing character --- geos-ats/src/geos/ats/helpers/restart_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index e7c164ab..8f2dcf5b 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -460,8 +460,8 @@ def limited_display(n,string): message = "Differing valid characters" message += " (substrings reordering detected):\n" if reordering_detected else ":\n" - message += f"- {limited_display( maxDisplay, arr_chars_display )}\n" - message += f"- {limited_display( maxDisplay, base_arr_chars_display )}\n" + message += f" {limited_display( maxDisplay, arr_chars_display )}\n" + message += f" {limited_display( maxDisplay, base_arr_chars_display )}\n" message += " " + "".join( ["^" if i in differing_indices else " " for i in range( min( maxDisplay, min_length ) ) ] ) + "\n" return message From d05adbd4fb5884b8ff84cddeb6f280a2bf705818 Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Thu, 17 Oct 2024 18:14:09 +0200 Subject: [PATCH 5/6] =?UTF-8?q?=F0=9F=8E=A8=20code=20style?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/geos/ats/helpers/restart_check.py | 67 ++++++++++--------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index 8f2dcf5b..aeb37154 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -376,7 +376,7 @@ def compareIntArrays( self, path, arr, base_arr ): ARR [in]: The hdf5 Dataset to compare. BASE_ARR [in]: The hdf5 Dataset to compare against. """ - message="" + message = "" if arr.shape != base_arr.shape: message = "Datasets have different shapes and therefore can't be compared statistically: %s, %s.\n" % ( arr.shape, base_arr.shape ) @@ -394,8 +394,8 @@ def compareIntArrays( self, path, arr, base_arr ): offenders_mean = np.mean( difference[ offenders ] ) offenders_std = np.std( difference[ offenders ] ) - message = "Arrays of types %s and %s have %s values of which %d have differing values.\n" % ( - arr.dtype, base_arr.dtype, offenders.size, n_offenders ) + message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( max_index, max_difference, + offenders_mean, offenders_std ) message += "Statistics of the differences greater than 0:\n" message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( max_index, max_difference, offenders_mean, offenders_std ) @@ -407,62 +407,65 @@ def compareIntArrays( self, path, arr, base_arr ): if message != "": self.errorMsg( path, message, True ) - def compareCharArrays( self, arr, base_arr ): + def compareCharArrays( self, comp_arr, base_arr ): """ Compare the valid characters of two arrays and return a formatted string showing differences. - ARR [in]: The hdf5 Dataset to compare. + COMP_ARR [in]: The hdf5 Dataset to compare. BASE_ARR [in]: The hdf5 Dataset to compare against. Returns a formatted string highlighting the differing characters. """ - arr_np = np.array( arr ) - base_arr_np = np.array( base_arr ) + comp_ndarr = np.array( comp_arr ).flatten() + base_ndarr = np.array( base_arr ).flatten() # Replace invalid characters by group-separator characters ('\x1D') valid_chars = set( string.printable ) - invalid_char = str( '\x1D' ) - arr_chars = "".join( [ chr(x) if ( x >= 0 and chr(x) in valid_chars ) else invalid_char for x in arr_np.flatten() ] ) - base_arr_chars = "".join( [ chr(x) if ( x >= 0 and chr(x) in valid_chars ) else invalid_char for x in base_arr_np.flatten() ] ) + invalid_char = '\x1D' + comp_str = "".join( [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in comp_ndarr ] ) + base_str = "".join( [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in base_ndarr ] ) # replace whitespaces sequences by only one space (preventing indentation / spacing changes detection) whitespace_pattern = r"[ \t\n\r\v\f]+" - arr_chars = re.sub( whitespace_pattern, " ", arr_chars ) - base_arr_chars = re.sub( whitespace_pattern, " ", base_arr_chars ) + comp_str = re.sub( whitespace_pattern, " ", comp_str ) + base_str = re.sub( whitespace_pattern, " ", base_str ) # replace invalid characters sequences by a double space (for clear display) invalid_char_pattern = r"\x1D+" - arr_chars_display = re.sub( invalid_char_pattern, " ", arr_chars) - base_arr_chars_display = re.sub( invalid_char_pattern, " ", base_arr_chars) + comp_str_display = re.sub( invalid_char_pattern, " ", comp_str ) + base_str_display = re.sub( invalid_char_pattern, " ", base_str ) message = "" - def limited_display(n,string): - return string[:n] + f"... ({len(string)-n} omitted chars)" if len(string) > n else string - - if len(arr_chars) != len(base_arr_chars): - maxDisplay = 250 - message = f"Character arrays have different sizes: {len( arr_chars )}, {len( base_arr_chars )}.\n" - message += f" {limited_display( maxDisplay, arr_chars_display )}\n" - message += f" {limited_display( maxDisplay, base_arr_chars_display )}\n" + + def limited_display( n, string ): + return string[ :n ] + f"... ({len(string)-n} omitted chars)" if len( string ) > n else string + + if len( comp_str ) != len( base_str ): + max_display = 250 + message = f"Character arrays have different sizes: {len( comp_str )}, {len( base_str )}.\n" + message += f" {limited_display( max_display, comp_str_display )}\n" + message += f" {limited_display( max_display, base_str_display )}\n" else: # We need to trim arrays to the length of the shortest one for the comparisons - min_length = min( len( arr_chars_display ), len( base_arr_chars_display ) ) - arr_chars_trim = arr_chars_display[:min_length] - base_arr_chars_trim = base_arr_chars_display[:min_length] + min_length = min( len( comp_str_display ), len( base_str_display ) ) + comp_str_trim = comp_str_display[ :min_length ] + base_str_trim = base_str_display[ :min_length ] - differing_indices = np.where( np.array( list( arr_chars_trim ) ) != np.array( list( base_arr_chars_trim ) ) )[0] + differing_indices = np.where( + np.array( list( comp_str_trim ) ) != np.array( list( base_str_trim ) ) )[ 0 ] if differing_indices.size != 0: # check for reordering - arr_set = sorted( set( arr_chars.split( invalid_char ) ) ) - base_arr_set = sorted( set( base_arr_chars.split( invalid_char ) ) ) + arr_set = sorted( set( comp_str.split( invalid_char ) ) ) + base_arr_set = sorted( set( base_str.split( invalid_char ) ) ) reordering_detected = arr_set == base_arr_set - maxDisplay = 110 if reordering_detected else 250 + max_display = 110 if reordering_detected else 250 message = "Differing valid characters" message += " (substrings reordering detected):\n" if reordering_detected else ":\n" - message += f" {limited_display( maxDisplay, arr_chars_display )}\n" - message += f" {limited_display( maxDisplay, base_arr_chars_display )}\n" - message += " " + "".join( ["^" if i in differing_indices else " " for i in range( min( maxDisplay, min_length ) ) ] ) + "\n" + message += f" {limited_display( max_display, comp_str_display )}\n" + message += f" {limited_display( max_display, base_str_display )}\n" + message += " " + "".join( + [ "^" if i in differing_indices else " " for i in range( min( max_display, min_length ) ) ] ) + "\n" return message From 52875533186941666fee79d68734cafcc7aedcee Mon Sep 17 00:00:00 2001 From: MelReyCG Date: Mon, 21 Oct 2024 11:32:59 +0200 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=8E=A8=20code=20style=20+=20msg=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- geos-ats/src/geos/ats/helpers/restart_check.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/geos-ats/src/geos/ats/helpers/restart_check.py b/geos-ats/src/geos/ats/helpers/restart_check.py index aeb37154..f4911073 100644 --- a/geos-ats/src/geos/ats/helpers/restart_check.py +++ b/geos-ats/src/geos/ats/helpers/restart_check.py @@ -394,11 +394,11 @@ def compareIntArrays( self, path, arr, base_arr ): offenders_mean = np.mean( difference[ offenders ] ) offenders_std = np.std( difference[ offenders ] ) + message = "Arrays of types %s and %s have %s values of which %d have differing values.\n" % ( + arr.dtype, base_arr.dtype, offenders.size, n_offenders ) + message += "Statistics of the differences greater than 0:\n" message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( max_index, max_difference, offenders_mean, offenders_std ) - message += "Statistics of the differences greater than 0:\n" - message += "\tmax_index = %s, max = %s, mean = %s, std = %s\n" % ( - max_index, max_difference, offenders_mean, offenders_std ) # actually, int8 arrays are almost always char arrays, so we sould add a character comparison. if arr.dtype == np.int8 and base_arr.dtype == np.int8: @@ -422,8 +422,10 @@ def compareCharArrays( self, comp_arr, base_arr ): # Replace invalid characters by group-separator characters ('\x1D') valid_chars = set( string.printable ) invalid_char = '\x1D' - comp_str = "".join( [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in comp_ndarr ] ) - base_str = "".join( [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in base_ndarr ] ) + comp_str = "".join( + [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in comp_ndarr ] ) + base_str = "".join( + [ chr( x ) if ( x >= 0 and chr( x ) in valid_chars ) else invalid_char for x in base_ndarr ] ) # replace whitespaces sequences by only one space (preventing indentation / spacing changes detection) whitespace_pattern = r"[ \t\n\r\v\f]+" @@ -450,8 +452,7 @@ def limited_display( n, string ): comp_str_trim = comp_str_display[ :min_length ] base_str_trim = base_str_display[ :min_length ] - differing_indices = np.where( - np.array( list( comp_str_trim ) ) != np.array( list( base_str_trim ) ) )[ 0 ] + differing_indices = np.where( np.array( list( comp_str_trim ) ) != np.array( list( base_str_trim ) ) )[ 0 ] if differing_indices.size != 0: # check for reordering arr_set = sorted( set( comp_str.split( invalid_char ) ) )