From bea38113c6a06fb5b217efbddfec40cd5458b61b Mon Sep 17 00:00:00 2001 From: Alex Cabrera Date: Tue, 2 Aug 2022 10:30:49 -0700 Subject: [PATCH 1/3] Iterate through entry array to exclude OO --- src/main.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main.py b/src/main.py index 00e8162..35ac591 100644 --- a/src/main.py +++ b/src/main.py @@ -10,7 +10,7 @@ def lambda_handler(event, context): - print(event) + #print(event) file1 = event['file1'] file2 = event['file2'] version = event['version'] @@ -20,10 +20,14 @@ def lambda_handler(event, context): entry1 = json1.get('entry', []) entry2 = json2.get('entry', []) - d = DeepDiff(entry1, entry2, ignore_order=True) - print(d) + print(len(entry1)) + print(len(entry2)) + + n_entry1 = exclude_operation_outcomes_from_entry(entry1) + n_entry2 = exclude_operation_outcomes_from_entry(entry2) + # First we check the obvious difference and then use deepdiff - if len(entry1) != len(entry2) or d: + if len(n_entry1) != len(n_entry2) or diffyng(n_entry1, n_entry2): print("NEW FILE FOUND, COPYING DATA TO LAYER2") lambda_client = boto3.client('lambda') payload = { @@ -39,6 +43,19 @@ def lambda_handler(event, context): else: print("> Same file") +def exclude_operation_outcomes_from_entry(entries): + new_entries = [] + for entry in entries: + if entry['resource']['resourceType'] != 'OperationOutcome': + new_entries.append(entry) + print(len(new_entries)) + return new_entries + +def diffyng(entry1, entry2): + print('using diffyng lib') + d = DeepDiff(entry1, entry2, ignore_order=True) + print(d) + return d def s3_read(file): fileobj = s3_client.get_object( From d9d22c0b0f113367cddf97ecf6a38c0881f2e16a Mon Sep 17 00:00:00 2001 From: Alex Cabrera Date: Tue, 2 Aug 2022 11:34:48 -0700 Subject: [PATCH 2/3] Exclude OperationOutcome from entries before diffying --- src/main.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main.py b/src/main.py index 35ac591..481e4bb 100644 --- a/src/main.py +++ b/src/main.py @@ -20,14 +20,9 @@ def lambda_handler(event, context): entry1 = json1.get('entry', []) entry2 = json2.get('entry', []) - print(len(entry1)) - print(len(entry2)) - - n_entry1 = exclude_operation_outcomes_from_entry(entry1) - n_entry2 = exclude_operation_outcomes_from_entry(entry2) # First we check the obvious difference and then use deepdiff - if len(n_entry1) != len(n_entry2) or diffyng(n_entry1, n_entry2): + if len(entry1) != len(entry2) or diffyng(entry1, entry2): print("NEW FILE FOUND, COPYING DATA TO LAYER2") lambda_client = boto3.client('lambda') payload = { @@ -43,17 +38,24 @@ def lambda_handler(event, context): else: print("> Same file") +# This can exclude elements from the path +# see: https://zepworks.com/deepdiff/current/ignore_types_or_values.html#exclude-obj-callback +def exclude_fullUrl(obj, path): + return True if "fullUrl" in path else False + def exclude_operation_outcomes_from_entry(entries): - new_entries = [] for entry in entries: - if entry['resource']['resourceType'] != 'OperationOutcome': - new_entries.append(entry) - print(len(new_entries)) - return new_entries + if entry['resource']['resourceType'] == 'OperationOutcome': + entries.remove(entry) + print(len(entries)) + return entries def diffyng(entry1, entry2): + n_entry1 = exclude_operation_outcomes_from_entry(entry1) + n_entry2 = exclude_operation_outcomes_from_entry(entry2) + print('using diffyng lib') - d = DeepDiff(entry1, entry2, ignore_order=True) + d = DeepDiff(n_entry1, n_entry2, ignore_order=True) print(d) return d From ce9a36b707d2bbb4168ee28b6a3867b4f4d5482c Mon Sep 17 00:00:00 2001 From: Alex Cabrera Date: Tue, 2 Aug 2022 11:41:38 -0700 Subject: [PATCH 3/3] logs --- src/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 481e4bb..475968a 100644 --- a/src/main.py +++ b/src/main.py @@ -20,6 +20,8 @@ def lambda_handler(event, context): entry1 = json1.get('entry', []) entry2 = json2.get('entry', []) + #print(f'initial file1 entries size: {len(entry1)}') + #print(f'initial file2 entries size: {len(entry2)}') # First we check the obvious difference and then use deepdiff if len(entry1) != len(entry2) or diffyng(entry1, entry2): @@ -47,14 +49,15 @@ def exclude_operation_outcomes_from_entry(entries): for entry in entries: if entry['resource']['resourceType'] == 'OperationOutcome': entries.remove(entry) - print(len(entries)) return entries def diffyng(entry1, entry2): n_entry1 = exclude_operation_outcomes_from_entry(entry1) n_entry2 = exclude_operation_outcomes_from_entry(entry2) + #print(f'relevant file1 entries size: {len(entry1)}') + #print(f'relevant file2 entries size: {len(entry2)}') - print('using diffyng lib') + print('Diffyng...') d = DeepDiff(n_entry1, n_entry2, ignore_order=True) print(d) return d