Skip to content

Commit 4a40f8a

Browse files
enable additional json (#54)
* implement additional personal data * enable additional json file * Update documents.csv * Update .test documents.csv * error correction
1 parent eedbca4 commit 4a40f8a

File tree

5 files changed

+29
-10
lines changed

5 files changed

+29
-10
lines changed

.tests/config/pep/documents.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
sample_name,fhir_metadata,compressed_docs
2-
1,data/1111111111.otto_normalverbraucher_jpgs.patient.json,data/1111111111.otto_normalverbraucher_jpgs.tar.lz4
3-
2,data/2222222222.maxime_mustermann_jpgs.patient.json,data/2222222222.maxime_mustermann_jpgs.tar.lz4
4-
3,data/v00076462.maxime_mustermann_tiff.patient.json,data/v00076462.maxime_mustermann_tiff.tar.lz4
1+
sample_name,fhir_metadata,compressed_docs,additional_metadata
2+
1,data/1111111111.otto_normalverbraucher_jpgs.patient.json,data/1111111111.otto_normalverbraucher_jpgs.tar.lz4,
3+
2,data/2222222222.maxime_mustermann_jpgs.patient.json,data/2222222222.maxime_mustermann_jpgs.tar.lz4,
4+
3,data/v00076462.maxime_mustermann_tiff.patient.json,data/v00076462.maxime_mustermann_tiff.tar.lz4,

config/pep/documents.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sample_name,fhir_metadata,compressed_docs
2-
1,data/fhire_meta_data.json,data/path_to.lz4
1+
sample_name,fhir_metadata,compressed_docs,additional_metadata
2+
1,data/fhire_meta_data.json,data/path_to.lz4,data/path_to_additional_metadata.json

workflow/rules/common.smk

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,18 @@ def get_compressed_docs(wildcards):
3131
def get_fhir_metadata(wildcards):
3232
return pep.sample_table.loc[wildcards.id][["fhir_metadata"]]
3333

34+
def get_additional_metadata(wildcards):
35+
if "additional_metadata" in pep.sample_table.columns:
36+
if type(pep.sample_table.loc[wildcards.id]["additional_metadata"]) == str:
37+
return pep.sample_table.loc[wildcards.id][["additional_metadata"]]
38+
else:
39+
return []
40+
else:
41+
return []
3442

3543
def get_all_ids():
3644
return pep.sample_table["sample_name"].to_list()
3745

38-
3946
def get_image_paths_for_id(wildcards):
4047
with checkpoints.fix_file_ext.get(id=wildcards.id).output[0].open() as f:
4148
paths = pd.read_csv(f, sep="\n", header=None, squeeze=True)

workflow/rules/extract-data.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
rule extract_personal_data:
22
input:
33
get_fhir_metadata,
4+
get_additional_metadata,
45
output:
56
temp("results/{id}/tmp/personal-data.json"),
67
log:
78
"logs/{id}/extract_personal_data.log",
89
script:
910
"../scripts/extract-personal-data.py"
1011

11-
1212
rule extract_lz4_docs:
1313
input:
1414
get_compressed_docs,

workflow/scripts/extract-personal-data.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def parse_meta_data(json_path: str) -> defaultdict:
2020
with open(json_path) as json_file:
2121
data = json.load(json_file)
2222

23-
# select the patient reosurce from the bundel data export
23+
# select the patient resource from the bundel data export
2424
for ele in data.get("entry", {}):
2525
# iterate of entries
2626
for key, value in ele.get("resource", {}).items():
@@ -141,6 +141,13 @@ def variate_personal_data(personal_data: dict, first_name_count: int) -> default
141141

142142
return personal_data
143143

144+
def add_additional_personal_data(add_json_path: str, personal_data: dict) -> defaultdict:
145+
# if an additional data file exist, this data will be added to the personal data json file
146+
with open(add_json_path) as json_file:
147+
additional_data = json.load(json_file)
148+
personal_data.update(additional_data)
149+
150+
return personal_data
144151

145152
def save_personal_data(personal_data: dict, out_path: str):
146153
"""Save the final dic with the personal data as json.
@@ -169,7 +176,12 @@ def save_personal_data(personal_data: dict, out_path: str):
169176

170177
# personal_data = {key: value.lower().strip() for key, value in personal_data.items()}
171178
var_data = {key: value.lower().strip() for key, value in var_data.items()}
172-
save_personal_data(var_data, snakemake.output[0])
179+
180+
if len(snakemake.input) > 1:
181+
add_data = add_additional_personal_data(snakemake.input[1], var_data)
182+
save_personal_data(add_data, snakemake.output[0])
183+
else:
184+
save_personal_data(var_data, snakemake.output[0])
173185

174186
# def enrich_personal_data(personal_data: dict) -> dict:
175187
# personal_data["names_combined"] = ",".join(

0 commit comments

Comments
 (0)