Skip to content

Commit 3ecc0c1

Browse files
authored
Merge pull request #46 from CBIIT/CRDCDH-2168-001-pgu
CRDCDH 2168
2 parents 7315ad6 + a5e7b51 commit 3ecc0c1

File tree

6 files changed

+79
-55
lines changed

6 files changed

+79
-55
lines changed

README-technical.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,6 @@ Usage of the CLI tool:
5757
Following arguments are needed to read important data from manifest, conditional required when type = “data file”
5858

5959
-m --manifest, path to manifest file, conditional required when type = “data file”
60-
-n --name-field
61-
-s --size-field
62-
-m --md5-field
63-
-i --id-field
64-
-o --omit-DCF-prefix
6560

6661
CLI configuration module will validate and combine parameters from CLI and/or config file
6762
If config_file is given, then everything else is potentially optional (if it’s included in config file)

README.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,6 @@ You can put a manifest in the same folder with the data files, or you can put it
9090
- data: local path to the folder that contains the data files to be uploaded
9191
- manifest: local path to the manifest file
9292
- id-field: column name in the manifest file that contains file IDs(Keys). Please refer to the data model to determine which property is the ID/Key property.
93-
- omit-DCF-prefix: for most data commons, this should be set to “false”. One exception is ICDC, which should be set to “true”.
94-
- name-field: column name in the manifest file that contains data file names
95-
- size-field: column name in the manifest file that contains data file sizes
96-
- md5-field: column name in the manifest file that contains data file MD5 checksums
97-
- id-field: column name in the manifest file that contains data file ID
98-
- omit-DCF-prefix: boolean to define if need DCF prefix "dg.4DFC"
9993
- retries: number of retries the CLI tool will perform after a failed upload
10094
- overwrite: if set to “true”, CLI will upload a data file to overwrite the data file with same name that already exists in the Data Hub target storage. If set to “false”, CLI will not upload a data file if a data file with the same name exists in the Data Hub target storage.
10195
- dryrun: if set to “true”, CLI will not upload any data files to the Data Hub target storage. If set to “false”, CLI will upload data files to the Data Hub target storage.

configs/uploader-file-config.example.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,6 @@ Config:
1717
# path to manifest file, conditional required when type = data file
1818
manifest: /path_to_the_manifest_file
1919

20-
# Property name for Key/ID property of file node. For example, "file_id" is the Key/ID property of file node in CDS.
21-
id-field: file_id
22-
23-
# Whether to omit DCF prefix when generating file IDs. For example, false means include DCF prefix when generating file IDs.
24-
omit-DCF-prefix: false
25-
26-
# file name header name in the manifest file. For example, "file_name" is the header name for CDS.
27-
name-field: file_name
28-
29-
# file size header name in the manifest file. For example, "file_size" is the header name for CDS.
30-
size-field: file_size
31-
32-
# file md5 header name in the manifest file. For example, "md5sum" is the header name for CDS.
33-
md5-field: md5sum
34-
3520
# file uploading retries
3621
retries: 3
3722

src/common/graphql_client.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,43 @@ def update_batch(self, batchID, uploaded_files):
145145
self.log.debug(e)
146146
self.log.exception(f'Update batch failed - internal error. Please try again and contact the helpdesk if this error persists.')
147147
return False
148+
149+
# 4) get_data_file_config()
150+
def get_data_file_config(self, submissionID):
151+
body = f"""
152+
query {{
153+
retrieveFileNodeConfig (submissionID: \"{submissionID}\") {{
154+
id_field,
155+
name_field,
156+
size_field,
157+
md5_field,
158+
omit_DCF_prefix
159+
}}
160+
}}
161+
"""
162+
try:
163+
response = requests.post(url=self.url, headers=self.headers, json={"query": body})
164+
status = response.status_code
165+
self.log.info(f"get_data_file_config response status code: {status}.")
166+
if status == 200:
167+
results = response.json()
168+
if results.get("errors"):
169+
self.log.error(f'Get data file config failed: {results.get("errors")[0].get("message")}.')
170+
else:
171+
data_file_config = results.get("data").get("retrieveFileNodeConfig")
172+
if data_file_config:
173+
return True, data_file_config
174+
else:
175+
self.log.error('Get data file config failed!')
176+
return False, None
177+
else:
178+
self.log.error(f'Get data file config failed (code: {status}) - internal error. Please try again and contact the helpdesk if this error persists.')
179+
return False, None
180+
181+
except Exception as e:
182+
self.log.debug(e)
183+
self.log.exception(f'Get data file config failed - internal error. Please try again and contact the helpdesk if this error persists.')
184+
return False, None
148185

149186

150187

src/upload_config.py

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ def __init__(self):
2121
parser.add_argument('--dryrun', default=False, type=bool, help='Only check original file, won\'t copy any files, optional, default is false')
2222
#args for data file type
2323
parser.add_argument('-f', '--manifest', help='path to manifest file, conditional required when type = “data file"')
24-
parser.add_argument('-n', '--name-field', help='header file name in manifest, optional, default value is "file_name"')
25-
parser.add_argument('-s', '--size-field', help='header file size in manifest, optional, default value is "file_size"')
26-
parser.add_argument('-m', '--md5-field', help='header md5 name in manifest, optional, default value is "md5sum"')
27-
parser.add_argument('-i', '--id-field', help='header file ID name in manifest, optional, default value is "file_id"')
28-
parser.add_argument('-o', '--omit-DCF-prefix', help='boolean to define if need DCF prefix "dg.4DFC"')
2924

3025
parser.add_argument('-r', '--retries', default=3, type=int, help='file uploading retries, optional, default value is 3')
3126

@@ -103,7 +98,7 @@ def validate(self):
10398
return False
10499
elif type not in UPLOAD_TYPES:
105100
self.log.critical(f'Configuration error in "type": “{type}” is not valid. Valid “type” value can be one of [“data file”, “metadata”]')
106-
return False
101+
return False
107102
else:
108103
if type == TYPE_FILE: #data file
109104
#check manifest
@@ -117,27 +112,6 @@ def validate(self):
117112
return False
118113

119114
self.data[PRE_MANIFEST] = manifest
120-
#check header names in manifest file
121-
file_name_header= self.data.get(FILE_NAME_FIELD)
122-
if file_name_header is None:
123-
self.data[FILE_NAME_FIELD] = FILE_NAME_DEFAULT
124-
125-
file_size_header = self.data.get(FILE_SIZE_FIELD)
126-
if file_size_header is None:
127-
self.data[FILE_SIZE_FIELD] = FILE_SIZE_DEFAULT
128-
129-
md5_header = self.data.get(FILE_MD5_FIELD)
130-
if md5_header is None:
131-
self.data[FILE_MD5_FIELD] = MD5_DEFAULT
132-
133-
file_id_header= self.data.get(FILE_ID_FIELD)
134-
if file_id_header is None:
135-
self.log.critical(f'file id field is required.')
136-
return False
137-
138-
omit_dcf_prefix = self.data.get(OMIT_DCF_PREFIX)
139-
if omit_dcf_prefix is None:
140-
self.data[OMIT_DCF_PREFIX] = False
141115

142116
filepath = self.data.get(FILE_DIR)
143117
if filepath is None:
@@ -154,4 +128,29 @@ def validate(self):
154128
self.data[FROM_S3] = True
155129

156130
return True
131+
132+
def validate_file_config(self, data_file_config):
133+
#check header names in manifest file
134+
file_name_header= data_file_config.get(FILE_NAME_FIELD.replace("-", "_"))
135+
self.data[FILE_NAME_FIELD] = file_name_header if file_name_header else FILE_NAME_DEFAULT
136+
137+
file_size_header = data_file_config.get(FILE_SIZE_FIELD.replace("-", "_"))
138+
self.data[FILE_SIZE_FIELD] = file_size_header if file_size_header else FILE_SIZE_DEFAULT
139+
140+
md5_header = data_file_config.get(FILE_MD5_FIELD.replace("-", "_"))
141+
self.data[FILE_MD5_FIELD] = md5_header if md5_header else MD5_DEFAULT
142+
143+
file_id_header= data_file_config.get(FILE_ID_FIELD.replace("-", "_"))
144+
if file_id_header is None:
145+
self.log.critical(f'file id field is required.')
146+
return False
147+
148+
self.data[FILE_ID_FIELD] = file_id_header
149+
150+
omit_dcf_prefix = data_file_config.get(OMIT_DCF_PREFIX.replace("-", "_"))
151+
self.data[OMIT_DCF_PREFIX] = False if omit_dcf_prefix is None else omit_dcf_prefix
152+
153+
return True
154+
155+
157156

src/uploader.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,24 @@ def controller():
2828
log.error("Failed to upload files: missing required valid parameter(s)!")
2929
print("Failed to upload files: invalid parameter(s)! Please check log file in tmp folder for details.")
3030
return 1
31-
31+
3232
#step 2: validate file or metadata
3333
configs = config.data
34+
apiInvoker = APIInvoker(configs)
35+
# get data file config
36+
if configs[UPLOAD_TYPE] == TYPE_FILE:
37+
# retrieve data file configuration
38+
result, data_file_config = apiInvoker.get_data_file_config(configs["submission"])
39+
if not result or not data_file_config:
40+
log.error("Failed to upload files: can't get data file config!")
41+
print("Failed to upload files: can't get data file config! Please check log file in tmp folder for details.")
42+
return 1
43+
44+
if not config.validate_file_config(data_file_config):
45+
log.error("Failed to upload files: invalid file config!")
46+
print("Failed to upload files: invalid file config! Please check log file in tmp folder for details.")
47+
return 1
48+
3449
validator = FileValidator(configs)
3550
if not validator.validate():
3651
log.error("Failed to upload files: found invalid file(s)!")
@@ -40,7 +55,6 @@ def controller():
4055

4156
if validator.invalid_count == 0:
4257
#step 3: create a batch
43-
apiInvoker = APIInvoker(configs)
4458
# file_array = [{"fileName": item[FILE_NAME_DEFAULT], "size": item[FILE_SIZE_DEFAULT]} for item in file_list]
4559
file_array = [ item[FILE_NAME_DEFAULT] for item in file_list]
4660
newBatch = None

0 commit comments

Comments
 (0)