Skip to content

Commit 68b2dcd

Browse files
authored
Merge pull request #40 from CBIIT/CRDCDH-2043-0012-pgu
Crdcdh 2043
2 parents 43f3110 + b2a884c commit 68b2dcd

File tree

4 files changed

+47
-10
lines changed

4 files changed

+47
-10
lines changed

src/common/s3util.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,23 @@ def set_s3_client(self, bucket, credentials):
3232
self.bucket = self.s3.Bucket(bucket)
3333
self.credential = None
3434

35-
35+
def file_exists_on_s3(self, key):
36+
'''
37+
Check if file exists in S3, return True only if file exists
38+
39+
:param key: file path
40+
:return: boolean
41+
'''
42+
try:
43+
self.client.head_object(Bucket=self.bucket.name, Key=key)
44+
return True
45+
except ClientError as e:
46+
if e.response['Error']['Code'] in ['404', '412']:
47+
return False
48+
else:
49+
self.log.error('Unknown S3 client error!')
50+
self.log.exception(e)
51+
return False
3652

3753
def put_file_obj(self, key, data, md5_base64):
3854
return self.bucket.put_object(Key=key,

src/file_uploader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(self, configs, file_list):
2626
self.prefix = configs[FILE_PREFIX]
2727
self.bucket_name = configs.get(S3_BUCKET)
2828
self.credential = configs.get(TEMP_CREDENTIAL)
29-
self.pre_manifest = configs.get(PRE_MANIFEST)
29+
# self.pre_manifest = configs.get(PRE_MANIFEST)
3030
self.file_info_list = file_list
3131
self.copier = None
3232
self.count = len(file_list)

src/file_validator.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import glob
55
from common.constants import UPLOAD_TYPE, TYPE_FILE, TYPE_MATE_DATA, FILE_NAME_DEFAULT, FILE_SIZE_DEFAULT, MD5_DEFAULT, \
66
FILE_DIR, FILE_MD5_FIELD, PRE_MANIFEST, FILE_NAME_FIELD, FILE_SIZE_FIELD, FILE_PATH, SUCCEEDED, ERRORS, FILE_ID_DEFAULT,\
7-
FILE_ID_FIELD, OMIT_DCF_PREFIX, FROM_S3, TEMP_DOWNLOAD_DIR
7+
FILE_ID_FIELD, OMIT_DCF_PREFIX, FROM_S3, TEMP_DOWNLOAD_DIR, S3_START
88
from common.utils import clean_up_key_value, clean_up_strs, is_valid_uuid
99
from bento.common.utils import get_logger, get_md5
1010
from common.utils import extract_s3_info_from_url
@@ -56,9 +56,6 @@ def validate(self):
5656
self.fileList.append({FILE_NAME_DEFAULT:filename, FILE_PATH: filepath, FILE_SIZE_DEFAULT: size})
5757

5858
elif self.uploadType == TYPE_FILE: #file
59-
if not os.path.isfile(self.pre_manifest):
60-
self.log.critical(f'manifest file is not valid!')
61-
return False
6259
try:
6360
return self.validate_size_md5()
6461
except Exception as e:
@@ -155,6 +152,29 @@ def read_manifest(self):
155152
files_info = []
156153
files_dict = {}
157154
manifest_rows = []
155+
is_s3_manifest = self.pre_manifest.startswith(S3_START)
156+
if is_s3_manifest:
157+
s3_bucket = None
158+
bucket_name, key = extract_s3_info_from_url(self.pre_manifest)
159+
self.download_file_dir = TEMP_DOWNLOAD_DIR
160+
os.makedirs(self.download_file_dir, exist_ok=True)
161+
local_manifest = os.path.join(self.download_file_dir, key.split('/')[-1])
162+
try:
163+
s3_bucket = S3Bucket()
164+
s3_bucket.set_s3_client(bucket_name, None)
165+
if s3_bucket.file_exists_on_s3(key) == False:
166+
self.log.critical(f"Manifest file {self.pre_manifest} does not exist!")
167+
return None
168+
s3_bucket.download_object(key, local_manifest)
169+
self.pre_manifest = self.configs[PRE_MANIFEST] = local_manifest
170+
except Exception as e:
171+
self.log.debug(e)
172+
self.log.exception(f"Downloading manifest failed - internal error. Please try again and contact the helpdesk if this error persists.")
173+
return None
174+
finally:
175+
if s3_bucket:
176+
s3_bucket.close()
177+
158178
try:
159179
with open(self.pre_manifest) as pre_m:
160180
reader = csv.DictReader(pre_m, delimiter='\t')

src/upload_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,11 @@ def validate(self):
111111
if manifest is None:
112112
self.log.critical(f'Please provide “manifest” in configuration file or command line argument.')
113113
return False
114-
if not os.path.isfile(manifest):
115-
self.log.critical(f'Manifest file “{manifest}” is not readable. Please make sure the path is correct and the file is readable.')
116-
return False
117-
114+
if not manifest.startswith(S3_START):
115+
if not os.path.isfile(manifest):
116+
self.log.critical(f'Manifest file “{manifest}” is not readable. Please make sure the path is correct and the file is readable.')
117+
return False
118+
118119
self.data[PRE_MANIFEST] = manifest
119120
#check header names in manifest file
120121
file_name_header= self.data.get(FILE_NAME_FIELD)

0 commit comments

Comments
 (0)