Skip to content

Commit 7c118b2

Browse files
Client Propogate HG_Finalize error on PDCclose (#263)
* all but 4 close errors are fixed * Committing clang-format changes * client side HG_Finalize now passes on serial tests * Committing clang-format changes * cleanup * Committing clang-format changes * Update pdc_region_transfer.c * free bulk handles during region transfer close --------- Co-authored-by: github-actions <github-actions[bot]@users.noreply.github.com>
1 parent 2c20971 commit 7c118b2

File tree

10 files changed

+202
-70
lines changed

10 files changed

+202
-70
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ src/install
1111
# ignore vscode files
1212
.vscode
1313

14+
# ignore intellij files
15+
.idea
16+
1417
# ignore macos files
1518
.DS_Store
1619

@@ -24,4 +27,4 @@ docs/build
2427
**/*venv
2528

2629
# ignore .zed
27-
.zed
30+
.zed

src/api/include/pdc_client_connect.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ uint32_t PDC_get_client_data_server();
208208
perr_t PDC_Client_read_server_addr_from_file();
209209

210210
/**
211-
* Client request of an obj id by sending object name
211+
* Client request of an obj id by sending object name.
212212
*
213213
* \param obj_name [IN] Name of the object
214214
* \param cont_id[IN] Container ID (obtained from metadata server)
@@ -220,32 +220,37 @@ perr_t PDC_Client_read_server_addr_from_file();
220220
perr_t PDC_Client_send_name_recv_id(const char *obj_name, uint64_t cont_id, pdcid_t obj_create_prop,
221221
pdcid_t *meta_id, uint32_t *data_server_id, uint32_t *metadata_server_id);
222222

223-
perr_t PDC_Client_transfer_request(void *buf, pdcid_t obj_id, uint32_t data_server_id, int obj_ndim,
224-
uint64_t *obj_dims, int remote_ndim, uint64_t *remote_offset,
223+
/**
224+
* The bulk_handle pointer is set to the bulk handle created.
225+
* The caller is responsible for calling HG_Bulk_free
226+
*/
227+
perr_t PDC_Client_transfer_request(hg_bulk_t *bulk_handle, void *buf, pdcid_t obj_id, uint32_t data_server_id,
228+
int obj_ndim, uint64_t *obj_dims, int remote_ndim, uint64_t *remote_offset,
225229
uint64_t *remote_size, size_t unit, pdc_access_t access_type,
226230
pdcid_t *metadata_id);
227231

228232
int PDC_Client_get_var_type_size(pdc_var_type_t dtype);
229233

230-
perr_t PDC_Client_transfer_request_all(int n_objs, pdc_access_t access_type, uint32_t data_server_id,
231-
char *bulk_buf, hg_size_t bulk_size, uint64_t *metadata_id,
234+
perr_t PDC_Client_transfer_request_all(hg_bulk_t *bulk_handle, int n_objs, pdc_access_t access_type,
235+
uint32_t data_server_id, char *bulk_buf, hg_size_t bulk_size,
236+
uint64_t *metadata_id,
232237
#ifdef ENABLE_MPI
233238
MPI_Comm comm);
234239
#else
235240
int comm);
236241
#endif
237242

238-
perr_t PDC_Client_transfer_request_metadata_query(char *buf, uint64_t total_buf_size, int n_objs,
239-
uint32_t metadata_server_id, uint8_t is_write,
243+
perr_t PDC_Client_transfer_request_metadata_query(hg_bulk_t *bulk_handle, char *buf, uint64_t total_buf_size,
244+
int n_objs, uint32_t metadata_server_id, uint8_t is_write,
240245
uint64_t *output_buf_size, uint64_t *query_id);
241246

242-
perr_t PDC_Client_transfer_request_metadata_query2(char *buf, uint64_t total_buf_size, uint64_t query_id,
243-
uint32_t metadata_server_id);
247+
perr_t PDC_Client_transfer_request_metadata_query2(hg_bulk_t *bulk_handle, char *buf, uint64_t total_buf_size,
248+
uint64_t query_id, uint32_t metadata_server_id);
244249

245250
perr_t PDC_Client_transfer_request_status(pdcid_t transfer_request_id, uint32_t data_server_id,
246251
pdc_transfer_status_t *completed);
247252

248-
perr_t PDC_Client_transfer_request_wait_all(int n_objs, pdcid_t *transfer_request_id,
253+
perr_t PDC_Client_transfer_request_wait_all(hg_bulk_t *bulk_handle, int n_objs, pdcid_t *transfer_request_id,
249254
uint32_t data_server_id);
250255

251256
perr_t PDC_Client_transfer_request_wait(pdcid_t transfer_request_id, uint32_t data_server_id,

src/api/pdc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ PDCclose(pdcid_t pdcid)
213213

214214
pdc_id_list_g = (struct pdc_id_list *)(intptr_t)PDC_free(pdc_id_list_g);
215215

216-
// Finalize METADATA
217-
PDC_Client_finalize();
216+
if (PDC_Client_finalize() != SUCCEED)
217+
PGOTO_ERROR(FAIL, "Error with PDC_Client_finalize");
218218

219219
done:
220220
FUNC_LEAVE(ret_value);

src/api/pdc_client_connect.c

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,13 +1626,11 @@ PDC_Client_finalize()
16261626
LOG_INFO("T_memcpy: %.2f\n", memcpy_time_g);
16271627
#endif
16281628

1629-
hg_ret = HG_Context_destroy(send_context_g);
1630-
if (hg_ret != HG_SUCCESS)
1629+
if (HG_Context_destroy(send_context_g) != HG_SUCCESS)
16311630
PGOTO_ERROR(FAIL, "Error with HG_Context_destroy");
16321631

1633-
hg_ret = HG_Finalize(send_class_g);
1634-
if (hg_ret != HG_SUCCESS)
1635-
LOG_WARNING("Error with HG_Finalize\n");
1632+
if (HG_Finalize(send_class_g) != HG_SUCCESS)
1633+
PGOTO_ERROR(FAIL, "Error with HG_Finalize");
16361634

16371635
done:
16381636
FUNC_LEAVE(ret_value);
@@ -2974,8 +2972,9 @@ PDC_Client_flush_obj_all()
29742972
}
29752973

29762974
perr_t
2977-
PDC_Client_transfer_request_all(int n_objs, pdc_access_t access_type, uint32_t data_server_id, char *bulk_buf,
2978-
hg_size_t bulk_size, uint64_t *metadata_id,
2975+
PDC_Client_transfer_request_all(hg_bulk_t *bulk_handle, int n_objs, pdc_access_t access_type,
2976+
uint32_t data_server_id, char *bulk_buf, hg_size_t bulk_size,
2977+
uint64_t *metadata_id,
29792978
#ifdef ENABLE_MPI
29802979
MPI_Comm comm)
29812980
#else
@@ -3015,8 +3014,9 @@ PDC_Client_transfer_request_all(int n_objs, pdc_access_t access_type, uint32_t d
30153014
transfer_request_all_register_id_g, &client_send_transfer_request_all_handle);
30163015

30173016
// Create bulk handles
3018-
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&bulk_buf, &bulk_size, HG_BULK_READWRITE,
3017+
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&bulk_buf, &bulk_size, HG_BULK_READWRITE,
30193018
&(in.local_bulk_handle));
3019+
*bulk_handle = in.local_bulk_handle;
30203020
if (hg_ret != HG_SUCCESS)
30213021
PGOTO_ERROR(FAIL, "Could not create local bulk data handle");
30223022

@@ -3076,8 +3076,8 @@ PDC_Client_transfer_request_all(int n_objs, pdc_access_t access_type, uint32_t d
30763076
}
30773077

30783078
perr_t
3079-
PDC_Client_transfer_request_metadata_query2(char *buf, uint64_t total_buf_size, uint64_t query_id,
3080-
uint32_t metadata_server_id)
3079+
PDC_Client_transfer_request_metadata_query2(hg_bulk_t *bulk_handle, char *buf, uint64_t total_buf_size,
3080+
uint64_t query_id, uint32_t metadata_server_id)
30813081
{
30823082
FUNC_ENTER(NULL);
30833083

@@ -3110,6 +3110,7 @@ PDC_Client_transfer_request_metadata_query2(char *buf, uint64_t total_buf_size,
31103110
// For sending metadata
31113111
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&buf, (hg_size_t *)&(in.total_buf_size), HG_BULK_READWRITE,
31123112
&(in.local_bulk_handle));
3113+
*bulk_handle = in.local_bulk_handle;
31133114
if (hg_ret != HG_SUCCESS)
31143115
PGOTO_ERROR(FAIL, "Could not create local bulk data handle");
31153116

@@ -3137,8 +3138,8 @@ PDC_Client_transfer_request_metadata_query2(char *buf, uint64_t total_buf_size,
31373138
}
31383139

31393140
perr_t
3140-
PDC_Client_transfer_request_metadata_query(char *buf, uint64_t total_buf_size, int n_objs,
3141-
uint32_t metadata_server_id, uint8_t is_write,
3141+
PDC_Client_transfer_request_metadata_query(hg_bulk_t *bulk_handle, char *buf, uint64_t total_buf_size,
3142+
int n_objs, uint32_t metadata_server_id, uint8_t is_write,
31423143
uint64_t *output_buf_size, uint64_t *query_id)
31433144
{
31443145
FUNC_ENTER(NULL);
@@ -3174,6 +3175,7 @@ PDC_Client_transfer_request_metadata_query(char *buf, uint64_t total_buf_size, i
31743175
// For sending metadata
31753176
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&buf, (hg_size_t *)&(in.total_buf_size), HG_BULK_READWRITE,
31763177
&(in.local_bulk_handle));
3178+
*bulk_handle = in.local_bulk_handle;
31773179
if (hg_ret != HG_SUCCESS)
31783180
PGOTO_ERROR(FAIL, "Could not create local bulk data handle");
31793181

@@ -3203,7 +3205,8 @@ PDC_Client_transfer_request_metadata_query(char *buf, uint64_t total_buf_size, i
32033205
}
32043206

32053207
perr_t
3206-
PDC_Client_transfer_request_wait_all(int n_objs, pdcid_t *transfer_request_id, uint32_t data_server_id)
3208+
PDC_Client_transfer_request_wait_all(hg_bulk_t *bulk_handle, int n_objs, pdcid_t *transfer_request_id,
3209+
uint32_t data_server_id)
32073210
{
32083211
FUNC_ENTER(NULL);
32093212

@@ -3238,6 +3241,7 @@ PDC_Client_transfer_request_wait_all(int n_objs, pdcid_t *transfer_request_id, u
32383241
// For sending metadata
32393242
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&transfer_request_id, (hg_size_t *)&(in.total_buf_size),
32403243
HG_BULK_READWRITE, &(in.local_bulk_handle));
3244+
*bulk_handle = in.local_bulk_handle;
32413245
if (hg_ret != HG_SUCCESS)
32423246
PGOTO_ERROR(FAIL, "Could not create local bulk data handle");
32433247

@@ -3269,8 +3273,8 @@ PDC_Client_transfer_request_wait_all(int n_objs, pdcid_t *transfer_request_id, u
32693273
}
32703274

32713275
perr_t
3272-
PDC_Client_transfer_request(void *buf, pdcid_t obj_id, uint32_t data_server_id, int obj_ndim,
3273-
uint64_t *obj_dims, int remote_ndim, uint64_t *remote_offset,
3276+
PDC_Client_transfer_request(hg_bulk_t *bulk_handle, void *buf, pdcid_t obj_id, uint32_t data_server_id,
3277+
int obj_ndim, uint64_t *obj_dims, int remote_ndim, uint64_t *remote_offset,
32743278
uint64_t *remote_size, size_t unit, pdc_access_t access_type,
32753279
pdcid_t *metadata_id)
32763280
{
@@ -3328,6 +3332,7 @@ PDC_Client_transfer_request(void *buf, pdcid_t obj_id, uint32_t data_server_id,
33283332
// Create bulk handle
33293333
hg_ret = HG_Bulk_create(hg_class, 1, (void **)&buf, (hg_size_t *)&total_data_size, HG_BULK_READWRITE,
33303334
&(in.local_bulk_handle));
3335+
*bulk_handle = in.local_bulk_handle;
33313336

33323337
if (hg_ret != HG_SUCCESS)
33333338
PGOTO_ERROR(FAIL, "Could not create local bulk data handle");

src/api/pdc_obj/pdc_cont.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ PDCcont_create(const char *cont_name, pdcid_t cont_prop_id)
6969
PGOTO_ERROR(0, "PDC pub container memory allocation failed");
7070
p->cont_info_pub->name = strdup(cont_name);
7171

72-
id_info = PDC_find_id(cont_prop_id);
72+
id_info = PDC_find_id(cont_prop_id);
73+
if (id_info == NULL)
74+
PGOTO_ERROR(0, "Failed to find cont prop using pdcid");
7375
cont_prop = (struct _pdc_cont_prop *)(id_info->obj_ptr);
7476

7577
p->cont_pt = (struct _pdc_cont_prop *)PDC_calloc(1, sizeof(struct _pdc_cont_prop));

src/api/pdc_obj/pdc_obj.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ PDC_obj_create(pdcid_t cont_id, const char *obj_name, pdcid_t obj_prop_id, _pdc_
169169
meta_id = p->cont->cont_info_pub->meta_id;
170170
}
171171

172-
id_info = PDC_find_id(obj_prop_id);
172+
id_info = PDC_find_id(obj_prop_id);
173+
if (id_info == NULL)
174+
PGOTO_ERROR(0, "Failed to find obj prop using pdcid");
173175
obj_prop = (struct _pdc_obj_prop *)(id_info->obj_ptr);
174176

175177
/* struct _pdc_obj_prop field */

src/api/pdc_obj/pdc_prop.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ PDCprop_create(pdc_prop_type_t type, pdcid_t pdcid)
7474
new_id_c = PDC_id_register(PDC_CONT_PROP, p);
7575
p->cont_prop_id = new_id_c;
7676
id_info = PDC_find_id(pdcid);
77-
pdc_class = (struct _pdc_class *)(id_info->obj_ptr);
78-
p->pdc = (struct _pdc_class *)PDC_calloc(1, sizeof(struct _pdc_class));
77+
if (id_info == NULL)
78+
PGOTO_ERROR(0, "Failed to find prop using pdcid");
79+
pdc_class = (struct _pdc_class *)(id_info->obj_ptr);
80+
p->pdc = (struct _pdc_class *)PDC_calloc(1, sizeof(struct _pdc_class));
7981
if (p->pdc == NULL)
8082
PGOTO_ERROR(0, "PDC class allocation failed");
8183
if (pdc_class->name)
@@ -105,8 +107,10 @@ PDCprop_create(pdc_prop_type_t type, pdcid_t pdcid)
105107
new_id_o = PDC_id_register(PDC_OBJ_PROP, q);
106108
q->obj_prop_pub->obj_prop_id = new_id_o;
107109
id_info = PDC_find_id(pdcid);
108-
pdc_class = (struct _pdc_class *)(id_info->obj_ptr);
109-
q->pdc = (struct _pdc_class *)PDC_calloc(1, sizeof(struct _pdc_class));
110+
if (id_info == NULL)
111+
PGOTO_ERROR(0, "Failed to find pdc obj using pdcid");
112+
pdc_class = (struct _pdc_class *)(id_info->obj_ptr);
113+
q->pdc = (struct _pdc_class *)PDC_calloc(1, sizeof(struct _pdc_class));
110114
if (q->pdc == NULL)
111115
PGOTO_ERROR(0, "PDC class allocation failed");
112116
if (pdc_class->name)

0 commit comments

Comments
 (0)