Skip to content

Commit d6b7655

Browse files
author
rhc54
committed
Merge pull request #730 from rhc54/topic/finalize
Fix application finalize race condition in RML
2 parents 6929aca + 4853457 commit d6b7655

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

orte/mca/rml/base/rml_base_frame.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include "orte/mca/rml/rml.h"
2828
#include "orte/mca/state/state.h"
29+
#include "orte/runtime/orte_wait.h"
2930
#include "orte/util/name_fns.h"
3031

3132
#include "orte/mca/rml/base/base.h"
@@ -74,14 +75,36 @@ static int orte_rml_base_register(mca_base_register_flag_t flags)
7475
return ORTE_SUCCESS;
7576
}
7677

77-
static int orte_rml_base_close(void)
78+
static void cleanup(int sd, short args, void *cbdata)
7879
{
79-
opal_list_item_t *item;
80+
bool *active = (bool*)cbdata;
8081

81-
while (NULL != (item = opal_list_remove_first(&orte_rml_base.posted_recvs))) {
82-
OBJ_RELEASE(item);
82+
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
83+
if (NULL != active) {
84+
*active = false;
8385
}
84-
OBJ_DESTRUCT(&orte_rml_base.posted_recvs);
86+
}
87+
88+
static int orte_rml_base_close(void)
89+
{
90+
bool active;
91+
92+
/* because the RML posted recvs list is in a separate
93+
* async thread for apps, we can't just destruct it here.
94+
* Instead, we push it into that event thread and destruct
95+
* it there */
96+
if (ORTE_PROC_IS_APP) {
97+
opal_event_t ev;
98+
active = true;
99+
opal_event_set(orte_event_base, &ev, -1,
100+
OPAL_EV_WRITE, cleanup, &active);
101+
opal_event_set_priority(&ev, ORTE_ERROR_PRI);
102+
opal_event_active(&ev, OPAL_EV_WRITE, 1);
103+
ORTE_WAIT_FOR_COMPLETION(active);
104+
} else {
105+
/* we can call the destruct directly */
106+
cleanup(0, 0, NULL);
107+
}
85108

86109
OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml);
87110
OBJ_DESTRUCT(&orte_rml_base.open_channels);

0 commit comments

Comments
 (0)