Skip to content

Commit 5491714

Browse files
committed
Purge communicator before free on mpich4+.
Mpich from 4.0 on seems to bug when there is a message dangling on freeing the communicator.
1 parent 2351334 commit 5491714

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

src/runtime-libraries/mpi/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ if (mpi_version_out MATCHES "[Oo]pen[ -][Mm][Pp][Ii]")
154154
if(NOT DEFINED ENV{TRAVIS})
155155
message( STATUS "Open-MPI back end detected, passing --allow-run-as-root to allow tests to pass when run with sudo or as root." )
156156
endif()
157+
elseif (mpi_version_out MATCHES "HYDRA")
158+
message(STATUS "MPICH detected")
159+
target_compile_definitions(caf_mpi PRIVATE MPI_CLEAR_COMM_BEFORE_FREE)
160+
target_compile_definitions(caf_mpi_static PRIVATE MPI_CLEAR_COMM_BEFORE_FREE)
157161
endif ()
158162

159163
if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "GNU")

src/runtime-libraries/mpi/mpi_caf.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,24 @@ finalize_internal(int status_code)
11041104
ierr = MPI_Finalize(); chk_err(ierr);
11051105
}
11061106
#else
1107+
#ifdef MPI_CLEAR_COMM_BEFORE_FREE
1108+
{
1109+
int probe_flag;
1110+
MPI_Status status;
1111+
do {
1112+
ierr = MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, CAF_COMM_WORLD, &probe_flag,
1113+
&status); /* error is not of interest. */
1114+
if (probe_flag) {
1115+
int cnt;
1116+
MPI_Get_count(&status, MPI_BYTE, &cnt);
1117+
void * buf = alloca(cnt);
1118+
ierr = MPI_Recv(buf, cnt, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG,
1119+
CAF_COMM_WORLD, &status); chk_err(ierr);
1120+
}
1121+
} while (probe_flag);
1122+
}
1123+
#endif
1124+
dprint("freeing caf's communicator.\n");
11071125
ierr = MPI_Comm_free(&CAF_COMM_WORLD); chk_err(ierr);
11081126

11091127
CAF_Win_unlock_all(*stat_tok);
@@ -1112,6 +1130,7 @@ finalize_internal(int status_code)
11121130
/* Only call Finalize if CAF runtime Initialized MPI. */
11131131
if (caf_owns_mpi)
11141132
{
1133+
dprint("Finalizing MPI.\n");
11151134
ierr = MPI_Finalize(); chk_err(ierr);
11161135
}
11171136
#endif

0 commit comments

Comments
 (0)