Skip to content

Commit 1c78dab

Browse files
authored
Fix a thread race issue (#224)
* Fix a thread race issue that may cause memory error when larger than cache max size data is transferred * Add a test that writes more data than server cache size * Fix CI run command
1 parent b8ad03f commit 1c78dab

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

.gitlab-ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ perlmutter-cache-build:
108108
- module list
109109
- mkdir -p ${PDC_BUILD_PATH}/perlmutter/cache
110110
- cd ${PDC_BUILD_PATH}/perlmutter/cache
111-
- cmake ../../.. -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DPDC_SERVER_CACHE=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=$PDC_DIR -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=cc -DMPI_RUN_CMD="srun -A ${PDC_PROJECT} --qos=debug --constraint=cpu --tasks-per-node=64" -DCMAKE_INSTALL_PREFIX=${PDC_INSTALL_PATH}/perlmutter/cache
111+
- cmake ../../.. -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DPDC_SERVER_CACHE=ON -DBUILD_TESTING=ON -DPDC_SERVER_CACHE_MAX_GB=1 -DCMAKE_INSTALL_PREFIX=$PDC_DIR -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=cc -DMPI_RUN_CMD="srun -A ${PDC_PROJECT} --qos=debug --constraint=cpu --tasks-per-node=64" -DCMAKE_INSTALL_PREFIX=${PDC_INSTALL_PATH}/perlmutter/cache
112112
- make -j
113113
- make install
114114
artifacts:

src/server/pdc_server_region/pdc_server_region_cache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -967,8 +967,9 @@ PDC_region_cache_clock_cycle(void *ptr)
967967
}
968968
}
969969
else {
970+
pthread_mutex_lock(&pdc_obj_cache_list_mutex);
970971
obj_cache_iter = obj_cache_iter->next;
971-
/* pthread_mutex_unlock(&pdc_obj_cache_list_mutex); */
972+
pthread_mutex_unlock(&pdc_obj_cache_list_mutex);
972973

973974
/* PDC_get_time_str(cur_time); */
974975
/* fprintf(stderr, "%s ==PDC_SERVER[%d]: stop flush to allow processing new RPCs\n", */

src/tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ if(BUILD_MPI_TESTING)
494494
add_test(NAME obj_put_data_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./obj_put_data ${MPI_RUN_CMD} 4 6 )
495495
add_test(NAME obj_get_data_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./obj_get_data ${MPI_RUN_CMD} 4 6 )
496496
add_test(NAME vpicio_bdcats_mpi WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_multiple_mpi_test.sh ${MPI_RUN_CMD} 4 6 ./vpicio ./bdcats)
497+
add_test(NAME vpicio_mpi_small_cache WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND mpi_test.sh ./vpicio ${MPI_RUN_CMD} 1 16)
497498

498499
set_tests_properties(read_obj_shared_int PROPERTIES LABELS "parallel;parallel_obj" )
499500
set_tests_properties(read_obj_shared_float PROPERTIES LABELS "parallel;parallel_obj" )
@@ -522,6 +523,7 @@ if(BUILD_MPI_TESTING)
522523
# set_tests_properties(region_transfer_3D_skewed_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
523524
set_tests_properties(region_transfer_write_read_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
524525
set_tests_properties(vpicio_bdcats_mpi PROPERTIES LABELS "parallel;parallel_region_transfer" )
526+
set_tests_properties(vpicio_mpi_small_cache PROPERTIES LABELS "parallel;parallel_region_transfer" )
525527
set_tests_properties(region_transfer_all_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )
526528
set_tests_properties(region_transfer_all_2D_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )
527529
set_tests_properties(region_transfer_all_3D_mpi PROPERTIES LABELS "parallel;parallel_region_transfer_all" )

0 commit comments

Comments
 (0)