diff --git a/.github/workflows/tsan.yml b/.github/workflows/tsan.yml new file mode 100644 index 0000000..cf8d40d --- /dev/null +++ b/.github/workflows/tsan.yml @@ -0,0 +1,90 @@ +name: TSAN (ThreadSanitizer) + +# Data race detection using ThreadSanitizer +# This workflow builds memtier_benchmark with TSAN enabled and runs +# tests to detect data races and threading issues. +# +# NOTE: TSAN currently detects known data races in the codebase. +# This workflow is informational and will not fail the build. +# See: https://github.com/google/sanitizers/issues/1716 for TSAN/ASLR issues + +on: [push, pull_request] + +jobs: + test-with-thread-sanitizer: + runs-on: ubuntu-latest + name: Data race detection (TSAN) + continue-on-error: true # Don't fail build on known races + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get -qq update + sudo apt-get install -y \ + build-essential \ + autoconf \ + automake \ + pkg-config \ + libevent-dev \ + zlib1g-dev \ + libssl-dev + + - name: Build with Thread Sanitizer + run: | + autoreconf -ivf + ./configure --enable-thread-sanitizer + make -j + + - name: Verify TSAN is enabled + run: | + ldd ./memtier_benchmark | grep tsan + echo "✓ ThreadSanitizer is linked" + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.10' + architecture: x64 + + - name: Install Python test dependencies + run: pip install -r ./tests/test_requirements.txt + + - name: Install Redis + run: | + curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg + echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list + sudo apt-get -qq update + sudo apt-get install redis + sudo service redis-server stop + + - name: Increase connection limit + run: | + sudo sysctl -w net.ipv4.tcp_fin_timeout=10 + sudo sysctl -w net.ipv4.tcp_tw_reuse=1 + ulimit -n 40960 + + - name: Generate TLS test certificates + run: ./tests/gen-test-certs.sh + + - name: Test OSS TCP with TSAN + timeout-minutes: 15 + run: | + # Use setarch to disable ASLR (workaround for TSAN on kernel 6.6+) + # Use suppression file to ignore known benign races + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c './tests/run_tests.sh' + + - name: Test OSS TCP TLS with TSAN + timeout-minutes: 15 + run: | + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c 'TLS=1 ./tests/run_tests.sh' + + - name: Test OSS-CLUSTER TCP with TSAN + timeout-minutes: 15 + run: | + export TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" + setarch `uname -m` -R bash -c 'OSS_STANDALONE=0 OSS_CLUSTER=1 ./tests/run_tests.sh' + diff --git a/README.md b/README.md index 7cd83d5..9a477dd 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,28 @@ To verify ASAN is enabled: $ ldd ./memtier_benchmark | grep asan + +**Data race detection with Thread Sanitizer** + + +memtier_benchmark supports building with ThreadSanitizer (TSAN) to detect data races and threading issues. + +To build with Thread Sanitizer enabled: + + $ ./configure --enable-thread-sanitizer + $ make + +To run tests with race detection (requires disabling ASLR on kernel 6.6+): + + $ TSAN_OPTIONS="suppressions=$(pwd)/tsan_suppressions.txt" setarch `uname -m` -R ./tests/run_tests.sh + +To verify TSAN is enabled: + + $ ldd ./memtier_benchmark | grep tsan + +**Note:** TSAN and ASAN are mutually exclusive and cannot be used together. A suppression file (`tsan_suppressions.txt`) is provided to ignore known benign data races that do not affect correctness. + + ## Using Docker Use available images on Docker Hub: diff --git a/config_types.h b/config_types.h index 323d6a7..c5f8b6b 100644 --- a/config_types.h +++ b/config_types.h @@ -27,6 +27,7 @@ #include #include +#include struct config_range { int min; @@ -100,7 +101,7 @@ struct server_addr { struct addrinfo *m_server_addr; struct addrinfo *m_used_addr; int m_resolution; - int m_last_error; + std::atomic m_last_error; // Atomic to prevent data race between resolve() and get_connect_info() }; #define KEY_PLACEHOLDER "__key__" diff --git a/configure.ac b/configure.ac index b75efcd..a43efd6 100755 --- a/configure.ac +++ b/configure.ac @@ -79,6 +79,19 @@ AS_IF([test "x$enable_sanitizers" = "xyes"], [ LDFLAGS="$LDFLAGS -fsanitize=address -fsanitize=leak" ], []) +# Thread Sanitizer (TSAN) is optional and mutually exclusive with ASAN. +AC_ARG_ENABLE([thread-sanitizer], + [AS_HELP_STRING([--enable-thread-sanitizer], + [Enable ThreadSanitizer for data race detection])]) +AS_IF([test "x$enable_thread_sanitizer" = "xyes"], [ + AS_IF([test "x$enable_sanitizers" = "xyes"], [ + AC_MSG_ERROR([--enable-thread-sanitizer and --enable-sanitizers are mutually exclusive]) + ]) + AC_MSG_NOTICE([Enabling ThreadSanitizer]) + CXXFLAGS="$CXXFLAGS -fsanitize=thread -fno-omit-frame-pointer -O1" + LDFLAGS="$LDFLAGS -fsanitize=thread" + ], []) + # clock_gettime requires -lrt on old glibc only. AC_SEARCH_LIBS([clock_gettime], [rt], , AC_MSG_ERROR([rt is required libevent.])) diff --git a/memtier_benchmark.cpp b/memtier_benchmark.cpp index 761aee2..f05eb2d 100755 --- a/memtier_benchmark.cpp +++ b/memtier_benchmark.cpp @@ -56,6 +56,7 @@ #include #include +#include #include "client.h" #include "JSON_handler.h" @@ -1222,7 +1223,7 @@ struct cg_thread { client_group* m_cg; abstract_protocol* m_protocol; pthread_t m_thread; - bool m_finished; + std::atomic m_finished; // Atomic to prevent data race between worker thread write and main thread read cg_thread(unsigned int id, benchmark_config* config, object_generator* obj_gen) : m_thread_id(id), m_config(config), m_obj_gen(obj_gen), m_cg(NULL), m_protocol(NULL), m_finished(false) @@ -1320,6 +1321,9 @@ run_stats run_benchmark(int run_id, benchmark_config* cfg, object_generator* obj unsigned long int cur_bytes_sec = 0; // provide some feedback... + // NOTE: Reading stats from worker threads without synchronization is a benign race. + // These stats are only for progress display and are approximate. Final results are + // collected after pthread_join() when all threads have finished (race-free). unsigned int active_threads = 0; do { active_threads = 0; diff --git a/tsan_suppressions.txt b/tsan_suppressions.txt new file mode 100644 index 0000000..3094d9a --- /dev/null +++ b/tsan_suppressions.txt @@ -0,0 +1,22 @@ +# ThreadSanitizer suppressions for memtier_benchmark +# +# This file contains suppressions for known benign data races that do not +# affect correctness. These races are intentionally left unfixed for performance. + +# Benign race on stats counters during progress display +# Worker threads update stats while main thread reads them for progress updates. +# This is benign because: +# - Progress stats are approximate and for display only +# - Final results are collected after pthread_join (race-free) +# - Adding synchronization would hurt performance +race:run_stats::set_end_time +race:run_stats::get_duration_usec +race:run_stats::get_total_ops +race:run_stats::get_total_bytes +race:run_stats::get_total_latency +race:totals::update_op + +# OpenSSL internal races (false positives in libcrypto) +# These are known benign races within OpenSSL library itself +race:libcrypto.so* +