Skip to content

Commit 68a5221

Browse files
committed
[ICRDMA] Do not scan devices from ctor. Restore ut. EXT-1639 (#28087)
Access to ibverbs library from ctor of LinkManager causes issue with msan for whole project. To solve it we will initialise rdma context only if it needed. Msan tests will not be run for rdma related code.
1 parent 82500f0 commit 68a5221

File tree

5 files changed

+59
-11
lines changed

5 files changed

+59
-11
lines changed

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,9 +1284,10 @@ Y_UNIT_TEST_SUITE(TPDiskTest) {
12841284
UNIT_ASSERT(ConvertIPartsToString(parts.Get()) == res->Data.ToString().Slice());
12851285
}
12861286
}
1287+
12871288
Y_UNIT_TEST(ChunkWriteDifferentOffsetAndSize) {
1288-
for (ui32 i = 0; i <= 3; ++i) {
1289-
ChunkWriteDifferentOffsetAndSizeImpl(i & 1, i & 2);
1289+
for (ui32 i = 0; i <= 1; ++i) {
1290+
ChunkWriteDifferentOffsetAndSizeImpl(i & 1, false);
12901291
}
12911292
}
12921293

@@ -2129,7 +2130,17 @@ Y_UNIT_TEST_SUITE(ShredPDisk) {
21292130
#endif
21302131
}
21312132

2132-
/*
2133+
2134+
// RDMA use ibverbs shared library which is part of hardware vendor provided drivers and can't be linked staticaly
2135+
// This library is not MSan-instrumented, so it causes msan fail if we run. So skip such run...
2136+
static bool IsMsanEnabled() {
2137+
#if defined(_msan_enabled_)
2138+
return true;
2139+
#else
2140+
return false;
2141+
#endif
2142+
}
2143+
21332144
Y_UNIT_TEST_SUITE(RDMA) {
21342145
void TestChunkReadWithRdmaAllocator(bool plainDataChunks) {
21352146
TActorTestContext testCtx({
@@ -2170,13 +2181,23 @@ Y_UNIT_TEST_SUITE(RDMA) {
21702181
}
21712182

21722183
Y_UNIT_TEST(TestChunkReadWithRdmaAllocatorEncryptedChunks) {
2184+
if (IsMsanEnabled())
2185+
return;
2186+
21732187
TestChunkReadWithRdmaAllocator(false);
21742188
}
2189+
21752190
Y_UNIT_TEST(TestChunkReadWithRdmaAllocatorPlainChunks) {
2191+
if (IsMsanEnabled())
2192+
return;
2193+
21762194
TestChunkReadWithRdmaAllocator(true);
21772195
}
21782196

21792197
Y_UNIT_TEST(TestRcBuf) {
2198+
if (IsMsanEnabled())
2199+
return;
2200+
21802201
ui32 size = 129961;
21812202
ui32 offset = 123;
21822203
ui32 tailRoom = 1111;
@@ -2210,6 +2231,15 @@ Y_UNIT_TEST_SUITE(RDMA) {
22102231
buf1.RawDataPtr(0, totalSize);
22112232
buf2.RawDataPtr(0, totalSize);
22122233
}
2234+
2235+
Y_UNIT_TEST(ChunkWriteDifferentOffsetAndSize) {
2236+
if (IsMsanEnabled())
2237+
return;
2238+
2239+
for (ui32 i = 0; i <= 1; ++i) {
2240+
NTestSuiteTPDiskTest::ChunkWriteDifferentOffsetAndSizeImpl(i & 1, true);
2241+
}
2242+
}
22132243
}
2214-
*/
2244+
22152245
} // namespace NKikimr

ydb/library/actors/interconnect/rdma/link_manager.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "link_manager.h"
22
#include "ctx.h"
33
#include "ctx_impl.h"
4+
#include <mutex>
45

56
#include <util/generic/scope.h>
67
#include <util/generic/string.h>
@@ -60,10 +61,12 @@ static class TRdmaLinkManager {
6061
return;
6162
}
6263
}
63-
private:
64-
TCtxsMap CtxMap;
6564

6665
void ScanDevices() {
66+
std::lock_guard<std::mutex> lock(Mtx);
67+
if (Inited) {
68+
return;
69+
}
6770
int numDevices = 0;
6871
int err;
6972
ibv_device** deviceList = ibv_get_device_list(&numDevices);
@@ -130,10 +133,16 @@ static class TRdmaLinkManager {
130133
}
131134
}
132135
}
136+
Inited = true;
133137
}
134138

139+
private:
140+
TCtxsMap CtxMap;
135141
int ErrNo = 0;
136142
TString Err;
143+
std::mutex Mtx;
144+
bool Inited = false;
145+
137146

138147
} RdmaLinkManager;
139148

@@ -159,4 +168,8 @@ const TCtxsMap& GetAllCtxs() {
159168
return RdmaLinkManager.GetAllCtxs();
160169
}
161170

171+
void Init() {
172+
RdmaLinkManager.ScanDevices();
173+
}
174+
162175
}

ydb/library/actors/interconnect/rdma/link_manager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ using TCtxsMap = std::vector<std::pair<ibv_gid, std::shared_ptr<NInterconnect::N
4444
TRdmaCtx* GetCtx(int sockfd);
4545
TRdmaCtx* GetCtx(const in6_addr& );
4646
const TCtxsMap& GetAllCtxs();
47-
47+
void Init();
4848
}

ydb/library/actors/interconnect/rdma/mem_pool.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,10 +277,15 @@ namespace NInterconnect::NRdma {
277277
return counters;
278278
}
279279

280+
static const NInterconnect::NRdma::NLinkMgr::TCtxsMap& GetAllCtxs() {
281+
NInterconnect::NRdma::NLinkMgr::Init();
282+
return NInterconnect::NRdma::NLinkMgr::GetAllCtxs();
283+
}
284+
280285
class TMemPoolBase: public IMemPool {
281286
public:
282287
TMemPoolBase(size_t maxChunk, NMonitoring::TDynamicCounterPtr counter)
283-
: Ctxs(NInterconnect::NRdma::NLinkMgr::GetAllCtxs())
288+
: Ctxs(GetAllCtxs())
284289
, MaxChunk(maxChunk)
285290
, Alignment(NSystemInfo::GetPageSize())
286291
{

ydb/library/actors/interconnect/rdma/ya.make

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ RECURSE(
3535
cq_actor
3636
)
3737

38-
#RECURSE_FOR_TESTS(
39-
# ut
40-
#)
38+
RECURSE_FOR_TESTS(
39+
ut
40+
)

0 commit comments

Comments
 (0)