Skip to content

Commit 3359704

Browse files
committed
bufix:自动选择最优的 gid index
1 parent 32cdf93 commit 3359704

File tree

1 file changed

+34
-19
lines changed

1 file changed

+34
-19
lines changed

mooncake-transfer-engine/src/transport/rdma_transport/rdma_context.cpp

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -392,38 +392,53 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a) {
392392
((a->s6_addr32[1] | (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL));
393393
}
394394

395+
396+
static std::string readGidNdev(const std::string &device_name, uint8_t port, int gid_index) {
397+
std::string sysfs_path = "/sys/class/infiniband/" + device_name +
398+
"/ports/" + std::to_string(port) +
399+
"/gid_attrs/ndevs/" + std::to_string(gid_index);
400+
std::ifstream file(sysfs_path);
401+
if (!file.is_open()) {
402+
return "";
403+
}
404+
405+
std::string ndev;
406+
std::getline(file, ndev);
407+
return ndev;
408+
}
409+
395410
int RdmaContext::getBestGidIndex(const std::string &device_name,
396411
struct ibv_context *context,
397412
ibv_port_attr &port_attr, uint8_t port) {
398-
int best_gid_index = 0;
399-
int best_priority = -1;
413+
int gid_index = 0, i;
400414
struct ibv_gid_entry gid_entry;
415+
bool fallback_found = false;
401416

402-
for (int i = 0; i < port_attr.gid_tbl_len; i++) {
417+
for (i = 0; i < port_attr.gid_tbl_len; i++) {
403418
if (ibv_query_gid_ex(context, port, i, &gid_entry, 0)) {
404419
PLOG(ERROR) << "Failed to query GID " << i << " on " << device_name
405420
<< "/" << port;
406421
continue; // if gid is invalid ibv_query_gid_ex() will return !0
407422
}
408-
int priority = -1;
409-
bool is_ipv4_mapped = ipv6_addr_v4mapped((struct in6_addr *)gid_entry.gid.raw);
410-
411-
if (is_ipv4_mapped && gid_entry.gid_type == IBV_GID_TYPE_ROCE_V2) {
412-
priority = 3; // 最高优先级
413-
} else if (is_ipv4_mapped) {
414-
priority = 2;
415-
} else if (gid_entry.gid_type == IBV_GID_TYPE_IB) {
416-
priority = 1;
417-
} else if (gid_entry.gid_type == IBV_GID_TYPE_ROCE_V2) {
418-
priority = 0;
419-
}
420423

421-
if (priority > best_priority) {
422-
best_priority = priority;
423-
best_gid_index = i;
424+
if ((ipv6_addr_v4mapped((struct in6_addr *)gid_entry.gid.raw) &&
425+
gid_entry.gid_type == IBV_GID_TYPE_ROCE_V2) ||
426+
gid_entry.gid_type == IBV_GID_TYPE_IB) {
427+
// Check if this GID has an associated network device
428+
std::string ndev = readGidNdev(device_name, port, i);
429+
if (!ndev.empty()) {
430+
// Found a GID with network device, this is the best choice
431+
gid_index = i;
432+
break;
433+
}
434+
// No network device, keep the first one as fallback candidate
435+
if (!fallback_found) {
436+
gid_index = i;
437+
fallback_found = true;
438+
}
424439
}
425440
}
426-
return best_gid_index;
441+
return gid_index;
427442
}
428443

429444
int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,

0 commit comments

Comments
 (0)