[Perf] These changes enhance the NUMA functionality of vllm for systems with more than one NUMA nodes per socket (#25559)

Signed-off-by: Siddappa Karabannavar <siddappa.karabannavar@amd.com>
This commit is contained in:
skaraban3807 2025-11-21 19:43:52 +05:30 committed by GitHub
parent 434f3d3eb8
commit f1805db1a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -45,31 +45,54 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
// Memory node binding // Memory node binding
if (numa_available() != -1) { if (numa_available() != -1) {
int mem_node_id = numa_node_of_cpu(omp_cpu_ids.front()); int mem_node_id = numa_node_of_cpu(omp_cpu_ids.front());
// Verify all CPUs are on the same NUMA node std::set<int> node_ids;
for (size_t i = 1; i < omp_cpu_ids.size(); ++i) { for (const auto& cpu_id : omp_cpu_ids) {
int node_id = numa_node_of_cpu(omp_cpu_ids[i]); int node_id = numa_node_of_cpu(cpu_id);
TORCH_CHECK(node_id == mem_node_id, "CPU ", omp_cpu_ids[i], if (node_id != -1) {
" is on NUMA node ", node_id, ", but CPU ", node_ids.insert(node_id);
omp_cpu_ids.front(), " is on NUMA node ", mem_node_id, }
". All CPUs should be on the same NUMA node for optimal " TORCH_WARN(node_id == mem_node_id, "CPU ", cpu_id, " is on NUMA node ",
"performance. Memory will be bound to NUMA node ", node_id, ", but CPU ", omp_cpu_ids.front(),
mem_node_id, "."); " is on NUMA node ", mem_node_id,
". All CPUs should be on the same NUMA node for optimal "
"performance. Memory will be bound to NUMA node ",
mem_node_id, ".");
} }
bitmask* mask = numa_parse_nodestring(std::to_string(mem_node_id).c_str()); // Concatenate all node_ids into a single comma-separated string
bitmask* src_mask = numa_get_membind(); if (!node_ids.empty()) {
std::string node_ids_str;
for (const int node_id : node_ids) {
if (!node_ids_str.empty()) {
node_ids_str += ",";
}
node_ids_str += std::to_string(node_id);
}
int pid = getpid(); bitmask* mask = numa_parse_nodestring(node_ids_str.c_str());
bitmask* src_mask = numa_get_membind();
// move all existing pages to the specified numa node. int pid = getpid();
*(src_mask->maskp) = *(src_mask->maskp) ^ *(mask->maskp);
int page_num = numa_migrate_pages(pid, src_mask, mask); if (mask && src_mask) {
if (page_num == -1) { // move all existing pages to the specified numa node.
TORCH_WARN("numa_migrate_pages failed. errno: " + std::to_string(errno)); *(src_mask->maskp) = *(src_mask->maskp) ^ *(mask->maskp);
int page_num = numa_migrate_pages(pid, src_mask, mask);
if (page_num == -1) {
TORCH_WARN("numa_migrate_pages failed. errno: " +
std::to_string(errno));
}
// restrict memory allocation node.
numa_set_membind(mask);
numa_set_strict(1);
numa_free_nodemask(mask);
numa_free_nodemask(src_mask);
} else {
TORCH_WARN("numa_parse_nodestring or numa_get_membind failed. errno: " +
std::to_string(errno));
}
} }
// restrict memory allocation node.
numa_set_membind(mask);
numa_set_strict(1);
} }
// OMP threads binding // OMP threads binding