[Perf] These changes enhance the NUMA functionality of vllm for systems with more than one NUMA nodes per socket (#25559)

Signed-off-by: Siddappa Karabannavar <siddappa.karabannavar@amd.com>
2026-06-02 20:17:53 +08:00 · 2025-11-21 19:43:52 +05:30 · 2025-11-21 19:43:52 +05:30 · f1805db1a6
commit f1805db1a6
parent 434f3d3eb8
1 changed files with 44 additions and 21 deletions
--- a/csrc/cpu/utils.cpp
+++ b/csrc/cpu/utils.cpp
@ -45,31 +45,54 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
  // Memory node binding
  if (numa_available() != -1) {
    int mem_node_id = numa_node_of_cpu(omp_cpu_ids.front());
-    // Verify all CPUs are on the same NUMA node
+    std::set<int> node_ids;
-    for (size_t i = 1; i < omp_cpu_ids.size(); ++i) {
+    for (const auto& cpu_id : omp_cpu_ids) {
-      int node_id = numa_node_of_cpu(omp_cpu_ids[i]);
+      int node_id = numa_node_of_cpu(cpu_id);
-      TORCH_CHECK(node_id == mem_node_id, "CPU ", omp_cpu_ids[i],
+      if (node_id != -1) {
-                  " is on NUMA node ", node_id, ", but CPU ",
+        node_ids.insert(node_id);
-                  omp_cpu_ids.front(), " is on NUMA node ", mem_node_id,
+      }
-                  ". All CPUs should be on the same NUMA node for optimal "
+      TORCH_WARN(node_id == mem_node_id, "CPU ", cpu_id, " is on NUMA node ",
-                  "performance. Memory will be bound to NUMA node ",
+                 node_id, ", but CPU ", omp_cpu_ids.front(),
-                  mem_node_id, ".");
+                 " is on NUMA node ", mem_node_id,
                 ". All CPUs should be on the same NUMA node for optimal "
                 "performance. Memory will be bound to NUMA node ",
                 mem_node_id, ".");
    }
-    bitmask* mask = numa_parse_nodestring(std::to_string(mem_node_id).c_str());
+    // Concatenate all node_ids into a single comma-separated string
-    bitmask* src_mask = numa_get_membind();
+    if (!node_ids.empty()) {
      std::string node_ids_str;
      for (const int node_id : node_ids) {
        if (!node_ids_str.empty()) {
          node_ids_str += ",";
        }
        node_ids_str += std::to_string(node_id);
      }
-    int pid = getpid();
+      bitmask* mask = numa_parse_nodestring(node_ids_str.c_str());
      bitmask* src_mask = numa_get_membind();
-    // move all existing pages to the specified numa node.
+      int pid = getpid();
-    *(src_mask->maskp) = *(src_mask->maskp) ^ *(mask->maskp);
+
-    int page_num = numa_migrate_pages(pid, src_mask, mask);
+      if (mask && src_mask) {
-    if (page_num == -1) {
+        // move all existing pages to the specified numa node.
-      TORCH_WARN("numa_migrate_pages failed. errno: " + std::to_string(errno));
+        *(src_mask->maskp) = *(src_mask->maskp) ^ *(mask->maskp);
        int page_num = numa_migrate_pages(pid, src_mask, mask);
        if (page_num == -1) {
          TORCH_WARN("numa_migrate_pages failed. errno: " +
                     std::to_string(errno));
        }
        // restrict memory allocation node.
        numa_set_membind(mask);
        numa_set_strict(1);
        numa_free_nodemask(mask);
        numa_free_nodemask(src_mask);
      } else {
        TORCH_WARN("numa_parse_nodestring or numa_get_membind failed. errno: " +
                   std::to_string(errno));
      }
    }
    // restrict memory allocation node.
    numa_set_membind(mask);
    numa_set_strict(1);
  }
  // OMP threads binding