From 8b62495076fde4a73c4a397f5307f49c93dd7c6e Mon Sep 17 00:00:00 2001 From: Zhewen Li Date: Wed, 29 Oct 2025 00:00:15 -0700 Subject: [PATCH] [Bugfix] Fix non-contiguous tensor error in `rocm_unquantized_gemm_impl` (#27605) Signed-off-by: zhewenli --- .buildkite/test-amd.yaml | 2 +- vllm/model_executor/layers/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index dceec159a9da..0a7767b8ccc7 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -286,7 +286,7 @@ steps: - label: Engine Test # 25min timeout_in_minutes: 40 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 #grade: Blocking source_file_dependencies: diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py index da5eea02d120..925f9ac0a16e 100644 --- a/vllm/model_executor/layers/utils.py +++ b/vllm/model_executor/layers/utils.py @@ -119,17 +119,17 @@ def rocm_unquantized_gemm_impl( if use_skinny is not True: return torch.nn.functional.linear(x, weight, bias) - x_view = x.view(-1, x.size(-1)) + x_view = x.reshape(-1, x.size(-1)) n = x_view.shape[0] m = weight.shape[0] cu_count = current_platform.get_cu_count() if m > 8 and 0 < n <= 4: out = ops.wvSplitK(weight, x_view, cu_count, bias) - return out.view(*x.shape[:-1], weight.shape[0]) + return out.reshape(*x.shape[:-1], weight.shape[0]) elif m % 4 == 0 and n == 1 and k <= 8192 and bias is None: out = ops.LLMM1(weight, x_view, 4) - return out.view(*x.shape[:-1], weight.shape[0]) + return out.reshape(*x.shape[:-1], weight.shape[0]) return torch.nn.functional.linear(x, weight, bias)