diff --git a/tests/kernels/quantization/test_awq.py b/tests/kernels/quantization/test_awq.py
index efb62ca3799a9..3bf59dea30972 100644
--- a/tests/kernels/quantization/test_awq.py
+++ b/tests/kernels/quantization/test_awq.py
@@ -41,9 +41,9 @@ def test_awq_gemm_opcheck(monkeypatch: pytest.MonkeyPatch):
         qweight = torch.randint(
             -2000000000, 2000000000, (8192, 256), device="cuda", dtype=torch.int32
         )
-        scales = torch.randint(
+        scales = torch.empty((64, 2048), device="cuda", dtype=torch.float16)
+        qzeros = torch.randint(
             -2000000000, 2000000000, (64, 256), device="cuda", dtype=torch.int32
         )
-        qzeros = torch.empty((64, 2048), device="cuda", dtype=torch.float16)
         split_k_iters = 8
-        opcheck(torch.ops._C.awq_gemm, (input, qweight, qzeros, scales, split_k_iters))
+        opcheck(torch.ops._C.awq_gemm, (input, qweight, scales, qzeros, split_k_iters))
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 52a58a082683d..2319655008c50 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -498,15 +498,15 @@ def awq_dequantize(
 def awq_gemm(
     input: torch.Tensor,
     qweight: torch.Tensor,
-    qzeros: torch.Tensor,
     scales: torch.Tensor,
+    qzeros: torch.Tensor,
     split_k_iters: int,
 ) -> torch.Tensor:
     if envs.VLLM_USE_TRITON_AWQ:
         from vllm.model_executor.layers.quantization.awq_triton import awq_gemm_triton
 
-        return awq_gemm_triton(input, qweight, qzeros, scales, split_k_iters)
-    return torch.ops._C.awq_gemm(input, qweight, qzeros, scales, split_k_iters)
+        return awq_gemm_triton(input, qweight, scales, qzeros, split_k_iters)
+    return torch.ops._C.awq_gemm(input, qweight, scales, qzeros, split_k_iters)
 
 
 # gptq
@@ -632,8 +632,8 @@ if hasattr(torch.ops._C, "gptq_marlin_24_gemm"):
     def _awq_gemm_fake(
         input: torch.Tensor,
         qweight: torch.Tensor,
-        qzeros: torch.Tensor,
         scales: torch.Tensor,
+        qzeros: torch.Tensor,
         split_k_iters: torch.SymInt,
     ) -> torch.Tensor:
         num_in_feats = input.size(0)