mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-30 00:07:12 +08:00
fix inductor tf32 setting error
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
parent
c5df2565ab
commit
b4d755ac05
@ -4,6 +4,7 @@ from itertools import repeat
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import torch
|
||||||
import torch._dynamo.config as dynamo_config
|
import torch._dynamo.config as dynamo_config
|
||||||
|
|
||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
@ -158,6 +159,7 @@ def run_tests(
|
|||||||
m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
|
m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
|
||||||
# lock matmul precision to full FP32 (IEEE)
|
# lock matmul precision to full FP32 (IEEE)
|
||||||
m.setenv("VLLM_FLOAT32_MATMUL_PRECISION", "ieee")
|
m.setenv("VLLM_FLOAT32_MATMUL_PRECISION", "ieee")
|
||||||
|
torch.backends.cuda.matmul.allow_tf32 = False
|
||||||
# m.setenv("VLLM_BATCH_INVARIANT", "1")
|
# m.setenv("VLLM_BATCH_INVARIANT", "1")
|
||||||
outputs: list[tuple[str, list, list]] = []
|
outputs: list[tuple[str, list, list]] = []
|
||||||
for n, (
|
for n, (
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user