mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:44:57 +08:00
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
ed540d6d4c
commit
8f18feb191
@ -355,13 +355,13 @@ def test_toy_llama(
|
||||
)
|
||||
|
||||
compile_config_no_compile = CompilationConfig(
|
||||
level=CompilationMode.NONE,
|
||||
mode=CompilationMode.NONE,
|
||||
cudagraph_mode=CUDAGraphMode.NONE,
|
||||
backend="eager",
|
||||
)
|
||||
|
||||
compile_config_no_split = CompilationConfig(
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
use_inductor_graph_partition=use_inductor_graph_partition,
|
||||
cudagraph_mode=CUDAGraphMode.PIECEWISE,
|
||||
backend=backend,
|
||||
|
||||
@ -38,7 +38,7 @@ class CompiledMod(torch.nn.Module):
|
||||
def make_vllm_config() -> VllmConfig:
|
||||
return VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
|
||||
if is_torch_equal_or_newer("2.9.0.dev"):
|
||||
config = VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
use_inductor_graph_partition=True,
|
||||
splitting_ops=["vllm::unified_attention"],
|
||||
)
|
||||
@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
|
||||
# When attn_fusion pass enabled, splitting_ops now default to attention ops.
|
||||
config = VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
pass_config={"enable_attn_fusion": True, "enable_noop": True},
|
||||
custom_ops=["+quant_fp8"],
|
||||
cudagraph_mode=CUDAGraphMode.PIECEWISE,
|
||||
@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
|
||||
if is_torch_equal_or_newer("2.9.0.dev"):
|
||||
config = VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
use_inductor_graph_partition=True,
|
||||
pass_config={"enable_attn_fusion": True, "enable_noop": True},
|
||||
custom_ops=["+quant_fp8"],
|
||||
|
||||
@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
|
||||
compilation_config = (
|
||||
compile_config
|
||||
if isinstance(compile_config, CompilationConfig)
|
||||
else CompilationConfig(level=compile_config)
|
||||
else CompilationConfig(mode=compile_config)
|
||||
)
|
||||
|
||||
prompts = [
|
||||
|
||||
@ -151,7 +151,7 @@ def test_attn_quant(
|
||||
cudagraph_mode=mode,
|
||||
splitting_ops=splitting_ops,
|
||||
# Common
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
|
||||
# Inductor caches custom passes by default as well via uuid
|
||||
inductor_compile_config={"force_disable_caches": True},
|
||||
@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
|
||||
custom_ops=custom_ops_list,
|
||||
splitting_ops=splitting_ops,
|
||||
# Common
|
||||
level=CompilationMode.VLLM_COMPILE,
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
pass_config=PassConfig(
|
||||
enable_attn_fusion=True,
|
||||
enable_noop=True,
|
||||
@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
|
||||
compilation_config = (
|
||||
compile_config
|
||||
if isinstance(compile_config, CompilationConfig)
|
||||
else CompilationConfig(level=compile_config)
|
||||
else CompilationConfig(mode=compile_config)
|
||||
)
|
||||
|
||||
prompts = [
|
||||
|
||||
@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"env, torch_level, backend, ops_enabled, default_on",
|
||||
"env, compilation_mode, backend, ops_enabled, default_on",
|
||||
[
|
||||
# Default values based on compile level
|
||||
# - All by default (no Inductor compilation)
|
||||
@ -77,7 +77,7 @@ class Relu3(ReLUSquaredActivation):
|
||||
)
|
||||
def test_enabled_ops(
|
||||
env: str | None,
|
||||
torch_level: int,
|
||||
compilation_mode: int,
|
||||
backend: str,
|
||||
ops_enabled: list[int],
|
||||
default_on: bool,
|
||||
@ -85,7 +85,7 @@ def test_enabled_ops(
|
||||
custom_ops = env.split(",") if env else []
|
||||
vllm_config = VllmConfig(
|
||||
compilation_config=CompilationConfig(
|
||||
backend=backend, level=torch_level, custom_ops=custom_ops
|
||||
backend=backend, mode=compilation_mode, custom_ops=custom_ops
|
||||
)
|
||||
)
|
||||
with set_current_vllm_config(vllm_config):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user