mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 20:30:54 +08:00
[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
8f55962a7f
commit
f4a8a37465
@ -41,8 +41,8 @@ EXPECTED_STRS_MAP = {
|
||||
reason=
|
||||
"Prevent unstable test based on golden strings from breaking the build "
|
||||
" and test input model being too large and hanging the system.")
|
||||
@pytest.mark.skipif(not is_quant_method_supported("nvfp4"),
|
||||
reason="nvfp4 is not supported on this GPU type.")
|
||||
@pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
|
||||
reason="modelopt_fp4 is not supported on this GPU type.")
|
||||
@pytest.mark.parametrize("model_name", MODELS)
|
||||
def test_models(example_prompts, model_name) -> None:
|
||||
model = LLM(
|
||||
@ -50,7 +50,7 @@ def test_models(example_prompts, model_name) -> None:
|
||||
max_model_len=MAX_MODEL_LEN,
|
||||
trust_remote_code=True,
|
||||
enforce_eager=True,
|
||||
quantization="nvfp4",
|
||||
quantization="modelopt_fp4",
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
@ -824,7 +824,7 @@ class ModelConfig:
|
||||
optimized_quantization_methods = [
|
||||
"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
|
||||
"awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
|
||||
"quark", "nvfp4", "bitblas", "gptq_bitblas"
|
||||
"quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
|
||||
]
|
||||
if self.quantization is not None:
|
||||
self.quantization = cast(QuantizationMethods,
|
||||
|
||||
@ -14,7 +14,7 @@ QuantizationMethods = Literal[
|
||||
"ptpc_fp8",
|
||||
"fbgemm_fp8",
|
||||
"modelopt",
|
||||
"nvfp4",
|
||||
"modelopt_fp4",
|
||||
"marlin",
|
||||
"bitblas",
|
||||
"gguf",
|
||||
@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
|
||||
"fp8": Fp8Config,
|
||||
"fbgemm_fp8": FBGEMMFp8Config,
|
||||
"modelopt": ModelOptFp8Config,
|
||||
"nvfp4": ModelOptNvFp4Config,
|
||||
"modelopt_fp4": ModelOptNvFp4Config,
|
||||
"marlin": MarlinConfig,
|
||||
"bitblas": BitBLASConfig,
|
||||
"gguf": GGUFConfig,
|
||||
|
||||
@ -192,7 +192,7 @@ class ModelOptNvFp4Config(QuantizationConfig):
|
||||
|
||||
@classmethod
|
||||
def get_name(cls) -> QuantizationMethods:
|
||||
return "nvfp4"
|
||||
return "modelopt_fp4"
|
||||
|
||||
@classmethod
|
||||
def get_supported_act_dtypes(cls) -> list[torch.dtype]:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user