[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-05-20 12:08:37 -04:00 committed by GitHub
parent 8f55962a7f
commit f4a8a37465
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 7 additions and 7 deletions

View File

@ -41,8 +41,8 @@ EXPECTED_STRS_MAP = {
reason=
"Prevent unstable test based on golden strings from breaking the build "
" and test input model being too large and hanging the system.")
@pytest.mark.skipif(not is_quant_method_supported("nvfp4"),
reason="nvfp4 is not supported on this GPU type.")
@pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
reason="modelopt_fp4 is not supported on this GPU type.")
@pytest.mark.parametrize("model_name", MODELS)
def test_models(example_prompts, model_name) -> None:
model = LLM(
@ -50,7 +50,7 @@ def test_models(example_prompts, model_name) -> None:
max_model_len=MAX_MODEL_LEN,
trust_remote_code=True,
enforce_eager=True,
quantization="nvfp4",
quantization="modelopt_fp4",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

View File

@ -824,7 +824,7 @@ class ModelConfig:
optimized_quantization_methods = [
"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
"awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
"quark", "nvfp4", "bitblas", "gptq_bitblas"
"quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
]
if self.quantization is not None:
self.quantization = cast(QuantizationMethods,

View File

@ -14,7 +14,7 @@ QuantizationMethods = Literal[
"ptpc_fp8",
"fbgemm_fp8",
"modelopt",
"nvfp4",
"modelopt_fp4",
"marlin",
"bitblas",
"gguf",
@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
"fp8": Fp8Config,
"fbgemm_fp8": FBGEMMFp8Config,
"modelopt": ModelOptFp8Config,
"nvfp4": ModelOptNvFp4Config,
"modelopt_fp4": ModelOptNvFp4Config,
"marlin": MarlinConfig,
"bitblas": BitBLASConfig,
"gguf": GGUFConfig,

View File

@ -192,7 +192,7 @@ class ModelOptNvFp4Config(QuantizationConfig):
@classmethod
def get_name(cls) -> QuantizationMethods:
return "nvfp4"
return "modelopt_fp4"
@classmethod
def get_supported_act_dtypes(cls) -> list[torch.dtype]: