[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-05-20 12:08:37 -04:00 committed by GitHub
parent 8f55962a7f
commit f4a8a37465
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 7 additions and 7 deletions

View File

@ -41,8 +41,8 @@ EXPECTED_STRS_MAP = {
reason= reason=
"Prevent unstable test based on golden strings from breaking the build " "Prevent unstable test based on golden strings from breaking the build "
" and test input model being too large and hanging the system.") " and test input model being too large and hanging the system.")
@pytest.mark.skipif(not is_quant_method_supported("nvfp4"), @pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
reason="nvfp4 is not supported on this GPU type.") reason="modelopt_fp4 is not supported on this GPU type.")
@pytest.mark.parametrize("model_name", MODELS) @pytest.mark.parametrize("model_name", MODELS)
def test_models(example_prompts, model_name) -> None: def test_models(example_prompts, model_name) -> None:
model = LLM( model = LLM(
@ -50,7 +50,7 @@ def test_models(example_prompts, model_name) -> None:
max_model_len=MAX_MODEL_LEN, max_model_len=MAX_MODEL_LEN,
trust_remote_code=True, trust_remote_code=True,
enforce_eager=True, enforce_eager=True,
quantization="nvfp4", quantization="modelopt_fp4",
) )
tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)

View File

@ -824,7 +824,7 @@ class ModelConfig:
optimized_quantization_methods = [ optimized_quantization_methods = [
"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin", "fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
"awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8", "awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
"quark", "nvfp4", "bitblas", "gptq_bitblas" "quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
] ]
if self.quantization is not None: if self.quantization is not None:
self.quantization = cast(QuantizationMethods, self.quantization = cast(QuantizationMethods,

View File

@ -14,7 +14,7 @@ QuantizationMethods = Literal[
"ptpc_fp8", "ptpc_fp8",
"fbgemm_fp8", "fbgemm_fp8",
"modelopt", "modelopt",
"nvfp4", "modelopt_fp4",
"marlin", "marlin",
"bitblas", "bitblas",
"gguf", "gguf",
@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
"fp8": Fp8Config, "fp8": Fp8Config,
"fbgemm_fp8": FBGEMMFp8Config, "fbgemm_fp8": FBGEMMFp8Config,
"modelopt": ModelOptFp8Config, "modelopt": ModelOptFp8Config,
"nvfp4": ModelOptNvFp4Config, "modelopt_fp4": ModelOptNvFp4Config,
"marlin": MarlinConfig, "marlin": MarlinConfig,
"bitblas": BitBLASConfig, "bitblas": BitBLASConfig,
"gguf": GGUFConfig, "gguf": GGUFConfig,

View File

@ -192,7 +192,7 @@ class ModelOptNvFp4Config(QuantizationConfig):
@classmethod @classmethod
def get_name(cls) -> QuantizationMethods: def get_name(cls) -> QuantizationMethods:
return "nvfp4" return "modelopt_fp4"
@classmethod @classmethod
def get_supported_act_dtypes(cls) -> list[torch.dtype]: def get_supported_act_dtypes(cls) -> list[torch.dtype]: