mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 15:55:43 +08:00
parent
f7344c971c
commit
78d47494df
@ -150,12 +150,18 @@ class ModelArchConfigConvertorBase:
|
|||||||
producer_name = quant_cfg.get("producer", {}).get("name")
|
producer_name = quant_cfg.get("producer", {}).get("name")
|
||||||
if producer_name == "modelopt":
|
if producer_name == "modelopt":
|
||||||
quant_algo = quant_cfg.get("quantization", {}).get("quant_algo")
|
quant_algo = quant_cfg.get("quantization", {}).get("quant_algo")
|
||||||
if quant_algo == "FP8":
|
if quant_algo is not None:
|
||||||
quant_cfg["quant_method"] = "modelopt"
|
quant_algo_upper = str(quant_algo).upper()
|
||||||
elif quant_algo == "NVFP4":
|
if quant_algo_upper in {
|
||||||
quant_cfg["quant_method"] = "modelopt_fp4"
|
"FP8",
|
||||||
elif quant_algo is not None:
|
"FP8_PER_CHANNEL_PER_TOKEN",
|
||||||
raise ValueError(f"Unknown ModelOpt quant algo: {quant_algo}")
|
"FP8_PB_WO",
|
||||||
|
}:
|
||||||
|
quant_cfg["quant_method"] = "modelopt"
|
||||||
|
elif quant_algo_upper == "NVFP4":
|
||||||
|
quant_cfg["quant_method"] = "modelopt_fp4"
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown ModelOpt quant algo: {quant_algo}")
|
||||||
|
|
||||||
if quant_cfg is not None:
|
if quant_cfg is not None:
|
||||||
# Use the community standard 'quant_method'
|
# Use the community standard 'quant_method'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user