mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 22:04:58 +08:00
[Bugfix] Fix handling of Tensorizer arguments for LoadConfig (#20643)
Signed-off-by: Sanger Steel <sangersteel@gmail.com>
This commit is contained in:
parent
efe73d0575
commit
4ac9c33f78
@ -103,25 +103,6 @@ def write_keyfile(keyfile_path: str):
|
|||||||
f.write(encryption_params.key)
|
f.write(encryption_params.key)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
|
||||||
def test_can_deserialize_s3(vllm_runner):
|
|
||||||
model_ref = "EleutherAI/pythia-1.4b"
|
|
||||||
tensorized_path = f"s3://tensorized/{model_ref}/fp16/model.tensors"
|
|
||||||
|
|
||||||
with vllm_runner(model_ref,
|
|
||||||
load_format="tensorizer",
|
|
||||||
model_loader_extra_config=TensorizerConfig(
|
|
||||||
tensorizer_uri=tensorized_path,
|
|
||||||
num_readers=1,
|
|
||||||
s3_endpoint="object.ord1.coreweave.com",
|
|
||||||
)) as loaded_hf_model:
|
|
||||||
deserialized_outputs = loaded_hf_model.generate(
|
|
||||||
prompts, sampling_params)
|
|
||||||
# noqa: E501
|
|
||||||
|
|
||||||
assert deserialized_outputs
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
||||||
def test_deserialized_encrypted_vllm_model_has_same_outputs(
|
def test_deserialized_encrypted_vllm_model_has_same_outputs(
|
||||||
model_ref, vllm_runner, tmp_path, model_path):
|
model_ref, vllm_runner, tmp_path, model_path):
|
||||||
|
|||||||
@ -1003,41 +1003,27 @@ class EngineArgs:
|
|||||||
override_attention_dtype=self.override_attention_dtype,
|
override_attention_dtype=self.override_attention_dtype,
|
||||||
)
|
)
|
||||||
|
|
||||||
def valid_tensorizer_config_provided(self) -> bool:
|
def validate_tensorizer_args(self):
|
||||||
"""
|
from vllm.model_executor.model_loader.tensorizer import (
|
||||||
Checks if a parseable TensorizerConfig was passed to
|
TensorizerConfig)
|
||||||
self.model_loader_extra_config. It first checks if the config passed
|
for key in self.model_loader_extra_config:
|
||||||
is a dict or a TensorizerConfig object directly, and if the latter is
|
if key in TensorizerConfig._fields:
|
||||||
true (by checking that the object has TensorizerConfig's
|
self.model_loader_extra_config["tensorizer_config"][
|
||||||
.to_serializable() method), converts it in to a serializable dict
|
key] = self.model_loader_extra_config[key]
|
||||||
format
|
|
||||||
"""
|
|
||||||
if self.model_loader_extra_config:
|
|
||||||
if hasattr(self.model_loader_extra_config, "to_serializable"):
|
|
||||||
self.model_loader_extra_config = (
|
|
||||||
self.model_loader_extra_config.to_serializable())
|
|
||||||
for allowed_to_pass in ["tensorizer_uri", "tensorizer_dir"]:
|
|
||||||
try:
|
|
||||||
self.model_loader_extra_config[allowed_to_pass]
|
|
||||||
return False
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return True
|
|
||||||
|
|
||||||
def create_load_config(self) -> LoadConfig:
|
def create_load_config(self) -> LoadConfig:
|
||||||
|
|
||||||
if self.quantization == "bitsandbytes":
|
if self.quantization == "bitsandbytes":
|
||||||
self.load_format = "bitsandbytes"
|
self.load_format = "bitsandbytes"
|
||||||
|
|
||||||
if (self.load_format == "tensorizer"
|
if self.load_format == "tensorizer":
|
||||||
and self.valid_tensorizer_config_provided()):
|
if hasattr(self.model_loader_extra_config, "to_serializable"):
|
||||||
logger.info("Inferring Tensorizer args from %s", self.model)
|
self.model_loader_extra_config = (
|
||||||
self.model_loader_extra_config = {"tensorizer_dir": self.model}
|
self.model_loader_extra_config.to_serializable())
|
||||||
else:
|
self.model_loader_extra_config["tensorizer_config"] = {}
|
||||||
logger.info(
|
self.model_loader_extra_config["tensorizer_config"][
|
||||||
"Using Tensorizer args from --model-loader-extra-config. "
|
"tensorizer_dir"] = self.model
|
||||||
"Note that you can now simply pass the S3 directory in the "
|
self.validate_tensorizer_args()
|
||||||
"model tag instead of providing the JSON string.")
|
|
||||||
|
|
||||||
return LoadConfig(
|
return LoadConfig(
|
||||||
load_format=self.load_format,
|
load_format=self.load_format,
|
||||||
|
|||||||
@ -223,9 +223,11 @@ class TensorizerConfig(MutableMapping):
|
|||||||
and re.search(r'%0\dd', self.tensorizer_uri) is not None
|
and re.search(r'%0\dd', self.tensorizer_uri) is not None
|
||||||
|
|
||||||
if self.tensorizer_dir and self.tensorizer_uri:
|
if self.tensorizer_dir and self.tensorizer_uri:
|
||||||
raise ValueError(
|
logger.warning_once(
|
||||||
"Either tensorizer_dir or tensorizer_uri must be provided, "
|
"Provided both tensorizer_dir and tensorizer_uri. "
|
||||||
"not both.")
|
"Inferring tensorizer_dir from tensorizer_uri as the "
|
||||||
|
"latter takes precedence.")
|
||||||
|
self.tensorizer_dir = os.path.dirname(self.tensorizer_uri)
|
||||||
if self.tensorizer_dir and self.lora_dir:
|
if self.tensorizer_dir and self.lora_dir:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Only one of tensorizer_dir or lora_dir may be specified. "
|
"Only one of tensorizer_dir or lora_dir may be specified. "
|
||||||
|
|||||||
@ -43,7 +43,7 @@ class TensorizerLoader(BaseModelLoader):
|
|||||||
else:
|
else:
|
||||||
validate_config(load_config.model_loader_extra_config)
|
validate_config(load_config.model_loader_extra_config)
|
||||||
self.tensorizer_config = TensorizerConfig(
|
self.tensorizer_config = TensorizerConfig(
|
||||||
**load_config.model_loader_extra_config)
|
**load_config.model_loader_extra_config["tensorizer_config"])
|
||||||
|
|
||||||
def _verify_config(self, model_config: ModelConfig,
|
def _verify_config(self, model_config: ModelConfig,
|
||||||
parallel_config: ParallelConfig):
|
parallel_config: ParallelConfig):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user