mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-27 13:28:42 +08:00
[Misc] add process_weights_after_loading for DummyLoader (#8969)
This commit is contained in:
parent
7da2487591
commit
82f3937e59
@ -441,6 +441,18 @@ class DummyModelLoader(BaseModelLoader):
|
||||
# NOTE(woosuk): For accurate performance evaluation, we assign
|
||||
# random values to the weights.
|
||||
initialize_dummy_weights(model)
|
||||
|
||||
for _, module in model.named_modules():
|
||||
quant_method = getattr(module, "quant_method", None)
|
||||
if quant_method is not None:
|
||||
# When quant methods need to process weights after loading
|
||||
# (for repacking, quantizing, etc), they expect parameters
|
||||
# to be on the global target device. This scope is for the
|
||||
# case where cpu offloading is used, where we will move the
|
||||
# parameters onto device for processing and back off after.
|
||||
with device_loading_context(
|
||||
module, torch.device(device_config.device)):
|
||||
quant_method.process_weights_after_loading(module)
|
||||
return model.eval()
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user