mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-26 20:28:04 +08:00
[ Misc ] Enable Quantizing All Layers of DeekSeekv2 (#6423)
This commit is contained in:
parent
ccd3c04571
commit
73030b7dae
@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
lm_eval --model vllm \
|
lm_eval --model vllm \
|
||||||
--model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,add_bos_token=true,distributed_executor_backend="ray",trust_remote_code=true \
|
--model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,add_bos_token=true,distributed_executor_backend="ray",trust_remote_code=true,max_model_len=4096 \
|
||||||
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
|
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
|
||||||
--batch_size $BATCH_SIZE
|
--batch_size $BATCH_SIZE
|
||||||
|
|||||||
@ -431,6 +431,11 @@ def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
|
|||||||
def default_weight_loader(param: torch.Tensor,
|
def default_weight_loader(param: torch.Tensor,
|
||||||
loaded_weight: torch.Tensor) -> None:
|
loaded_weight: torch.Tensor) -> None:
|
||||||
"""Default weight loader."""
|
"""Default weight loader."""
|
||||||
|
# If the weight on disk does not have a shape, give it one
|
||||||
|
# (such scales for AutoFp8).
|
||||||
|
if len(loaded_weight.shape) == 0:
|
||||||
|
loaded_weight = loaded_weight.reshape(1)
|
||||||
|
|
||||||
assert param.size() == loaded_weight.size()
|
assert param.size() == loaded_weight.size()
|
||||||
param.data.copy_(loaded_weight)
|
param.data.copy_(loaded_weight)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user