mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 04:44:59 +08:00
[Bugfix] Fix Llama GGUF initialization (#18717)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
82e2339b06
commit
a869baca73
@ -208,7 +208,7 @@ class LlamaAttention(nn.Module):
|
|||||||
quant_config: Optional[QuantizationConfig]) -> None:
|
quant_config: Optional[QuantizationConfig]) -> None:
|
||||||
is_neox_style = True
|
is_neox_style = True
|
||||||
is_gguf = quant_config and quant_config.get_name() == "gguf"
|
is_gguf = quant_config and quant_config.get_name() == "gguf"
|
||||||
if is_gguf and self.config.model_type == "llama":
|
if is_gguf and config.model_type == "llama":
|
||||||
is_neox_style = False
|
is_neox_style = False
|
||||||
|
|
||||||
self.rotary_emb = get_rope(
|
self.rotary_emb = get_rope(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user