mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 06:05:01 +08:00
[Misc][Attention][Quantization] init property earlier (#13733)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
parent
1e15aaef56
commit
ab1091d5f2
@ -85,6 +85,11 @@ class Attention(nn.Module):
|
|||||||
self._k_scale_float = 1.0
|
self._k_scale_float = 1.0
|
||||||
self._v_scale_float = 1.0
|
self._v_scale_float = 1.0
|
||||||
|
|
||||||
|
self.num_heads = num_heads
|
||||||
|
self.head_size = head_size
|
||||||
|
self.num_kv_heads = num_kv_heads
|
||||||
|
self.sliding_window = sliding_window
|
||||||
|
|
||||||
quant_method = quant_config.get_quant_method(
|
quant_method = quant_config.get_quant_method(
|
||||||
self, prefix=prefix) if quant_config else None
|
self, prefix=prefix) if quant_config else None
|
||||||
if quant_method is not None:
|
if quant_method is not None:
|
||||||
@ -116,10 +121,6 @@ class Attention(nn.Module):
|
|||||||
alibi_slopes, sliding_window, kv_cache_dtype,
|
alibi_slopes, sliding_window, kv_cache_dtype,
|
||||||
blocksparse_params, logits_soft_cap, attn_type,
|
blocksparse_params, logits_soft_cap, attn_type,
|
||||||
**extra_impl_args)
|
**extra_impl_args)
|
||||||
self.num_heads = num_heads
|
|
||||||
self.head_size = head_size
|
|
||||||
self.num_kv_heads = num_kv_heads
|
|
||||||
self.sliding_window = sliding_window
|
|
||||||
self.backend = backend_name_to_enum(attn_backend.get_name())
|
self.backend = backend_name_to_enum(attn_backend.get_name())
|
||||||
self.dtype = dtype
|
self.dtype = dtype
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user