From 767c727a81ae9ec570d30d55b7afc783775d5a05 Mon Sep 17 00:00:00 2001 From: Calvinn Ng <39899397+Calvinnncy97@users.noreply.github.com> Date: Sat, 8 Jun 2024 05:10:21 +0800 Subject: [PATCH] fix DbrxFusedNormAttention missing cache_config (#5340) Co-authored-by: team --- vllm/model_executor/models/dbrx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index 8ff19a2015e0..59af42445f32 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -247,11 +247,12 @@ class DbrxFusedNormAttention(nn.Module): def __init__( self, config: DbrxConfig, + cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, ): super().__init__() self.d_model = config.d_model - self.attn = DbrxAttention(config, quant_config) + self.attn = DbrxAttention(config, cache_config, quant_config) self.norm_1 = nn.LayerNorm(self.d_model) self.norm_2 = nn.LayerNorm(self.d_model)