Chen Zhang 770ec6024f
[Model] Add support for the multi-modal Llama 3.2 model (#8811)
Co-authored-by: simon-mo <xmo@berkeley.edu>
Co-authored-by: Chang Su <chang.s.su@oracle.com>
Co-authored-by: Simon Mo <simon.mo@hey.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
2024-09-25 13:29:32 -07:00

29 lines
766 B
Python

from transformers.models.mllama import configuration_mllama as mllama_hf_config
class MllamaTextConfig(mllama_hf_config.MllamaTextConfig):
'''
Use this class to override is_encoder_decoder:
- transformers regards mllama as is_encoder_decoder=False
- vllm needs is_encoder_decoder=True to enable cross-attention
'''
def __init__(
self,
**kwargs,
):
super().__init__(**kwargs)
self.is_encoder_decoder = True
class MllamaConfig(mllama_hf_config.MllamaConfig):
def __init__(
self,
text_config=None,
**kwargs,
):
if isinstance(text_config, dict):
text_config = MllamaTextConfig(**text_config)
super().__init__(text_config=text_config, **kwargs)