mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-10 03:29:09 +08:00
[Model][LoRA]LoRA support added for LlamaEmbeddingModel (#10071)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
6a585a23d2
commit
2003cc3513
@ -333,7 +333,7 @@ Text Embedding
|
|||||||
* - :code:`MistralModel`
|
* - :code:`MistralModel`
|
||||||
- Mistral-based
|
- Mistral-based
|
||||||
- :code:`intfloat/e5-mistral-7b-instruct`, etc.
|
- :code:`intfloat/e5-mistral-7b-instruct`, etc.
|
||||||
-
|
- ✅︎
|
||||||
- ✅︎
|
- ✅︎
|
||||||
|
|
||||||
.. important::
|
.. important::
|
||||||
|
|||||||
@ -627,7 +627,7 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
|||||||
return name, loaded_weight
|
return name, loaded_weight
|
||||||
|
|
||||||
|
|
||||||
class LlamaEmbeddingModel(nn.Module, SupportsPP):
|
class LlamaEmbeddingModel(nn.Module, SupportsLoRA, SupportsPP):
|
||||||
"""
|
"""
|
||||||
A model that uses Llama with additional embedding functionalities.
|
A model that uses Llama with additional embedding functionalities.
|
||||||
|
|
||||||
@ -638,6 +638,19 @@ class LlamaEmbeddingModel(nn.Module, SupportsPP):
|
|||||||
model: An instance of LlamaModel used for forward operations.
|
model: An instance of LlamaModel used for forward operations.
|
||||||
_pooler: An instance of Pooler used for pooling operations.
|
_pooler: An instance of Pooler used for pooling operations.
|
||||||
"""
|
"""
|
||||||
|
packed_modules_mapping = {
|
||||||
|
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
||||||
|
"gate_up_proj": ["gate_proj", "up_proj"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# LoRA specific attributes
|
||||||
|
supported_lora_modules = [
|
||||||
|
"qkv_proj", "o_proj", "gate_up_proj", "down_proj", "embed_tokens"
|
||||||
|
]
|
||||||
|
embedding_modules = {
|
||||||
|
"embed_tokens": "input_embeddings",
|
||||||
|
}
|
||||||
|
embedding_padding_modules = []
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -679,3 +692,8 @@ class LlamaEmbeddingModel(nn.Module, SupportsPP):
|
|||||||
|
|
||||||
def load_kv_cache_scales(self, quantization_param_path: str) -> None:
|
def load_kv_cache_scales(self, quantization_param_path: str) -> None:
|
||||||
self.model.load_kv_cache_scales(quantization_param_path)
|
self.model.load_kv_cache_scales(quantization_param_path)
|
||||||
|
|
||||||
|
# LRUCacheWorkerLoRAManager instantiation requires model config.
|
||||||
|
@property
|
||||||
|
def config(self):
|
||||||
|
return self.model.config
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user