mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-23 19:25:01 +08:00
21 lines
508 B
Python
21 lines
508 B
Python
# Adapted from llama.py
|
|
"""Inference-only Phi3 model code inherit from Llama.py"""
|
|
|
|
from vllm.model_executor.models.llama import LlamaForCausalLM
|
|
|
|
|
|
class Phi3ForCausalLM(LlamaForCausalLM):
|
|
|
|
packed_modules_mapping = {
|
|
"qkv_proj": [
|
|
"qkv_proj",
|
|
],
|
|
"gate_up_proj": [
|
|
"gate_up_proj",
|
|
],
|
|
}
|
|
|
|
# BitandBytes specific attributes
|
|
# Initialize an empty dict when there is no stacked parameter mapping.
|
|
bitsandbytes_stacked_params_mapping = {}
|