mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-07 16:27:04 +08:00
Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Lucas Kabela <lucaskabela@meta.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Andrew Sansom <andrew@protopia.ai> Signed-off-by: Boyuan Feng <boyuan@meta.com> Signed-off-by: Boyuan Feng <fby.1994@gmail.com> Signed-off-by: boyuanfeng <boyuan@meta.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: JartX <sagformas@epdcenter.es> Signed-off-by: Chendi Xue <Chendi.Xue@intel.com> Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Chen Zhang <zhangch99@outlook.com> Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: wwl2755 <wangwenlong2755@gmail.com> Signed-off-by: Manoel Marques <manoel.marques@ibm.com> Signed-off-by: Manoel Marques <manoelmrqs@gmail.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: pengdrumli <pengdrumli@tencent.com> Signed-off-by: windsonsea <haifeng.yao@daocloud.io> Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai> Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: Huamin Li <3ericli@gmail.com> Signed-off-by: simondanielsson <simon.danielsson99@hotmail.com> Signed-off-by: Rahul Tuli <rtuli@redhat.com> Signed-off-by: Yang <lymailforjob@gmail.com> Signed-off-by: Debolina Roy <debroy@redhat.com> Signed-off-by: David Chen <530634352@qq.com> Signed-off-by: wangzi <3220100013@zju.edu.cn> Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com> Signed-off-by: NickLucche <nlucches@redhat.com> Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com> Signed-off-by: Sara Kokkila Schumacher <saraks@ibm.com> Signed-off-by: Csrayz <jover@cmbchina.com> Signed-off-by: ivyilike <pww123@cmbchina.com> Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com> Signed-off-by: Bowen Wang <abmfy@icloud.com> Signed-off-by: qqma <qqma@amazon.com> Signed-off-by: ElizaWszola <ewszola@redhat.com> Signed-off-by: Lu Fang <fanglu@fb.com> Signed-off-by: Zhuohan Li <zhuohan123@gmail.com> Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: luka <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Signed-off-by: Or Ozeri <oro@il.ibm.com> Signed-off-by: Johnny Yang <johnnyyang@google.com> Signed-off-by: Alec Solder <alecs@fb.com> Signed-off-by: Alec S <10566873+alecsolder@users.noreply.github.com> Signed-off-by: Russell Bryant <rbryant@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Alexander Matveev <amatveev@redhat.com> Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: liuye.hj <liuye.hj@alibaba-inc.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Signed-off-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Signed-off-by: Ming Yang <minos.future@gmail.com> Signed-off-by: Zhikaiiii <1658973216@qq.com> Signed-off-by: Andreas Hartel <andreas.hartel@aleph-alpha.com> Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Signed-off-by: wuxibin <wuxibin@bytedance.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Signed-off-by: Peter Pan <Peter.Pan@daocloud.io> Signed-off-by: Peter Pan <peter.pan@daocloud.io> Signed-off-by: Nicolò Lucchesi<nicolo.lucchesi@gmail.com> Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com> Signed-off-by: Sage Moore <sage@neuralmagic.com> Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: Bill Nell <bnell@redhat.com> Signed-off-by: Shreeasish Kumar <shreeasish@rivosinc.com> Signed-off-by: Weida Hong <wdhongtw@google.com> Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Signed-off-by: Hashem Hashemi <hashem.hashemi@amd.com> Signed-off-by: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> Signed-off-by: Amir Samani <asamani@nvidia.com> Signed-off-by: ElizaWszola <elizaw.9289@gmail.com> Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Signed-off-by: ilmarkov <markovilya197@gmail.com> Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com> Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Signed-off-by: rouchenzi <ruochenwen@gmail.com> Signed-off-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Signed-off-by: Andrew Xia <axia@meta.com> Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com> Signed-off-by: Corey Lowman <clowman1993@gmail.com> Signed-off-by: jpvillam <jpvillam@amd.com> Signed-off-by: dougbtv <dosmith@redhat.com> Signed-off-by: Chenxi Yang <cxyang@fb.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: ahao-anyscale <ahao@anyscale.com> Signed-off-by: Yan Lu <luyan@nvidia.com> Signed-off-by: baxingpiaochong <771405853@qq.com> Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Signed-off-by: Nikhil Gupta <nikhil.gupta2@arm.com> Signed-off-by: Yong Hoon Shin <yhshin@meta.com> Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai> Signed-off-by: Benjamin Chislett <bchislett@nvidia.com> Signed-off-by: Ben Browning <bbrownin@redhat.com> Signed-off-by: Chengji Yao <chengjiyao@google.com> Signed-off-by: jiang1.li <jiang1.li@intel.com> Signed-off-by: Jackmin801 <ongjackm@gmail.com> Signed-off-by: Jonas M. Kübler <44084297+jmkuebler@users.noreply.github.com> Signed-off-by: taohui <taohui3@gmail.com> Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io> Signed-off-by: Shu Wang <shuw@nvidia.com> Signed-off-by: Shu Wang. <shuw@nvidia.com> Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Signed-off-by: Duncan Moss <djm.moss@gmail.com> Signed-off-by: Shiyan Deng <dsy842974287@meta.com> Signed-off-by: Wei Wei <wwei6@meta.com> Signed-off-by: Saman Keon <samanamp@outlook.com> Signed-off-by: yangxurui <yangxurui@meituan.com> Signed-off-by: nicole-lihui <nicole.li@daocloud.io> Signed-off-by: courage17340 <courage17340@163.com> Signed-off-by: Jacob Kahn <jacobkahn1@gmail.com> Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com> Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai> Signed-off-by: zxw <1020938856@qq.com> Signed-off-by: wang.yuqi <noooop@126.com> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Signed-off-by: chenlang <chen.lang5@zte.com.cn> Signed-off-by: Jonas Kuebler <kuebj@amazon.com> Signed-off-by: AlonKejzman <alonkeizman@gmail.com> Signed-off-by: Tao Hui <taohui3@gmail.com> Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com> Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> Signed-off-by: Aleksandr Malyshev <maleksan@amd.com> Signed-off-by: Eugene Khvedchenia <ekhvedchenia@nvidia.com> Signed-off-by: Eugene Khvedchenya <ekhvedchenya@gmail.com> Signed-off-by: yiting.jiang <yiting.jiang@daocloud.io> Signed-off-by: xaguilar <Xavier.AguilarFruto@amd.com> Signed-off-by: Iceber Gu <caiwei95@hotmail.com> Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Signed-off-by: Icey <1790571317@qq.com> Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com> Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Lucas Kabela <lucasakabela@gmail.com> Co-authored-by: Maximilien de Bayser <mbayser@br.ibm.com> Co-authored-by: Andrew Sansom <andrew@protopia.ai> Co-authored-by: Boyuan Feng <boyuan@meta.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: JartX <sagformas@epdcenter.es> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com> Co-authored-by: xin.li <xin.li@daocloud.io> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> Co-authored-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: Roger Wang <hey@rogerw.io> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Wenlong Wang <wangwenlong2755@gmail.com> Co-authored-by: Manoel Marques <manoelmrqs@gmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: lirong <56789630+lirong-lirong@users.noreply.github.com> Co-authored-by: Michael Yao <haifeng.yao@daocloud.io> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Huamin Li <3ericli@gmail.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: Simon Danielsson <70206058+simondanielsson@users.noreply.github.com> Co-authored-by: Rahul Tuli <rtuli@redhat.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Yang Liu <127183760+KKSK-DON@users.noreply.github.com> Co-authored-by: Deboleina <debroy@redhat.com> Co-authored-by: yinz-aizip <yinz@aizip.ai> Co-authored-by: WeiQing Chen <40507679+david6666666@users.noreply.github.com> Co-authored-by: wangzi <3220100013@zju.edu.cn> Co-authored-by: Eldar Kurtić <8884008+eldarkurtic@users.noreply.github.com> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com> Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com> Co-authored-by: Yizhou <136800916+yiz-liu@users.noreply.github.com> Co-authored-by: Sara-KS <50249410+Sara-KS@users.noreply.github.com> Co-authored-by: Csrayz <jover@cmbchina.com> Co-authored-by: ivyilike <pww123@cmbchina.com> Co-authored-by: Burkhard Ringlein <ngl@zurich.ibm.com> Co-authored-by: Bowen Wang <abmfy@icloud.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Daisy-Ma-coder <daisy.ma.0117@gmail.com> Co-authored-by: qqma <qqma@amazon.com> Co-authored-by: ElizaWszola <ewszola@redhat.com> Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> Co-authored-by: Simon Mo <simon.mo@hey.com> Co-authored-by: Or Ozeri <oro@il.ibm.com> Co-authored-by: Johnny Yang <24908445+jcyang43@users.noreply.github.com> Co-authored-by: Chengji Yao <chengjiyao@google.com> Co-authored-by: Alec S <10566873+alecsolder@users.noreply.github.com> Co-authored-by: Alec Solder <alecs@fb.com> Co-authored-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Chris Bamford <chrisbam4d@gmail.com> Co-authored-by: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Co-authored-by: liuye.hj <liuye.hj@alibaba-inc.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com> Co-authored-by: Varun Sundar Rabindranath <varunsundar08@gmail.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Ming Yang <yming@meta.com> Co-authored-by: Zhikaiiii <55917203+Zhikaiiii@users.noreply.github.com> Co-authored-by: Andreas Hartel <andreas@hartel.me> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com> Co-authored-by: Joel <wuxibin89@163.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Mark McLoughlin <markmc@redhat.com> Co-authored-by: Peter Pan <peter.pan@daocloud.io> Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com> Co-authored-by: Fanli Lin <fanli.lin@intel.com> Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com> Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Sage Moore <sage@neuralmagic.com> Co-authored-by: yewentao256 <zhyanwentao@126.com> Co-authored-by: bnellnm <49004751+bnellnm@users.noreply.github.com> Co-authored-by: rivos-shreeasish <shreeasish@rivosinc.com> Co-authored-by: Chih-Chieh Yang <chih.chieh.yang@ibm.com> Co-authored-by: Weida Hong <wdhongtw@gmail.com> Co-authored-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Co-authored-by: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> Co-authored-by: Amir Samani <samani@ualberta.ca> Co-authored-by: Luka Govedič <lgovedic@redhat.com> Co-authored-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Co-authored-by: Ilya Markov <markovilya197@gmail.com> Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Co-authored-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Co-authored-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Co-authored-by: Andrew Xia <axia@meta.com> Co-authored-by: kourosh hakhamaneshi <31483498+kouroshHakha@users.noreply.github.com> Co-authored-by: Corey Lowman <clowman1993@gmail.com> Co-authored-by: Juan Villamizar <100237675+jpvillam-amd@users.noreply.github.com> Co-authored-by: jpvillam <jpvillam@amd.com> Co-authored-by: Doug Smith <dosmith@redhat.com> Co-authored-by: Chenxi Yang <cxyang@cs.utexas.edu> Co-authored-by: Chenxi Yang <cxyang@fb.com> Co-authored-by: ahao-anyscale <ahao@anyscale.com> Co-authored-by: 0xNullPath <luyanfcp@foxmail.com> Co-authored-by: baxingpiaochong <771405853@qq.com> Co-authored-by: Benjamin Chislett <bchislett@nvidia.com> Co-authored-by: Kyle Sayers <kylesayrs@gmail.com> Co-authored-by: Nikhil Gupta <nikhil.gupta2@arm.com> Co-authored-by: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com> Co-authored-by: lhsjohn <huashuoli@tencent.com> Co-authored-by: Ben Browning <bbrownin@redhat.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com> Co-authored-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Jonas M. Kübler <44084297+jmkuebler@users.noreply.github.com> Co-authored-by: Tao Hui <taohui3@gmail.com> Co-authored-by: rongfu.leng <rongfu.leng@daocloud.io> Co-authored-by: Shu Wang <shuw@nvidia.com> Co-authored-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Co-authored-by: Duncan Moss <djm.moss@gmail.com> Co-authored-by: Shiyan Deng <dsy842974287@meta.com> Co-authored-by: Wei Wei <wwei6@meta.com> Co-authored-by: Saman A. Pour <samanamp@outlook.com> Co-authored-by: XuruiYang <530534756@qq.com> Co-authored-by: yangxurui <yangxurui@meituan.com> Co-authored-by: Nicole LiHui 🥜 <nicolelihui@outlook.com> Co-authored-by: courage17340 <courage17340@users.noreply.github.com> Co-authored-by: Jacob Kahn <jacobkahn1@gmail.com> Co-authored-by: Nicole LiHui 🥜 <nicole.li@daocloud.io> Co-authored-by: Fadi Arafeh <115173828+fadara01@users.noreply.github.com> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyn@users.noreply.github.com> Co-authored-by: yyzxw <34639446+yyzxw@users.noreply.github.com> Co-authored-by: wang.yuqi <noooop@126.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: chenlang <chen.lang5@zte.com.cn> Co-authored-by: chenlang <10346245@zte.com.cn> Co-authored-by: AlonKejzman <alonkeizman@gmail.com> Co-authored-by: tomeras91 <57313761+tomeras91@users.noreply.github.com> Co-authored-by: Aleksandr Malyshev <164964928+maleksan85@users.noreply.github.com> Co-authored-by: Aleksandr Malyshev <maleksan@amd.com> Co-authored-by: Doug Lehr <douglehr@amd.com> Co-authored-by: Eugene Khvedchenya <ekhvedchenya@gmail.com> Co-authored-by: yitingdc <59356937+yitingdc@users.noreply.github.com> Co-authored-by: xaguilar-amd <xavier.aguilarfruto@amd.com> Co-authored-by: Iceber Gu <caiwei95@hotmail.com> Co-authored-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Icey <1790571317@qq.com> Co-authored-by: Xu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com> Co-authored-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com> Co-authored-by: RishiAstra <40644327+RishiAstra@users.noreply.github.com>
495 lines
18 KiB
Python
495 lines
18 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
import os
|
|
from dataclasses import MISSING, Field, asdict, dataclass, field
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from vllm.compilation.backends import VllmBackend
|
|
from vllm.config import ModelConfig, PoolerConfig, VllmConfig, update_config
|
|
from vllm.config.load import LoadConfig
|
|
from vllm.config.utils import get_field
|
|
from vllm.model_executor.layers.pooler import PoolingType
|
|
from vllm.platforms import current_platform
|
|
|
|
|
|
def test_compile_config_repr_succeeds():
|
|
# setup: VllmBackend mutates the config object
|
|
config = VllmConfig()
|
|
backend = VllmBackend(config)
|
|
backend.configure_post_pass()
|
|
|
|
# test that repr(config) succeeds
|
|
val = repr(config)
|
|
assert 'VllmConfig' in val
|
|
assert 'inductor_passes' in val
|
|
|
|
|
|
@dataclass
|
|
class _TestConfigFields:
|
|
a: int
|
|
b: dict = field(default_factory=dict)
|
|
c: str = "default"
|
|
|
|
|
|
def test_get_field():
|
|
with pytest.raises(ValueError):
|
|
get_field(_TestConfigFields, "a")
|
|
|
|
b = get_field(_TestConfigFields, "b")
|
|
assert isinstance(b, Field)
|
|
assert b.default is MISSING
|
|
assert b.default_factory is dict
|
|
|
|
c = get_field(_TestConfigFields, "c")
|
|
assert isinstance(c, Field)
|
|
assert c.default == "default"
|
|
assert c.default_factory is MISSING
|
|
|
|
|
|
@dataclass
|
|
class _TestNestedConfig:
|
|
a: _TestConfigFields = field(
|
|
default_factory=lambda: _TestConfigFields(a=0))
|
|
|
|
|
|
def test_update_config():
|
|
# Simple update
|
|
config1 = _TestConfigFields(a=0)
|
|
new_config1 = update_config(config1, {"a": 42})
|
|
assert new_config1.a == 42
|
|
# Nonexistent field
|
|
with pytest.raises(AssertionError):
|
|
new_config1 = update_config(config1, {"nonexistent": 1})
|
|
# Nested update with dataclass
|
|
config2 = _TestNestedConfig()
|
|
new_inner_config = _TestConfigFields(a=1, c="new_value")
|
|
new_config2 = update_config(config2, {"a": new_inner_config})
|
|
assert new_config2.a == new_inner_config
|
|
# Nested update with dict
|
|
config3 = _TestNestedConfig()
|
|
new_config3 = update_config(config3, {"a": {"c": "new_value"}})
|
|
assert new_config3.a.c == "new_value"
|
|
# Nested update with invalid type
|
|
with pytest.raises(AssertionError):
|
|
new_config3 = update_config(config3, {"a": "new_value"})
|
|
|
|
|
|
# Can remove once --task option is fully deprecated
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type",
|
|
"expected_task"),
|
|
[
|
|
("distilbert/distilgpt2", "generate", "none", "generate"),
|
|
("intfloat/multilingual-e5-small", "pooling", "none", "embed"),
|
|
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
|
|
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none",
|
|
"classify"),
|
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none", "reward"),
|
|
("openai/whisper-small", "generate", "none", "transcription"),
|
|
],
|
|
)
|
|
def test_auto_task(model_id, expected_runner_type, expected_convert_type,
|
|
expected_task):
|
|
config = ModelConfig(model_id, task="auto")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
# Can remove once --task option is fully deprecated
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type",
|
|
"expected_task"),
|
|
[
|
|
("distilbert/distilgpt2", "pooling", "embed", "embed"),
|
|
("intfloat/multilingual-e5-small", "pooling", "embed", "embed"),
|
|
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
|
|
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "classify",
|
|
"classify"),
|
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "embed", "embed"),
|
|
("openai/whisper-small", "pooling", "embed", "embed"),
|
|
],
|
|
)
|
|
def test_score_task(model_id, expected_runner_type, expected_convert_type,
|
|
expected_task):
|
|
config = ModelConfig(model_id, task="score")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
# Can remove once --task option is fully deprecated
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type",
|
|
"expected_task"),
|
|
[
|
|
("openai/whisper-small", "generate", "none", "transcription"),
|
|
],
|
|
)
|
|
def test_transcription_task(model_id, expected_runner_type,
|
|
expected_convert_type, expected_task):
|
|
config = ModelConfig(model_id, task="transcription")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type"),
|
|
[
|
|
("distilbert/distilgpt2", "generate", "none"),
|
|
("intfloat/multilingual-e5-small", "pooling", "none"),
|
|
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
|
|
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
|
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
|
|
("openai/whisper-small", "generate", "none"),
|
|
],
|
|
)
|
|
def test_auto_runner(model_id, expected_runner_type, expected_convert_type):
|
|
config = ModelConfig(model_id, runner="auto")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type"),
|
|
[
|
|
("distilbert/distilgpt2", "pooling", "embed"),
|
|
("intfloat/multilingual-e5-small", "pooling", "none"),
|
|
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
|
|
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
|
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
|
|
("openai/whisper-small", "pooling", "embed"),
|
|
],
|
|
)
|
|
def test_pooling_runner(model_id, expected_runner_type, expected_convert_type):
|
|
config = ModelConfig(model_id, runner="pooling")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("model_id", "expected_runner_type", "expected_convert_type"),
|
|
[
|
|
("Qwen/Qwen2.5-1.5B-Instruct", "draft", "none"),
|
|
],
|
|
)
|
|
def test_draft_runner(model_id, expected_runner_type, expected_convert_type):
|
|
config = ModelConfig(model_id, runner="draft")
|
|
|
|
assert config.runner_type == expected_runner_type
|
|
assert config.convert_type == expected_convert_type
|
|
|
|
|
|
MODEL_IDS_EXPECTED = [
|
|
("Qwen/Qwen1.5-7B", 32768),
|
|
("mistralai/Mistral-7B-v0.1", 4096),
|
|
("mistralai/Mistral-7B-Instruct-v0.2", 32768),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("model_id_expected", MODEL_IDS_EXPECTED)
|
|
def test_disable_sliding_window(model_id_expected):
|
|
model_id, expected = model_id_expected
|
|
model_config = ModelConfig(model_id, disable_sliding_window=True)
|
|
assert model_config.max_model_len == expected
|
|
|
|
|
|
@pytest.mark.skipif(current_platform.is_rocm(),
|
|
reason="Xformers backend is not supported on ROCm.")
|
|
def test_get_pooling_config():
|
|
model_id = "sentence-transformers/all-MiniLM-L12-v2"
|
|
model_config = ModelConfig(model_id)
|
|
|
|
assert model_config.pooler_config is not None
|
|
assert model_config.pooler_config.normalize
|
|
assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
|
|
|
|
|
|
@pytest.mark.skipif(current_platform.is_rocm(),
|
|
reason="Xformers backend is not supported on ROCm.")
|
|
def test_get_pooling_config_from_args():
|
|
model_id = "sentence-transformers/all-MiniLM-L12-v2"
|
|
pooler_config = PoolerConfig(pooling_type="CLS", normalize=True)
|
|
model_config = ModelConfig(model_id, pooler_config=pooler_config)
|
|
|
|
assert asdict(model_config.pooler_config) == asdict(pooler_config)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("model_id", "default_pooling_type", "pooling_type"),
|
|
[
|
|
("tomaarsen/Qwen3-Reranker-0.6B-seq-cls", "LAST", "LAST"), # LLM
|
|
("intfloat/e5-small", "CLS", "MEAN"), # BertModel
|
|
("Qwen/Qwen2.5-Math-RM-72B", "ALL", "ALL"), # reward
|
|
("Qwen/Qwen2.5-Math-PRM-7B", "STEP", "STEP") # step reward
|
|
])
|
|
def test_default_pooling_type(model_id, default_pooling_type, pooling_type):
|
|
model_config = ModelConfig(model_id)
|
|
assert model_config._model_info.default_pooling_type == default_pooling_type
|
|
assert model_config.pooler_config.pooling_type == pooling_type
|
|
|
|
|
|
@pytest.mark.skipif(current_platform.is_rocm(),
|
|
reason="Xformers backend is not supported on ROCm.")
|
|
def test_get_bert_tokenization_sentence_transformer_config():
|
|
model_id = "BAAI/bge-base-en-v1.5"
|
|
bge_model_config = ModelConfig(model_id)
|
|
|
|
bert_bge_model_config = bge_model_config._get_encoder_config()
|
|
|
|
assert bert_bge_model_config["max_seq_length"] == 512
|
|
assert bert_bge_model_config["do_lower_case"]
|
|
|
|
|
|
def test_rope_customization():
|
|
TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
|
|
TEST_ROPE_THETA = 16_000_000.0
|
|
LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
|
|
|
|
llama_model_config = ModelConfig("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
assert getattr(llama_model_config.hf_config, "rope_scaling", None) is None
|
|
assert getattr(llama_model_config.hf_config, "rope_theta", None) == 500_000
|
|
assert llama_model_config.max_model_len == 8192
|
|
|
|
llama_model_config = ModelConfig(
|
|
"meta-llama/Meta-Llama-3-8B-Instruct",
|
|
hf_overrides={
|
|
"rope_scaling": TEST_ROPE_SCALING,
|
|
"rope_theta": TEST_ROPE_THETA,
|
|
},
|
|
)
|
|
assert getattr(llama_model_config.hf_config, "rope_scaling",
|
|
None) == TEST_ROPE_SCALING
|
|
assert getattr(llama_model_config.hf_config, "rope_theta",
|
|
None) == TEST_ROPE_THETA
|
|
assert llama_model_config.max_model_len == 16384
|
|
|
|
longchat_model_config = ModelConfig("lmsys/longchat-13b-16k")
|
|
# Check if LONGCHAT_ROPE_SCALING entries are in longchat_model_config
|
|
assert all(
|
|
longchat_model_config.hf_config.rope_scaling.get(key) == value
|
|
for key, value in LONGCHAT_ROPE_SCALING.items())
|
|
assert longchat_model_config.max_model_len == 16384
|
|
|
|
longchat_model_config = ModelConfig(
|
|
"lmsys/longchat-13b-16k",
|
|
hf_overrides={
|
|
"rope_scaling": TEST_ROPE_SCALING,
|
|
},
|
|
)
|
|
assert getattr(longchat_model_config.hf_config, "rope_scaling",
|
|
None) == TEST_ROPE_SCALING
|
|
assert longchat_model_config.max_model_len == 4096
|
|
|
|
|
|
@pytest.mark.skipif(current_platform.is_rocm(),
|
|
reason="Encoder Decoder models not supported on ROCm.")
|
|
@pytest.mark.parametrize(("model_id", "is_encoder_decoder"), [
|
|
("facebook/opt-125m", False),
|
|
("openai/whisper-tiny", True),
|
|
("meta-llama/Llama-3.2-1B-Instruct", False),
|
|
])
|
|
def test_is_encoder_decoder(model_id, is_encoder_decoder):
|
|
config = ModelConfig(model_id)
|
|
|
|
assert config.is_encoder_decoder == is_encoder_decoder
|
|
|
|
|
|
@pytest.mark.parametrize(("model_id", "uses_mrope"), [
|
|
("facebook/opt-125m", False),
|
|
("Qwen/Qwen2-VL-2B-Instruct", True),
|
|
])
|
|
def test_uses_mrope(model_id, uses_mrope):
|
|
config = ModelConfig(model_id)
|
|
|
|
assert config.uses_mrope == uses_mrope
|
|
|
|
|
|
def test_generation_config_loading():
|
|
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
|
|
|
|
# When set generation_config to "vllm", the default generation config
|
|
# will not be loaded.
|
|
model_config = ModelConfig(model_id, generation_config="vllm")
|
|
assert model_config.get_diff_sampling_param() == {}
|
|
|
|
# When set generation_config to "auto", the default generation config
|
|
# should be loaded.
|
|
model_config = ModelConfig(model_id, generation_config="auto")
|
|
|
|
correct_generation_config = {
|
|
"repetition_penalty": 1.1,
|
|
"temperature": 0.7,
|
|
"top_p": 0.8,
|
|
"top_k": 20,
|
|
}
|
|
|
|
assert model_config.get_diff_sampling_param() == correct_generation_config
|
|
|
|
# The generation config could be overridden by the user.
|
|
override_generation_config = {"temperature": 0.5, "top_k": 5}
|
|
|
|
model_config = ModelConfig(
|
|
model_id,
|
|
generation_config="auto",
|
|
override_generation_config=override_generation_config)
|
|
|
|
override_result = correct_generation_config.copy()
|
|
override_result.update(override_generation_config)
|
|
|
|
assert model_config.get_diff_sampling_param() == override_result
|
|
|
|
# When generation_config is set to "vllm" and override_generation_config
|
|
# is set, the override_generation_config should be used directly.
|
|
model_config = ModelConfig(
|
|
model_id,
|
|
generation_config="vllm",
|
|
override_generation_config=override_generation_config)
|
|
|
|
assert model_config.get_diff_sampling_param() == override_generation_config
|
|
|
|
|
|
@pytest.mark.parametrize("pt_load_map_location", [
|
|
"cuda",
|
|
{
|
|
"": "cuda"
|
|
},
|
|
])
|
|
def test_load_config_pt_load_map_location(pt_load_map_location):
|
|
load_config = LoadConfig(pt_load_map_location=pt_load_map_location)
|
|
config = VllmConfig(load_config=load_config)
|
|
|
|
assert config.load_config.pt_load_map_location == pt_load_map_location
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("model_id", "max_model_len", "expected_max_len", "should_raise"), [
|
|
("BAAI/bge-reranker-base", None, 512, False),
|
|
("BAAI/bge-reranker-base", 256, 256, False),
|
|
("BAAI/bge-reranker-base", 513, 512, True),
|
|
("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", None, 131072, False),
|
|
("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 131073, 131072, True),
|
|
])
|
|
def test_get_and_verify_max_len(model_id, max_model_len, expected_max_len,
|
|
should_raise):
|
|
"""Test get_and_verify_max_len with different configurations."""
|
|
model_config = ModelConfig(model_id)
|
|
|
|
if should_raise:
|
|
with pytest.raises(ValueError):
|
|
model_config.get_and_verify_max_len(max_model_len)
|
|
else:
|
|
actual_max_len = model_config.get_and_verify_max_len(max_model_len)
|
|
assert actual_max_len == expected_max_len
|
|
|
|
|
|
class MockConfig:
|
|
"""Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""
|
|
|
|
def __init__(self, model: str, tokenizer: str):
|
|
self.model = model
|
|
self.tokenizer = tokenizer
|
|
self.model_weights = None
|
|
|
|
|
|
@pytest.mark.parametrize("s3_url", [
|
|
"s3://example-bucket-1/model/",
|
|
"s3://example-bucket-2/model/",
|
|
])
|
|
@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
|
|
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
|
|
"""Test that S3 URLs create deterministic local directories for model and
|
|
tokenizer."""
|
|
# Mock pull_files to avoid actually downloading files during tests
|
|
mock_pull_files.return_value = None
|
|
|
|
# Create first mock and run the method
|
|
config1 = MockConfig(model=s3_url, tokenizer=s3_url)
|
|
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)
|
|
|
|
# Check that model and tokenizer point to existing directories
|
|
assert os.path.exists(
|
|
config1.model), f"Model directory does not exist: {config1.model}"
|
|
assert os.path.isdir(
|
|
config1.model), f"Model path is not a directory: {config1.model}"
|
|
assert os.path.exists(
|
|
config1.tokenizer
|
|
), f"Tokenizer directory does not exist: {config1.tokenizer}"
|
|
assert os.path.isdir(
|
|
config1.tokenizer
|
|
), f"Tokenizer path is not a directory: {config1.tokenizer}"
|
|
|
|
# Verify that the paths are different from the original S3 URL
|
|
assert config1.model != s3_url, (
|
|
"Model path should be converted to local directory")
|
|
assert config1.tokenizer != s3_url, (
|
|
"Tokenizer path should be converted to local directory")
|
|
|
|
# Store the original paths
|
|
created_model_dir = config1.model
|
|
create_tokenizer_dir = config1.tokenizer
|
|
|
|
# Create a new mock and run the method with the same S3 URL
|
|
config2 = MockConfig(model=s3_url, tokenizer=s3_url)
|
|
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)
|
|
|
|
# Check that the new directories exist
|
|
assert os.path.exists(
|
|
config2.model), f"Model directory does not exist: {config2.model}"
|
|
assert os.path.isdir(
|
|
config2.model), f"Model path is not a directory: {config2.model}"
|
|
assert os.path.exists(
|
|
config2.tokenizer
|
|
), f"Tokenizer directory does not exist: {config2.tokenizer}"
|
|
assert os.path.isdir(
|
|
config2.tokenizer
|
|
), f"Tokenizer path is not a directory: {config2.tokenizer}"
|
|
|
|
# Verify that the paths are deterministic (same as before)
|
|
assert config2.model == created_model_dir, (
|
|
f"Model paths are not deterministic. "
|
|
f"Original: {created_model_dir}, New: {config2.model}")
|
|
assert config2.tokenizer == create_tokenizer_dir, (
|
|
f"Tokenizer paths are not deterministic. "
|
|
f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}")
|
|
|
|
|
|
@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
|
|
def test_s3_url_different_models_create_different_directories(mock_pull_files):
|
|
"""Test that different S3 URLs create different local directories."""
|
|
# Mock pull_files to avoid actually downloading files during tests
|
|
mock_pull_files.return_value = None
|
|
|
|
s3_url1 = "s3://example-bucket-1/model/"
|
|
s3_url2 = "s3://example-bucket-2/model/"
|
|
|
|
# Create mocks with different S3 URLs and run the method
|
|
config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
|
|
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)
|
|
|
|
config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
|
|
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)
|
|
|
|
# Verify that different URLs produce different directories
|
|
assert config1.model != config2.model, (
|
|
f"Different S3 URLs should create different model directories. "
|
|
f"URL1 model: {config1.model}, URL2 model: {config2.model}")
|
|
assert config1.tokenizer != config2.tokenizer, (
|
|
f"Different S3 URLs should create different tokenizer directories. "
|
|
f"URL1 tokenizer: {config1.tokenizer}, "
|
|
f"URL2 tokenizer: {config2.tokenizer}")
|
|
|
|
# Verify that both sets of directories exist
|
|
assert os.path.exists(config1.model) and os.path.isdir(config1.model)
|
|
assert os.path.exists(config1.tokenizer) and os.path.isdir(
|
|
config1.tokenizer)
|
|
assert os.path.exists(config2.model) and os.path.isdir(config2.model)
|
|
assert os.path.exists(config2.tokenizer) and os.path.isdir(
|
|
config2.tokenizer)
|