mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-08 01:17:04 +08:00
Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Lucas Kabela <lucaskabela@meta.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Andrew Sansom <andrew@protopia.ai> Signed-off-by: Boyuan Feng <boyuan@meta.com> Signed-off-by: Boyuan Feng <fby.1994@gmail.com> Signed-off-by: boyuanfeng <boyuan@meta.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: JartX <sagformas@epdcenter.es> Signed-off-by: Chendi Xue <Chendi.Xue@intel.com> Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Chen Zhang <zhangch99@outlook.com> Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: wwl2755 <wangwenlong2755@gmail.com> Signed-off-by: Manoel Marques <manoel.marques@ibm.com> Signed-off-by: Manoel Marques <manoelmrqs@gmail.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: pengdrumli <pengdrumli@tencent.com> Signed-off-by: windsonsea <haifeng.yao@daocloud.io> Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai> Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: Huamin Li <3ericli@gmail.com> Signed-off-by: simondanielsson <simon.danielsson99@hotmail.com> Signed-off-by: Rahul Tuli <rtuli@redhat.com> Signed-off-by: Yang <lymailforjob@gmail.com> Signed-off-by: Debolina Roy <debroy@redhat.com> Signed-off-by: David Chen <530634352@qq.com> Signed-off-by: wangzi <3220100013@zju.edu.cn> Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com> Signed-off-by: NickLucche <nlucches@redhat.com> Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com> Signed-off-by: Sara Kokkila Schumacher <saraks@ibm.com> Signed-off-by: Csrayz <jover@cmbchina.com> Signed-off-by: ivyilike <pww123@cmbchina.com> Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com> Signed-off-by: Bowen Wang <abmfy@icloud.com> Signed-off-by: qqma <qqma@amazon.com> Signed-off-by: ElizaWszola <ewszola@redhat.com> Signed-off-by: Lu Fang <fanglu@fb.com> Signed-off-by: Zhuohan Li <zhuohan123@gmail.com> Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: luka <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Signed-off-by: Or Ozeri <oro@il.ibm.com> Signed-off-by: Johnny Yang <johnnyyang@google.com> Signed-off-by: Alec Solder <alecs@fb.com> Signed-off-by: Alec S <10566873+alecsolder@users.noreply.github.com> Signed-off-by: Russell Bryant <rbryant@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Alexander Matveev <amatveev@redhat.com> Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: liuye.hj <liuye.hj@alibaba-inc.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Signed-off-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Signed-off-by: Ming Yang <minos.future@gmail.com> Signed-off-by: Zhikaiiii <1658973216@qq.com> Signed-off-by: Andreas Hartel <andreas.hartel@aleph-alpha.com> Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Signed-off-by: wuxibin <wuxibin@bytedance.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Signed-off-by: Peter Pan <Peter.Pan@daocloud.io> Signed-off-by: Peter Pan <peter.pan@daocloud.io> Signed-off-by: Nicolò Lucchesi<nicolo.lucchesi@gmail.com> Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com> Signed-off-by: Sage Moore <sage@neuralmagic.com> Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: Bill Nell <bnell@redhat.com> Signed-off-by: Shreeasish Kumar <shreeasish@rivosinc.com> Signed-off-by: Weida Hong <wdhongtw@google.com> Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Signed-off-by: Hashem Hashemi <hashem.hashemi@amd.com> Signed-off-by: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> Signed-off-by: Amir Samani <asamani@nvidia.com> Signed-off-by: ElizaWszola <elizaw.9289@gmail.com> Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Signed-off-by: ilmarkov <markovilya197@gmail.com> Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com> Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Signed-off-by: rouchenzi <ruochenwen@gmail.com> Signed-off-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Signed-off-by: Andrew Xia <axia@meta.com> Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com> Signed-off-by: Corey Lowman <clowman1993@gmail.com> Signed-off-by: jpvillam <jpvillam@amd.com> Signed-off-by: dougbtv <dosmith@redhat.com> Signed-off-by: Chenxi Yang <cxyang@fb.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: ahao-anyscale <ahao@anyscale.com> Signed-off-by: Yan Lu <luyan@nvidia.com> Signed-off-by: baxingpiaochong <771405853@qq.com> Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Signed-off-by: Nikhil Gupta <nikhil.gupta2@arm.com> Signed-off-by: Yong Hoon Shin <yhshin@meta.com> Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai> Signed-off-by: Benjamin Chislett <bchislett@nvidia.com> Signed-off-by: Ben Browning <bbrownin@redhat.com> Signed-off-by: Chengji Yao <chengjiyao@google.com> Signed-off-by: jiang1.li <jiang1.li@intel.com> Signed-off-by: Jackmin801 <ongjackm@gmail.com> Signed-off-by: Jonas M. Kübler <44084297+jmkuebler@users.noreply.github.com> Signed-off-by: taohui <taohui3@gmail.com> Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io> Signed-off-by: Shu Wang <shuw@nvidia.com> Signed-off-by: Shu Wang. <shuw@nvidia.com> Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Signed-off-by: Duncan Moss <djm.moss@gmail.com> Signed-off-by: Shiyan Deng <dsy842974287@meta.com> Signed-off-by: Wei Wei <wwei6@meta.com> Signed-off-by: Saman Keon <samanamp@outlook.com> Signed-off-by: yangxurui <yangxurui@meituan.com> Signed-off-by: nicole-lihui <nicole.li@daocloud.io> Signed-off-by: courage17340 <courage17340@163.com> Signed-off-by: Jacob Kahn <jacobkahn1@gmail.com> Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com> Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai> Signed-off-by: zxw <1020938856@qq.com> Signed-off-by: wang.yuqi <noooop@126.com> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Signed-off-by: chenlang <chen.lang5@zte.com.cn> Signed-off-by: Jonas Kuebler <kuebj@amazon.com> Signed-off-by: AlonKejzman <alonkeizman@gmail.com> Signed-off-by: Tao Hui <taohui3@gmail.com> Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com> Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> Signed-off-by: Aleksandr Malyshev <maleksan@amd.com> Signed-off-by: Eugene Khvedchenia <ekhvedchenia@nvidia.com> Signed-off-by: Eugene Khvedchenya <ekhvedchenya@gmail.com> Signed-off-by: yiting.jiang <yiting.jiang@daocloud.io> Signed-off-by: xaguilar <Xavier.AguilarFruto@amd.com> Signed-off-by: Iceber Gu <caiwei95@hotmail.com> Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Signed-off-by: Icey <1790571317@qq.com> Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com> Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Lucas Kabela <lucasakabela@gmail.com> Co-authored-by: Maximilien de Bayser <mbayser@br.ibm.com> Co-authored-by: Andrew Sansom <andrew@protopia.ai> Co-authored-by: Boyuan Feng <boyuan@meta.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: JartX <sagformas@epdcenter.es> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Chauncey <chaunceyjiang@gmail.com> Co-authored-by: xin.li <xin.li@daocloud.io> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> Co-authored-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: Roger Wang <hey@rogerw.io> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Wenlong Wang <wangwenlong2755@gmail.com> Co-authored-by: Manoel Marques <manoelmrqs@gmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: lirong <56789630+lirong-lirong@users.noreply.github.com> Co-authored-by: Michael Yao <haifeng.yao@daocloud.io> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Huamin Li <3ericli@gmail.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: Simon Danielsson <70206058+simondanielsson@users.noreply.github.com> Co-authored-by: Rahul Tuli <rtuli@redhat.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Yang Liu <127183760+KKSK-DON@users.noreply.github.com> Co-authored-by: Deboleina <debroy@redhat.com> Co-authored-by: yinz-aizip <yinz@aizip.ai> Co-authored-by: WeiQing Chen <40507679+david6666666@users.noreply.github.com> Co-authored-by: wangzi <3220100013@zju.edu.cn> Co-authored-by: Eldar Kurtić <8884008+eldarkurtic@users.noreply.github.com> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com> Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com> Co-authored-by: Yizhou <136800916+yiz-liu@users.noreply.github.com> Co-authored-by: Sara-KS <50249410+Sara-KS@users.noreply.github.com> Co-authored-by: Csrayz <jover@cmbchina.com> Co-authored-by: ivyilike <pww123@cmbchina.com> Co-authored-by: Burkhard Ringlein <ngl@zurich.ibm.com> Co-authored-by: Bowen Wang <abmfy@icloud.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Daisy-Ma-coder <daisy.ma.0117@gmail.com> Co-authored-by: qqma <qqma@amazon.com> Co-authored-by: ElizaWszola <ewszola@redhat.com> Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> Co-authored-by: Simon Mo <simon.mo@hey.com> Co-authored-by: Or Ozeri <oro@il.ibm.com> Co-authored-by: Johnny Yang <24908445+jcyang43@users.noreply.github.com> Co-authored-by: Chengji Yao <chengjiyao@google.com> Co-authored-by: Alec S <10566873+alecsolder@users.noreply.github.com> Co-authored-by: Alec Solder <alecs@fb.com> Co-authored-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Chris Bamford <chrisbam4d@gmail.com> Co-authored-by: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Co-authored-by: liuye.hj <liuye.hj@alibaba-inc.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com> Co-authored-by: Varun Sundar Rabindranath <varunsundar08@gmail.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Ming Yang <yming@meta.com> Co-authored-by: Zhikaiiii <55917203+Zhikaiiii@users.noreply.github.com> Co-authored-by: Andreas Hartel <andreas@hartel.me> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com> Co-authored-by: Joel <wuxibin89@163.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Mark McLoughlin <markmc@redhat.com> Co-authored-by: Peter Pan <peter.pan@daocloud.io> Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com> Co-authored-by: Fanli Lin <fanli.lin@intel.com> Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com> Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Sage Moore <sage@neuralmagic.com> Co-authored-by: yewentao256 <zhyanwentao@126.com> Co-authored-by: bnellnm <49004751+bnellnm@users.noreply.github.com> Co-authored-by: rivos-shreeasish <shreeasish@rivosinc.com> Co-authored-by: Chih-Chieh Yang <chih.chieh.yang@ibm.com> Co-authored-by: Weida Hong <wdhongtw@gmail.com> Co-authored-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Co-authored-by: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> Co-authored-by: Amir Samani <samani@ualberta.ca> Co-authored-by: Luka Govedič <lgovedic@redhat.com> Co-authored-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Co-authored-by: Ilya Markov <markovilya197@gmail.com> Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Co-authored-by: Jialin Ouyang <Jialin.Ouyang@gmail.com> Co-authored-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Co-authored-by: Andrew Xia <axia@meta.com> Co-authored-by: kourosh hakhamaneshi <31483498+kouroshHakha@users.noreply.github.com> Co-authored-by: Corey Lowman <clowman1993@gmail.com> Co-authored-by: Juan Villamizar <100237675+jpvillam-amd@users.noreply.github.com> Co-authored-by: jpvillam <jpvillam@amd.com> Co-authored-by: Doug Smith <dosmith@redhat.com> Co-authored-by: Chenxi Yang <cxyang@cs.utexas.edu> Co-authored-by: Chenxi Yang <cxyang@fb.com> Co-authored-by: ahao-anyscale <ahao@anyscale.com> Co-authored-by: 0xNullPath <luyanfcp@foxmail.com> Co-authored-by: baxingpiaochong <771405853@qq.com> Co-authored-by: Benjamin Chislett <bchislett@nvidia.com> Co-authored-by: Kyle Sayers <kylesayrs@gmail.com> Co-authored-by: Nikhil Gupta <nikhil.gupta2@arm.com> Co-authored-by: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com> Co-authored-by: lhsjohn <huashuoli@tencent.com> Co-authored-by: Ben Browning <bbrownin@redhat.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com> Co-authored-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Jonas M. Kübler <44084297+jmkuebler@users.noreply.github.com> Co-authored-by: Tao Hui <taohui3@gmail.com> Co-authored-by: rongfu.leng <rongfu.leng@daocloud.io> Co-authored-by: Shu Wang <shuw@nvidia.com> Co-authored-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Co-authored-by: Duncan Moss <djm.moss@gmail.com> Co-authored-by: Shiyan Deng <dsy842974287@meta.com> Co-authored-by: Wei Wei <wwei6@meta.com> Co-authored-by: Saman A. Pour <samanamp@outlook.com> Co-authored-by: XuruiYang <530534756@qq.com> Co-authored-by: yangxurui <yangxurui@meituan.com> Co-authored-by: Nicole LiHui 🥜 <nicolelihui@outlook.com> Co-authored-by: courage17340 <courage17340@users.noreply.github.com> Co-authored-by: Jacob Kahn <jacobkahn1@gmail.com> Co-authored-by: Nicole LiHui 🥜 <nicole.li@daocloud.io> Co-authored-by: Fadi Arafeh <115173828+fadara01@users.noreply.github.com> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyn@users.noreply.github.com> Co-authored-by: yyzxw <34639446+yyzxw@users.noreply.github.com> Co-authored-by: wang.yuqi <noooop@126.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: chenlang <chen.lang5@zte.com.cn> Co-authored-by: chenlang <10346245@zte.com.cn> Co-authored-by: AlonKejzman <alonkeizman@gmail.com> Co-authored-by: tomeras91 <57313761+tomeras91@users.noreply.github.com> Co-authored-by: Aleksandr Malyshev <164964928+maleksan85@users.noreply.github.com> Co-authored-by: Aleksandr Malyshev <maleksan@amd.com> Co-authored-by: Doug Lehr <douglehr@amd.com> Co-authored-by: Eugene Khvedchenya <ekhvedchenya@gmail.com> Co-authored-by: yitingdc <59356937+yitingdc@users.noreply.github.com> Co-authored-by: xaguilar-amd <xavier.aguilarfruto@amd.com> Co-authored-by: Iceber Gu <caiwei95@hotmail.com> Co-authored-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Icey <1790571317@qq.com> Co-authored-by: Xu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com> Co-authored-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com> Co-authored-by: RishiAstra <40644327+RishiAstra@users.noreply.github.com>
441 lines
13 KiB
Python
441 lines
13 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""
|
|
Test cases for long text embedding with automatic chunking mechanism.
|
|
|
|
This test suite validates vLLM's automatic chunking functionality for handling
|
|
text inputs that exceed the model's maximum token length, specifically targeting
|
|
the intfloat/multilingual-e5-small model (max token length: 512).
|
|
"""
|
|
|
|
import random
|
|
|
|
import openai
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from tests.utils import RemoteOpenAIServer
|
|
from vllm.entrypoints.openai.protocol import EmbeddingResponse
|
|
|
|
|
|
def _generate_random_text(word_count: int) -> str:
|
|
"""Generate random text with approximately the specified word count."""
|
|
# Common English words with focus on verbs and nouns for realistic text
|
|
common_words = [
|
|
# Essential articles and pronouns (minimal)
|
|
"the",
|
|
"and",
|
|
"you",
|
|
"they",
|
|
"this",
|
|
"that",
|
|
"these",
|
|
"those",
|
|
|
|
# Action verbs
|
|
"create",
|
|
"build",
|
|
"develop",
|
|
"design",
|
|
"implement",
|
|
"execute",
|
|
"analyze",
|
|
"process",
|
|
"generate",
|
|
"calculate",
|
|
"evaluate",
|
|
"optimize",
|
|
"transform",
|
|
"integrate",
|
|
"configure",
|
|
"deploy",
|
|
"monitor",
|
|
"manage",
|
|
"discover",
|
|
"explore",
|
|
"investigate",
|
|
"research",
|
|
"study",
|
|
"examine",
|
|
"improve",
|
|
"enhance",
|
|
"upgrade",
|
|
"modify",
|
|
"update",
|
|
"maintain",
|
|
"solve",
|
|
"resolve",
|
|
"handle",
|
|
"address",
|
|
"tackle",
|
|
"overcome",
|
|
"communicate",
|
|
"collaborate",
|
|
"coordinate",
|
|
"organize",
|
|
"plan",
|
|
"achieve",
|
|
"accomplish",
|
|
"complete",
|
|
"finish",
|
|
"deliver",
|
|
"provide",
|
|
|
|
# Technology and science nouns
|
|
"system",
|
|
"application",
|
|
"software",
|
|
"hardware",
|
|
"network",
|
|
"database",
|
|
"algorithm",
|
|
"model",
|
|
"framework",
|
|
"platform",
|
|
"interface",
|
|
"protocol",
|
|
"architecture",
|
|
"infrastructure",
|
|
"component",
|
|
"module",
|
|
"service",
|
|
"technology",
|
|
"innovation",
|
|
"solution",
|
|
"methodology",
|
|
"approach",
|
|
"artificial",
|
|
"intelligence",
|
|
"machine",
|
|
"learning",
|
|
"neural",
|
|
"network",
|
|
"computer",
|
|
"processor",
|
|
"memory",
|
|
"storage",
|
|
"computation",
|
|
"data",
|
|
"information",
|
|
"knowledge",
|
|
"insight",
|
|
"pattern",
|
|
"trend",
|
|
"analysis",
|
|
"research",
|
|
"development",
|
|
"engineering",
|
|
"science",
|
|
"mathematics",
|
|
"statistics",
|
|
"probability",
|
|
"optimization",
|
|
"performance",
|
|
"efficiency",
|
|
|
|
# General nouns
|
|
"project",
|
|
"team",
|
|
"organization",
|
|
"company",
|
|
"business",
|
|
"industry",
|
|
"market",
|
|
"customer",
|
|
"user",
|
|
"client",
|
|
"product",
|
|
"feature",
|
|
"function",
|
|
"requirement",
|
|
"specification",
|
|
"documentation",
|
|
"report",
|
|
"result",
|
|
"outcome",
|
|
"impact",
|
|
"benefit",
|
|
"advantage",
|
|
"challenge",
|
|
"problem",
|
|
"opportunity",
|
|
"strategy",
|
|
"goal",
|
|
"objective",
|
|
"target",
|
|
"milestone",
|
|
"process",
|
|
"procedure",
|
|
"workflow",
|
|
"pipeline",
|
|
"operation",
|
|
"task",
|
|
"activity",
|
|
"event",
|
|
"session",
|
|
"meeting",
|
|
"discussion",
|
|
"decision"
|
|
]
|
|
|
|
words = []
|
|
for _ in range(word_count):
|
|
words.append(random.choice(common_words))
|
|
|
|
# Add some punctuation for more realistic text
|
|
text = " ".join(words)
|
|
# Add periods every 10-20 words
|
|
words_list = text.split()
|
|
result = []
|
|
for i, word in enumerate(words_list):
|
|
result.append(word)
|
|
if ((i + 1) % random.randint(10, 20) == 0 and i < len(words_list) - 1):
|
|
result[-1] += "."
|
|
|
|
return " ".join(result)
|
|
|
|
|
|
MODEL_NAME = "intfloat/multilingual-e5-small"
|
|
DTYPE = "bfloat16"
|
|
|
|
# Test text: Generate text with approximately 1500 words to exceed 1024 tokens
|
|
LONG_TEXT_1500_WORDS = _generate_random_text(1500)
|
|
|
|
# Test text: Generate text with approximately 2500 words to exceed 2048 tokens
|
|
LONG_TEXT_2500_WORDS = _generate_random_text(2500)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def server_with_chunked_processing():
|
|
"""Start server with automatic chunking processing enabled."""
|
|
args = [
|
|
"--runner",
|
|
"pooling",
|
|
"--dtype",
|
|
DTYPE,
|
|
"--enforce-eager",
|
|
"--max-model-len",
|
|
"512", # Set smaller max_model_len to trigger chunking mechanism
|
|
'--pooler-config',
|
|
('{"pooling_type": "MEAN", "normalize": true, '
|
|
'"enable_chunked_processing": true, "max_embed_len": 10000}'),
|
|
"--gpu-memory-utilization",
|
|
"0.8",
|
|
]
|
|
|
|
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
|
|
yield remote_server
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def client_with_chunked_processing(server_with_chunked_processing):
|
|
"""Create async client with chunking processing support."""
|
|
async with server_with_chunked_processing.get_async_client(
|
|
) as async_client:
|
|
yield async_client
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
async def test_long_text_embedding_1500_chars(
|
|
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
|
"""Test embedding processing for ~1500 character long text
|
|
(~1028 tokens, exceeding 512 token limit)."""
|
|
|
|
# Verify text length
|
|
# Verify text has sufficient word count (approximately 1500 words)
|
|
word_count = len(LONG_TEXT_1500_WORDS.split())
|
|
assert word_count >= 1400, (
|
|
f"Test text word count insufficient: {word_count} words")
|
|
|
|
# Send embedding request
|
|
embedding_response = await client_with_chunked_processing.embeddings.create(
|
|
model=model_name,
|
|
input=[LONG_TEXT_1500_WORDS],
|
|
encoding_format="float",
|
|
)
|
|
|
|
# Verify response structure
|
|
embeddings = EmbeddingResponse.model_validate(
|
|
embedding_response.model_dump(mode="json"))
|
|
|
|
assert embeddings.id is not None
|
|
assert len(embeddings.data) == 1
|
|
assert len(embeddings.data[0].embedding
|
|
) == 384 # multilingual-e5-small embedding dimension
|
|
assert embeddings.usage.completion_tokens == 0
|
|
# Due to chunked processing, token count should
|
|
# reflect actual processed tokens
|
|
# With ~1500 words, we expect roughly
|
|
# 1024+ tokens (exceeding 512 token limit)
|
|
# Should exceed single chunk limit of 512
|
|
assert embeddings.usage.prompt_tokens > 800
|
|
assert embeddings.usage.total_tokens == embeddings.usage.prompt_tokens
|
|
|
|
# Verify embedding vector validity
|
|
embedding_vector = embeddings.data[0].embedding
|
|
assert all(
|
|
isinstance(x, float)
|
|
for x in embedding_vector), "Embedding vector should contain floats"
|
|
assert not all(
|
|
x == 0
|
|
for x in embedding_vector), "Embedding vector should not be all zeros"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
async def test_long_text_embedding_2500_chars(
|
|
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
|
"""Test embedding processing for ~2500 character long text
|
|
(~2048 tokens, requiring multiple chunks)."""
|
|
|
|
# Verify text length
|
|
# Verify text has sufficient word count (approximately 2500 words)
|
|
word_count = len(LONG_TEXT_2500_WORDS.split())
|
|
assert word_count >= 2300, (
|
|
f"Test text word count insufficient: {word_count} words")
|
|
|
|
# Send embedding request
|
|
embedding_response = await client_with_chunked_processing.embeddings.create(
|
|
model=model_name,
|
|
input=[LONG_TEXT_2500_WORDS],
|
|
encoding_format="float",
|
|
)
|
|
|
|
# Verify response structure
|
|
embeddings = EmbeddingResponse.model_validate(
|
|
embedding_response.model_dump(mode="json"))
|
|
|
|
assert embeddings.id is not None
|
|
assert len(embeddings.data) == 1
|
|
assert len(embeddings.data[0].embedding
|
|
) == 384 # multilingual-e5-small embedding dimension
|
|
assert embeddings.usage.completion_tokens == 0
|
|
# Due to chunked processing, token count should
|
|
# reflect actual processed tokens
|
|
# With ~2500 words, we expect
|
|
# roughly 2048+ tokens (requiring multiple chunks)
|
|
# Should require multiple chunks for processing
|
|
assert embeddings.usage.prompt_tokens > 1500
|
|
assert embeddings.usage.total_tokens == embeddings.usage.prompt_tokens
|
|
|
|
# Verify embedding vector validity
|
|
embedding_vector = embeddings.data[0].embedding
|
|
assert all(
|
|
isinstance(x, float)
|
|
for x in embedding_vector), "Embedding vector should contain floats"
|
|
assert not all(
|
|
x == 0
|
|
for x in embedding_vector), "Embedding vector should not be all zeros"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
async def test_batch_long_text_embedding(
|
|
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
|
"""Test batch long text embedding processing."""
|
|
|
|
input_texts = [
|
|
LONG_TEXT_1500_WORDS,
|
|
LONG_TEXT_2500_WORDS,
|
|
"This is a short text test.", # Short text for comparison
|
|
]
|
|
|
|
# Send batch embedding request
|
|
embedding_response = await client_with_chunked_processing.embeddings.create(
|
|
model=model_name,
|
|
input=input_texts,
|
|
encoding_format="float",
|
|
)
|
|
|
|
# Verify response structure
|
|
embeddings = EmbeddingResponse.model_validate(
|
|
embedding_response.model_dump(mode="json"))
|
|
|
|
assert embeddings.id is not None
|
|
assert len(embeddings.data) == 3 # Three input texts
|
|
|
|
# Verify each embedding dimension
|
|
for i, embedding_data in enumerate(embeddings.data):
|
|
assert len(embedding_data.embedding) == 384
|
|
assert embedding_data.index == i
|
|
|
|
# Verify embedding vector validity
|
|
embedding_vector = embedding_data.embedding
|
|
assert all(isinstance(x, float) for x in embedding_vector)
|
|
assert not all(x == 0 for x in embedding_vector)
|
|
|
|
# Verify token usage
|
|
assert embeddings.usage.completion_tokens == 0
|
|
# Total token count should be very substantial
|
|
assert embeddings.usage.prompt_tokens > 1000
|
|
assert embeddings.usage.total_tokens == embeddings.usage.prompt_tokens
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
async def test_chunked_vs_normal_consistency(
|
|
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
|
"""Test consistency between chunked and
|
|
normal processing (using short text)."""
|
|
|
|
# Use a short text within the 512 token limit
|
|
short_text = ("Artificial intelligence technology is changing our world, "
|
|
"bringing unprecedented opportunities and challenges.")
|
|
|
|
# Send embedding request
|
|
embedding_response = await client_with_chunked_processing.embeddings.create(
|
|
model=model_name,
|
|
input=[short_text],
|
|
encoding_format="float",
|
|
)
|
|
|
|
# Verify response structure
|
|
embeddings = EmbeddingResponse.model_validate(
|
|
embedding_response.model_dump(mode="json"))
|
|
|
|
assert embeddings.id is not None
|
|
assert len(embeddings.data) == 1
|
|
assert len(embeddings.data[0].embedding) == 384
|
|
assert embeddings.usage.completion_tokens == 0
|
|
# Short text should not require chunked processing
|
|
assert embeddings.usage.prompt_tokens < 512
|
|
assert embeddings.usage.total_tokens == embeddings.usage.prompt_tokens
|
|
|
|
# 验证embedding向量的有效性
|
|
embedding_vector = embeddings.data[0].embedding
|
|
assert all(isinstance(x, float) for x in embedding_vector)
|
|
assert not all(x == 0 for x in embedding_vector)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
|
async def test_chunked_processing_response_format(
|
|
client_with_chunked_processing: openai.AsyncOpenAI, model_name: str):
|
|
"""Test response format and structure during chunked processing."""
|
|
|
|
# Test with long text to trigger chunking
|
|
embedding_response = await client_with_chunked_processing.embeddings.create(
|
|
model=model_name,
|
|
input=[LONG_TEXT_1500_WORDS],
|
|
encoding_format="float",
|
|
)
|
|
|
|
# Verify response structure
|
|
embeddings = EmbeddingResponse.model_validate(
|
|
embedding_response.model_dump(mode="json"))
|
|
|
|
assert embeddings.id is not None
|
|
assert len(embeddings.data) == 1
|
|
assert embeddings.data[0].object == "embedding"
|
|
assert embeddings.data[0].index == 0
|
|
|
|
# Verify embedding vector properties
|
|
embedding_vector = embeddings.data[0].embedding
|
|
import math
|
|
vector_norm = math.sqrt(sum(x * x for x in embedding_vector))
|
|
# Check that the vector is normalized
|
|
# (default behavior for most embedding models)
|
|
assert 0.8 < vector_norm < 1.2, (
|
|
f"Vector norm should be reasonable, actual: {vector_norm}")
|