mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 14:27:19 +08:00
compilation is fixed
This commit is contained in:
parent
5fb9dbe6f6
commit
70b4e46e70
@ -5,7 +5,7 @@ requests >= 2.26.0
|
|||||||
tqdm
|
tqdm
|
||||||
blake3
|
blake3
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL.
|
transformers >= 4.48.2 # Required for Bamba model and Transformers backend.
|
||||||
tokenizers >= 0.19.1 # Required for Llama 3.
|
tokenizers >= 0.19.1 # Required for Llama 3.
|
||||||
protobuf # Required by LlamaTokenizer.
|
protobuf # Required by LlamaTokenizer.
|
||||||
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
|
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
|
||||||
@ -34,6 +34,6 @@ pyyaml
|
|||||||
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
|
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
|
||||||
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
|
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
|
||||||
einops # Required for Qwen2-VL.
|
einops # Required for Qwen2-VL.
|
||||||
compressed-tensors == 0.8.1 # required for compressed-tensors
|
compressed-tensors == 0.9.1 # required for compressed-tensors
|
||||||
depyf==0.18.0 # required for profiling and debugging with compilation config
|
depyf==0.18.0 # required for profiling and debugging with compilation config
|
||||||
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
|
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
|
||||||
@ -106,9 +106,17 @@ dnspython==2.7.0
|
|||||||
docutils==0.16
|
docutils==0.16
|
||||||
# via awscli
|
# via awscli
|
||||||
einops==0.8.0
|
einops==0.8.0
|
||||||
# via -r requirements-test.in
|
# via
|
||||||
|
# -r requirements-test.in
|
||||||
|
# encodec
|
||||||
|
# vector-quantize-pytorch
|
||||||
|
# vocos
|
||||||
|
einx==0.3.0
|
||||||
|
# via vector-quantize-pytorch
|
||||||
email-validator==2.2.0
|
email-validator==2.2.0
|
||||||
# via pydantic
|
# via pydantic
|
||||||
|
encodec==0.1.1
|
||||||
|
# via vocos
|
||||||
evaluate==0.4.3
|
evaluate==0.4.3
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
fastparquet==2024.11.0
|
fastparquet==2024.11.0
|
||||||
@ -125,6 +133,8 @@ filelock==3.16.1
|
|||||||
# triton
|
# triton
|
||||||
fonttools==4.54.1
|
fonttools==4.54.1
|
||||||
# via matplotlib
|
# via matplotlib
|
||||||
|
frozendict==2.4.6
|
||||||
|
# via einx
|
||||||
frozenlist==1.5.0
|
frozenlist==1.5.0
|
||||||
# via
|
# via
|
||||||
# aiohttp
|
# aiohttp
|
||||||
@ -159,6 +169,7 @@ huggingface-hub==0.26.2
|
|||||||
# timm
|
# timm
|
||||||
# tokenizers
|
# tokenizers
|
||||||
# transformers
|
# transformers
|
||||||
|
# vocos
|
||||||
idna==3.10
|
idna==3.10
|
||||||
# via
|
# via
|
||||||
# anyio
|
# anyio
|
||||||
@ -261,6 +272,8 @@ numpy==1.26.4
|
|||||||
# cupy-cuda12x
|
# cupy-cuda12x
|
||||||
# datasets
|
# datasets
|
||||||
# decord
|
# decord
|
||||||
|
# einx
|
||||||
|
# encodec
|
||||||
# evaluate
|
# evaluate
|
||||||
# fastparquet
|
# fastparquet
|
||||||
# genai-perf
|
# genai-perf
|
||||||
@ -283,6 +296,7 @@ numpy==1.26.4
|
|||||||
# torchvision
|
# torchvision
|
||||||
# transformers
|
# transformers
|
||||||
# tritonclient
|
# tritonclient
|
||||||
|
# vocos
|
||||||
nvidia-cublas-cu12==12.4.5.8
|
nvidia-cublas-cu12==12.4.5.8
|
||||||
# via
|
# via
|
||||||
# nvidia-cudnn-cu12
|
# nvidia-cudnn-cu12
|
||||||
@ -455,6 +469,7 @@ pyyaml==6.0.2
|
|||||||
# responses
|
# responses
|
||||||
# timm
|
# timm
|
||||||
# transformers
|
# transformers
|
||||||
|
# vocos
|
||||||
ray[adag]==2.40.0
|
ray[adag]==2.40.0
|
||||||
# via -r requirements-test.in
|
# via -r requirements-test.in
|
||||||
redis==5.2.0
|
redis==5.2.0
|
||||||
@ -517,6 +532,7 @@ scipy==1.13.1
|
|||||||
# scikit-learn
|
# scikit-learn
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
# statsmodels
|
# statsmodels
|
||||||
|
# vocos
|
||||||
sentence-transformers==3.2.1
|
sentence-transformers==3.2.1
|
||||||
# via -r requirements-test.in
|
# via -r requirements-test.in
|
||||||
sentencepiece==0.2.0
|
sentencepiece==0.2.0
|
||||||
@ -540,7 +556,9 @@ sqlitedict==2.1.0
|
|||||||
statsmodels==0.14.4
|
statsmodels==0.14.4
|
||||||
# via genai-perf
|
# via genai-perf
|
||||||
sympy==1.13.1
|
sympy==1.13.1
|
||||||
# via torch
|
# via
|
||||||
|
# einx
|
||||||
|
# torch
|
||||||
tabledata==1.3.3
|
tabledata==1.3.3
|
||||||
# via pytablewriter
|
# via pytablewriter
|
||||||
tabulate==0.9.0
|
tabulate==0.9.0
|
||||||
@ -568,12 +586,21 @@ torch==2.5.1
|
|||||||
# -r requirements-test.in
|
# -r requirements-test.in
|
||||||
# accelerate
|
# accelerate
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
|
# encodec
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# peft
|
# peft
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
# tensorizer
|
# tensorizer
|
||||||
# timm
|
# timm
|
||||||
|
# torchaudio
|
||||||
# torchvision
|
# torchvision
|
||||||
|
# vector-quantize-pytorch
|
||||||
|
# vocos
|
||||||
|
torchaudio==2.5.1
|
||||||
|
# via
|
||||||
|
# -r requirements-test.in
|
||||||
|
# encodec
|
||||||
|
# vocos
|
||||||
torchvision==0.20.1
|
torchvision==0.20.1
|
||||||
# via timm
|
# via timm
|
||||||
tqdm==4.66.6
|
tqdm==4.66.6
|
||||||
@ -584,13 +611,15 @@ tqdm==4.66.6
|
|||||||
# lm-eval
|
# lm-eval
|
||||||
# nltk
|
# nltk
|
||||||
# peft
|
# peft
|
||||||
|
# pqdm
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
# tqdm-multiprocess
|
# tqdm-multiprocess
|
||||||
# transformers
|
# transformers
|
||||||
tqdm-multiprocess==0.0.11
|
tqdm-multiprocess==0.0.11
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
transformers==4.47.0
|
transformers==4.48.2
|
||||||
# via
|
# via
|
||||||
|
# -r requirements-test.in
|
||||||
# genai-perf
|
# genai-perf
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# peft
|
# peft
|
||||||
@ -615,6 +644,7 @@ typing-extensions==4.12.2
|
|||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# librosa
|
# librosa
|
||||||
# mistral-common
|
# mistral-common
|
||||||
|
# pqdm
|
||||||
# pydantic
|
# pydantic
|
||||||
# pydantic-core
|
# pydantic-core
|
||||||
# torch
|
# torch
|
||||||
@ -626,6 +656,10 @@ urllib3==2.2.3
|
|||||||
# requests
|
# requests
|
||||||
# responses
|
# responses
|
||||||
# tritonclient
|
# tritonclient
|
||||||
|
vector-quantize-pytorch==1.21.2
|
||||||
|
# via -r requirements-test.in
|
||||||
|
vocos==0.1.0
|
||||||
|
# via -r requirements-test.in
|
||||||
word2number==1.1
|
word2number==1.1
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
xxhash==3.5.0
|
xxhash==3.5.0
|
||||||
|
|||||||
@ -766,11 +766,6 @@ class TPUModelRunner(ModelRunnerBase):
|
|||||||
logger.info(" -- Compilation for decode done in %.2f [secs].",
|
logger.info(" -- Compilation for decode done in %.2f [secs].",
|
||||||
end - start)
|
end - start)
|
||||||
|
|
||||||
def _initialize_kv_cache(self):
|
|
||||||
kv_cache_spec = self.get_kv_cache_spec()
|
|
||||||
|
|
||||||
kv_cache_config = get_kv_cache_config(vllm_config, kv_cache_spec,
|
|
||||||
availble_gpu_memory)
|
|
||||||
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
|
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize KV cache based on `kv_cache_config`.
|
Initialize KV cache based on `kv_cache_config`.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user