compilation is fixed

This commit is contained in:
Alexander Matveev 2025-02-06 20:49:29 +00:00
parent 5fb9dbe6f6
commit 70b4e46e70
3 changed files with 42 additions and 13 deletions

View File

@ -5,7 +5,7 @@ requests >= 2.26.0
tqdm tqdm
blake3 blake3
py-cpuinfo py-cpuinfo
transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL. transformers >= 4.48.2 # Required for Bamba model and Transformers backend.
tokenizers >= 0.19.1 # Required for Llama 3. tokenizers >= 0.19.1 # Required for Llama 3.
protobuf # Required by LlamaTokenizer. protobuf # Required by LlamaTokenizer.
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9' fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
@ -34,6 +34,6 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL. einops # Required for Qwen2-VL.
compressed-tensors == 0.8.1 # required for compressed-tensors compressed-tensors == 0.9.1 # required for compressed-tensors
depyf==0.18.0 # required for profiling and debugging with compilation config depyf==0.18.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py cloudpickle # allows pickling lambda functions in model_executor/models/registry.py

View File

@ -106,9 +106,17 @@ dnspython==2.7.0
docutils==0.16 docutils==0.16
# via awscli # via awscli
einops==0.8.0 einops==0.8.0
# via -r requirements-test.in # via
# -r requirements-test.in
# encodec
# vector-quantize-pytorch
# vocos
einx==0.3.0
# via vector-quantize-pytorch
email-validator==2.2.0 email-validator==2.2.0
# via pydantic # via pydantic
encodec==0.1.1
# via vocos
evaluate==0.4.3 evaluate==0.4.3
# via lm-eval # via lm-eval
fastparquet==2024.11.0 fastparquet==2024.11.0
@ -125,6 +133,8 @@ filelock==3.16.1
# triton # triton
fonttools==4.54.1 fonttools==4.54.1
# via matplotlib # via matplotlib
frozendict==2.4.6
# via einx
frozenlist==1.5.0 frozenlist==1.5.0
# via # via
# aiohttp # aiohttp
@ -159,6 +169,7 @@ huggingface-hub==0.26.2
# timm # timm
# tokenizers # tokenizers
# transformers # transformers
# vocos
idna==3.10 idna==3.10
# via # via
# anyio # anyio
@ -261,6 +272,8 @@ numpy==1.26.4
# cupy-cuda12x # cupy-cuda12x
# datasets # datasets
# decord # decord
# einx
# encodec
# evaluate # evaluate
# fastparquet # fastparquet
# genai-perf # genai-perf
@ -283,6 +296,7 @@ numpy==1.26.4
# torchvision # torchvision
# transformers # transformers
# tritonclient # tritonclient
# vocos
nvidia-cublas-cu12==12.4.5.8 nvidia-cublas-cu12==12.4.5.8
# via # via
# nvidia-cudnn-cu12 # nvidia-cudnn-cu12
@ -455,6 +469,7 @@ pyyaml==6.0.2
# responses # responses
# timm # timm
# transformers # transformers
# vocos
ray[adag]==2.40.0 ray[adag]==2.40.0
# via -r requirements-test.in # via -r requirements-test.in
redis==5.2.0 redis==5.2.0
@ -517,6 +532,7 @@ scipy==1.13.1
# scikit-learn # scikit-learn
# sentence-transformers # sentence-transformers
# statsmodels # statsmodels
# vocos
sentence-transformers==3.2.1 sentence-transformers==3.2.1
# via -r requirements-test.in # via -r requirements-test.in
sentencepiece==0.2.0 sentencepiece==0.2.0
@ -540,7 +556,9 @@ sqlitedict==2.1.0
statsmodels==0.14.4 statsmodels==0.14.4
# via genai-perf # via genai-perf
sympy==1.13.1 sympy==1.13.1
# via torch # via
# einx
# torch
tabledata==1.3.3 tabledata==1.3.3
# via pytablewriter # via pytablewriter
tabulate==0.9.0 tabulate==0.9.0
@ -568,12 +586,21 @@ torch==2.5.1
# -r requirements-test.in # -r requirements-test.in
# accelerate # accelerate
# bitsandbytes # bitsandbytes
# encodec
# lm-eval # lm-eval
# peft # peft
# sentence-transformers # sentence-transformers
# tensorizer # tensorizer
# timm # timm
# torchaudio
# torchvision # torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.5.1
# via
# -r requirements-test.in
# encodec
# vocos
torchvision==0.20.1 torchvision==0.20.1
# via timm # via timm
tqdm==4.66.6 tqdm==4.66.6
@ -584,13 +611,15 @@ tqdm==4.66.6
# lm-eval # lm-eval
# nltk # nltk
# peft # peft
# pqdm
# sentence-transformers # sentence-transformers
# tqdm-multiprocess # tqdm-multiprocess
# transformers # transformers
tqdm-multiprocess==0.0.11 tqdm-multiprocess==0.0.11
# via lm-eval # via lm-eval
transformers==4.47.0 transformers==4.48.2
# via # via
# -r requirements-test.in
# genai-perf # genai-perf
# lm-eval # lm-eval
# peft # peft
@ -615,6 +644,7 @@ typing-extensions==4.12.2
# huggingface-hub # huggingface-hub
# librosa # librosa
# mistral-common # mistral-common
# pqdm
# pydantic # pydantic
# pydantic-core # pydantic-core
# torch # torch
@ -626,6 +656,10 @@ urllib3==2.2.3
# requests # requests
# responses # responses
# tritonclient # tritonclient
vector-quantize-pytorch==1.21.2
# via -r requirements-test.in
vocos==0.1.0
# via -r requirements-test.in
word2number==1.1 word2number==1.1
# via lm-eval # via lm-eval
xxhash==3.5.0 xxhash==3.5.0

View File

@ -766,11 +766,6 @@ class TPUModelRunner(ModelRunnerBase):
logger.info(" -- Compilation for decode done in %.2f [secs].", logger.info(" -- Compilation for decode done in %.2f [secs].",
end - start) end - start)
def _initialize_kv_cache(self):
kv_cache_spec = self.get_kv_cache_spec()
kv_cache_config = get_kv_cache_config(vllm_config, kv_cache_spec,
availble_gpu_memory)
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None: def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
""" """
Initialize KV cache based on `kv_cache_config`. Initialize KV cache based on `kv_cache_config`.