[Bugfix] Add proper comparison for package versions (#22314)

Signed-off-by: Syed Muhammad Bin Asif <syedmba7@connect.hku.hk>
This commit is contained in:
Syed Muhammad Bin Asif 2025-08-07 11:31:03 +08:00 committed by GitHub
parent 5e9455ae8f
commit 609b533cb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 40 additions and 16 deletions

View File

@ -3,6 +3,8 @@
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT License. # Licensed under the MIT License.
from packaging import version
from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
MINIMUM_BITBLAS_VERSION, MINIMUM_BITBLAS_VERSION,
) )
@ -10,7 +12,7 @@ from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}" f"install bitblas>={MINIMUM_BITBLAS_VERSION}"

View File

@ -200,7 +200,8 @@ vision-language model.
lora_config = vllm_config.lora_config lora_config = vllm_config.lora_config
super().__init__(config, cache_config, quant_config, lora_config, prefix) super().__init__(config, cache_config, quant_config, lora_config, prefix)
if __version__ >= "0.6.4": from packaging import version
if version.parse(__version__) >= version.parse("0.6.4"):
MyModel = MyNewModel MyModel = MyNewModel
else: else:
MyModel = MyOldModel MyModel = MyOldModel

View File

@ -31,6 +31,8 @@ It supports page size >= 1.
import logging import logging
from packaging import version
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.triton_utils import tl, triton from vllm.triton_utils import tl, triton
@ -40,7 +42,7 @@ logger = logging.getLogger(__name__)
# Only print the following warnings when triton version < 3.2.0. # Only print the following warnings when triton version < 3.2.0.
# The issue won't affect performance or accuracy. # The issue won't affect performance or accuracy.
if triton.__version__ < '3.2.0': if version.parse(triton.__version__) < version.parse('3.2.0'):
logger.warning( logger.warning(
"The following error message 'operation scheduled before its operands' " "The following error message 'operation scheduled before its operands' "
"can be ignored.") "can be ignored.")

View File

@ -3,6 +3,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
@ -45,7 +46,8 @@ class BitBLASConfig(QuantizationConfig):
) -> None: ) -> None:
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")

View File

@ -4,6 +4,7 @@
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
import torch import torch
from packaging import version
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE, from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
FusedMoEMethodBase) FusedMoEMethodBase)
@ -169,7 +170,8 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
def __init__(self, quant_config: BitsAndBytesConfig): def __init__(self, quant_config: BitsAndBytesConfig):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:
@ -412,7 +414,8 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
def __init__(self, quant_config: BitsAndBytesConfig): def __init__(self, quant_config: BitsAndBytesConfig):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:

View File

@ -6,6 +6,7 @@ from typing import Any, Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from packaging import version
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.model_executor.layers.quantization import QuantizationMethods
@ -145,7 +146,7 @@ class DeepSpeedFPParameter(nn.Parameter):
quant_config: DeepSpeedFPConfig): quant_config: DeepSpeedFPConfig):
try: try:
import deepspeed import deepspeed
if deepspeed.__version__ < "0.14.2": if version.parse(deepspeed.__version__) < version.parse("0.14.2"):
raise ImportError("deepspeed version is wrong. Please " raise ImportError("deepspeed version is wrong. Please "
"install deepspeed>=0.14.2.") "install deepspeed>=0.14.2.")
from deepspeed.ops.fp_quantizer import FP_Quantize from deepspeed.ops.fp_quantizer import FP_Quantize

View File

@ -3,6 +3,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from vllm.logger import init_logger from vllm.logger import init_logger
@ -63,7 +64,8 @@ class GPTQBitBLASConfig(QuantizationConfig):
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")

View File

@ -4,6 +4,7 @@
from typing import Any, Optional from typing import Any, Optional
import torch import torch
from packaging import version
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
UnquantizedLinearMethod) UnquantizedLinearMethod)
@ -135,7 +136,8 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
try: try:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
if ipex.__version__ < MIN_IPEX_VERSION: if version.parse(
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
raise ImportError( raise ImportError(
"intel_extension_for_pytorch version is " "intel_extension_for_pytorch version is "
"wrong. Please install " "wrong. Please install "
@ -199,7 +201,8 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
try: try:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
if ipex.__version__ < MIN_IPEX_VERSION: if version.parse(
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
raise ImportError( raise ImportError(
"intel_extension_for_pytorch version is " "intel_extension_for_pytorch version is "
"wrong. Please install " "wrong. Please install "

View File

@ -4,6 +4,7 @@
from typing import Optional from typing import Optional
import torch import torch
from packaging import version
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.quantization.base_config import ( from vllm.model_executor.layers.quantization.base_config import (
@ -110,7 +111,8 @@ class BitBLASLinearKernel(MPLinearKernel):
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(bitblas.__version__) < version.parse(
MINIMUM_BITBLAS_VERSION):
raise ImportError( raise ImportError(
"bitblas version is wrong. Please " "bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")

View File

@ -3,6 +3,7 @@
from typing import Optional from typing import Optional
import torch import torch
from packaging import version
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.scalar_type import ScalarType, scalar_types from vllm.scalar_type import ScalarType, scalar_types
@ -75,7 +76,8 @@ def _check_bitblas_supported(
# Finally, check if bitblas is installed # Finally, check if bitblas is installed
try: try:
import bitblas import bitblas
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION: if version.parse(
bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
raise ImportError("bitblas version is wrong. Please " raise ImportError("bitblas version is wrong. Please "
f"install bitblas>={MINIMUM_BITBLAS_VERSION}") f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
except ImportError: except ImportError:

View File

@ -4,6 +4,7 @@
from typing import Callable, Optional, Union from typing import Callable, Optional, Union
import torch import torch
from packaging import version
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm import envs from vllm import envs
@ -21,8 +22,8 @@ TORCH_DEVICE_IDENTITY = None
# torch._scaled_mm rowwise feature. # torch._scaled_mm rowwise feature.
# The condition is determined once as the operations # The condition is determined once as the operations
# are time consuming. # are time consuming.
USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
and torch.__version__[0:3] >= "2.7" torch.__version__) >= version.parse("2.7")
and current_platform.has_device_capability(94)) and current_platform.has_device_capability(94))

View File

@ -12,6 +12,7 @@ from typing import Any, Callable, Optional
import numpy as np import numpy as np
import torch import torch
from huggingface_hub import HfApi from huggingface_hub import HfApi
from packaging import version
from torch import nn from torch import nn
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
@ -193,7 +194,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
try: try:
import bitsandbytes import bitsandbytes
if bitsandbytes.__version__ < "0.46.1": if version.parse(
bitsandbytes.__version__) < version.parse("0.46.1"):
raise ImportError("bitsandbytes version is wrong. Please " raise ImportError("bitsandbytes version is wrong. Please "
"install bitsandbytes>=0.46.1.") "install bitsandbytes>=0.46.1.")
except ImportError as err: except ImportError as err:

View File

@ -5,6 +5,7 @@ from typing import Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
from packaging import version
from vllm import envs from vllm import envs
from vllm.logger import init_logger from vllm.logger import init_logger
@ -32,7 +33,7 @@ class TopKTopPSampler(nn.Module):
if current_platform.is_cuda(): if current_platform.is_cuda():
if is_flashinfer_available: if is_flashinfer_available:
flashinfer_version = flashinfer.__version__ flashinfer_version = flashinfer.__version__
if flashinfer_version < "0.2.3": if version.parse(flashinfer_version) < version.parse("0.2.3"):
logger.warning_once( logger.warning_once(
"FlashInfer version >= 0.2.3 required. " "FlashInfer version >= 0.2.3 required. "
"Falling back to default sampling implementation.") "Falling back to default sampling implementation.")