mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 23:05:43 +08:00
[Bugfix] Add proper comparison for package versions (#22314)
Signed-off-by: Syed Muhammad Bin Asif <syedmba7@connect.hku.hk>
This commit is contained in:
parent
5e9455ae8f
commit
609b533cb6
@ -3,6 +3,8 @@
|
|||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT License.
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
|
from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
|
||||||
MINIMUM_BITBLAS_VERSION,
|
MINIMUM_BITBLAS_VERSION,
|
||||||
)
|
)
|
||||||
@ -10,7 +12,7 @@ from vllm.model_executor.layers.quantization.utils.bitblas_utils import (
|
|||||||
try:
|
try:
|
||||||
import bitblas
|
import bitblas
|
||||||
|
|
||||||
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION:
|
if version.parse(bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"bitblas version is wrong. Please "
|
"bitblas version is wrong. Please "
|
||||||
f"install bitblas>={MINIMUM_BITBLAS_VERSION}"
|
f"install bitblas>={MINIMUM_BITBLAS_VERSION}"
|
||||||
|
|||||||
@ -200,7 +200,8 @@ vision-language model.
|
|||||||
lora_config = vllm_config.lora_config
|
lora_config = vllm_config.lora_config
|
||||||
super().__init__(config, cache_config, quant_config, lora_config, prefix)
|
super().__init__(config, cache_config, quant_config, lora_config, prefix)
|
||||||
|
|
||||||
if __version__ >= "0.6.4":
|
from packaging import version
|
||||||
|
if version.parse(__version__) >= version.parse("0.6.4"):
|
||||||
MyModel = MyNewModel
|
MyModel = MyNewModel
|
||||||
else:
|
else:
|
||||||
MyModel = MyOldModel
|
MyModel = MyOldModel
|
||||||
|
|||||||
@ -31,6 +31,8 @@ It supports page size >= 1.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.triton_utils import tl, triton
|
from vllm.triton_utils import tl, triton
|
||||||
|
|
||||||
@ -40,7 +42,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
# Only print the following warnings when triton version < 3.2.0.
|
# Only print the following warnings when triton version < 3.2.0.
|
||||||
# The issue won't affect performance or accuracy.
|
# The issue won't affect performance or accuracy.
|
||||||
if triton.__version__ < '3.2.0':
|
if version.parse(triton.__version__) < version.parse('3.2.0'):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The following error message 'operation scheduled before its operands' "
|
"The following error message 'operation scheduled before its operands' "
|
||||||
"can be ignored.")
|
"can be ignored.")
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
|
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
|
||||||
@ -45,7 +46,8 @@ class BitBLASConfig(QuantizationConfig):
|
|||||||
) -> None:
|
) -> None:
|
||||||
try:
|
try:
|
||||||
import bitblas
|
import bitblas
|
||||||
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION:
|
if version.parse(bitblas.__version__) < version.parse(
|
||||||
|
MINIMUM_BITBLAS_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"bitblas version is wrong. Please "
|
"bitblas version is wrong. Please "
|
||||||
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
|
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
|
||||||
FusedMoEMethodBase)
|
FusedMoEMethodBase)
|
||||||
@ -169,7 +170,8 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
|
|||||||
def __init__(self, quant_config: BitsAndBytesConfig):
|
def __init__(self, quant_config: BitsAndBytesConfig):
|
||||||
try:
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
if bitsandbytes.__version__ < "0.46.1":
|
if version.parse(
|
||||||
|
bitsandbytes.__version__) < version.parse("0.46.1"):
|
||||||
raise ImportError("bitsandbytes version is wrong. Please "
|
raise ImportError("bitsandbytes version is wrong. Please "
|
||||||
"install bitsandbytes>=0.46.1.")
|
"install bitsandbytes>=0.46.1.")
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
@ -412,7 +414,8 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
|
|||||||
def __init__(self, quant_config: BitsAndBytesConfig):
|
def __init__(self, quant_config: BitsAndBytesConfig):
|
||||||
try:
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
if bitsandbytes.__version__ < "0.46.1":
|
if version.parse(
|
||||||
|
bitsandbytes.__version__) < version.parse("0.46.1"):
|
||||||
raise ImportError("bitsandbytes version is wrong. Please "
|
raise ImportError("bitsandbytes version is wrong. Please "
|
||||||
"install bitsandbytes>=0.46.1.")
|
"install bitsandbytes>=0.46.1.")
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from typing import Any, Optional
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
|
from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase
|
||||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||||
@ -145,7 +146,7 @@ class DeepSpeedFPParameter(nn.Parameter):
|
|||||||
quant_config: DeepSpeedFPConfig):
|
quant_config: DeepSpeedFPConfig):
|
||||||
try:
|
try:
|
||||||
import deepspeed
|
import deepspeed
|
||||||
if deepspeed.__version__ < "0.14.2":
|
if version.parse(deepspeed.__version__) < version.parse("0.14.2"):
|
||||||
raise ImportError("deepspeed version is wrong. Please "
|
raise ImportError("deepspeed version is wrong. Please "
|
||||||
"install deepspeed>=0.14.2.")
|
"install deepspeed>=0.14.2.")
|
||||||
from deepspeed.ops.fp_quantizer import FP_Quantize
|
from deepspeed.ops.fp_quantizer import FP_Quantize
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
from torch.nn.parameter import Parameter
|
from torch.nn.parameter import Parameter
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -63,7 +64,8 @@ class GPTQBitBLASConfig(QuantizationConfig):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import bitblas
|
import bitblas
|
||||||
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION:
|
if version.parse(bitblas.__version__) < version.parse(
|
||||||
|
MINIMUM_BITBLAS_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"bitblas version is wrong. Please "
|
"bitblas version is wrong. Please "
|
||||||
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
||||||
UnquantizedLinearMethod)
|
UnquantizedLinearMethod)
|
||||||
@ -135,7 +136,8 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
if ipex.__version__ < MIN_IPEX_VERSION:
|
if version.parse(
|
||||||
|
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"intel_extension_for_pytorch version is "
|
"intel_extension_for_pytorch version is "
|
||||||
"wrong. Please install "
|
"wrong. Please install "
|
||||||
@ -199,7 +201,8 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
if ipex.__version__ < MIN_IPEX_VERSION:
|
if version.parse(
|
||||||
|
ipex.__version__) < version.parse(MIN_IPEX_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"intel_extension_for_pytorch version is "
|
"intel_extension_for_pytorch version is "
|
||||||
"wrong. Please install "
|
"wrong. Please install "
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.quantization.base_config import (
|
from vllm.model_executor.layers.quantization.base_config import (
|
||||||
@ -110,7 +111,8 @@ class BitBLASLinearKernel(MPLinearKernel):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
import bitblas
|
import bitblas
|
||||||
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION:
|
if version.parse(bitblas.__version__) < version.parse(
|
||||||
|
MINIMUM_BITBLAS_VERSION):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"bitblas version is wrong. Please "
|
"bitblas version is wrong. Please "
|
||||||
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
from vllm.scalar_type import ScalarType, scalar_types
|
from vllm.scalar_type import ScalarType, scalar_types
|
||||||
@ -75,7 +76,8 @@ def _check_bitblas_supported(
|
|||||||
# Finally, check if bitblas is installed
|
# Finally, check if bitblas is installed
|
||||||
try:
|
try:
|
||||||
import bitblas
|
import bitblas
|
||||||
if bitblas.__version__ < MINIMUM_BITBLAS_VERSION:
|
if version.parse(
|
||||||
|
bitblas.__version__) < version.parse(MINIMUM_BITBLAS_VERSION):
|
||||||
raise ImportError("bitblas version is wrong. Please "
|
raise ImportError("bitblas version is wrong. Please "
|
||||||
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
f"install bitblas>={MINIMUM_BITBLAS_VERSION}")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
from typing import Callable, Optional, Union
|
from typing import Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm import _custom_ops as ops
|
from vllm import _custom_ops as ops
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
@ -21,8 +22,8 @@ TORCH_DEVICE_IDENTITY = None
|
|||||||
# torch._scaled_mm rowwise feature.
|
# torch._scaled_mm rowwise feature.
|
||||||
# The condition is determined once as the operations
|
# The condition is determined once as the operations
|
||||||
# are time consuming.
|
# are time consuming.
|
||||||
USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm()
|
USE_ROWWISE_TORCH_SCALED_MM = (current_platform.is_rocm() and version.parse(
|
||||||
and torch.__version__[0:3] >= "2.7"
|
torch.__version__) >= version.parse("2.7")
|
||||||
and current_platform.has_device_capability(94))
|
and current_platform.has_device_capability(94))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@ from typing import Any, Callable, Optional
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from huggingface_hub import HfApi
|
from huggingface_hub import HfApi
|
||||||
|
from packaging import version
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
|
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||||
|
|
||||||
@ -193,7 +194,8 @@ class BitsAndBytesModelLoader(BaseModelLoader):
|
|||||||
try:
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
|
|
||||||
if bitsandbytes.__version__ < "0.46.1":
|
if version.parse(
|
||||||
|
bitsandbytes.__version__) < version.parse("0.46.1"):
|
||||||
raise ImportError("bitsandbytes version is wrong. Please "
|
raise ImportError("bitsandbytes version is wrong. Please "
|
||||||
"install bitsandbytes>=0.46.1.")
|
"install bitsandbytes>=0.46.1.")
|
||||||
except ImportError as err:
|
except ImportError as err:
|
||||||
|
|||||||
@ -5,6 +5,7 @@ from typing import Optional
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
from vllm import envs
|
from vllm import envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@ -32,7 +33,7 @@ class TopKTopPSampler(nn.Module):
|
|||||||
if current_platform.is_cuda():
|
if current_platform.is_cuda():
|
||||||
if is_flashinfer_available:
|
if is_flashinfer_available:
|
||||||
flashinfer_version = flashinfer.__version__
|
flashinfer_version = flashinfer.__version__
|
||||||
if flashinfer_version < "0.2.3":
|
if version.parse(flashinfer_version) < version.parse("0.2.3"):
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
"FlashInfer version >= 0.2.3 required. "
|
"FlashInfer version >= 0.2.3 required. "
|
||||||
"Falling back to default sampling implementation.")
|
"Falling back to default sampling implementation.")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user