mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:44:57 +08:00
[CI/Build] upgrade compressed-tensors to 0.12.2 to address LGPLv3 (#26501)
Signed-off-by: Sangyeon Cho <josang1204@gmail.com>
This commit is contained in:
parent
5c7fe25491
commit
a1b2d658ee
@ -38,7 +38,7 @@ pyyaml
|
||||
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
|
||||
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
|
||||
einops # Required for Qwen2-VL.
|
||||
compressed-tensors == 0.11.0 # required for compressed-tensors
|
||||
compressed-tensors == 0.12.2 # required for compressed-tensors
|
||||
depyf==0.19.0 # required for profiling and debugging with compilation config
|
||||
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
|
||||
watchfiles # required for http server to monitor the updates of TLS files
|
||||
|
||||
@ -310,7 +310,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
||||
)
|
||||
is_float_type = (
|
||||
weight_quant.type == QuantizationType.FLOAT
|
||||
and input_quant.type == QuantizationType.FLOAT.value
|
||||
and input_quant.type == QuantizationType.FLOAT
|
||||
)
|
||||
is_4_bits = weight_quant.num_bits == 4 and input_quant.num_bits == 4
|
||||
|
||||
|
||||
@ -143,7 +143,7 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
|
||||
# Prefer to use the MarlinMoE kernel when it is supported.
|
||||
if not check_moe_marlin_supports_layer(layer, group_size):
|
||||
if (
|
||||
weight_quant.strategy in QuantizationStrategy.GROUP
|
||||
weight_quant.strategy == QuantizationStrategy.GROUP
|
||||
and weight_quant.actorder
|
||||
in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC)
|
||||
):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user