diff --git a/requirements/common.txt b/requirements/common.txt index d5fa1e92bd7eb..a7aa801208969 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -38,7 +38,7 @@ pyyaml six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 einops # Required for Qwen2-VL. -compressed-tensors == 0.11.0 # required for compressed-tensors +compressed-tensors == 0.12.2 # required for compressed-tensors depyf==0.19.0 # required for profiling and debugging with compilation config cloudpickle # allows pickling lambda functions in model_executor/models/registry.py watchfiles # required for http server to monitor the updates of TLS files diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 15a0ff23273dd..6c7d4cd7bd9ab 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -310,7 +310,7 @@ class CompressedTensorsConfig(QuantizationConfig): ) is_float_type = ( weight_quant.type == QuantizationType.FLOAT - and input_quant.type == QuantizationType.FLOAT.value + and input_quant.type == QuantizationType.FLOAT ) is_4_bits = weight_quant.num_bits == 4 and input_quant.num_bits == 4 diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 28383491207e7..efc5bd3639f4b 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -143,7 +143,7 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase): # Prefer to use the MarlinMoE kernel when it is supported. if not check_moe_marlin_supports_layer(layer, group_size): if ( - weight_quant.strategy in QuantizationStrategy.GROUP + weight_quant.strategy == QuantizationStrategy.GROUP and weight_quant.actorder in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC) ):