mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 16:15:01 +08:00
[CI/Build][CPU] Fix CPU CI by lazy importing triton FP8 kernels (#11618)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
970d6d0776
commit
5dbf854553
@ -15,8 +15,6 @@ from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
|||||||
from vllm.model_executor.layers.quantization.base_config import (
|
from vllm.model_executor.layers.quantization.base_config import (
|
||||||
QuantizationConfig, QuantizeMethodBase)
|
QuantizationConfig, QuantizeMethodBase)
|
||||||
from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
|
from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
|
||||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
|
||||||
apply_w8a8_block_fp8_linear)
|
|
||||||
from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
|
from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
|
||||||
apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin)
|
apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin)
|
||||||
from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
||||||
@ -337,6 +335,9 @@ class Fp8LinearMethod(LinearMethodBase):
|
|||||||
size_k=layer.input_size_per_partition,
|
size_k=layer.input_size_per_partition,
|
||||||
bias=bias)
|
bias=bias)
|
||||||
|
|
||||||
|
# Note: lazy import to avoid triton import error.
|
||||||
|
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||||
|
apply_w8a8_block_fp8_linear)
|
||||||
if self.block_quant:
|
if self.block_quant:
|
||||||
assert self.quant_config.weight_block_size is not None
|
assert self.quant_config.weight_block_size is not None
|
||||||
return apply_w8a8_block_fp8_linear(
|
return apply_w8a8_block_fp8_linear(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user