mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 08:04:58 +08:00
[XPU] fix xpu to set cudagraph batch sizes (#23044)
Signed-off-by: calvin chen <wen.chen@dynamia.ai>
This commit is contained in:
parent
6d243efeda
commit
21e39436c8
@ -232,8 +232,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
# The convention is different.
|
||||
# self.cudagraph_batch_sizes sorts in ascending order.
|
||||
# The batch sizes in the config are in descending order.
|
||||
self.cudagraph_batch_sizes = list(
|
||||
reversed(self.compilation_config.cudagraph_capture_sizes))
|
||||
if self.compilation_config.cudagraph_capture_sizes and \
|
||||
self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE:
|
||||
self.cudagraph_batch_sizes = list(
|
||||
reversed(self.compilation_config.cudagraph_capture_sizes))
|
||||
|
||||
# Cache the device properties.
|
||||
self._init_device_properties()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user