mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-27 12:54:28 +08:00
fixes
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
parent
d86e3f0172
commit
caca0b718a
@ -83,6 +83,9 @@ class PPLXAll2AllManager(All2AllManagerBase):
|
|||||||
assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa
|
assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa
|
||||||
super().__init__(cpu_group)
|
super().__init__(cpu_group)
|
||||||
|
|
||||||
|
# Intranode doesn't work yet.
|
||||||
|
self.internode = True
|
||||||
|
|
||||||
if self.internode:
|
if self.internode:
|
||||||
# inter-node communication needs nvshmem,
|
# inter-node communication needs nvshmem,
|
||||||
# intra-node communication uses p2p mapping directly
|
# intra-node communication uses p2p mapping directly
|
||||||
|
|||||||
@ -269,9 +269,12 @@ class FusedMoEMethodBase(QuantizeMethodBase):
|
|||||||
hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
|
hidden_dim_scale_bytes=(0 if moe.in_dtype.itemsize != 1 else (
|
||||||
(moe.hidden_dim + moe.block_size - 1) // moe.block_size *
|
(moe.hidden_dim + moe.block_size - 1) // moe.block_size *
|
||||||
torch.float32.itemsize)),
|
torch.float32.itemsize)),
|
||||||
group_name=all2all_manager.cpu_group.group_name,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not all2all_manager.internode:
|
||||||
|
all_to_all_args["group_name"] = \
|
||||||
|
all2all_manager.cpu_group.group_name
|
||||||
|
|
||||||
handle = all2all_manager.get_handle(all_to_all_args)
|
handle = all2all_manager.get_handle(all_to_all_args)
|
||||||
|
|
||||||
logger.debug("PplxPrepareAndFinalize")
|
logger.debug("PplxPrepareAndFinalize")
|
||||||
|
|||||||
@ -790,7 +790,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
max_num_tokens=MOE_DP_CHUNK_SIZE,
|
max_num_tokens=MOE_DP_CHUNK_SIZE,
|
||||||
world_size=all2all_manager.world_size,
|
world_size=all2all_manager.world_size,
|
||||||
dp_size=all2all_manager.tp_group.world_size,
|
dp_size=all2all_manager.tp_group.world_size,
|
||||||
qtype=torch.float8_e4m3fn,
|
use_fp8_w8a8=True,
|
||||||
block_shape=self.quant_config.weight_block_size,
|
block_shape=self.quant_config.weight_block_size,
|
||||||
per_act_token=False, #?
|
per_act_token=False, #?
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user