mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 11:17:07 +08:00
[Bugfix] Fix benchmark_moe.py for blockwise fp8. (#23823)
Signed-off-by: crischeng <420985011@qq.com> Co-authored-by: cris <grace@guisenbindeMacBook-Pro.local>
This commit is contained in:
parent
d3da2eea54
commit
66548f6603
@ -419,8 +419,10 @@ class BenchmarkWorker:
|
|||||||
)
|
)
|
||||||
# NOTE(woosuk): The current naming convention uses w2.shape[2], which
|
# NOTE(woosuk): The current naming convention uses w2.shape[2], which
|
||||||
# is the intermediate size after silu_and_mul.
|
# is the intermediate size after silu_and_mul.
|
||||||
|
block_n = block_quant_shape[0] if block_quant_shape else None
|
||||||
|
block_k = block_quant_shape[1] if block_quant_shape else None
|
||||||
op_config = get_moe_configs(
|
op_config = get_moe_configs(
|
||||||
num_experts, shard_intermediate_size // 2, dtype_str
|
num_experts, shard_intermediate_size // 2, dtype_str, block_n, block_k
|
||||||
)
|
)
|
||||||
if op_config is None:
|
if op_config is None:
|
||||||
config = get_default_config(
|
config = get_default_config(
|
||||||
@ -430,6 +432,7 @@ class BenchmarkWorker:
|
|||||||
hidden_size,
|
hidden_size,
|
||||||
topk,
|
topk,
|
||||||
dtype_str,
|
dtype_str,
|
||||||
|
block_quant_shape,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))]
|
config = op_config[min(op_config.keys(), key=lambda x: abs(x - num_tokens))]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user