mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 19:05:35 +08:00
[Bugfix] Fix block size in block_table with PCP (#29094)
Signed-off-by: Livinfly <luojie3m@gmail.com>
This commit is contained in:
parent
ed8e6843cc
commit
5c8f2adf50
@ -84,7 +84,7 @@ class BlockTable:
|
|||||||
self.pcp_world_size = get_pcp_group().world_size
|
self.pcp_world_size = get_pcp_group().world_size
|
||||||
self.pcp_rank = get_pcp_group().rank_in_group
|
self.pcp_rank = get_pcp_group().rank_in_group
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
# DCP might not be initialized in testing
|
# PCP might not be initialized in testing
|
||||||
self.pcp_world_size = 1
|
self.pcp_world_size = 1
|
||||||
self.pcp_rank = 0
|
self.pcp_rank = 0
|
||||||
try:
|
try:
|
||||||
@ -268,6 +268,11 @@ class MultiGroupBlockTable:
|
|||||||
# (max_model_len//dcp_world_size) tokens in kvcache,
|
# (max_model_len//dcp_world_size) tokens in kvcache,
|
||||||
# so the block_size which used for calc max_num_blocks_per_req
|
# so the block_size which used for calc max_num_blocks_per_req
|
||||||
# must be multiplied by dcp_world_size.
|
# must be multiplied by dcp_world_size.
|
||||||
|
try:
|
||||||
|
pcp_world_size = get_pcp_group().world_size
|
||||||
|
except AssertionError:
|
||||||
|
# PCP might not be initialized in testing
|
||||||
|
pcp_world_size = 1
|
||||||
try:
|
try:
|
||||||
dcp_world_size = get_dcp_group().world_size
|
dcp_world_size = get_dcp_group().world_size
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
@ -280,12 +285,14 @@ class MultiGroupBlockTable:
|
|||||||
f"must match block_sizes length ({len(block_sizes)})"
|
f"must match block_sizes length ({len(block_sizes)})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
total_cp_world_size = dcp_world_size * pcp_world_size
|
||||||
|
|
||||||
self.block_tables = [
|
self.block_tables = [
|
||||||
BlockTable(
|
BlockTable(
|
||||||
block_size,
|
block_size,
|
||||||
max_num_reqs,
|
max_num_reqs,
|
||||||
max(
|
max(
|
||||||
cdiv(max_model_len, block_size * dcp_world_size),
|
cdiv(max_model_len, block_size * total_cp_world_size),
|
||||||
1 + num_speculative_tokens,
|
1 + num_speculative_tokens,
|
||||||
),
|
),
|
||||||
max_num_batched_tokens,
|
max_num_batched_tokens,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user