mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-23 06:43:30 +08:00
Signed-off-by: 苏政渊 <suzhengyuan@moonshot.cn> Co-authored-by: 苏政渊 <suzhengyuan@moonshot.cn>
This commit is contained in:
parent
d059110498
commit
f961d7f6ef
@ -37,7 +37,7 @@ from vllm.attention.backends.utils import (PAD_SLOT_ID, compute_slot_mapping,
|
|||||||
is_block_tables_empty)
|
is_block_tables_empty)
|
||||||
from vllm.attention.layer import Attention
|
from vllm.attention.layer import Attention
|
||||||
from vllm.attention.ops.paged_attn import PagedAttention
|
from vllm.attention.ops.paged_attn import PagedAttention
|
||||||
from vllm.config import VllmConfig, get_current_vllm_config
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype,
|
from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype,
|
||||||
make_tensor_with_pad)
|
make_tensor_with_pad)
|
||||||
@ -187,7 +187,7 @@ class FlashInferState(AttentionState):
|
|||||||
# Global hyperparameters shared by all attention layers
|
# Global hyperparameters shared by all attention layers
|
||||||
self.global_hyperparameters: Optional[PerLayerParameters] = None
|
self.global_hyperparameters: Optional[PerLayerParameters] = None
|
||||||
|
|
||||||
self.vllm_config = get_current_vllm_config()
|
self.vllm_config = self.runner.vllm_config
|
||||||
|
|
||||||
def _get_workspace_buffer(self):
|
def _get_workspace_buffer(self):
|
||||||
if self._workspace_buffer is None:
|
if self._workspace_buffer is None:
|
||||||
@ -613,7 +613,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
|||||||
# Global hyperparameters shared by all attention layers
|
# Global hyperparameters shared by all attention layers
|
||||||
self.global_hyperparameters: Optional[PerLayerParameters] = None
|
self.global_hyperparameters: Optional[PerLayerParameters] = None
|
||||||
|
|
||||||
self.vllm_config = get_current_vllm_config()
|
self.vllm_config = self.runner.vllm_config
|
||||||
|
|
||||||
def prepare(self):
|
def prepare(self):
|
||||||
self.slot_mapping: List[int] = []
|
self.slot_mapping: List[int] = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user