more fixes

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore 2025-06-18 13:58:40 +00:00
parent 0889f66297
commit ff2dd13145
3 changed files with 2 additions and 10 deletions

View File

@ -170,7 +170,7 @@ def _support_torch_compile(
# e.g. TPU has the compilation logic in model runner, so we don't
# need to compile the model inside.
if self.do_not_compile or torch.compiler.is_compiling():
logger.info("SKIPPING COMPILATION")
# logger.info("SKIPPING COMPILATION")
return self.forward(*args, **kwargs)
# the first compilation needs to have dynamic shapes marked

View File

@ -337,14 +337,6 @@ class FlashAttentionMetadataBuilder(
# populated on first build() call.
self.aot_sliding_window: Optional[tuple[int, int]] = None
def build(
self, common_prefix_len: int,
common_attn_metadata: CommonAttentionMetadata
) -> FlashAttentionMetadata:
num_reqs = common_attn_metadata.num_reqs
num_actual_tokens = common_attn_metadata.num_actual_tokens
max_query_len = common_attn_metadata.max_query_len
def build_slice(
self,
req_slice: slice,

View File

@ -610,7 +610,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
num_actual_tokens = common_attn_metadata.num_actual_tokens
max_query_len = common_attn_metadata.max_query_len
assert self._num_decodes + self._num_prefills == num_reqs
# assert self._num_decodes + self._num_prefills == num_reqs
return self.build_slice(
req_slice=slice(0, num_reqs),
token_slice=slice(0, num_actual_tokens),