diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py index ab72b55a8943..a05267d921d1 100644 --- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py +++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py @@ -32,6 +32,8 @@ from outlines_core.fsm.json_schema import build_regex_from_schema from pydantic import BaseModel from transformers import PreTrainedTokenizerBase +from vllm.platforms import current_platform + class BaseLogitsProcessor: @@ -91,7 +93,14 @@ class BaseLogitsProcessor: allowed_tokens = allowed_tokens.masked_select( allowed_tokens < scores.shape[-1]) mask.index_fill_(0, allowed_tokens, 0) - scores.add_(mask) + if current_platform.is_hpu(): + # Workaround for HPU bug where add_() raise RuntimeError: + # synNodeCreateWithId failed for node: strided_insert + # with synStatus 1 [Invalid argument], hopefully it will + # be fixed in the future releases of the HPU runtime. + scores = scores.add(mask) + else: + scores.add_(mask) return scores