mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 07:05:01 +08:00
[V1][Structured Output] Clear xgrammar compiler object when engine core shut down to avoid nanobind leaked warning (#16954)
Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
parent
21f4f1c9a4
commit
b724afe343
@ -253,6 +253,7 @@ class EngineCore:
|
|||||||
return engine_core_outputs
|
return engine_core_outputs
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
|
self.structured_output_manager.clear_backend()
|
||||||
if self.model_executor:
|
if self.model_executor:
|
||||||
self.model_executor.shutdown()
|
self.model_executor.shutdown()
|
||||||
|
|
||||||
|
|||||||
@ -107,3 +107,7 @@ class StructuredOutputManager:
|
|||||||
# np.ndarray, because that is much more efficient for serialization
|
# np.ndarray, because that is much more efficient for serialization
|
||||||
# and deserialization when sending this to the GPU workers.
|
# and deserialization when sending this to the GPU workers.
|
||||||
return bitmask_tensor.numpy()
|
return bitmask_tensor.numpy()
|
||||||
|
|
||||||
|
def clear_backend(self) -> None:
|
||||||
|
if self.backend is not None:
|
||||||
|
self.backend.destroy()
|
||||||
|
|||||||
@ -108,6 +108,9 @@ class GuidanceBackend(StructuredOutputBackend):
|
|||||||
return llguidance_torch.allocate_token_bitmask(
|
return llguidance_torch.allocate_token_bitmask(
|
||||||
max_num_seqs, self.ll_tokenizer.vocab_size)
|
max_num_seqs, self.ll_tokenizer.vocab_size)
|
||||||
|
|
||||||
|
def destroy(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GuidanceGrammar(StructuredOutputGrammar):
|
class GuidanceGrammar(StructuredOutputGrammar):
|
||||||
|
|||||||
@ -87,3 +87,9 @@ class StructuredOutputBackend(ABC):
|
|||||||
max_num_seqs (int): The maximum number of sequences for which
|
max_num_seqs (int): The maximum number of sequences for which
|
||||||
to allocate the bitmask.
|
to allocate the bitmask.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def destroy(self):
|
||||||
|
"""
|
||||||
|
Backend-specific cleanup.
|
||||||
|
"""
|
||||||
|
|||||||
@ -124,6 +124,9 @@ class XgrammarBackend(StructuredOutputBackend):
|
|||||||
def allocate_token_bitmask(self, max_num_seqs: int):
|
def allocate_token_bitmask(self, max_num_seqs: int):
|
||||||
return xgr.allocate_token_bitmask(max_num_seqs, self.vocab_size)
|
return xgr.allocate_token_bitmask(max_num_seqs, self.vocab_size)
|
||||||
|
|
||||||
|
def destroy(self):
|
||||||
|
del self.compiler
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class XgrammarGrammar(StructuredOutputGrammar):
|
class XgrammarGrammar(StructuredOutputGrammar):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user