mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-23 10:44:25 +08:00
[V1] Chore: cruft removal (#11724)
This commit is contained in:
parent
bf0d97d786
commit
ad0d567e1c
@ -225,8 +225,6 @@ class LLM:
|
|||||||
# Logic to switch between engines is done at runtime instead of import
|
# Logic to switch between engines is done at runtime instead of import
|
||||||
# to avoid import order issues
|
# to avoid import order issues
|
||||||
self.engine_class = self.get_engine_class()
|
self.engine_class = self.get_engine_class()
|
||||||
|
|
||||||
# TODO(rob): enable mp by default (issue with fork vs spawn)
|
|
||||||
self.llm_engine = self.engine_class.from_engine_args(
|
self.llm_engine = self.engine_class.from_engine_args(
|
||||||
engine_args, usage_context=UsageContext.LLM_CLASS)
|
engine_args, usage_context=UsageContext.LLM_CLASS)
|
||||||
|
|
||||||
|
|||||||
@ -94,8 +94,6 @@ class InprocClient(EngineCoreClient):
|
|||||||
|
|
||||||
* pushes EngineCoreRequest directly into the EngineCore
|
* pushes EngineCoreRequest directly into the EngineCore
|
||||||
* pulls EngineCoreOutputs by stepping the EngineCore
|
* pulls EngineCoreOutputs by stepping the EngineCore
|
||||||
|
|
||||||
TODO: support asyncio-mode for debugging.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|||||||
@ -42,8 +42,6 @@ class LLMEngine:
|
|||||||
use_cached_outputs: bool = False,
|
use_cached_outputs: bool = False,
|
||||||
multiprocess_mode: bool = False,
|
multiprocess_mode: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
# TODO: Can we avoid this?
|
|
||||||
self.model_config = vllm_config.model_config
|
self.model_config = vllm_config.model_config
|
||||||
|
|
||||||
# Tokenizer (+ ensure liveness if running in another process).
|
# Tokenizer (+ ensure liveness if running in another process).
|
||||||
@ -179,8 +177,6 @@ class LLMEngine:
|
|||||||
|
|
||||||
return request_outputs
|
return request_outputs
|
||||||
|
|
||||||
# TODO(rob): Can we get rid of these?
|
|
||||||
|
|
||||||
def get_model_config(self):
|
def get_model_config(self):
|
||||||
return self.model_config
|
return self.model_config
|
||||||
|
|
||||||
|
|||||||
@ -49,9 +49,6 @@ class Processor:
|
|||||||
cache_config.enable_prefix_caching
|
cache_config.enable_prefix_caching
|
||||||
self.mm_hasher = MMHasher()
|
self.mm_hasher = MMHasher()
|
||||||
|
|
||||||
# TODO: run in an ThreadpoolExecutor or BackgroundProcess.
|
|
||||||
# This ideally should releases the GIL, so we should not block the
|
|
||||||
# asyncio loop while this is running.
|
|
||||||
def process_inputs(
|
def process_inputs(
|
||||||
self,
|
self,
|
||||||
request_id: str,
|
request_id: str,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user