mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 03:05:02 +08:00
[misc] clean up and unify logging (#10999)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
43b05fa314
commit
46004e83a2
@ -2579,45 +2579,40 @@ class VllmConfig:
|
||||
self.instance_id = random_uuid()[:5]
|
||||
|
||||
def __str__(self):
|
||||
return ("model=%r, speculative_config=%r, tokenizer=%r, "
|
||||
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
|
||||
"override_neuron_config=%s, tokenizer_revision=%s, "
|
||||
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
|
||||
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
|
||||
"pipeline_parallel_size=%d, "
|
||||
"disable_custom_all_reduce=%s, quantization=%s, "
|
||||
"enforce_eager=%s, kv_cache_dtype=%s, "
|
||||
"quantization_param_path=%s, device_config=%s, "
|
||||
"decoding_config=%r, observability_config=%r, "
|
||||
"seed=%d, served_model_name=%s, "
|
||||
"num_scheduler_steps=%d, enable_prefix_caching=%s, "
|
||||
"use_async_output_proc=%s, mm_processor_kwargs=%s") % \
|
||||
(self.model_config.model, self.speculative_config,
|
||||
self.model_config.tokenizer,
|
||||
self.model_config.skip_tokenizer_init,
|
||||
self.model_config.tokenizer_mode,
|
||||
self.model_config.revision,
|
||||
self.model_config.override_neuron_config,
|
||||
self.model_config.tokenizer_revision,
|
||||
self.model_config.trust_remote_code,
|
||||
self.model_config.dtype,
|
||||
self.model_config.max_model_len,
|
||||
self.load_config.download_dir,
|
||||
self.load_config.load_format,
|
||||
self.parallel_config.tensor_parallel_size,
|
||||
self.parallel_config.pipeline_parallel_size,
|
||||
self.parallel_config.disable_custom_all_reduce,
|
||||
self.model_config.quantization,
|
||||
self.model_config.enforce_eager,
|
||||
self.cache_config.cache_dtype,
|
||||
self.model_config.quantization_param_path,
|
||||
self.device_config.device, self.decoding_config,
|
||||
self.observability_config, self.model_config.seed,
|
||||
self.model_config.served_model_name,
|
||||
self.scheduler_config.num_scheduler_steps,
|
||||
self.cache_config.enable_prefix_caching,
|
||||
self.model_config.use_async_output_proc,
|
||||
self.model_config.mm_processor_kwargs)
|
||||
return (
|
||||
f"model={self.model_config.model!r},"
|
||||
f" speculative_config={self.speculative_config!r},"
|
||||
f" tokenizer={self.model_config.tokenizer!r}, "
|
||||
f"skip_tokenizer_init={self.model_config.skip_tokenizer_init},"
|
||||
f" tokenizer_mode={self.model_config.tokenizer_mode}, "
|
||||
f"revision={self.model_config.revision}, "
|
||||
f"override_neuron_config={self.model_config.override_neuron_config},"
|
||||
f" tokenizer_revision={self.model_config.tokenizer_revision}, "
|
||||
f"trust_remote_code={self.model_config.trust_remote_code}, "
|
||||
f"dtype={self.model_config.dtype}, "
|
||||
f"max_seq_len={self.model_config.max_model_len},"
|
||||
f" download_dir={self.load_config.download_dir!r}, "
|
||||
f"load_format={self.load_config.load_format}, "
|
||||
f"tensor_parallel_size={self.parallel_config.tensor_parallel_size},"
|
||||
f" pipeline_parallel_size={self.parallel_config.pipeline_parallel_size}, " # noqa
|
||||
f"disable_custom_all_reduce={self.parallel_config.disable_custom_all_reduce}, " # noqa
|
||||
f"quantization={self.model_config.quantization}, "
|
||||
f"enforce_eager={self.model_config.enforce_eager}, "
|
||||
f"kv_cache_dtype={self.cache_config.cache_dtype}, "
|
||||
f"quantization_param_path={self.model_config.quantization_param_path},"
|
||||
f" device_config={self.device_config.device}, "
|
||||
f"decoding_config={self.decoding_config!r}, "
|
||||
f"observability_config={self.observability_config!r}, "
|
||||
f"seed={self.model_config.seed}, "
|
||||
f"served_model_name={self.model_config.served_model_name}, "
|
||||
f"num_scheduler_steps={self.scheduler_config.num_scheduler_steps}, "
|
||||
f"multi_step_stream_outputs={self.scheduler_config.multi_step_stream_outputs}, " # noqa
|
||||
f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, "
|
||||
f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, " # noqa
|
||||
f"use_async_output_proc={self.model_config.use_async_output_proc}, "
|
||||
f"mm_processor_kwargs={self.model_config.mm_processor_kwargs}, "
|
||||
f"pooler_config={self.model_config.pooler_config!r},"
|
||||
f" compilation_config={self.compilation_config!r}")
|
||||
|
||||
|
||||
_current_vllm_config: Optional[VllmConfig] = None
|
||||
|
||||
@ -247,60 +247,12 @@ class LLMEngine:
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Initializing an LLM engine (v%s) with config: "
|
||||
"model=%r, speculative_config=%r, tokenizer=%r, "
|
||||
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
|
||||
"override_neuron_config=%s, tokenizer_revision=%s, "
|
||||
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
|
||||
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
|
||||
"pipeline_parallel_size=%d, "
|
||||
"disable_custom_all_reduce=%s, quantization=%s, "
|
||||
"enforce_eager=%s, kv_cache_dtype=%s, "
|
||||
"quantization_param_path=%s, device_config=%s, "
|
||||
"decoding_config=%r, observability_config=%r, "
|
||||
"seed=%d, served_model_name=%s, "
|
||||
"num_scheduler_steps=%d, chunked_prefill_enabled=%s "
|
||||
"multi_step_stream_outputs=%s, enable_prefix_caching=%s, "
|
||||
"use_async_output_proc=%s, use_cached_outputs=%s, "
|
||||
"mm_processor_kwargs=%s, pooler_config=%r,"
|
||||
"compilation_config=%r",
|
||||
"Initializing an LLM engine (v%s) with config: %r,"
|
||||
"use_cached_outputs=%s, ",
|
||||
VLLM_VERSION,
|
||||
self.model_config.model,
|
||||
self.speculative_config,
|
||||
self.model_config.tokenizer,
|
||||
self.model_config.skip_tokenizer_init,
|
||||
self.model_config.tokenizer_mode,
|
||||
self.model_config.revision,
|
||||
self.model_config.override_neuron_config,
|
||||
self.model_config.tokenizer_revision,
|
||||
self.model_config.trust_remote_code,
|
||||
self.model_config.dtype,
|
||||
self.model_config.max_model_len,
|
||||
self.load_config.download_dir,
|
||||
self.load_config.load_format,
|
||||
self.parallel_config.tensor_parallel_size,
|
||||
self.parallel_config.pipeline_parallel_size,
|
||||
self.parallel_config.disable_custom_all_reduce,
|
||||
self.model_config.quantization,
|
||||
self.model_config.enforce_eager,
|
||||
self.cache_config.cache_dtype,
|
||||
self.model_config.quantization_param_path,
|
||||
self.device_config.device,
|
||||
self.decoding_config,
|
||||
self.observability_config,
|
||||
self.model_config.seed,
|
||||
self.model_config.served_model_name,
|
||||
self.scheduler_config.num_scheduler_steps,
|
||||
self.scheduler_config.chunked_prefill_enabled,
|
||||
self.scheduler_config.multi_step_stream_outputs,
|
||||
self.cache_config.enable_prefix_caching,
|
||||
self.model_config.use_async_output_proc,
|
||||
vllm_config,
|
||||
use_cached_outputs,
|
||||
self.model_config.mm_processor_kwargs,
|
||||
self.model_config.pooler_config,
|
||||
vllm_config.compilation_config,
|
||||
)
|
||||
# TODO(woosuk): Print more configs in debug mode.
|
||||
|
||||
self.log_stats = log_stats
|
||||
self.use_cached_outputs = use_cached_outputs
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user