mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 05:55:01 +08:00
[Misc] fix typo and add detailed log (#28178)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
parent
3a7d580343
commit
e5e9067e61
@ -11,7 +11,7 @@ python save_sharded_state.py \
|
|||||||
--model /path/to/load \
|
--model /path/to/load \
|
||||||
--quantization deepspeedfp \
|
--quantization deepspeedfp \
|
||||||
--tensor-parallel-size 8 \
|
--tensor-parallel-size 8 \
|
||||||
--output /path/to/save/sharded/modele
|
--output /path/to/save/sharded/model
|
||||||
|
|
||||||
python load_sharded_state.py \
|
python load_sharded_state.py \
|
||||||
--model /path/to/saved/sharded/model \
|
--model /path/to/saved/sharded/model \
|
||||||
|
|||||||
@ -3035,7 +3035,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
time_after_load = time.perf_counter()
|
time_after_load = time.perf_counter()
|
||||||
self.model_memory_usage = m.consumed_memory
|
self.model_memory_usage = m.consumed_memory
|
||||||
logger.info_once(
|
logger.info_once(
|
||||||
"Model loading took %.4f GiB and %.6f seconds",
|
"Model loading took %.4f GiB memory and %.6f seconds",
|
||||||
self.model_memory_usage / GiB_bytes,
|
self.model_memory_usage / GiB_bytes,
|
||||||
time_after_load - time_before_load,
|
time_after_load - time_before_load,
|
||||||
scope="local",
|
scope="local",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user