mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:45:29 +08:00
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
"""
|
|
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
|
|
See Ray Serve LLM documentation at:
|
|
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
|
|
|
|
Run `python3 ray_serve_deepseek.py` to deploy the model.
|
|
"""
|
|
|
|
from ray import serve
|
|
from ray.serve.llm import LLMConfig, LLMRouter, LLMServer
|
|
|
|
llm_config = LLMConfig(
|
|
model_loading_config=dict(
|
|
model_id="deepseek",
|
|
# Change to model download path
|
|
model_source="/path/to/the/model",
|
|
),
|
|
deployment_config=dict(autoscaling_config=dict(
|
|
min_replicas=1,
|
|
max_replicas=1,
|
|
)),
|
|
# Change to the accelerator type of the node
|
|
accelerator_type="H100",
|
|
runtime_env=dict(env_vars=dict(VLLM_USE_V1="1")),
|
|
# Customize engine arguments as needed (e.g. vLLM engine kwargs)
|
|
engine_kwargs=dict(
|
|
tensor_parallel_size=8,
|
|
pipeline_parallel_size=2,
|
|
gpu_memory_utilization=0.92,
|
|
dtype="auto",
|
|
max_num_seqs=40,
|
|
max_model_len=16384,
|
|
enable_chunked_prefill=True,
|
|
enable_prefix_caching=True,
|
|
trust_remote_code=True,
|
|
),
|
|
)
|
|
|
|
# Deploy the application
|
|
deployment = LLMServer.as_deployment(
|
|
llm_config.get_serve_options(name_prefix="vLLM:")).bind(llm_config)
|
|
llm_app = LLMRouter.as_deployment().bind([deployment])
|
|
serve.run(llm_app)
|