From da855b42d2005d9b701758e9d59836131ee8c6fb Mon Sep 17 00:00:00 2001 From: Fang Han Date: Thu, 6 Nov 2025 12:27:16 -0800 Subject: [PATCH] [Doc]: Make extraInit containers fully configurable in helm chart (#27497) Signed-off-by: Fang Han --- docs/deployment/frameworks/helm.md | 49 ++++++- examples/online_serving/chart-helm/README.md | 12 ++ .../chart-helm/templates/_helpers.tpl | 7 +- .../chart-helm/templates/deployment.yaml | 23 ++- .../chart-helm/templates/job.yaml | 18 ++- .../chart-helm/tests/deployment_test.yaml | 135 ++++++++++++++++++ .../chart-helm/tests/job_test.yaml | 61 ++++++++ .../chart-helm/tests/pvc_test.yaml | 32 +++++ .../chart-helm/values.schema.json | 70 ++++++++- .../online_serving/chart-helm/values.yaml | 59 +++++++- 10 files changed, 439 insertions(+), 27 deletions(-) create mode 100644 examples/online_serving/chart-helm/tests/deployment_test.yaml create mode 100644 examples/online_serving/chart-helm/tests/job_test.yaml create mode 100644 examples/online_serving/chart-helm/tests/pvc_test.yaml diff --git a/docs/deployment/frameworks/helm.md b/docs/deployment/frameworks/helm.md index e5d44945ba725..1d9e3632593ad 100644 --- a/docs/deployment/frameworks/helm.md +++ b/docs/deployment/frameworks/helm.md @@ -13,7 +13,7 @@ Before you begin, ensure that you have the following: - A running Kubernetes cluster - NVIDIA Kubernetes Device Plugin (`k8s-device-plugin`): This can be found at [https://github.com/NVIDIA/k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin) - Available GPU resources in your cluster -- An S3 with the model which will be deployed +- (Optional) An S3 bucket or other storage with the model weights, if using automatic model download ## Installing the chart @@ -61,10 +61,16 @@ The following table describes configurable parameters of the chart in `values.ya | deploymentStrategy | object | {} | Deployment strategy configuration | | externalConfigs | list | [] | External configuration | | extraContainers | list | [] | Additional containers configuration | -| extraInit | object | {"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true} | Additional configuration for the init container | -| extraInit.pvcStorage | string | "1Gi" | Storage size of the s3 | -| extraInit.s3modelpath | string | "relative_s3_model_path/opt-125m" | Path of the model on the s3 which hosts model weights and config files | -| extraInit.awsEc2MetadataDisabled | boolean | true | Disables the use of the Amazon EC2 instance metadata service | +| extraInit | object | {"modelDownload":{"enabled":true},"initContainers":[],"pvcStorage":"1Gi"} | Additional configuration for init containers | +| extraInit.modelDownload | object | {"enabled":true} | Model download functionality configuration | +| extraInit.modelDownload.enabled | bool | true | Enable automatic model download job and wait container | +| extraInit.modelDownload.image | object | {"repository":"amazon/aws-cli","tag":"2.6.4","pullPolicy":"IfNotPresent"} | Image for model download operations | +| extraInit.modelDownload.waitContainer | object | {} | Wait container configuration (command, args, env) | +| extraInit.modelDownload.downloadJob | object | {} | Download job configuration (command, args, env) | +| extraInit.initContainers | list | [] | Custom init containers (appended after model download if enabled) | +| extraInit.pvcStorage | string | "1Gi" | Storage size for the PVC | +| extraInit.s3modelpath | string | "relative_s3_model_path/opt-125m" | (Optional) Path of the model on S3 | +| extraInit.awsEc2MetadataDisabled | bool | true | (Optional) Disable AWS EC2 metadata service | | extraPorts | list | [] | Additional ports configuration | | gpuModels | list | ["TYPE_GPU_USED"] | Type of gpu used | | image | object | {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"} | Image configuration | @@ -98,3 +104,36 @@ The following table describes configurable parameters of the chart in `values.ya | serviceName | string | "" | Service name | | servicePort | int | 80 | Service port | | labels.environment | string | test | Environment name | + +## Configuration Examples + +### Using S3 Model Download (Default) + +```yaml +extraInit: + modelDownload: + enabled: true + pvcStorage: "10Gi" + s3modelpath: "models/llama-7b" +``` + +### Using Custom Init Containers Only + +For use cases like llm-d where you need custom sidecars without model download: + +```yaml +extraInit: + modelDownload: + enabled: false + initContainers: + - name: llm-d-routing-proxy + image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: proxy + securityContext: + runAsUser: 1000 + restartPolicy: Always + pvcStorage: "10Gi" +``` diff --git a/examples/online_serving/chart-helm/README.md b/examples/online_serving/chart-helm/README.md index bfe81121d1fd4..4376aac488f05 100644 --- a/examples/online_serving/chart-helm/README.md +++ b/examples/online_serving/chart-helm/README.md @@ -19,3 +19,15 @@ This directory contains a Helm chart for deploying the vllm application. The cha - templates/pvc.yaml: Template for Persistent Volume Claims. - templates/secrets.yaml: Template for Kubernetes Secrets. - templates/service.yaml: Template for creating Services. + +## Running Tests + +This chart includes unit tests using [helm-unittest](https://github.com/helm-unittest/helm-unittest). Install the plugin and run tests: + +```bash +# Install plugin +helm plugin install https://github.com/helm-unittest/helm-unittest + +# Run tests +helm unittest . +``` diff --git a/examples/online_serving/chart-helm/templates/_helpers.tpl b/examples/online_serving/chart-helm/templates/_helpers.tpl index a9690bad3c945..3226c1d79c428 100644 --- a/examples/online_serving/chart-helm/templates/_helpers.tpl +++ b/examples/online_serving/chart-helm/templates/_helpers.tpl @@ -123,9 +123,6 @@ runAsUser: {{- end }} {{- end }} -{{- define "chart.extraInitImage" -}} -"amazon/aws-cli:2.6.4" -{{- end }} {{- define "chart.extraInitEnv" -}} - name: S3_ENDPOINT_URL @@ -148,11 +145,15 @@ runAsUser: secretKeyRef: name: {{ .Release.Name }}-secrets key: s3accesskey +{{- if .Values.extraInit.s3modelpath }} - name: S3_PATH value: "{{ .Values.extraInit.s3modelpath }}" +{{- end }} +{{- if hasKey .Values.extraInit "awsEc2MetadataDisabled" }} - name: AWS_EC2_METADATA_DISABLED value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}" {{- end }} +{{- end }} {{/* Define chart labels diff --git a/examples/online_serving/chart-helm/templates/deployment.yaml b/examples/online_serving/chart-helm/templates/deployment.yaml index 536983b587be2..a0a3c4b9ee523 100644 --- a/examples/online_serving/chart-helm/templates/deployment.yaml +++ b/examples/online_serving/chart-helm/templates/deployment.yaml @@ -72,16 +72,21 @@ spec: {{ toYaml . | nindent 8 }} {{- end }} - {{- if .Values.extraInit }} + {{- if and .Values.extraInit (or .Values.extraInit.modelDownload.enabled .Values.extraInit.initContainers) }} initContainers: + {{- if .Values.extraInit.modelDownload.enabled }} - name: wait-download-model - image: {{ include "chart.extraInitImage" . }} - command: - - /bin/bash + image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }} + imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }} + command: {{ .Values.extraInit.modelDownload.waitContainer.command | toJson }} args: - - -eucx - - while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done - env: {{- include "chart.extraInitEnv" . | nindent 10 }} + {{- toYaml .Values.extraInit.modelDownload.waitContainer.args | nindent 10 }} + env: + {{- if .Values.extraInit.modelDownload.waitContainer.env }} + {{- toYaml .Values.extraInit.modelDownload.waitContainer.env | nindent 10 }} + {{- else }} + {{- include "chart.extraInitEnv" . | nindent 10 }} + {{- end }} resources: requests: cpu: 200m @@ -93,6 +98,10 @@ spec: - name: {{ .Release.Name }}-storage mountPath: /data {{- end }} + {{- with .Values.extraInit.initContainers }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- end }} volumes: - name: {{ .Release.Name }}-storage persistentVolumeClaim: diff --git a/examples/online_serving/chart-helm/templates/job.yaml b/examples/online_serving/chart-helm/templates/job.yaml index f9ea3541e78d2..98d313916ca48 100644 --- a/examples/online_serving/chart-helm/templates/job.yaml +++ b/examples/online_serving/chart-helm/templates/job.yaml @@ -1,4 +1,4 @@ -{{- if .Values.extraInit }} +{{- if and .Values.extraInit .Values.extraInit.modelDownload.enabled }} apiVersion: batch/v1 kind: Job metadata: @@ -12,13 +12,17 @@ spec: spec: containers: - name: job-download-model - image: {{ include "chart.extraInitImage" . }} - command: - - /bin/bash + image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }} + imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }} + command: {{ .Values.extraInit.modelDownload.downloadJob.command | toJson }} args: - - -eucx - - aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data - env: {{- include "chart.extraInitEnv" . | nindent 8 }} + {{- toYaml .Values.extraInit.modelDownload.downloadJob.args | nindent 8 }} + env: + {{- if .Values.extraInit.modelDownload.downloadJob.env }} + {{- toYaml .Values.extraInit.modelDownload.downloadJob.env | nindent 8 }} + {{- else }} + {{- include "chart.extraInitEnv" . | nindent 8 }} + {{- end }} volumeMounts: - name: {{ .Release.Name }}-storage mountPath: /data diff --git a/examples/online_serving/chart-helm/tests/deployment_test.yaml b/examples/online_serving/chart-helm/tests/deployment_test.yaml new file mode 100644 index 0000000000000..9b7472cf0fd43 --- /dev/null +++ b/examples/online_serving/chart-helm/tests/deployment_test.yaml @@ -0,0 +1,135 @@ +suite: test deployment +templates: + - deployment.yaml +tests: + - it: should create wait-download-model init container when modelDownload is enabled + set: + extraInit: + modelDownload: + enabled: true + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: [ "/bin/bash" ] + args: + - "-eucx" + - "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done" + downloadJob: + command: [ "/bin/bash" ] + args: + - "-eucx" + - "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data" + initContainers: [ ] + pvcStorage: "1Gi" + s3modelpath: "relative_s3_model_path/opt-125m" + awsEc2MetadataDisabled: true + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Deployment + - isNotEmpty: + path: spec.template.spec.initContainers + - equal: + path: spec.template.spec.initContainers[0].name + value: wait-download-model + - equal: + path: spec.template.spec.initContainers[0].image + value: amazon/aws-cli:2.6.4 + - equal: + path: spec.template.spec.initContainers[0].imagePullPolicy + value: IfNotPresent + + - it: should only create custom init containers when modelDownload is disabled + set: + extraInit: + modelDownload: + enabled: false + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: [ "/bin/bash" ] + args: [ "-c", "echo test" ] + downloadJob: + command: [ "/bin/bash" ] + args: [ "-c", "echo test" ] + initContainers: + - name: llm-d-routing-proxy + image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: proxy + pvcStorage: "10Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Deployment + - lengthEqual: + path: spec.template.spec.initContainers + count: 1 + - equal: + path: spec.template.spec.initContainers[0].name + value: llm-d-routing-proxy + - equal: + path: spec.template.spec.initContainers[0].image + value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + - equal: + path: spec.template.spec.initContainers[0].ports[0].containerPort + value: 8080 + + - it: should create both wait-download-model and custom init containers when both are enabled + set: + extraInit: + modelDownload: + enabled: true + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: [ "/bin/bash" ] + args: + - "-eucx" + - "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done" + downloadJob: + command: [ "/bin/bash" ] + args: + - "-eucx" + - "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data" + initContainers: + - name: llm-d-routing-proxy + image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: proxy + pvcStorage: "10Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Deployment + - lengthEqual: + path: spec.template.spec.initContainers + count: 2 + - equal: + path: spec.template.spec.initContainers[0].name + value: wait-download-model + - equal: + path: spec.template.spec.initContainers[0].image + value: amazon/aws-cli:2.6.4 + - equal: + path: spec.template.spec.initContainers[1].name + value: llm-d-routing-proxy + - equal: + path: spec.template.spec.initContainers[1].image + value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + - equal: + path: spec.template.spec.initContainers[1].ports[0].containerPort + value: 8080 \ No newline at end of file diff --git a/examples/online_serving/chart-helm/tests/job_test.yaml b/examples/online_serving/chart-helm/tests/job_test.yaml new file mode 100644 index 0000000000000..25d40ff265132 --- /dev/null +++ b/examples/online_serving/chart-helm/tests/job_test.yaml @@ -0,0 +1,61 @@ +suite: test job +templates: + - job.yaml +tests: + - it: should create job when modelDownload is enabled + set: + extraInit: + modelDownload: + enabled: true + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: [ "/bin/bash" ] + args: [ "-c", "wait" ] + downloadJob: + command: [ "/bin/bash" ] + args: + - "-eucx" + - "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data" + pvcStorage: "1Gi" + s3modelpath: "relative_s3_model_path/opt-125m" + awsEc2MetadataDisabled: true + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Job + - equal: + path: spec.template.spec.containers[0].name + value: job-download-model + - equal: + path: spec.template.spec.containers[0].image + value: amazon/aws-cli:2.6.4 + - equal: + path: spec.template.spec.restartPolicy + value: OnFailure + + - it: should not create job when modelDownload is disabled + set: + extraInit: + modelDownload: + enabled: false + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: [ "/bin/bash" ] + args: [ "-c", "wait" ] + downloadJob: + command: [ "/bin/bash" ] + args: [ "-c", "download" ] + initContainers: + - name: llm-d-routing-proxy + image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + pvcStorage: "10Gi" + asserts: + - hasDocuments: + count: 0 diff --git a/examples/online_serving/chart-helm/tests/pvc_test.yaml b/examples/online_serving/chart-helm/tests/pvc_test.yaml new file mode 100644 index 0000000000000..2a8b37da7e8bd --- /dev/null +++ b/examples/online_serving/chart-helm/tests/pvc_test.yaml @@ -0,0 +1,32 @@ +suite: test pvc +templates: + - pvc.yaml +tests: + # Test Case: PVC Created When extraInit Defined + - it: should create pvc when extraInit is defined + set: + extraInit: + modelDownload: + enabled: true + image: + repository: "amazon/aws-cli" + tag: "2.6.4" + pullPolicy: "IfNotPresent" + waitContainer: + command: ["/bin/bash"] + args: ["-c", "wait"] + downloadJob: + command: ["/bin/bash"] + args: ["-c", "download"] + pvcStorage: "10Gi" + asserts: + - hasDocuments: + count: 1 + - isKind: + of: PersistentVolumeClaim + - equal: + path: spec.accessModes[0] + value: ReadWriteOnce + - equal: + path: spec.resources.requests.storage + value: 10Gi \ No newline at end of file diff --git a/examples/online_serving/chart-helm/values.schema.json b/examples/online_serving/chart-helm/values.schema.json index 812d54bde1397..0d0e0098bc194 100644 --- a/examples/online_serving/chart-helm/values.schema.json +++ b/examples/online_serving/chart-helm/values.schema.json @@ -136,6 +136,70 @@ "extraInit": { "type": "object", "properties": { + "modelDownload": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + } + }, + "required": ["repository", "tag", "pullPolicy"] + }, + "waitContainer": { + "type": "object", + "properties": { + "command": { + "type": "array", + "items": {"type": "string"} + }, + "args": { + "type": "array", + "items": {"type": "string"} + }, + "env": { + "type": "array", + "items": {"type": "object"} + } + }, + "required": ["command", "args"] + }, + "downloadJob": { + "type": "object", + "properties": { + "command": { + "type": "array", + "items": {"type": "string"} + }, + "args": { + "type": "array", + "items": {"type": "string"} + }, + "env": { + "type": "array", + "items": {"type": "object"} + } + }, + "required": ["command", "args"] + } + }, + "required": ["enabled", "image", "waitContainer", "downloadJob"] + }, + "initContainers": { + "type": "array", + "items": {"type": "object"} + }, "s3modelpath": { "type": "string" }, @@ -147,9 +211,9 @@ } }, "required": [ - "pvcStorage", - "s3modelpath", - "awsEc2MetadataDisabled" + "modelDownload", + "initContainers", + "pvcStorage" ] }, "extraContainers": { diff --git a/examples/online_serving/chart-helm/values.yaml b/examples/online_serving/chart-helm/values.yaml index 815f02a4bfd52..8c6c9ae8ea239 100644 --- a/examples/online_serving/chart-helm/values.yaml +++ b/examples/online_serving/chart-helm/values.yaml @@ -75,10 +75,65 @@ maxUnavailablePodDisruptionBudget: "" # -- Additional configuration for the init container extraInit: - # -- Path of the model on the s3 which hosts model weights and config files + # -- Model download functionality (optional) + modelDownload: + # -- Enable model download job and wait container + enabled: true + # -- Image configuration for model download operations + image: + # -- Image repository + repository: "amazon/aws-cli" + # -- Image tag + tag: "2.6.4" + # -- Image pull policy + pullPolicy: "IfNotPresent" + # -- Wait container configuration (init container that waits for model to be ready) + waitContainer: + # -- Command to execute + command: ["/bin/bash"] + # -- Arguments for the wait container + args: + - "-eucx" + - "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done" + # -- Environment variables (optional, overrides S3 defaults entirely if specified) + # env: + # - name: HUGGING_FACE_HUB_TOKEN + # value: "your-token" + # - name: MODEL_ID + # value: "meta-llama/Llama-2-7b" + # -- Download job configuration (job that actually downloads the model) + downloadJob: + # -- Command to execute + command: ["/bin/bash"] + # -- Arguments for the download job + args: + - "-eucx" + - "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data" + # -- Environment variables (optional, overrides S3 defaults entirely if specified) + # env: + # - name: HUGGING_FACE_HUB_TOKEN + # value: "your-token" + # - name: MODEL_ID + # value: "meta-llama/Llama-2-7b" + + # -- Custom init containers (appended after wait-download-model if modelDownload is enabled) + initContainers: [] + # Example for llm-d sidecar: + # initContainers: + # - name: llm-d-routing-proxy + # image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0 + # imagePullPolicy: IfNotPresent + # ports: + # - containerPort: 8080 + # name: proxy + # securityContext: + # runAsUser: 1000 + + # -- Path of the model on the s3 which hosts model weights and config files s3modelpath: "relative_s3_model_path/opt-125m" - # -- Storage size of the s3 + # -- Storage size for the PVC pvcStorage: "1Gi" + # -- Disable AWS EC2 metadata service awsEc2MetadataDisabled: true # -- Additional containers configuration