mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-04 19:55:44 +08:00
[Doc]: Make extraInit containers fully configurable in helm chart (#27497)
Signed-off-by: Fang Han <fhan0520@gmail.com>
This commit is contained in:
parent
449de9001a
commit
da855b42d2
@ -13,7 +13,7 @@ Before you begin, ensure that you have the following:
|
|||||||
- A running Kubernetes cluster
|
- A running Kubernetes cluster
|
||||||
- NVIDIA Kubernetes Device Plugin (`k8s-device-plugin`): This can be found at [https://github.com/NVIDIA/k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin)
|
- NVIDIA Kubernetes Device Plugin (`k8s-device-plugin`): This can be found at [https://github.com/NVIDIA/k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin)
|
||||||
- Available GPU resources in your cluster
|
- Available GPU resources in your cluster
|
||||||
- An S3 with the model which will be deployed
|
- (Optional) An S3 bucket or other storage with the model weights, if using automatic model download
|
||||||
|
|
||||||
## Installing the chart
|
## Installing the chart
|
||||||
|
|
||||||
@ -61,10 +61,16 @@ The following table describes configurable parameters of the chart in `values.ya
|
|||||||
| deploymentStrategy | object | {} | Deployment strategy configuration |
|
| deploymentStrategy | object | {} | Deployment strategy configuration |
|
||||||
| externalConfigs | list | [] | External configuration |
|
| externalConfigs | list | [] | External configuration |
|
||||||
| extraContainers | list | [] | Additional containers configuration |
|
| extraContainers | list | [] | Additional containers configuration |
|
||||||
| extraInit | object | {"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true} | Additional configuration for the init container |
|
| extraInit | object | {"modelDownload":{"enabled":true},"initContainers":[],"pvcStorage":"1Gi"} | Additional configuration for init containers |
|
||||||
| extraInit.pvcStorage | string | "1Gi" | Storage size of the s3 |
|
| extraInit.modelDownload | object | {"enabled":true} | Model download functionality configuration |
|
||||||
| extraInit.s3modelpath | string | "relative_s3_model_path/opt-125m" | Path of the model on the s3 which hosts model weights and config files |
|
| extraInit.modelDownload.enabled | bool | true | Enable automatic model download job and wait container |
|
||||||
| extraInit.awsEc2MetadataDisabled | boolean | true | Disables the use of the Amazon EC2 instance metadata service |
|
| extraInit.modelDownload.image | object | {"repository":"amazon/aws-cli","tag":"2.6.4","pullPolicy":"IfNotPresent"} | Image for model download operations |
|
||||||
|
| extraInit.modelDownload.waitContainer | object | {} | Wait container configuration (command, args, env) |
|
||||||
|
| extraInit.modelDownload.downloadJob | object | {} | Download job configuration (command, args, env) |
|
||||||
|
| extraInit.initContainers | list | [] | Custom init containers (appended after model download if enabled) |
|
||||||
|
| extraInit.pvcStorage | string | "1Gi" | Storage size for the PVC |
|
||||||
|
| extraInit.s3modelpath | string | "relative_s3_model_path/opt-125m" | (Optional) Path of the model on S3 |
|
||||||
|
| extraInit.awsEc2MetadataDisabled | bool | true | (Optional) Disable AWS EC2 metadata service |
|
||||||
| extraPorts | list | [] | Additional ports configuration |
|
| extraPorts | list | [] | Additional ports configuration |
|
||||||
| gpuModels | list | ["TYPE_GPU_USED"] | Type of gpu used |
|
| gpuModels | list | ["TYPE_GPU_USED"] | Type of gpu used |
|
||||||
| image | object | {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"} | Image configuration |
|
| image | object | {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"} | Image configuration |
|
||||||
@ -98,3 +104,36 @@ The following table describes configurable parameters of the chart in `values.ya
|
|||||||
| serviceName | string | "" | Service name |
|
| serviceName | string | "" | Service name |
|
||||||
| servicePort | int | 80 | Service port |
|
| servicePort | int | 80 | Service port |
|
||||||
| labels.environment | string | test | Environment name |
|
| labels.environment | string | test | Environment name |
|
||||||
|
|
||||||
|
## Configuration Examples
|
||||||
|
|
||||||
|
### Using S3 Model Download (Default)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: true
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
s3modelpath: "models/llama-7b"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using Custom Init Containers Only
|
||||||
|
|
||||||
|
For use cases like llm-d where you need custom sidecars without model download:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: false
|
||||||
|
initContainers:
|
||||||
|
- name: llm-d-routing-proxy
|
||||||
|
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: proxy
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 1000
|
||||||
|
restartPolicy: Always
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
```
|
||||||
|
|||||||
@ -19,3 +19,15 @@ This directory contains a Helm chart for deploying the vllm application. The cha
|
|||||||
- templates/pvc.yaml: Template for Persistent Volume Claims.
|
- templates/pvc.yaml: Template for Persistent Volume Claims.
|
||||||
- templates/secrets.yaml: Template for Kubernetes Secrets.
|
- templates/secrets.yaml: Template for Kubernetes Secrets.
|
||||||
- templates/service.yaml: Template for creating Services.
|
- templates/service.yaml: Template for creating Services.
|
||||||
|
|
||||||
|
## Running Tests
|
||||||
|
|
||||||
|
This chart includes unit tests using [helm-unittest](https://github.com/helm-unittest/helm-unittest). Install the plugin and run tests:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install plugin
|
||||||
|
helm plugin install https://github.com/helm-unittest/helm-unittest
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
helm unittest .
|
||||||
|
```
|
||||||
|
|||||||
@ -123,9 +123,6 @@ runAsUser:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- define "chart.extraInitImage" -}}
|
|
||||||
"amazon/aws-cli:2.6.4"
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{- define "chart.extraInitEnv" -}}
|
{{- define "chart.extraInitEnv" -}}
|
||||||
- name: S3_ENDPOINT_URL
|
- name: S3_ENDPOINT_URL
|
||||||
@ -148,11 +145,15 @@ runAsUser:
|
|||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ .Release.Name }}-secrets
|
name: {{ .Release.Name }}-secrets
|
||||||
key: s3accesskey
|
key: s3accesskey
|
||||||
|
{{- if .Values.extraInit.s3modelpath }}
|
||||||
- name: S3_PATH
|
- name: S3_PATH
|
||||||
value: "{{ .Values.extraInit.s3modelpath }}"
|
value: "{{ .Values.extraInit.s3modelpath }}"
|
||||||
|
{{- end }}
|
||||||
|
{{- if hasKey .Values.extraInit "awsEc2MetadataDisabled" }}
|
||||||
- name: AWS_EC2_METADATA_DISABLED
|
- name: AWS_EC2_METADATA_DISABLED
|
||||||
value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}"
|
value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}"
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{/*
|
{{/*
|
||||||
Define chart labels
|
Define chart labels
|
||||||
|
|||||||
@ -72,16 +72,21 @@ spec:
|
|||||||
{{ toYaml . | nindent 8 }}
|
{{ toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- if .Values.extraInit }}
|
{{- if and .Values.extraInit (or .Values.extraInit.modelDownload.enabled .Values.extraInit.initContainers) }}
|
||||||
initContainers:
|
initContainers:
|
||||||
|
{{- if .Values.extraInit.modelDownload.enabled }}
|
||||||
- name: wait-download-model
|
- name: wait-download-model
|
||||||
image: {{ include "chart.extraInitImage" . }}
|
image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }}
|
||||||
command:
|
imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }}
|
||||||
- /bin/bash
|
command: {{ .Values.extraInit.modelDownload.waitContainer.command | toJson }}
|
||||||
args:
|
args:
|
||||||
- -eucx
|
{{- toYaml .Values.extraInit.modelDownload.waitContainer.args | nindent 10 }}
|
||||||
- while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done
|
env:
|
||||||
env: {{- include "chart.extraInitEnv" . | nindent 10 }}
|
{{- if .Values.extraInit.modelDownload.waitContainer.env }}
|
||||||
|
{{- toYaml .Values.extraInit.modelDownload.waitContainer.env | nindent 10 }}
|
||||||
|
{{- else }}
|
||||||
|
{{- include "chart.extraInitEnv" . | nindent 10 }}
|
||||||
|
{{- end }}
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 200m
|
cpu: 200m
|
||||||
@ -93,6 +98,10 @@ spec:
|
|||||||
- name: {{ .Release.Name }}-storage
|
- name: {{ .Release.Name }}-storage
|
||||||
mountPath: /data
|
mountPath: /data
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
{{- with .Values.extraInit.initContainers }}
|
||||||
|
{{- toYaml . | nindent 6 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
volumes:
|
volumes:
|
||||||
- name: {{ .Release.Name }}-storage
|
- name: {{ .Release.Name }}-storage
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
{{- if .Values.extraInit }}
|
{{- if and .Values.extraInit .Values.extraInit.modelDownload.enabled }}
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
@ -12,13 +12,17 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: job-download-model
|
- name: job-download-model
|
||||||
image: {{ include "chart.extraInitImage" . }}
|
image: {{ .Values.extraInit.modelDownload.image.repository }}:{{ .Values.extraInit.modelDownload.image.tag }}
|
||||||
command:
|
imagePullPolicy: {{ .Values.extraInit.modelDownload.image.pullPolicy }}
|
||||||
- /bin/bash
|
command: {{ .Values.extraInit.modelDownload.downloadJob.command | toJson }}
|
||||||
args:
|
args:
|
||||||
- -eucx
|
{{- toYaml .Values.extraInit.modelDownload.downloadJob.args | nindent 8 }}
|
||||||
- aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data
|
env:
|
||||||
env: {{- include "chart.extraInitEnv" . | nindent 8 }}
|
{{- if .Values.extraInit.modelDownload.downloadJob.env }}
|
||||||
|
{{- toYaml .Values.extraInit.modelDownload.downloadJob.env | nindent 8 }}
|
||||||
|
{{- else }}
|
||||||
|
{{- include "chart.extraInitEnv" . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: {{ .Release.Name }}-storage
|
- name: {{ .Release.Name }}-storage
|
||||||
mountPath: /data
|
mountPath: /data
|
||||||
|
|||||||
135
examples/online_serving/chart-helm/tests/deployment_test.yaml
Normal file
135
examples/online_serving/chart-helm/tests/deployment_test.yaml
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
suite: test deployment
|
||||||
|
templates:
|
||||||
|
- deployment.yaml
|
||||||
|
tests:
|
||||||
|
- it: should create wait-download-model init container when modelDownload is enabled
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: true
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||||
|
downloadJob:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||||
|
initContainers: [ ]
|
||||||
|
pvcStorage: "1Gi"
|
||||||
|
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||||
|
awsEc2MetadataDisabled: true
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 1
|
||||||
|
- isKind:
|
||||||
|
of: Deployment
|
||||||
|
- isNotEmpty:
|
||||||
|
path: spec.template.spec.initContainers
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].name
|
||||||
|
value: wait-download-model
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].image
|
||||||
|
value: amazon/aws-cli:2.6.4
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].imagePullPolicy
|
||||||
|
value: IfNotPresent
|
||||||
|
|
||||||
|
- it: should only create custom init containers when modelDownload is disabled
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: false
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args: [ "-c", "echo test" ]
|
||||||
|
downloadJob:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args: [ "-c", "echo test" ]
|
||||||
|
initContainers:
|
||||||
|
- name: llm-d-routing-proxy
|
||||||
|
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: proxy
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 1
|
||||||
|
- isKind:
|
||||||
|
of: Deployment
|
||||||
|
- lengthEqual:
|
||||||
|
path: spec.template.spec.initContainers
|
||||||
|
count: 1
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].name
|
||||||
|
value: llm-d-routing-proxy
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].image
|
||||||
|
value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].ports[0].containerPort
|
||||||
|
value: 8080
|
||||||
|
|
||||||
|
- it: should create both wait-download-model and custom init containers when both are enabled
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: true
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||||
|
downloadJob:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||||
|
initContainers:
|
||||||
|
- name: llm-d-routing-proxy
|
||||||
|
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: proxy
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 1
|
||||||
|
- isKind:
|
||||||
|
of: Deployment
|
||||||
|
- lengthEqual:
|
||||||
|
path: spec.template.spec.initContainers
|
||||||
|
count: 2
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].name
|
||||||
|
value: wait-download-model
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[0].image
|
||||||
|
value: amazon/aws-cli:2.6.4
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[1].name
|
||||||
|
value: llm-d-routing-proxy
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[1].image
|
||||||
|
value: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.initContainers[1].ports[0].containerPort
|
||||||
|
value: 8080
|
||||||
61
examples/online_serving/chart-helm/tests/job_test.yaml
Normal file
61
examples/online_serving/chart-helm/tests/job_test.yaml
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
suite: test job
|
||||||
|
templates:
|
||||||
|
- job.yaml
|
||||||
|
tests:
|
||||||
|
- it: should create job when modelDownload is enabled
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: true
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args: [ "-c", "wait" ]
|
||||||
|
downloadJob:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||||
|
pvcStorage: "1Gi"
|
||||||
|
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||||
|
awsEc2MetadataDisabled: true
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 1
|
||||||
|
- isKind:
|
||||||
|
of: Job
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.containers[0].name
|
||||||
|
value: job-download-model
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.containers[0].image
|
||||||
|
value: amazon/aws-cli:2.6.4
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.restartPolicy
|
||||||
|
value: OnFailure
|
||||||
|
|
||||||
|
- it: should not create job when modelDownload is disabled
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: false
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args: [ "-c", "wait" ]
|
||||||
|
downloadJob:
|
||||||
|
command: [ "/bin/bash" ]
|
||||||
|
args: [ "-c", "download" ]
|
||||||
|
initContainers:
|
||||||
|
- name: llm-d-routing-proxy
|
||||||
|
image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 0
|
||||||
32
examples/online_serving/chart-helm/tests/pvc_test.yaml
Normal file
32
examples/online_serving/chart-helm/tests/pvc_test.yaml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
suite: test pvc
|
||||||
|
templates:
|
||||||
|
- pvc.yaml
|
||||||
|
tests:
|
||||||
|
# Test Case: PVC Created When extraInit Defined
|
||||||
|
- it: should create pvc when extraInit is defined
|
||||||
|
set:
|
||||||
|
extraInit:
|
||||||
|
modelDownload:
|
||||||
|
enabled: true
|
||||||
|
image:
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
tag: "2.6.4"
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
waitContainer:
|
||||||
|
command: ["/bin/bash"]
|
||||||
|
args: ["-c", "wait"]
|
||||||
|
downloadJob:
|
||||||
|
command: ["/bin/bash"]
|
||||||
|
args: ["-c", "download"]
|
||||||
|
pvcStorage: "10Gi"
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 1
|
||||||
|
- isKind:
|
||||||
|
of: PersistentVolumeClaim
|
||||||
|
- equal:
|
||||||
|
path: spec.accessModes[0]
|
||||||
|
value: ReadWriteOnce
|
||||||
|
- equal:
|
||||||
|
path: spec.resources.requests.storage
|
||||||
|
value: 10Gi
|
||||||
@ -136,6 +136,70 @@
|
|||||||
"extraInit": {
|
"extraInit": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"modelDownload": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"enabled": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"image": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"repository": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"tag": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"pullPolicy": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["repository", "tag", "pullPolicy"]
|
||||||
|
},
|
||||||
|
"waitContainer": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"command": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"}
|
||||||
|
},
|
||||||
|
"args": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"}
|
||||||
|
},
|
||||||
|
"env": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "object"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["command", "args"]
|
||||||
|
},
|
||||||
|
"downloadJob": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"command": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"}
|
||||||
|
},
|
||||||
|
"args": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"}
|
||||||
|
},
|
||||||
|
"env": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "object"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["command", "args"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["enabled", "image", "waitContainer", "downloadJob"]
|
||||||
|
},
|
||||||
|
"initContainers": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "object"}
|
||||||
|
},
|
||||||
"s3modelpath": {
|
"s3modelpath": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@ -147,9 +211,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"pvcStorage",
|
"modelDownload",
|
||||||
"s3modelpath",
|
"initContainers",
|
||||||
"awsEc2MetadataDisabled"
|
"pvcStorage"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"extraContainers": {
|
"extraContainers": {
|
||||||
|
|||||||
@ -75,10 +75,65 @@ maxUnavailablePodDisruptionBudget: ""
|
|||||||
|
|
||||||
# -- Additional configuration for the init container
|
# -- Additional configuration for the init container
|
||||||
extraInit:
|
extraInit:
|
||||||
# -- Path of the model on the s3 which hosts model weights and config files
|
# -- Model download functionality (optional)
|
||||||
|
modelDownload:
|
||||||
|
# -- Enable model download job and wait container
|
||||||
|
enabled: true
|
||||||
|
# -- Image configuration for model download operations
|
||||||
|
image:
|
||||||
|
# -- Image repository
|
||||||
|
repository: "amazon/aws-cli"
|
||||||
|
# -- Image tag
|
||||||
|
tag: "2.6.4"
|
||||||
|
# -- Image pull policy
|
||||||
|
pullPolicy: "IfNotPresent"
|
||||||
|
# -- Wait container configuration (init container that waits for model to be ready)
|
||||||
|
waitContainer:
|
||||||
|
# -- Command to execute
|
||||||
|
command: ["/bin/bash"]
|
||||||
|
# -- Arguments for the wait container
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done"
|
||||||
|
# -- Environment variables (optional, overrides S3 defaults entirely if specified)
|
||||||
|
# env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: "your-token"
|
||||||
|
# - name: MODEL_ID
|
||||||
|
# value: "meta-llama/Llama-2-7b"
|
||||||
|
# -- Download job configuration (job that actually downloads the model)
|
||||||
|
downloadJob:
|
||||||
|
# -- Command to execute
|
||||||
|
command: ["/bin/bash"]
|
||||||
|
# -- Arguments for the download job
|
||||||
|
args:
|
||||||
|
- "-eucx"
|
||||||
|
- "aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data"
|
||||||
|
# -- Environment variables (optional, overrides S3 defaults entirely if specified)
|
||||||
|
# env:
|
||||||
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
# value: "your-token"
|
||||||
|
# - name: MODEL_ID
|
||||||
|
# value: "meta-llama/Llama-2-7b"
|
||||||
|
|
||||||
|
# -- Custom init containers (appended after wait-download-model if modelDownload is enabled)
|
||||||
|
initContainers: []
|
||||||
|
# Example for llm-d sidecar:
|
||||||
|
# initContainers:
|
||||||
|
# - name: llm-d-routing-proxy
|
||||||
|
# image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
|
||||||
|
# imagePullPolicy: IfNotPresent
|
||||||
|
# ports:
|
||||||
|
# - containerPort: 8080
|
||||||
|
# name: proxy
|
||||||
|
# securityContext:
|
||||||
|
# runAsUser: 1000
|
||||||
|
|
||||||
|
# -- Path of the model on the s3 which hosts model weights and config files
|
||||||
s3modelpath: "relative_s3_model_path/opt-125m"
|
s3modelpath: "relative_s3_model_path/opt-125m"
|
||||||
# -- Storage size of the s3
|
# -- Storage size for the PVC
|
||||||
pvcStorage: "1Gi"
|
pvcStorage: "1Gi"
|
||||||
|
# -- Disable AWS EC2 metadata service
|
||||||
awsEc2MetadataDisabled: true
|
awsEc2MetadataDisabled: true
|
||||||
|
|
||||||
# -- Additional containers configuration
|
# -- Additional containers configuration
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user