v2 app deployment--templating mainly in manifest now.
This commit is contained in:
@@ -19,10 +19,10 @@ spec:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
nodeSelector:
|
||||
nvidia.com/gpu.product: "{{ .apps.vllm.gpuProduct }}"
|
||||
nvidia.com/gpu.product: "{{ .gpuProduct }}"
|
||||
containers:
|
||||
- name: vllm
|
||||
image: "{{ .apps.vllm.image }}"
|
||||
image: "{{ .image }}"
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -31,10 +31,10 @@ spec:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
args:
|
||||
- --model={{ .apps.vllm.model }}
|
||||
- --max-model-len={{ .apps.vllm.maxModelLen }}
|
||||
- --tensor-parallel-size={{ .apps.vllm.tensorParallelSize }}
|
||||
- --gpu-memory-utilization={{ .apps.vllm.gpuMemoryUtilization }}
|
||||
- --model={{ .model }}
|
||||
- --max-model-len={{ .maxModelLen }}
|
||||
- --tensor-parallel-size={{ .tensorParallelSize }}
|
||||
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
|
||||
{{- if .apps.vllm.enforceEager }}
|
||||
- --enforce-eager=True
|
||||
{{- end }}
|
||||
@@ -48,13 +48,13 @@ spec:
|
||||
containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ .apps.vllm.cpuRequest }}"
|
||||
memory: "{{ .apps.vllm.memoryRequest }}"
|
||||
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
|
||||
cpu: "{{ .cpuRequest }}"
|
||||
memory: "{{ .memoryRequest }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
limits:
|
||||
cpu: "{{ .apps.vllm.cpuLimit }}"
|
||||
memory: "{{ .apps.vllm.memoryLimit }}"
|
||||
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
|
||||
cpu: "{{ .cpuLimit }}"
|
||||
memory: "{{ .memoryLimit }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/models
|
||||
|
||||
Reference in New Issue
Block a user