Reorganized for new stable/waypoint versioning design.
This commit is contained in:
73
vllm/versions/0/deployment.yaml
Normal file
73
vllm/versions/0/deployment.yaml
Normal file
@@ -0,0 +1,73 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vllm
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
component: inference
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
component: inference
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
nodeSelector:
|
||||
nvidia.com/gpu.product: "{{ .gpuProduct }}"
|
||||
containers:
|
||||
- name: vllm
|
||||
image: vllm/vllm-openai:v0.5.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
args:
|
||||
- --model={{ .model }}
|
||||
- --max-model-len={{ .maxModelLen }}
|
||||
- --tensor-parallel-size=1
|
||||
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
|
||||
- --enforce-eager=True
|
||||
env:
|
||||
- name: VLLM_TORCH_DTYPE
|
||||
value: "auto"
|
||||
- name: VLLM_WORKER_CONCURRENCY
|
||||
value: "1"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ .cpuRequest }}"
|
||||
memory: "{{ .memoryRequest }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
limits:
|
||||
cpu: "{{ .cpuLimit }}"
|
||||
memory: "{{ .memoryLimit }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/models
|
||||
port: http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 5
|
||||
tolerations:
|
||||
- key: "nvidia.com/gpu"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
Reference in New Issue
Block a user