Standardize config.
This commit is contained in:
@@ -22,7 +22,7 @@ spec:
|
||||
nvidia.com/gpu.product: "{{ .gpuProduct }}"
|
||||
containers:
|
||||
- name: vllm
|
||||
image: "{{ .image }}"
|
||||
image: vllm/vllm-openai:v0.5.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -33,11 +33,9 @@ spec:
|
||||
args:
|
||||
- --model={{ .model }}
|
||||
- --max-model-len={{ .maxModelLen }}
|
||||
- --tensor-parallel-size={{ .tensorParallelSize }}
|
||||
- --tensor-parallel-size=1
|
||||
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
|
||||
{{- if .apps.vllm.enforceEager }}
|
||||
- --enforce-eager=True
|
||||
{{- end }}
|
||||
env:
|
||||
- name: VLLM_TORCH_DTYPE
|
||||
value: "auto"
|
||||
|
||||
Reference in New Issue
Block a user