Standardize config.
This commit is contained in:
@@ -22,7 +22,7 @@ spec:
|
||||
nvidia.com/gpu.product: "{{ .gpuProduct }}"
|
||||
containers:
|
||||
- name: vllm
|
||||
image: "{{ .image }}"
|
||||
image: vllm/vllm-openai:v0.5.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -33,11 +33,9 @@ spec:
|
||||
args:
|
||||
- --model={{ .model }}
|
||||
- --max-model-len={{ .maxModelLen }}
|
||||
- --tensor-parallel-size={{ .tensorParallelSize }}
|
||||
- --tensor-parallel-size=1
|
||||
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
|
||||
{{- if .apps.vllm.enforceEager }}
|
||||
- --enforce-eager=True
|
||||
{{- end }}
|
||||
env:
|
||||
- name: VLLM_TORCH_DTYPE
|
||||
value: "auto"
|
||||
|
||||
@@ -2,16 +2,14 @@ name: vllm
|
||||
is: vllm
|
||||
description: vLLM is a fast and easy-to-use library for LLM inference and serving
|
||||
with OpenAI-compatible API
|
||||
version: 0.5.4
|
||||
version: 0.5.4-1
|
||||
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
|
||||
requires: []
|
||||
defaultConfig:
|
||||
image: vllm/vllm-openai:v0.5.4
|
||||
namespace: llm
|
||||
model: Qwen/Qwen2.5-7B-Instruct
|
||||
maxModelLen: 8192
|
||||
tensorParallelSize: 1
|
||||
gpuMemoryUtilization: 0.9
|
||||
enforceEager: true
|
||||
gpuProduct: RTX 4090
|
||||
cpuRequest: '4'
|
||||
cpuLimit: '8'
|
||||
@@ -19,5 +17,4 @@ defaultConfig:
|
||||
memoryLimit: 24Gi
|
||||
gpuCount: 1
|
||||
domain: vllm.{{ .cloud.domain }}
|
||||
namespace: llm
|
||||
defaultSecrets: []
|
||||
|
||||
Reference in New Issue
Block a user