Standardize config.

This commit is contained in:
2026-05-23 19:51:33 +00:00
parent e2e3f730a5
commit 6b5325c6f3
87 changed files with 426 additions and 531 deletions

View File

@@ -22,7 +22,7 @@ spec:
nvidia.com/gpu.product: "{{ .gpuProduct }}"
containers:
- name: vllm
image: "{{ .image }}"
image: vllm/vllm-openai:v0.5.4
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
@@ -33,11 +33,9 @@ spec:
args:
- --model={{ .model }}
- --max-model-len={{ .maxModelLen }}
- --tensor-parallel-size={{ .tensorParallelSize }}
- --tensor-parallel-size=1
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
{{- if .apps.vllm.enforceEager }}
- --enforce-eager=True
{{- end }}
env:
- name: VLLM_TORCH_DTYPE
value: "auto"

View File

@@ -2,16 +2,14 @@ name: vllm
is: vllm
description: vLLM is a fast and easy-to-use library for LLM inference and serving
with OpenAI-compatible API
version: 0.5.4
version: 0.5.4-1
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
requires: []
defaultConfig:
image: vllm/vllm-openai:v0.5.4
namespace: llm
model: Qwen/Qwen2.5-7B-Instruct
maxModelLen: 8192
tensorParallelSize: 1
gpuMemoryUtilization: 0.9
enforceEager: true
gpuProduct: RTX 4090
cpuRequest: '4'
cpuLimit: '8'
@@ -19,5 +17,4 @@ defaultConfig:
memoryLimit: 24Gi
gpuCount: 1
domain: vllm.{{ .cloud.domain }}
namespace: llm
defaultSecrets: []