Standardize config.

2026-05-23 19:51:33 +00:00
parent e2e3f730a5
commit 6b5325c6f3
87 changed files with 426 additions and 531 deletions
--- a/vllm/deployment.yaml
+++ b/vllm/deployment.yaml
@@ -22,7 +22,7 @@ spec:
        nvidia.com/gpu.product: "{{ .gpuProduct }}"
      containers:
        - name: vllm
-          image: "{{ .image }}"
+          image: vllm/vllm-openai:v0.5.4
          imagePullPolicy: IfNotPresent
          securityContext:
            allowPrivilegeEscalation: false
@@ -33,11 +33,9 @@ spec:
          args:
            - --model={{ .model }}
            - --max-model-len={{ .maxModelLen }}
-            - --tensor-parallel-size={{ .tensorParallelSize }}
+            - --tensor-parallel-size=1
            - --gpu-memory-utilization={{ .gpuMemoryUtilization }}
-            {{- if .apps.vllm.enforceEager }}
            - --enforce-eager=True
-            {{- end }}
          env:
            - name: VLLM_TORCH_DTYPE
              value: "auto"
--- a/vllm/manifest.yaml
+++ b/vllm/manifest.yaml
@@ -2,16 +2,14 @@ name: vllm
 is: vllm
 description: vLLM is a fast and easy-to-use library for LLM inference and serving
  with OpenAI-compatible API
-version: 0.5.4
+version: 0.5.4-1
 icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
 requires: []
 defaultConfig:
-  image: vllm/vllm-openai:v0.5.4
+  namespace: llm
  model: Qwen/Qwen2.5-7B-Instruct
  maxModelLen: 8192
-  tensorParallelSize: 1
  gpuMemoryUtilization: 0.9
-  enforceEager: true
  gpuProduct: RTX 4090
  cpuRequest: '4'
  cpuLimit: '8'
@@ -19,5 +17,4 @@ defaultConfig:
  memoryLimit: 24Gi
  gpuCount: 1
  domain: vllm.{{ .cloud.domain }}
-  namespace: llm
 defaultSecrets: []