v2 app deployment--templating mainly in manifest now.

2025-12-31 06:53:17 +00:00
parent 8818d822cf
commit d1304a2630
84 changed files with 630 additions and 607 deletions
--- a/vllm/manifest.yaml
+++ b/vllm/manifest.yaml
@@ -1,21 +1,22 @@
 name: vllm
-description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API
+description: vLLM is a fast and easy-to-use library for LLM inference and serving
+  with OpenAI-compatible API
 version: 0.5.4
-icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png
+icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
 requires: []
 defaultConfig:
  image: vllm/vllm-openai:v0.5.4
  model: Qwen/Qwen2.5-7B-Instruct
  maxModelLen: 8192
  tensorParallelSize: 1
-  gpuMemoryUtilization: 0.90
+  gpuMemoryUtilization: 0.9
  enforceEager: true
-  gpuProduct: "RTX 4090"
-  cpuRequest: "4"
-  cpuLimit: "8"
-  memoryRequest: "16Gi"
-  memoryLimit: "24Gi"
+  gpuProduct: RTX 4090
+  cpuRequest: '4'
+  cpuLimit: '8'
+  memoryRequest: 16Gi
+  memoryLimit: 24Gi
  gpuCount: 1
  domain: vllm.{{ .cloud.domain }}
  namespace: llm
-defaultSecrets: []
+defaultSecrets: []