v2 app deployment--templating mainly in manifest now.

This commit is contained in:
2025-12-31 06:53:17 +00:00
parent 8818d822cf
commit d1304a2630
84 changed files with 630 additions and 607 deletions

View File

@@ -1,21 +1,22 @@
name: vllm
description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API
description: vLLM is a fast and easy-to-use library for LLM inference and serving
with OpenAI-compatible API
version: 0.5.4
icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
requires: []
defaultConfig:
image: vllm/vllm-openai:v0.5.4
model: Qwen/Qwen2.5-7B-Instruct
maxModelLen: 8192
tensorParallelSize: 1
gpuMemoryUtilization: 0.90
gpuMemoryUtilization: 0.9
enforceEager: true
gpuProduct: "RTX 4090"
cpuRequest: "4"
cpuLimit: "8"
memoryRequest: "16Gi"
memoryLimit: "24Gi"
gpuProduct: RTX 4090
cpuRequest: '4'
cpuLimit: '8'
memoryRequest: 16Gi
memoryLimit: 24Gi
gpuCount: 1
domain: vllm.{{ .cloud.domain }}
namespace: llm
defaultSecrets: []
defaultSecrets: []