name: vllm description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API version: 0.5.4 icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png requires: [] defaultConfig: image: vllm/vllm-openai:v0.5.4 model: Qwen/Qwen2.5-7B-Instruct maxModelLen: 8192 tensorParallelSize: 1 gpuMemoryUtilization: 0.90 enforceEager: true gpuProduct: "RTX 4090" cpuRequest: "4" cpuLimit: "8" memoryRequest: "16Gi" memoryLimit: "24Gi" gpuCount: 1 domain: vllm.{{ .cloud.domain }} namespace: llm requiredSecrets: []