name: vllm
description: vLLM is a fast and easy-to-use library for LLM inference and serving
  with OpenAI-compatible API
version: 0.5.4
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
requires: []
defaultConfig:
  image: vllm/vllm-openai:v0.5.4
  model: Qwen/Qwen2.5-7B-Instruct
  maxModelLen: 8192
  tensorParallelSize: 1
  gpuMemoryUtilization: 0.9
  enforceEager: true
  gpuProduct: RTX 4090
  cpuRequest: '4'
  cpuLimit: '8'
  memoryRequest: 16Gi
  memoryLimit: 24Gi
  gpuCount: 1
  domain: vllm.{{ .cloud.domain }}
  namespace: llm
defaultSecrets: []