16 lines
311 B
YAML
16 lines
311 B
YAML
version: 0.5.4-1
|
|
requires: []
|
|
defaultConfig:
|
|
namespace: llm
|
|
model: Qwen/Qwen2.5-7B-Instruct
|
|
maxModelLen: 8192
|
|
gpuMemoryUtilization: 0.9
|
|
gpuProduct: RTX 4090
|
|
cpuRequest: '4'
|
|
cpuLimit: '8'
|
|
memoryRequest: 16Gi
|
|
memoryLimit: 24Gi
|
|
gpuCount: 1
|
|
domain: vllm.{{ .cloud.domain }}
|
|
defaultSecrets: []
|