v2 app deployment--templating mainly in manifest now.

This commit is contained in:
2025-12-31 06:53:17 +00:00
parent 8818d822cf
commit d1304a2630
84 changed files with 630 additions and 607 deletions

View File

@@ -19,10 +19,10 @@ spec:
seccompProfile:
type: RuntimeDefault
nodeSelector:
nvidia.com/gpu.product: "{{ .apps.vllm.gpuProduct }}"
nvidia.com/gpu.product: "{{ .gpuProduct }}"
containers:
- name: vllm
image: "{{ .apps.vllm.image }}"
image: "{{ .image }}"
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
@@ -31,10 +31,10 @@ spec:
- ALL
readOnlyRootFilesystem: false
args:
- --model={{ .apps.vllm.model }}
- --max-model-len={{ .apps.vllm.maxModelLen }}
- --tensor-parallel-size={{ .apps.vllm.tensorParallelSize }}
- --gpu-memory-utilization={{ .apps.vllm.gpuMemoryUtilization }}
- --model={{ .model }}
- --max-model-len={{ .maxModelLen }}
- --tensor-parallel-size={{ .tensorParallelSize }}
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
{{- if .apps.vllm.enforceEager }}
- --enforce-eager=True
{{- end }}
@@ -48,13 +48,13 @@ spec:
containerPort: 8000
resources:
requests:
cpu: "{{ .apps.vllm.cpuRequest }}"
memory: "{{ .apps.vllm.memoryRequest }}"
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
cpu: "{{ .cpuRequest }}"
memory: "{{ .memoryRequest }}"
nvidia.com/gpu: {{ .gpuCount }}
limits:
cpu: "{{ .apps.vllm.cpuLimit }}"
memory: "{{ .apps.vllm.memoryLimit }}"
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
cpu: "{{ .cpuLimit }}"
memory: "{{ .memoryLimit }}"
nvidia.com/gpu: {{ .gpuCount }}
readinessProbe:
httpGet:
path: /v1/models

View File

@@ -3,13 +3,13 @@ kind: Ingress
metadata:
name: vllm
annotations:
external-dns.alpha.kubernetes.io/target: {{ .cloud.domain }}
external-dns.alpha.kubernetes.io/target: {{ .externalDnsDomain }}
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
spec:
rules:
- host: {{ .apps.vllm.domain }}
- host: {{ .domain }}
http:
paths:
- path: /
@@ -21,5 +21,5 @@ spec:
number: 8000
tls:
- hosts:
- {{ .apps.vllm.domain }}
- {{ .domain }}
secretName: vllm-tls

View File

@@ -1,6 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: {{ .apps.vllm.namespace }}
namespace: {{ .namespace }}
labels:
- includeSelectors: true
pairs:

View File

@@ -1,21 +1,22 @@
name: vllm
description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API
description: vLLM is a fast and easy-to-use library for LLM inference and serving
with OpenAI-compatible API
version: 0.5.4
icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
requires: []
defaultConfig:
image: vllm/vllm-openai:v0.5.4
model: Qwen/Qwen2.5-7B-Instruct
maxModelLen: 8192
tensorParallelSize: 1
gpuMemoryUtilization: 0.90
gpuMemoryUtilization: 0.9
enforceEager: true
gpuProduct: "RTX 4090"
cpuRequest: "4"
cpuLimit: "8"
memoryRequest: "16Gi"
memoryLimit: "24Gi"
gpuProduct: RTX 4090
cpuRequest: '4'
cpuLimit: '8'
memoryRequest: 16Gi
memoryLimit: 24Gi
gpuCount: 1
domain: vllm.{{ .cloud.domain }}
namespace: llm
defaultSecrets: []
defaultSecrets: []

View File

@@ -1,4 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: {{ .apps.vllm.namespace }}
name: {{ .namespace }}