v2 app deployment--templating mainly in manifest now.
This commit is contained in:
@@ -19,10 +19,10 @@ spec:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
nodeSelector:
|
||||
nvidia.com/gpu.product: "{{ .apps.vllm.gpuProduct }}"
|
||||
nvidia.com/gpu.product: "{{ .gpuProduct }}"
|
||||
containers:
|
||||
- name: vllm
|
||||
image: "{{ .apps.vllm.image }}"
|
||||
image: "{{ .image }}"
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -31,10 +31,10 @@ spec:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
args:
|
||||
- --model={{ .apps.vllm.model }}
|
||||
- --max-model-len={{ .apps.vllm.maxModelLen }}
|
||||
- --tensor-parallel-size={{ .apps.vllm.tensorParallelSize }}
|
||||
- --gpu-memory-utilization={{ .apps.vllm.gpuMemoryUtilization }}
|
||||
- --model={{ .model }}
|
||||
- --max-model-len={{ .maxModelLen }}
|
||||
- --tensor-parallel-size={{ .tensorParallelSize }}
|
||||
- --gpu-memory-utilization={{ .gpuMemoryUtilization }}
|
||||
{{- if .apps.vllm.enforceEager }}
|
||||
- --enforce-eager=True
|
||||
{{- end }}
|
||||
@@ -48,13 +48,13 @@ spec:
|
||||
containerPort: 8000
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ .apps.vllm.cpuRequest }}"
|
||||
memory: "{{ .apps.vllm.memoryRequest }}"
|
||||
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
|
||||
cpu: "{{ .cpuRequest }}"
|
||||
memory: "{{ .memoryRequest }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
limits:
|
||||
cpu: "{{ .apps.vllm.cpuLimit }}"
|
||||
memory: "{{ .apps.vllm.memoryLimit }}"
|
||||
nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
|
||||
cpu: "{{ .cpuLimit }}"
|
||||
memory: "{{ .memoryLimit }}"
|
||||
nvidia.com/gpu: {{ .gpuCount }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/models
|
||||
|
||||
@@ -3,13 +3,13 @@ kind: Ingress
|
||||
metadata:
|
||||
name: vllm
|
||||
annotations:
|
||||
external-dns.alpha.kubernetes.io/target: {{ .cloud.domain }}
|
||||
external-dns.alpha.kubernetes.io/target: {{ .externalDnsDomain }}
|
||||
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
|
||||
spec:
|
||||
rules:
|
||||
- host: {{ .apps.vllm.domain }}
|
||||
- host: {{ .domain }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
@@ -21,5 +21,5 @@ spec:
|
||||
number: 8000
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .apps.vllm.domain }}
|
||||
- {{ .domain }}
|
||||
secretName: vllm-tls
|
||||
@@ -1,6 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: {{ .apps.vllm.namespace }}
|
||||
namespace: {{ .namespace }}
|
||||
labels:
|
||||
- includeSelectors: true
|
||||
pairs:
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
name: vllm
|
||||
description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API
|
||||
description: vLLM is a fast and easy-to-use library for LLM inference and serving
|
||||
with OpenAI-compatible API
|
||||
version: 0.5.4
|
||||
icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png
|
||||
icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
|
||||
requires: []
|
||||
defaultConfig:
|
||||
image: vllm/vllm-openai:v0.5.4
|
||||
model: Qwen/Qwen2.5-7B-Instruct
|
||||
maxModelLen: 8192
|
||||
tensorParallelSize: 1
|
||||
gpuMemoryUtilization: 0.90
|
||||
gpuMemoryUtilization: 0.9
|
||||
enforceEager: true
|
||||
gpuProduct: "RTX 4090"
|
||||
cpuRequest: "4"
|
||||
cpuLimit: "8"
|
||||
memoryRequest: "16Gi"
|
||||
memoryLimit: "24Gi"
|
||||
gpuProduct: RTX 4090
|
||||
cpuRequest: '4'
|
||||
cpuLimit: '8'
|
||||
memoryRequest: 16Gi
|
||||
memoryLimit: 24Gi
|
||||
gpuCount: 1
|
||||
domain: vllm.{{ .cloud.domain }}
|
||||
namespace: llm
|
||||
defaultSecrets: []
|
||||
defaultSecrets: []
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: {{ .apps.vllm.namespace }}
|
||||
name: {{ .namespace }}
|
||||
Reference in New Issue
Block a user