v2 app deployment--templating mainly in manifest now.

2025-12-31 06:53:17 +00:00
parent 8818d822cf
commit d1304a2630
84 changed files with 630 additions and 607 deletions
--- a/vllm/deployment.yaml
+++ b/vllm/deployment.yaml
@@ -19,10 +19,10 @@ spec:
        seccompProfile:
          type: RuntimeDefault
      nodeSelector:
-        nvidia.com/gpu.product: "{{ .apps.vllm.gpuProduct }}"
+        nvidia.com/gpu.product: "{{ .gpuProduct }}"
      containers:
        - name: vllm
-          image: "{{ .apps.vllm.image }}"
+          image: "{{ .image }}"
          imagePullPolicy: IfNotPresent
          securityContext:
            allowPrivilegeEscalation: false
@@ -31,10 +31,10 @@ spec:
              - ALL
            readOnlyRootFilesystem: false
          args:
-            - --model={{ .apps.vllm.model }}
-            - --max-model-len={{ .apps.vllm.maxModelLen }}
-            - --tensor-parallel-size={{ .apps.vllm.tensorParallelSize }}
-            - --gpu-memory-utilization={{ .apps.vllm.gpuMemoryUtilization }}
+            - --model={{ .model }}
+            - --max-model-len={{ .maxModelLen }}
+            - --tensor-parallel-size={{ .tensorParallelSize }}
+            - --gpu-memory-utilization={{ .gpuMemoryUtilization }}
            {{- if .apps.vllm.enforceEager }}
            - --enforce-eager=True
            {{- end }}
@@ -48,13 +48,13 @@ spec:
              containerPort: 8000
          resources:
            requests:
-              cpu: "{{ .apps.vllm.cpuRequest }}"
-              memory: "{{ .apps.vllm.memoryRequest }}"
-              nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
+              cpu: "{{ .cpuRequest }}"
+              memory: "{{ .memoryRequest }}"
+              nvidia.com/gpu: {{ .gpuCount }}
            limits:
-              cpu: "{{ .apps.vllm.cpuLimit }}"
-              memory: "{{ .apps.vllm.memoryLimit }}"
-              nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
+              cpu: "{{ .cpuLimit }}"
+              memory: "{{ .memoryLimit }}"
+              nvidia.com/gpu: {{ .gpuCount }}
          readinessProbe:
            httpGet:
              path: /v1/models
--- a/vllm/ingress.yaml
+++ b/vllm/ingress.yaml
@@ -3,13 +3,13 @@ kind: Ingress
 metadata:
  name: vllm
  annotations:
-    external-dns.alpha.kubernetes.io/target: {{ .cloud.domain }}
+    external-dns.alpha.kubernetes.io/target: {{ .externalDnsDomain }}
    external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
    traefik.ingress.kubernetes.io/router.tls: "true"
    traefik.ingress.kubernetes.io/router.tls.certresolver: letsencrypt
 spec:
  rules:
-    - host: {{ .apps.vllm.domain }}
+    - host: {{ .domain }}
      http:
        paths:
          - path: /
@@ -21,5 +21,5 @@ spec:
                  number: 8000
  tls:
    - hosts:
-        - {{ .apps.vllm.domain }}
+        - {{ .domain }}
      secretName: vllm-tls
--- a/vllm/kustomization.yaml
+++ b/vllm/kustomization.yaml
@@ -1,6 +1,6 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
-namespace: {{ .apps.vllm.namespace }}
+namespace: {{ .namespace }}
 labels:
  - includeSelectors: true
    pairs:
--- a/vllm/manifest.yaml
+++ b/vllm/manifest.yaml
@@ -1,21 +1,22 @@
 name: vllm
-description: vLLM is a fast and easy-to-use library for LLM inference and serving with OpenAI-compatible API
+description: vLLM is a fast and easy-to-use library for LLM inference and serving
+  with OpenAI-compatible API
 version: 0.5.4
-icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/assets/logos/vllm-logo-text-light.png
+icon: https://unpkg.com/@lobehub/icons-static-png@latest/dark/vllm.png
 requires: []
 defaultConfig:
  image: vllm/vllm-openai:v0.5.4
  model: Qwen/Qwen2.5-7B-Instruct
  maxModelLen: 8192
  tensorParallelSize: 1
-  gpuMemoryUtilization: 0.90
+  gpuMemoryUtilization: 0.9
  enforceEager: true
-  gpuProduct: "RTX 4090"
-  cpuRequest: "4"
-  cpuLimit: "8"
-  memoryRequest: "16Gi"
-  memoryLimit: "24Gi"
+  gpuProduct: RTX 4090
+  cpuRequest: '4'
+  cpuLimit: '8'
+  memoryRequest: 16Gi
+  memoryLimit: 24Gi
  gpuCount: 1
  domain: vllm.{{ .cloud.domain }}
  namespace: llm
-defaultSecrets: []
+defaultSecrets: []
--- a/vllm/namespace.yaml
+++ b/vllm/namespace.yaml
@@ -1,4 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
-  name: {{ .apps.vllm.namespace }}
+  name: {{ .namespace }}