v2 app deployment--templating mainly in manifest now.

2025-12-31 06:53:17 +00:00
parent 8818d822cf
commit d1304a2630
84 changed files with 630 additions and 607 deletions
--- a/vllm/deployment.yaml
+++ b/vllm/deployment.yaml
@@ -19,10 +19,10 @@ spec:
        seccompProfile:
          type: RuntimeDefault
      nodeSelector:
-        nvidia.com/gpu.product: "{{ .apps.vllm.gpuProduct }}"
+        nvidia.com/gpu.product: "{{ .gpuProduct }}"
      containers:
        - name: vllm
-          image: "{{ .apps.vllm.image }}"
+          image: "{{ .image }}"
          imagePullPolicy: IfNotPresent
          securityContext:
            allowPrivilegeEscalation: false
@@ -31,10 +31,10 @@ spec:
              - ALL
            readOnlyRootFilesystem: false
          args:
-            - --model={{ .apps.vllm.model }}
-            - --max-model-len={{ .apps.vllm.maxModelLen }}
-            - --tensor-parallel-size={{ .apps.vllm.tensorParallelSize }}
-            - --gpu-memory-utilization={{ .apps.vllm.gpuMemoryUtilization }}
+            - --model={{ .model }}
+            - --max-model-len={{ .maxModelLen }}
+            - --tensor-parallel-size={{ .tensorParallelSize }}
+            - --gpu-memory-utilization={{ .gpuMemoryUtilization }}
            {{- if .apps.vllm.enforceEager }}
            - --enforce-eager=True
            {{- end }}
@@ -48,13 +48,13 @@ spec:
              containerPort: 8000
          resources:
            requests:
-              cpu: "{{ .apps.vllm.cpuRequest }}"
-              memory: "{{ .apps.vllm.memoryRequest }}"
-              nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
+              cpu: "{{ .cpuRequest }}"
+              memory: "{{ .memoryRequest }}"
+              nvidia.com/gpu: {{ .gpuCount }}
            limits:
-              cpu: "{{ .apps.vllm.cpuLimit }}"
-              memory: "{{ .apps.vllm.memoryLimit }}"
-              nvidia.com/gpu: {{ .apps.vllm.gpuCount }}
+              cpu: "{{ .cpuLimit }}"
+              memory: "{{ .memoryLimit }}"
+              nvidia.com/gpu: {{ .gpuCount }}
          readinessProbe:
            httpGet:
              path: /v1/models