Moves setup files into embedded package.

This commit is contained in:
2025-10-11 22:06:39 +00:00
parent 92032202f4
commit 89c6a7aa80
112 changed files with 337 additions and 0 deletions

View File

@@ -0,0 +1,91 @@
# NVIDIA Device Plugin DaemonSet
# Based on official manifest from: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
# Licensed under the Apache License, Version 2.0
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/component: device-plugin
managedBy: kustomize
partOf: wild-cloud
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: nvidia-device-plugin-ds
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/component: device-plugin
spec:
runtimeClassName: nvidia
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
- key: CriticalAddonsOnly
operator: Exists
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/pci-0300_10de.present
operator: In
values:
- "true"
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.17.1
name: nvidia-device-plugin-ctr
env:
- name: MPS_ROOT
value: /run/nvidia/mps
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
- name: FAIL_ON_INIT_ERROR
value: "false"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: mps-shm
mountPath: /dev/shm
- name: mps-root
mountPath: /mps
- name: cdi-root
mountPath: /var/run/cdi
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: mps-root
hostPath:
path: /run/nvidia/mps
type: DirectoryOrCreate
- name: mps-shm
hostPath:
path: /run/nvidia/mps/shm
- name: cdi-root
hostPath:
path: /var/run/cdi
type: DirectoryOrCreate

View File

@@ -0,0 +1,12 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kube-system
resources:
- daemonset.yaml
- runtimeclass.yaml
labels:
- pairs:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/component: device-plugin
managedBy: kustomize
partOf: wild-cloud

View File

@@ -0,0 +1,5 @@
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia