# NVIDIA Device Plugin DaemonSet # Based on official manifest from: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml # Licensed under the Apache License, Version 2.0 apiVersion: apps/v1 kind: DaemonSet metadata: name: nvidia-device-plugin-daemonset namespace: kube-system labels: app.kubernetes.io/name: nvidia-device-plugin app.kubernetes.io/component: device-plugin managedBy: kustomize partOf: wild-cloud spec: selector: matchLabels: name: nvidia-device-plugin-ds updateStrategy: type: RollingUpdate template: metadata: labels: name: nvidia-device-plugin-ds app.kubernetes.io/name: nvidia-device-plugin app.kubernetes.io/component: device-plugin spec: runtimeClassName: nvidia tolerations: - key: nvidia.com/gpu operator: Exists effect: NoSchedule - key: CriticalAddonsOnly operator: Exists affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: feature.node.kubernetes.io/pci-0300_10de.present operator: In values: - "true" # Mark this pod as a critical add-on; when enabled, the critical add-on # scheduler reserves resources for critical add-on pods so that they can # be rescheduled after a failure. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" securityContext: seccompProfile: type: RuntimeDefault containers: - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.1 name: nvidia-device-plugin-ctr env: - name: MPS_ROOT value: /run/nvidia/mps - name: NVIDIA_VISIBLE_DEVICES value: all - name: NVIDIA_DRIVER_CAPABILITIES value: compute,utility - name: FAIL_ON_INIT_ERROR value: "false" securityContext: allowPrivilegeEscalation: false capabilities: drop: ["ALL"] volumeMounts: - name: device-plugin mountPath: /var/lib/kubelet/device-plugins - name: mps-shm mountPath: /dev/shm - name: mps-root mountPath: /mps - name: cdi-root mountPath: /var/run/cdi volumes: - name: device-plugin hostPath: path: /var/lib/kubelet/device-plugins - name: mps-root hostPath: path: /run/nvidia/mps type: DirectoryOrCreate - name: mps-shm hostPath: path: /run/nvidia/mps/shm - name: cdi-root hostPath: path: /var/run/cdi type: DirectoryOrCreate