Files
wild-central-api/internal/setup/cluster-services/nvidia-device-plugin/kustomize.template/daemonset.yaml

91 lines
2.8 KiB
YAML

# NVIDIA Device Plugin DaemonSet
# Based on official manifest from: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
# Licensed under the Apache License, Version 2.0
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/component: device-plugin
managedBy: kustomize
partOf: wild-cloud
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: nvidia-device-plugin-ds
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/component: device-plugin
spec:
runtimeClassName: nvidia
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
- key: CriticalAddonsOnly
operator: Exists
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: feature.node.kubernetes.io/pci-0300_10de.present
operator: In
values:
- "true"
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.17.1
name: nvidia-device-plugin-ctr
env:
- name: MPS_ROOT
value: /run/nvidia/mps
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
- name: FAIL_ON_INIT_ERROR
value: "false"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: mps-shm
mountPath: /dev/shm
- name: mps-root
mountPath: /mps
- name: cdi-root
mountPath: /var/run/cdi
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: mps-root
hostPath:
path: /run/nvidia/mps
type: DirectoryOrCreate
- name: mps-shm
hostPath:
path: /run/nvidia/mps/shm
- name: cdi-root
hostPath:
path: /var/run/cdi
type: DirectoryOrCreate