91 lines
2.8 KiB
YAML
91 lines
2.8 KiB
YAML
# NVIDIA Device Plugin DaemonSet
|
|
# Based on official manifest from: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.1/deployments/static/nvidia-device-plugin.yml
|
|
# Licensed under the Apache License, Version 2.0
|
|
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: nvidia-device-plugin-daemonset
|
|
namespace: kube-system
|
|
labels:
|
|
app.kubernetes.io/name: nvidia-device-plugin
|
|
app.kubernetes.io/component: device-plugin
|
|
managedBy: kustomize
|
|
partOf: wild-cloud
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
name: nvidia-device-plugin-ds
|
|
updateStrategy:
|
|
type: RollingUpdate
|
|
template:
|
|
metadata:
|
|
labels:
|
|
name: nvidia-device-plugin-ds
|
|
app.kubernetes.io/name: nvidia-device-plugin
|
|
app.kubernetes.io/component: device-plugin
|
|
spec:
|
|
runtimeClassName: nvidia
|
|
tolerations:
|
|
- key: nvidia.com/gpu
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
- key: CriticalAddonsOnly
|
|
operator: Exists
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: feature.node.kubernetes.io/pci-0300_10de.present
|
|
operator: In
|
|
values:
|
|
- "true"
|
|
# Mark this pod as a critical add-on; when enabled, the critical add-on
|
|
# scheduler reserves resources for critical add-on pods so that they can
|
|
# be rescheduled after a failure.
|
|
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
|
|
priorityClassName: "system-node-critical"
|
|
securityContext:
|
|
seccompProfile:
|
|
type: RuntimeDefault
|
|
containers:
|
|
- image: nvcr.io/nvidia/k8s-device-plugin:v0.17.1
|
|
name: nvidia-device-plugin-ctr
|
|
env:
|
|
- name: MPS_ROOT
|
|
value: /run/nvidia/mps
|
|
- name: NVIDIA_VISIBLE_DEVICES
|
|
value: all
|
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
|
value: compute,utility
|
|
- name: FAIL_ON_INIT_ERROR
|
|
value: "false"
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
capabilities:
|
|
drop: ["ALL"]
|
|
volumeMounts:
|
|
- name: device-plugin
|
|
mountPath: /var/lib/kubelet/device-plugins
|
|
- name: mps-shm
|
|
mountPath: /dev/shm
|
|
- name: mps-root
|
|
mountPath: /mps
|
|
- name: cdi-root
|
|
mountPath: /var/run/cdi
|
|
volumes:
|
|
- name: device-plugin
|
|
hostPath:
|
|
path: /var/lib/kubelet/device-plugins
|
|
- name: mps-root
|
|
hostPath:
|
|
path: /run/nvidia/mps
|
|
type: DirectoryOrCreate
|
|
- name: mps-shm
|
|
hostPath:
|
|
path: /run/nvidia/mps/shm
|
|
- name: cdi-root
|
|
hostPath:
|
|
path: /var/run/cdi
|
|
type: DirectoryOrCreate |