From 10c926947857ea60d28fe572c99302d5257939f2 Mon Sep 17 00:00:00 2001 From: Hannes Hansen <mail@hannesh.de> Date: Wed, 19 Mar 2025 17:10:47 +0100 Subject: [PATCH 1/3] replicate latest pytorch and tensorflow images with gpu support; fix https://gitlab.cern.ch/mlops/project/-/issues/144 --- values-k8s.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/values-k8s.yaml b/values-k8s.yaml index 48953e9..4837004 100644 --- a/values-k8s.yaml +++ b/values-k8s.yaml @@ -309,6 +309,12 @@ items: gcr.io/knative-releases/knative.dev/serving/cmd/webhook@sha256:48aee2733721ecc77956abc5a2ca072853a669ebc97519beb48f7b3da8455e67 gcr.io/ml-pipeline/workflow-controller:v3.4.17-license-compliance docker.io/kserve/xgbserver:v0.13.1 + docker.io/pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime + docker.io/pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime + docker.io/pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime + docker.io/tensorflow/tensorflow:2.19.0-gpu + docker.io/tensorflow/tensorflow:2.18.0-gpu + docker.io/tensorflow/tensorflow:2.17.0-gpu registry.cern.ch/kubeflow/charts: type: chart -- GitLab From 72f2a1d777f27916977f1badea5a182a726c79a5 Mon Sep 17 00:00:00 2001 From: Hannes Hansen <mail@hannesh.de> Date: Thu, 20 Mar 2025 08:58:07 +0100 Subject: [PATCH 2/3] keep order and removed unnecessary image --- values-k8s.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/values-k8s.yaml b/values-k8s.yaml index 4837004..429e4db 100644 --- a/values-k8s.yaml +++ b/values-k8s.yaml @@ -297,6 +297,11 @@ items: docker.io/kubeflownotebookswg/tensorboards-web-app:v1.9.2 docker.io/kubeflowkatib/tfevent-metrics-collector:v0.17.0 docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.17.0 + docker.io/pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime + docker.io/pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime + docker.io/tensorflow/tensorflow:2.17.0-gpu + docker.io/tensorflow/tensorflow:2.18.0-gpu + docker.io/tensorflow/tensorflow:2.19.0-gpu docker.io/pytorch/torchserve-kfs:0.9.0 docker.io/pytorch/torchserve-kfs:0.12.0-gpu docker.io/kubeflow/training-operator:v1-94dee0e @@ -309,12 +314,6 @@ items: gcr.io/knative-releases/knative.dev/serving/cmd/webhook@sha256:48aee2733721ecc77956abc5a2ca072853a669ebc97519beb48f7b3da8455e67 gcr.io/ml-pipeline/workflow-controller:v3.4.17-license-compliance docker.io/kserve/xgbserver:v0.13.1 - docker.io/pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime - docker.io/pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime - docker.io/pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime - docker.io/tensorflow/tensorflow:2.19.0-gpu - docker.io/tensorflow/tensorflow:2.18.0-gpu - docker.io/tensorflow/tensorflow:2.17.0-gpu registry.cern.ch/kubeflow/charts: type: chart -- GitLab From 407beec9614d8438e5bed14c298653784cc1b98e Mon Sep 17 00:00:00 2001 From: Hannes Hansen <mail@hannesh.de> Date: Thu, 20 Mar 2025 12:59:01 +0100 Subject: [PATCH 3/3] keep order --- values-k8s.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/values-k8s.yaml b/values-k8s.yaml index 429e4db..72dcf09 100644 --- a/values-k8s.yaml +++ b/values-k8s.yaml @@ -274,6 +274,8 @@ items: docker.io/istio/proxyv2:1.22.1 docker.io/kubeflownotebookswg/pvcviewer-controller:v1.9.2 docker.io/library/python:3.9 + docker.io/pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime + docker.io/pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 gcr.io/knative-releases/knative.dev/serving/cmd/queue@sha256:89e6f90141f1b63405883fbb4de0d3b6d80f8b77e530904c4d29bdcd1dc5a167 docker.io/rayproject/ray:2.23.0-py311-cpu @@ -295,13 +297,11 @@ items: docker.io/kubeflowkatib/suggestion-skopt:v0.17.0 docker.io/kubeflownotebookswg/tensorboard-controller:v1.9.2 docker.io/kubeflownotebookswg/tensorboards-web-app:v1.9.2 - docker.io/kubeflowkatib/tfevent-metrics-collector:v0.17.0 - docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.17.0 - docker.io/pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime - docker.io/pytorch/pytorch:2.6.0-cuda12.6-cudnn9-runtime docker.io/tensorflow/tensorflow:2.17.0-gpu docker.io/tensorflow/tensorflow:2.18.0-gpu docker.io/tensorflow/tensorflow:2.19.0-gpu + docker.io/kubeflowkatib/tfevent-metrics-collector:v0.17.0 + docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.17.0 docker.io/pytorch/torchserve-kfs:0.9.0 docker.io/pytorch/torchserve-kfs:0.12.0-gpu docker.io/kubeflow/training-operator:v1-94dee0e -- GitLab