From 283c192e0a9d6a3f826a8605289bdb173595b4e1 Mon Sep 17 00:00:00 2001
From: 1602077 <jack.charlie.munday@cern.ch>
Date: Wed, 5 Mar 2025 13:19:25 +0000
Subject: [PATCH] build(ci): extend image repl to cover acc repos too

test: updating ci to only scan image difference

refactor: script to use parallel for proper error handling
---
 .gitlab-ci.yml             | 48 ++++++++-----------
 .helmignore                |  1 +
 scripts/validate_images.sh | 96 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+), 28 deletions(-)
 create mode 100755 scripts/validate_images.sh

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8ba7c14..8c61270 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,6 @@
 variables:
-  VALUES_FILE: "values-k8s.yaml"
-  IMAGE_REPOSITORIES: registry.cern.ch/kubernetes registry.cern.ch/kubeflow
-  CHART_REPOSITORY: registry.cern.ch/kubernetes/charts
+  VALUES_FILES: "values-k8s.yaml values-acc.yaml"
+  VALIDATE_ALL_IMAGES: false
 
 stages:
   - test
@@ -13,26 +12,12 @@ test_images:
     - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
     - if: $CI_PIPELINE_SOURCE == 'merge_request_event'
   image: "registry.cern.ch/kubernetes/ops:0.4.0"
+  before_script: apt-get update && apt-get install -y parallel
   script:
     - |
-      check-images-exist() {
-        for ARTIFACT in "${ARTIFACTS[@]}"; do
-          {
-            skopeo inspect "docker://${ARTIFACT}" >/dev/null || {
-              echo "image $ARTIFACT does not exist" 1>&2
-              exit 1
-            }
-            echo "image $ARTIFACT exists"
-          } &
-        done
-        wait
-      }
-    - |
-      for REPO in ${IMAGE_REPOSITORIES[@]}; do
-        echo "checking image replication in $REPO"
-        readarray ARTIFACTS < <(yq ".items.\"${REPO}\".artifacts" $VALUES_FILE)
-        ARTIFACTS=(${ARTIFACTS[*]}) # strip trailing new lines from each element.
-        check-images-exist
+      echo ">> verifying oci artifacts in $VALUES_FILES"
+      for VALUES_FILE in ${VALUES_FILES[@]}; do
+       VALIDATE_ALL_IMAGES=$VALIDATE_ALL_IMAGES ./scripts/validate_images.sh $VALUES_FILE
       done
 
 test_charts:
@@ -63,11 +48,18 @@ test_charts:
         helm repo remove "$REPO_NAME" >/dev/null 2>&1
       }
     - |
-      readarray ARTIFACTS < <(yq ".items.\"${CHART_REPOSITORY}\".artifacts" $VALUES_FILE)
-      for ARTIFACT in "${ARTIFACTS[@]}"; do
-        # strip trailing new line characters & skip if resultant output is an empty string.
-        ARTIFACT=$(echo $ARTIFACT | tr -d '\n')
-        [ -z "${ARTIFACT}" ] && continue
-        IFS=' ' read -r REPO_URL CHART_NAME CHART_VERSION <<<"$ARTIFACT"
-        check-chart-exists "$REPO_URL" "$CHART_NAME" "$CHART_VERSION"
+      echo ">> verifying charts in $VALUES_FILES"
+      for VALUES_FILE in "${VALUES_FILES[@]}"; do
+        CHART_REPOSITORIES=$(yq '.items | with_entries(select(.value.type == "chart")) | to_entries | .[].key' $VALUES_FILE)
+        for CHART_REPOSITORY in ${CHART_REPOSITORIES}; do
+          echo ">> checking chart replication in $CHART_REPOSITORY"
+          readarray ARTIFACTS < <(yq ".items.\"${CHART_REPOSITORY}\".artifacts" $VALUES_FILE)
+          for ARTIFACT in "${ARTIFACTS[@]/---}"; do
+            # strip trailing new line characters & skip if resultant output is an empty string.
+            ARTIFACT=$(echo $ARTIFACT | tr -d '\n')
+            [[ -z "${ARTIFACT}" || "${ARTIFACT}" == "null" ]] && continue
+            IFS=' ' read -r REPO_URL CHART_NAME CHART_VERSION <<<"$ARTIFACT"
+            check-chart-exists "$REPO_URL" "$CHART_NAME" "$CHART_VERSION"
+          done
+        done
       done
diff --git a/.helmignore b/.helmignore
index 3e73e4c..a3e3f52 100644
--- a/.helmignore
+++ b/.helmignore
@@ -1,2 +1,3 @@
 .git
 catalog-info.yaml
+scripts/
diff --git a/scripts/validate_images.sh b/scripts/validate_images.sh
new file mode 100755
index 0000000..bdb1d4a
--- /dev/null
+++ b/scripts/validate_images.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# validates that newly added images for replication exists upstream.
+#
+# USAGE: ./scripts/validate_images.sh VALUES_FILE
+
+# Additional arguments to pass to skopeo, skopeo defaults to pulling the system
+# architecture which can cause issues if you are on ARM.
+SKOPEO_ADDITIONAL_ARGS="${SKOPEO_ADDITIONAL_ARGS:="--raw"}"
+# SKOPEO_ADDITIONAL_ARGS="--override-arch amd64 --override-os linux"
+
+# Validate every image in the values file rather that just the newly added ones
+# when compared against master.
+VALIDATE_ALL_IMAGES=${VALIDATE_ALL_IMAGES:=false}
+
+# ###########################################################################
+# generates a list of newly added image artifacts for a repository in a given
+# file by comparing its state on the feature branch with that of master.
+#
+# This assumes we are always merging for master, which should be okay.
+#
+# INPUTS
+# $1 - values.yaml
+# $2 - repository
+#
+# GLOBALS:
+# NEW_ARTIFACTS - Contains images to be verified.
+# ###########################################################################
+generate_image_diff() {
+  if [ "$#" -ne 2 ]; then
+    echo "illegal number of parameters, excepted 2"
+  fi
+  VALUES_FILE=$1
+  REPOSITORY=$2
+
+  if [[ "$VALIDATE_ALL_IMAGES" = true ]]; then # i.e. do not generate diff.
+    NEW_ARTIFACTS=$(cat $VALUES_FILE | yq ".items.\"${REPOSITORY}\".artifacts")
+    return
+  fi
+
+  git fetch origin master:master
+  cat $VALUES_FILE | yq ".items.\"${REPOSITORY}\".artifacts" | sort >/tmp/branch_images.yaml
+  git show master:$VALUES_FILE | yq ".items.\"${REPOSITORY}\".artifacts" | sort >/tmp/master_images.yaml
+
+  NEW_ARTIFACTS=$(comm -23 /tmp/branch_images.yaml /tmp/master_images.yaml)
+  rm /tmp/branch_images.yaml /tmp/master_images.yaml
+}
+
+# ###########################################################################
+# private function that verifies an image exists using skopeo, this is called
+# by `parallel` to simultaneously check a large number of images. `parallel`
+# is used over the &/wait syntax in bash to ensure proper error handling.
+#
+# INPUTS
+# $1 - Artifact whose existence is to be checked.
+# ###########################################################################
+_check_image_exists() {
+  ARTIFACT="$1"
+  skopeo inspect $SKOPEO_ADDITIONAL_ARGS "docker://${ARTIFACT}" >/dev/null || {
+    echo "image $ARTIFACT does not exist" 1>&2
+    exit 1
+  }
+  echo "image $ARTIFACT exists"
+}
+
+# ###########################################################################
+# validate_images_exist_upstream is the main entrypoint of this script.
+#
+# It generates a diff of newly added images (as compared against master) and
+# uses skopeo to verify that these images are valid.
+#
+# INPUTS
+# $1 - VALUES_FILE which contains images to be parsed.
+# ###########################################################################
+validate_images_exist_upstream() {
+  if [ "$#" -ne 1 ]; then
+    echo "illegal number of parameters, excepted 1"
+  fi
+  VALUES_FILE=$1
+
+  check_images_exist() {
+    export -f _check_image_exists
+    export SKOPEO_ADDITIONAL_ARGS
+
+    ARTIFACTS=$@
+    echo "${ARTIFACTS[@]}" | tr ' ' '\n' | grep -v '^$' | parallel --will-cite --halt soon,fail=1 _check_image_exists
+  }
+
+  IMAGE_REPOSITORIES=$(yq '.items | with_entries(select(.value.type == "oci")) | to_entries | .[].key' $VALUES_FILE)
+  for REPOSITORY in ${IMAGE_REPOSITORIES}; do
+    echo ">> checking image replication in $REPOSITORY"
+    generate_image_diff $VALUES_FILE $REPOSITORY
+    check_images_exist $NEW_ARTIFACTS
+  done
+}
+
+validate_images_exist_upstream "$@"
-- 
GitLab