diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c48d7927cd3c929c88924bb5cb14e4f88f0364d7..aa7a8c3fd1aa1c42aa615c09284efa781b5961f9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,37 +1,24 @@ stages: - build -.build: +build: stage: build - image: - name: gitlab-registry.cern.ch/ci-tools/docker-image-builder - entrypoint: [""] + image: + # We recommend using the CERN version of the Kaniko image: gitlab-registry.cern.ch/ci-tools/docker-image-builder + name: gitlab-registry.cern.ch/ci-tools/docker-image-builder + entrypoint: [""] script: - - echo "{\"auths\":{\"$CI_REGISTRY\":{\"username\":\"$CI_REGISTRY_USER\",\"password\":\"$CI_REGISTRY_PASSWORD\"}}}" > /kaniko/.docker/config.json - - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --destination $IMAGE_DESTINATION - - echo "Image pushed successfully to ${IMAGE_DESTINATION}" - - -##################################################### -################### Merge Requests ################## - -build_mr: - extends: .build - variables: - IMAGE_DESTINATION: ${CI_REGISTRY_IMAGE}/openshift-cli:${CI_COMMIT_REF_SLUG} - rules: - - if: $CI_MERGE_REQUEST_ID - when: manual - allow_failure: true - - when: never - -###################################################### -#################### Master branch ################### - -build_master: - extends: .build - variables: - IMAGE_DESTINATION: ${CI_REGISTRY_IMAGE}/openshift-cli:latest - rules: - - if: $CI_COMMIT_BRANCH == 'master' - - when: never + - | + case "$CI_COMMIT_BRANCH" in + v*) export DATE=$(date -u +%Y.%m.%dT%H-%M-%SZ); export TAG="RELEASE"-${DATE}; echo "TAG=$TAG" >> build.env ;; + *) export TAG=$CI_COMMIT_SHORT_SHA ;; + esac + - wget --no-check-certificate https://github.com/mikefarah/yq/releases/download/v4.2.0/yq_linux_amd64 -O /yq && chmod +x /yq + # This is not the common Authentication config, unknown reason why common config fails + - echo "{\"auths\":{\"$CI_REGISTRY\":{\"auth\":\"$(echo -n ${CI_REGISTRY_USER}:${CI_REGISTRY_PASSWORD} | base64)\"}}}" > /kaniko/.docker/config.json + # Image builder + - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/Dockerfile --destination ${CI_REGISTRY_IMAGE}/velero-restore:${CI_COMMIT_BRANCH}-${TAG} + - echo "Image pushed successfully to ${CI_REGISTRY_IMAGE}/velero-restore:${CI_COMMIT_BRANCH}-${TAG}" + artifacts: + reports: + dotenv: build.env diff --git a/Dockerfile b/Dockerfile index f352bbda90e0a770b25e9279e35f0b49a9212543..d8e27c1c58605f2956507077e5f50bdc555ac81f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,20 @@ -# Base image: https://github.com/Docker-Hub-frolvlad/docker-alpine-glibc -# This image makes glibc work side by side with musl libc -FROM frolvlad/alpine-glibc:latest +FROM gitlab-registry.cern.ch/paas-tools/openshift-client LABEL maintainer="Drupal Admins <drupal-admins@cern.ch>" -ARG BUILD_DEPS='tar gzip' -ARG RUN_DEPS='curl ca-certificates gettext' +ARG restic_version=0.12.0 -RUN apk --no-cache add $BUILD_DEPS $RUN_DEPS && \ - curl -sLo /tmp/oc.tar.gz https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/stable/openshift-client-linux.tar.gz && \ - tar xzvf /tmp/oc.tar.gz -C /usr/local/bin/ && \ - rm -rf /tmp/oc.tar.gz && \ - apk del $BUILD_DEPS +RUN yum install epel-release -y && \ + # install restic + yum install yum-plugin-copr -y && \ + yum copr enable copart/restic -y && \ + yum install restic-${restic_version} -y && \ + # we need this to interact with manila OpenStack to get CephFS information + yum install centos-release-openstack-train -y && \ + yum install -y /usr/bin/openstack --disableplugin=protectbase && \ + yum clean all + +COPY ./tekton-task-templates /tekton-task-templates +COPY ./velero-restic-restore/restore_pvs.sh /scripts/restore_pvs.sh CMD ["/usr/local/bin/oc"] diff --git a/chart/cluster-tasks/templates/clear-cache.yaml b/chart/cluster-tasks/templates/clear-cache.yaml deleted file mode 100644 index 73ad5108ed8b125ba4ab0125904d5d1441298930..0000000000000000000000000000000000000000 --- a/chart/cluster-tasks/templates/clear-cache.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: tekton.dev/v1beta1 -kind: ClusterTask -metadata: - name: clear-cache -spec: - params: - - name: drupalSite - type: string - description: The name of the drupal site - - name: namespace - type: string - description: The namespace of the drupal site - workspaces: - - name: job - steps: - - name: clear-cache - image: {{ .Values.openshiftCli.image }} - command: - - /bin/sh - - '-c' - args: - - |- - echo "--------------------------- Clear cache ---------------------------" - echo "Drupal site name: $(params.drupalSite)" - export TASK=clear-cache - export DRUPALSITE=$(params.drupalSite) - export NAMESPACE=$(params.namespace) - export SERVING_POD_IMAGE=$(oc get -n $(params.namespace) drupalsite/$(params.drupalSite) -o=jsonpath='{.status.servingPodImage}') - envsubst < $(workspaces.job.path)/{{ .Values.jobTemplateFile }} | oc create -n $(params.namespace) -f - - oc wait --for=condition=complete job/$TASK-$(params.drupalSite) - jobStatus=$(oc get job/$TASK-$(params.drupalSite) -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") - echo "Job status: $jobStatus" - echo "Job logs:" - oc logs job/$TASK-$DRUPALSITE -n $(params.namespace) diff --git a/chart/cluster-tasks/templates/database-backup.yaml b/chart/cluster-tasks/templates/database-backup.yaml deleted file mode 100644 index b6003d40121265862c4832359652f957e33df9e8..0000000000000000000000000000000000000000 --- a/chart/cluster-tasks/templates/database-backup.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: tekton.dev/v1beta1 -kind: ClusterTask -metadata: - name: database-backup -spec: - params: - - name: drupalSite - type: string - description: The name of the drupal site - - name: namespace - type: string - description: The namespace of the drupal site - workspaces: - - name: job - steps: - - name: database-backup - image: {{ .Values.openshiftCli.image }} - command: - - /bin/sh - - '-c' - args: - - |- - echo "--------------------------- Database backup ---------------------------" - echo "Drupal site name: $(params.drupalSite)" - export TASK=database-backup - export DRUPALSITE=$(params.drupalSite) - export NAMESPACE=$(params.namespace) - export SERVING_POD_IMAGE=$(oc get -n $(params.namespace) drupalsite/$(params.drupalSite) -o=jsonpath='{.status.servingPodImage}') - export ARGS="-f dbBackUp.sql" - envsubst < $(workspaces.job.path)/{{ .Values.jobTemplateFile }} | oc create -n $(params.namespace) -f - - oc wait --for=condition=complete job/$TASK-$(params.drupalSite) - jobStatus=$(oc get job/$TASK-$(params.drupalSite) -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") - echo "Job status: $jobStatus" - echo "Job logs:" - oc logs job/$TASK-$DRUPALSITE -n $(params.namespace) diff --git a/chart/cluster-tasks/templates/database-restore.yaml b/chart/cluster-tasks/templates/database-restore.yaml deleted file mode 100644 index 45c44baa7c39cf5279e2491909eb3471bb8da70f..0000000000000000000000000000000000000000 --- a/chart/cluster-tasks/templates/database-restore.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: tekton.dev/v1beta1 -kind: ClusterTask -metadata: - name: database-restore -spec: - params: - - name: drupalSite - type: string - description: The name of the drupal site - - name: namespace - type: string - description: The namespace of the drupal site - workspaces: - - name: job - steps: - - name: database-restore - image: {{ .Values.openshiftCli.image }} - command: - - /bin/sh - - '-c' - args: - - |- - echo "--------------------------- Database restore ---------------------------" - echo "Drupal site name: $(params.drupalSite)" - export TASK=database-restore - export DRUPALSITE=$(params.drupalSite) - export NAMESPACE=$(params.namespace) - export SERVING_POD_IMAGE=$(oc get -n $(params.namespace) drupalsite/$(params.drupalSite) -o=jsonpath='{.status.servingPodImage}') - export ARGS="-f dbBackUp.sql" - envsubst < $(workspaces.job.path)/{{ .Values.jobTemplateFile }} | oc create -n $(params.namespace) -f - - oc wait --for=condition=complete job/$TASK-$(params.drupalSite) - jobStatus=$(oc get job/$TASK-$(params.drupalSite) -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") - echo "Job status: $jobStatus" - echo "Job logs:" - oc logs job/$TASK-$DRUPALSITE -n $(params.namespace) diff --git a/chart/cluster-tasks/values.yaml b/chart/cluster-tasks/values.yaml deleted file mode 100644 index aa18145e5429760da681202c342bde6c4afc231a..0000000000000000000000000000000000000000 --- a/chart/cluster-tasks/values.yaml +++ /dev/null @@ -1,4 +0,0 @@ -openshiftCli: - image: gitlab-registry.cern.ch/drupal/paas/drupal-operations/openshift-cli:latest - -jobTemplateFile: job-operations-template.yaml diff --git a/chart/cluster-tasks/Chart.yaml b/chart/drupal-operations/Chart.yaml similarity index 84% rename from chart/cluster-tasks/Chart.yaml rename to chart/drupal-operations/Chart.yaml index f9d8e2d414a485a3eec8b823c4800e13202de058..bfbf95e4ee00542846090a89069129d3ff1b5ff5 100644 --- a/chart/cluster-tasks/Chart.yaml +++ b/chart/drupal-operations/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: drupal-operations home: https://gitlab.cern.ch/drupal/paas/drupal-operations -description: A Helm chart for Deployment of Cluster Tasks +description: Deploy on-demand actions for Drupal site admins version: 0.1.0 icon: https://raw.githubusercontent.com/cdfoundation/artwork/main/tekton/horizontal/color/tekton-horizontal-color.png keywords: diff --git a/chart/drupal-operations/templates/clear-cache.yaml b/chart/drupal-operations/templates/clear-cache.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05ab110017ec19d68ad29feed723eb3e0fcb4fbe --- /dev/null +++ b/chart/drupal-operations/templates/clear-cache.yaml @@ -0,0 +1,39 @@ +apiVersion: tekton.dev/v1beta1 +kind: ClusterTask +metadata: + name: clear-cache + annotations: + app: drupal +spec: + params: + - name: drupalSite + type: string + description: The name of the drupal site + - name: namespace + type: string + description: The namespace of the drupal site + steps: + - name: clear-cache + image: {{ .Values.image }} + imagePullPolicy: Always + command: + - /bin/sh + - '-c' + args: + - |- + echo "--------------------------- Clear cache ---------------------------" + echo "Drupal site name: $(params.drupalSite)" + export DATE=$(date +%F-%H-%M) + export TASK=clear-cache-$DATE + export OPERATION="clear-cache" + export DRUPALSITE=$(params.drupalSite) + export NAMESPACE=$(params.namespace) + export SERVING_POD_IMAGE=$(oc get -n "$NAMESPACE" "drupalsite/$DRUPALSITE" -o=jsonpath='{.status.servingPodImage}') + envsubst < /tekton-task-templates/drupal_operation_job.yaml | oc create -n "$NAMESPACE" -f - + oc wait --for=condition=complete "job/$TASK-$DRUPALSITE" + jobStatus=$(oc get "job/$TASK-$DRUPALSITE" -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") + echo "Job status: $jobStatus" + echo "Job logs:" + oc logs "job/$TASK-$DRUPALSITE" -n "$NAMESPACE" + echo "Cleaning up Jobs" + oc delete -n "$NAMESPACE" "job/$TASK-$DRUPALSITE" diff --git a/chart/drupal-operations/templates/database-restore.yaml b/chart/drupal-operations/templates/database-restore.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3fdf7ebd986836a029a8c99f1e78aba1d5010e25 --- /dev/null +++ b/chart/drupal-operations/templates/database-restore.yaml @@ -0,0 +1,40 @@ +apiVersion: tekton.dev/v1beta1 +kind: ClusterTask +metadata: + name: database-restore + annotations: + app: drupal +spec: + params: + - name: drupalSite + type: string + description: The name of the drupal site + - name: namespace + type: string + description: The namespace of the drupal site + steps: + - name: database-restore + image: gitlab-registry.cern.ch/drupal/paas/drupal-operations/openshift-cli:velero-restore + imagePullPolicy: Always + command: + - /bin/sh + - '-c' + args: + - |- + echo "--------------------------- Database restore ---------------------------" + echo "Drupal site name: $(params.drupalSite)" + export DATE=$(date +%F-%H-%M) + export TASK=database-restore-$DATE + export OPERATION="database-restore" + export DRUPALSITE=$(params.drupalSite) + export ARGS="-f 'database_backup.sql'" + export NAMESPACE=$(params.namespace) + export SERVING_POD_IMAGE=$(oc get -n $NAMESPACE drupalsite/$DRUPALSITE -o=jsonpath='{.status.servingPodImage}') + envsubst < /tekton-task-templates/drupal_operation_job.yaml | oc create -n $NAMESPACE -f - + oc wait --for=condition=complete job/$TASK-$DRUPALSITE + jobStatus=$(oc get job/$TASK-$DRUPALSITE -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") + echo "Job status: $jobStatus" + echo "Job logs:" + oc logs job/$TASK-$DRUPALSITE -n $NAMESPACE + echo "Cleaning up Jobs" + oc delete -n "$NAMESPACE" "job/$TASK-$DRUPALSITE" diff --git a/chart/drupal-operations/templates/drupalsite-backup.yaml b/chart/drupal-operations/templates/drupalsite-backup.yaml new file mode 100644 index 0000000000000000000000000000000000000000..27e9b414a0dd611541441453987b9218fb5b43d5 --- /dev/null +++ b/chart/drupal-operations/templates/drupalsite-backup.yaml @@ -0,0 +1,41 @@ +apiVersion: tekton.dev/v1beta1 +kind: ClusterTask +metadata: + name: drupalsite-backup + annotations: + app: drupal +spec: + params: + - name: drupalSite + type: string + description: The name of the drupal site + - name: namespace + type: string + description: The namespace of the drupal site + - name: backup + type: string + description: The name for the backup + steps: + - name: drupalsite-backup + image: {{ .Values.image }} + imagePullPolicy: Always + command: + - /bin/sh + - '-c' + args: + - |- + echo "--------------------------- DrupalSite backup ---------------------------" + echo "Drupal site name: $(params.drupalSite)" + export DATE=$(date +%F-%H-%M) + export TASK=site-backup-$DATE + export DRUPALSITE=$(params.drupalSite) + export NAMESPACE=$(params.namespace) + export BACKUP_NAME=$(params.backup) + export TIMESTAMP_HASH=$(date | md5sum | awk '{print substr($1,length($1)-4)}') + export RESOURCE_NAME=$NAMESPACE-$BACKUP_NAME-$TIMESTAMP_HASH + export PROJECT_HASH=$(printf '%s' "$NAMESPACE" | md5sum | awk '{print $1}') + export VELERO_NAMESPACE={{ .Values.veleroNamespace }} + envsubst < /tekton-task-templates/backup_resource.yaml | oc create -f - + timeout 120s sh -c -- 'while [ $(oc get backup/$RESOURCE_NAME -n $VELERO_NAMESPACE -o jsonpath='{.status.phase}') != "Completed" ]; do printf "Backup in progress\n"; sleep 2s; done' + backupStatus=$(oc get backup/$RESOURCE_NAME -n $VELERO_NAMESPACE -o=jsonpath='{.status.phase}' | grep -i -E 'failed|completed|partiallyfailed' || echo "Failed") + echo "DrupalSite backup status: $backupStatus" diff --git a/chart/drupal-operations/templates/drupalsite-restore.yaml b/chart/drupal-operations/templates/drupalsite-restore.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e7d3de33f6a9833e122f705c9346d11f3b035fb1 --- /dev/null +++ b/chart/drupal-operations/templates/drupalsite-restore.yaml @@ -0,0 +1,58 @@ +apiVersion: tekton.dev/v1beta1 +kind: ClusterTask +metadata: + name: drupalsite-restore + annotations: + app: drupal +spec: + params: + - name: drupalSite + type: string + description: The name of the drupal site + - name: backupName + type: string + description: The name of the velero Backup resource to be restored + - name: namespace + type: string + description: The namespace of the drupal site + steps: + - name: database-backup + image: {{ .Values.image }} + imagePullPolicy: Always + command: + - /bin/sh + - '-c' + args: + - |- + echo "Drupal site name: $(params.drupalSite)" + echo "--------------------------- Drupalsite files restore ---------------------------" + export DATE=$(date +%F-%H-%M) + export TASK=files-restore-$DATE + export DRUPALSITE=$(params.drupalSite) + export NAMESPACE=$(params.namespace) + export VELERO_BACKUP_NAME=$(params.backupName) + export VELERO_NAMESPACE={{ .Values.veleroNamespace }} + export OPERATIONS_IMAGE={{ .Values.image }} + export RESTORE_SERVICE_ACCOUNT={{ .Values.restoreServiceAccount }} + envsubst < /tekton-task-templates/restore_pv_job.yaml | oc create -f - + oc wait --for=condition=complete "job/$TASK-$DRUPALSITE" -n "$VELERO_NAMESPACE" + jobStatus=$(oc get job/$TASK-$DRUPALSITE -n $VELERO_NAMESPACE -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") + echo "Files restore Job status: $jobStatus" + echo "Files restore Job logs:" + oc logs "job/$TASK-$DRUPALSITE" -n "$VELERO_NAMESPACE" + echo "Cleaning up Jobs" + oc delete -n "$VELERO_NAMESPACE" "job/$TASK-$DRUPALSITE" + + echo "--------------------------- Drupalsite database restore ---------------------------" + export TASK=db-restore-$DATE + export SERVING_POD_IMAGE=$(oc get -n $NAMESPACE drupalsite/$DRUPALSITE -o=jsonpath='{.status.servingPodImage}') + export OPERATION="database-restore" + export ARGS="-f 'database_backup.sql'" + envsubst < /tekton-task-templates/drupal_operation_job.yaml | oc create -n "$NAMESPACE" -f - + oc wait --for=condition=complete job/$TASK-$DRUPALSITE + jobStatus=$(oc get job/$TASK-$DRUPALSITE -o=jsonpath='{.status.conditions[*].type}' | grep -i -E 'failed|complete' || echo "Failed") + echo "Database restore Job status: $jobStatus" + echo "Database restore Job logs:" + oc logs job/$TASK-$DRUPALSITE -n $NAMESPACE + echo "Cleaning up Jobs" + oc delete -n "$NAMESPACE" "job/$TASK-$DRUPALSITE" diff --git a/chart/drupal-operations/values.yaml b/chart/drupal-operations/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..990f94c4f80b061f25a247bc7817367c30418758 --- /dev/null +++ b/chart/drupal-operations/values.yaml @@ -0,0 +1,3 @@ +image: gitlab-registry.cern.ch/drupal/paas/drupal-operations/openshift-cli:velero-restore +restoreServiceAccount: "" +veleroNamespace: "" diff --git a/examples/clear-cache-taskrun.yaml b/examples/clear-cache-taskrun.yaml index 3adb40c75a6f73142c09e99614edfbd1a1faf060..b8c8e30e9c108b4fdf1eb9ae450d05b2e909fd92 100644 --- a/examples/clear-cache-taskrun.yaml +++ b/examples/clear-cache-taskrun.yaml @@ -2,6 +2,13 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: clear-cache- + ownerReferences: + - apiVersion: tekton.dev/v1beta1 + blockOwnerDeletion: true + controller: true + kind: Pipeline + name: build-test + uid: my-uid spec: taskRef: name: clear-cache @@ -11,8 +18,4 @@ spec: value: test-dimitra-drupalsite - name: namespace value: test-dimitra - workspaces: - - name: job - configmap: - name: job-operations-template-configmap serviceAccountName: tektoncd diff --git a/examples/database-backup-taskrun.yaml b/examples/database-backup-taskrun.yaml deleted file mode 100644 index 266e50edcd90f6588e5db01d9f2563ff68ea9a68..0000000000000000000000000000000000000000 --- a/examples/database-backup-taskrun.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: tekton.dev/v1beta1 -kind: TaskRun -metadata: - generateName: database-backup- -spec: - taskRef: - name: database-backup - kind: ClusterTask - params: - - name: drupalSite - value: test-dimitra-drupalsite - - name: namespace - value: test-dimitra - workspaces: - - name: job - configmap: - name: job-operations-template-configmap - serviceAccountName: tektoncd diff --git a/examples/database-restore-taskrun.yaml b/examples/database-restore-taskrun.yaml index ab0b9ae947ba1e9fc90b594daf1453cfd4d91848..08af4baf62ee06429ba6413e0111a5270ba14960 100644 --- a/examples/database-restore-taskrun.yaml +++ b/examples/database-restore-taskrun.yaml @@ -2,6 +2,13 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: database-restore- + ownerReferences: + - apiVersion: tekton.dev/v1beta1 + blockOwnerDeletion: true + controller: true + kind: Pipeline + name: build-test + uid: my-uid spec: taskRef: name: database-restore @@ -11,8 +18,4 @@ spec: value: test-dimitra-drupalsite - name: namespace value: test-dimitra - workspaces: - - name: job - configmap: - name: job-operations-template-configmap serviceAccountName: tektoncd diff --git a/examples/drupalsite-backup-taskrun.yaml b/examples/drupalsite-backup-taskrun.yaml new file mode 100644 index 0000000000000000000000000000000000000000..904ee1f9f119d5982feeec4928e826e7e8df221d --- /dev/null +++ b/examples/drupalsite-backup-taskrun.yaml @@ -0,0 +1,23 @@ +apiVersion: tekton.dev/v1beta1 +kind: TaskRun +metadata: + generateName: drupalsite-backup- + ownerReferences: + - apiVersion: tekton.dev/v1beta1 + blockOwnerDeletion: true + controller: true + kind: ClusterTask + name: build-test + uid: my-uid +spec: + taskRef: + name: drupalsite-backup + kind: ClusterTask + params: + - name: drupalSite + value: test-dimitra-drupalsite + - name: namespace + value: test-dimitra + - name: backup + value: tekton-test + serviceAccountName: tektoncd diff --git a/examples/drupalsite-restore-taskrun.yaml b/examples/drupalsite-restore-taskrun.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bafc0266abde53b3313c89568dbd5e28347269c9 --- /dev/null +++ b/examples/drupalsite-restore-taskrun.yaml @@ -0,0 +1,23 @@ +apiVersion: tekton.dev/v1beta1 +kind: TaskRun +metadata: + generateName: drupalsite-restore- + ownerReferences: + - apiVersion: tekton.dev/v1beta1 + blockOwnerDeletion: true + controller: true + kind: ClusterTask + name: build-test + uid: my-uid +spec: + taskRef: + name: drupalsite-restore + kind: ClusterTask + params: + - name: drupalSite + value: drupalsite-sample + - name: backupName + value: ravineet-1-tekton-test-fbfe0 + - name: namespace + value: ravineet-1 + serviceAccountName: tektoncd diff --git a/tekton-task-templates/backup_resource.yaml b/tekton-task-templates/backup_resource.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6d193c13eecc1495e5d6b63a32720ea4318f0fd --- /dev/null +++ b/tekton-task-templates/backup_resource.yaml @@ -0,0 +1,29 @@ +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: $RESOURCE_NAME + namespace: $VELERO_NAMESPACE + labels: + drupal.webservices.cern.ch/projectHash: $PROJECT_HASH + # These labels can have too long values (max value length: 64) + # It is enough to give them as annotations + # However, annotations can't be propagate from velero Schedule -> Backup with velero <v1.6 + # so for consistency they are also provided as labels for the time being. + # + drupal.webservices.cern.ch/project: $NAMESPACE + drupal.webservices.cern.ch/drupalSite: $DRUPALSITE + annotations: + drupal.webservices.cern.ch/project: $NAMESPACE + drupal.webservices.cern.ch/drupalSite: $DRUPALSITE +spec: + hooks: {} + includedNamespaces: + - $NAMESPACE + includedResources: + - pods + labelSelector: + matchLabels: + app: drupal + drupalSite: $DRUPALSITE + ttl: 87600h0m0s # 10 years (prevent cleanup of on-demand backups) +status: {} diff --git a/tekton-task-templates/drupal_operation_job.yaml b/tekton-task-templates/drupal_operation_job.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9fb1656902f6e050a0cdc58c9fd91b626162295 --- /dev/null +++ b/tekton-task-templates/drupal_operation_job.yaml @@ -0,0 +1,41 @@ +kind: Job +apiVersion: batch/v1 +metadata: + name: $TASK-$DRUPALSITE + namespace: $NAMESPACE + labels: + app: drupal-$TASK +spec: + activeDeadlineSeconds: 86400 + ttlSecondsAfterFinished: 86400 + backoffLimit: 3 + completions: 1 + parallelism: 1 + template: + spec: + containers: + - command: + - sh + - '-c' + - /operations/$OPERATION.sh $ARGS + env: + - name: DRUPAL_SHARED_VOLUME + value: /drupal-data + envFrom: + - secretRef: + name: dbcredentials-$DRUPALSITE + image: $SERVING_POD_IMAGE + imagePullPolicy: Always + name: taskrun + volumeMounts: + - mountPath: /drupal-data + name: drupal-directory + dnsPolicy: ClusterFirst + restartPolicy: Never + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: drupal-directory + persistentVolumeClaim: + claimName: pv-claim-$DRUPALSITE diff --git a/tekton-task-templates/restore_pv_job.yaml b/tekton-task-templates/restore_pv_job.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8d79685ece264279443f98252ae0c3533da2281 --- /dev/null +++ b/tekton-task-templates/restore_pv_job.yaml @@ -0,0 +1,75 @@ +kind: Job +apiVersion: batch/v1 +metadata: + name: $TASK-$DRUPALSITE + namespace: $VELERO_NAMESPACE + labels: + app: velero-restore +spec: + activeDeadlineSeconds: 86400 + ttlSecondsAfterFinished: 86400 + backoffLimit: 3 + template: + metadata: + labels: + job-name: $TASK-$DRUPALSITE + spec: + volumes: + - name: openstack-credentials + secret: + secretName: openstack-auth-secrets + defaultMode: 420 + - name: velero-restic-credentials + secret: + secretName: velero-restic-credentials + defaultMode: 420 + - name: cache + emptyDir: {} + containers: + - name: backups-volume-cephfs + image: $OPERATIONS_IMAGE + command: + - /scripts/restore_pvs.sh + env: + - name: OS_CLOUD + value: openstack + - name: VELERO_NAMESPACE + value: $VELERO_NAMESPACE + - name: VELERO_BACKUP_NAME + value: $VELERO_BACKUP_NAME + - name: DATABASE_BACKUP_FILENAME + value: $DATABASE_BACKUP_FILENAME + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: cephfs-backup-secrets + key: cephfsBackupS3AccessKey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: cephfs-backup-secrets + key: cephfsBackupS3SecretKey + resources: {} + volumeMounts: + - name: openstack-credentials + readOnly: true + mountPath: /etc/openstack/ + - name: velero-restic-credentials + readOnly: true + mountPath: /tmp + lifecycle: + preStop: + exec: + command: + - /bin/sh + - '-c' + - umount /mnt; sleep 10 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: Always + securityContext: + privileged: true + restartPolicy: Never + terminationGracePeriodSeconds: 30 + serviceAccountName: $RESTORE_SERVICE_ACCOUNT + serviceAccount: $RESTORE_SERVICE_ACCOUNT diff --git a/velero-restic-restore/restore_pvs.sh b/velero-restic-restore/restore_pvs.sh new file mode 100755 index 0000000000000000000000000000000000000000..e7c11dd35ca3106eebf7c13e84d5033e7524bc87 --- /dev/null +++ b/velero-restic-restore/restore_pvs.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +timestamp() { + date +%Y-%m-%dT%H:%M:%S.%3NZ +} + +validateVar(){ + if [ -z "${1}" ]; then echo "Failed to initialize the variable $2" 1>&2; exit 1; fi +} + +# Will stop the execution of the backup script if it finds any command execution error +# as all the operations are critical. +set -e + + +# Contact the OpenStack manila API to retrieve information about each of the manila shares +# We need this to be able to mount PVs for backup +# See https://clouddocs.web.cern.ch/file_shares/programmatic_access.html +MANILA_URL=$(openstack catalog show manilav2 -f json | jq -r '.endpoints[] | select(.interface == "public") | .url') +validateVar "$MANILA_URL" "MANILA_URL" + +# OpenStack token issues will expire after 24h, so we can create several tokens per day +OPENSTACK_MANILA_SECRET=$(openstack token issue -f json | jq -r '.id') +validateVar "$OPENSTACK_MANILA_SECRET" "OPENSTACK_MANILA_SECRET" + +# Use the velero backup name, to fetch the PodVolumeBackup resource and the ID of the restic snapshot, PV name accordingly +# List the PodVolumeBackup resources by label filtering +POD_VOLUME_BACKUP=`oc get PodVolumeBackup -n "$VELERO_NAMESPACE" -l velero.io/backup-name="$VELERO_BACKUP_NAME" -o name` +validateVar "$POD_VOLUME_BACKUP" "POD_VOLUME_BACKUP" + +# Fetch the PodVolumeBackup resource json +POD_VOLUME_BACKUP_JSON=`oc get "$POD_VOLUME_BACKUP" -n "$VELERO_NAMESPACE" -o json` +validateVar "$POD_VOLUME_BACKUP_JSON" "POD_VOLUME_BACKUP_JSON" + +# Fetch the Restic snapshot ID +RESTIC_SNAPSHOT_ID=$(echo "$POD_VOLUME_BACKUP_JSON" | jq -r '.status.snapshotID') +validateVar "$RESTIC_SNAPSHOT_ID" "RESTIC_SNAPSHOT_ID" +echo $RESTIC_SNAPSHOT_ID + +# Fetch the Restic Repo URL +RESTIC_REPO=$(echo "$POD_VOLUME_BACKUP_JSON" | jq -r '.spec.repoIdentifier') +validateVar "$RESTIC_REPO" "RESTIC_REPO" +echo $RESTIC_REPO + +# Fetch the PV name +PV_NAME=pvc-$(echo "$POD_VOLUME_BACKUP_JSON" | jq -r '.spec.tags["pvc-uid"]') +validateVar "$PV_NAME" "PV_NAME" +echo $PV_NAME + +PV_JSON=`oc get pv "$PV_NAME" -o json` +validateVar "$PV_JSON" "PV_JSON" + +NAMESPACE_CSI_DRIVER=$(echo "$PV_JSON" | jq -r '.spec.csi.nodeStageSecretRef.namespace') +validateVar "$NAMESPACE_CSI_DRIVER" "NAMESPACE_CSI_DRIVER" +echo $NAMESPACE_CSI_DRIVER + +# We need this information to access the manila API +MANILA_SHARE_ID=$(echo "$PV_JSON" | jq -r '.spec.csi.volumeAttributes.shareID') +validateVar "$MANILA_SHARE_ID" "MANILA_SHARE_ID" +echo $MANILA_SHARE_ID + +MANILA_SHARE_ACCESS_ID=$(echo "$PV_JSON" | jq -r '.spec.csi.volumeAttributes.shareAccessID') +validateVar "$MANILA_SHARE_ACCESS_ID" "MANILA_SHARE_ACCESS_ID" +echo $MANILA_SHARE_ACCESS_ID + +MANILA_EXPORT_LOCATIONS=$(curl -X GET -H "X-Auth-Token: $OPENSTACK_MANILA_SECRET" -H "X-Openstack-Manila-Api-Version: 2.51" $MANILA_URL/shares/$MANILA_SHARE_ID/export_locations) +validateVar "$MANILA_EXPORT_LOCATIONS" "MANILA_EXPORT_LOCATIONS" +echo $MANILA_EXPORT_LOCATIONS + +# Stores monitors and path of the PV, similar to +# 137.138.121.135:6789,188.184.85.133:6789,188.184.91.157:6789:/volumes/_nogroup/337f5361-bee2-415b-af8e-53eaec1add43 +CEPHFS_PATH_PV=$(echo "$MANILA_EXPORT_LOCATIONS" | jq -r '.export_locations[]?.path') +validateVar "$CEPHFS_PATH_PV" "CEPHFS_PATH_PV" +echo $CEPHFS_PATH_PV + +# Stores the userKey credentials needed to manually mount CephFS PVs +MANILA_ACCESS_RULES=$(curl -X GET -H "X-Auth-Token: $OPENSTACK_MANILA_SECRET" -H "X-Openstack-Manila-Api-Version: 2.51" $MANILA_URL/share-access-rules/$MANILA_SHARE_ACCESS_ID) +validateVar "$MANILA_ACCESS_RULES" "MANILA_ACCESS_RULES" +echo $MANILA_ACCESS_RULES + +CEPHFS_USERKEY=$(echo "$MANILA_ACCESS_RULES" | jq -r '.access.access_key') +validateVar "$CEPHFS_USERKEY" "CEPHFS_USERKEY" +echo $CEPHFS_USERKEY + +echo mounting "$PV_NAME" in /mnt JOB_UID: "$JOB_UID" ... +mount -t ceph "$CEPHFS_PATH_PV" -o name="$PV_NAME",noatime,secret="$CEPHFS_USERKEY" /mnt + +# The target directory for restic restore needs to have the same permissions as '/drupal-data' for rsync later +mkdir -p /restore +chmod 777 /restore + +restic -p /tmp/repository-password -r "$RESTIC_REPO" restore "$RESTIC_SNAPSHOT_ID" --target /restore + +rsync -avz /restore/ /mnt/ --delete + +# Unmount pv from /mnt earlier mounted +echo unmounting "$PV_NAME" from /mnt JOB_UID: "$JOB_UID" ... +umount /mnt + +# We remove /root/.cache/ in each iteration to prevent restic backups to run out of memory and fail the cronjobs we run, +# as we detected this malfunction in our infra. +echo "cleaning up /root/.cache/*" +rm -rf /root/.cache/*