From 4dd1194911b8c31a89e1562845c3fed9dc603013 Mon Sep 17 00:00:00 2001 From: Vineet Reddy Rajula <rajula.vineet.reddy@cern.ch> Date: Tue, 27 Jul 2021 04:28:04 +0200 Subject: [PATCH 1/4] Use oc rsync to restore files --- tekton-task-templates/restore_pv_job.yaml | 9 ++-- velero-restic-restore/restore_pvs.sh | 61 ++--------------------- 2 files changed, 8 insertions(+), 62 deletions(-) diff --git a/tekton-task-templates/restore_pv_job.yaml b/tekton-task-templates/restore_pv_job.yaml index e8d7968..e0095a9 100644 --- a/tekton-task-templates/restore_pv_job.yaml +++ b/tekton-task-templates/restore_pv_job.yaml @@ -31,8 +31,10 @@ spec: command: - /scripts/restore_pvs.sh env: - - name: OS_CLOUD - value: openstack + - name: DRUPALSITE + value: $DRUPALSITE + - name: NAMESPACE + value: $NAMESPACE - name: VELERO_NAMESPACE value: $VELERO_NAMESPACE - name: VELERO_BACKUP_NAME @@ -51,9 +53,6 @@ spec: key: cephfsBackupS3SecretKey resources: {} volumeMounts: - - name: openstack-credentials - readOnly: true - mountPath: /etc/openstack/ - name: velero-restic-credentials readOnly: true mountPath: /tmp diff --git a/velero-restic-restore/restore_pvs.sh b/velero-restic-restore/restore_pvs.sh index 867d430..5a0ea85 100755 --- a/velero-restic-restore/restore_pvs.sh +++ b/velero-restic-restore/restore_pvs.sh @@ -13,16 +13,6 @@ validateVar(){ set -e -# Contact the OpenStack manila API to retrieve information about each of the manila shares -# We need this to be able to mount PVs for backup -# See https://clouddocs.web.cern.ch/file_shares/programmatic_access.html -MANILA_URL=$(openstack catalog show manilav2 -f json | jq -r '.endpoints[] | select(.interface == "public") | .url') -validateVar "$MANILA_URL" "MANILA_URL" - -# OpenStack token issues will expire after 24h, so we can create several tokens per day -OPENSTACK_MANILA_SECRET=$(openstack token issue -f json | jq -r '.id') -validateVar "$OPENSTACK_MANILA_SECRET" "OPENSTACK_MANILA_SECRET" - # Use the velero backup name, to fetch the PodVolumeBackup resource and the ID of the restic snapshot, PV name accordingly # List the PodVolumeBackup resources by label filtering POD_VOLUME_BACKUP=`oc get PodVolumeBackup -n "$VELERO_NAMESPACE" -l velero.io/backup-name="$VELERO_BACKUP_NAME" -o name` @@ -42,60 +32,17 @@ RESTIC_REPO=$(echo "$POD_VOLUME_BACKUP_JSON" | jq -r '.spec.repoIdentifier') validateVar "$RESTIC_REPO" "RESTIC_REPO" echo "$RESTIC_REPO" -# Fetch the PV name -PV_NAME=pvc-$(echo "$POD_VOLUME_BACKUP_JSON" | jq -r '.spec.tags["pvc-uid"]') -validateVar "$PV_NAME" "PV_NAME" -echo "$PV_NAME" - -PV_JSON=`oc get pv "$PV_NAME" -o json` -validateVar "$PV_JSON" "PV_JSON" - -NAMESPACE_CSI_DRIVER=$(echo "$PV_JSON" | jq -r '.spec.csi.nodeStageSecretRef.namespace') -validateVar "$NAMESPACE_CSI_DRIVER" "NAMESPACE_CSI_DRIVER" -echo "$NAMESPACE_CSI_DRIVER" - -# We need this information to access the manila API -MANILA_SHARE_ID=$(echo "$PV_JSON" | jq -r '.spec.csi.volumeAttributes.shareID') -validateVar "$MANILA_SHARE_ID" "MANILA_SHARE_ID" -echo "$MANILA_SHARE_ID" - -MANILA_SHARE_ACCESS_ID=$(echo "$PV_JSON" | jq -r '.spec.csi.volumeAttributes.shareAccessID') -validateVar "$MANILA_SHARE_ACCESS_ID" "MANILA_SHARE_ACCESS_ID" -echo "$MANILA_SHARE_ACCESS_ID" - -MANILA_EXPORT_LOCATIONS=$(curl -X GET -H "X-Auth-Token: $OPENSTACK_MANILA_SECRET" -H "X-Openstack-Manila-Api-Version: 2.51" "$MANILA_URL/shares/$MANILA_SHARE_ID/export_locations") -validateVar "$MANILA_EXPORT_LOCATIONS" "MANILA_EXPORT_LOCATIONS" -echo "$MANILA_EXPORT_LOCATIONS" - -# Stores monitors and path of the PV, similar to -# 137.138.121.135:6789,188.184.85.133:6789,188.184.91.157:6789:/volumes/_nogroup/337f5361-bee2-415b-af8e-53eaec1add43 -CEPHFS_PATH_PV=$(echo "$MANILA_EXPORT_LOCATIONS" | jq -r '.export_locations[]?.path') -validateVar "$CEPHFS_PATH_PV" "CEPHFS_PATH_PV" -echo "$CEPHFS_PATH_PV" - -# Stores the userKey credentials needed to manually mount CephFS PVs -MANILA_ACCESS_RULES=$(curl -X GET -H "X-Auth-Token: $OPENSTACK_MANILA_SECRET" -H "X-Openstack-Manila-Api-Version: 2.51" "$MANILA_URL/share-access-rules/$MANILA_SHARE_ACCESS_ID") -validateVar "$MANILA_ACCESS_RULES" "MANILA_ACCESS_RULES" -echo "$MANILA_ACCESS_RULES" - -CEPHFS_USERKEY=$(echo "$MANILA_ACCESS_RULES" | jq -r '.access.access_key') -validateVar "$CEPHFS_USERKEY" "CEPHFS_USERKEY" -echo "$CEPHFS_USERKEY" - -echo "mounting $PV_NAME in /mnt JOB_UID: $JOB_UID ..." -mount -t ceph "$CEPHFS_PATH_PV" -o name="$PV_NAME",noatime,secret="$CEPHFS_USERKEY" /mnt - # The target directory for restic restore needs to have the same permissions as '/drupal-data' for rsync later mkdir -p /restore chmod 777 /restore restic -p /tmp/repository-password -r "$RESTIC_REPO" restore "$RESTIC_SNAPSHOT_ID" --target /restore -rsync -avz /restore/ /mnt/ --delete +# Fetch the pod name from the drupalSite env var +POD_NAME=`oc get pods -l app=drupal,drupalSite="$DRUPALSITE" -o name -n "$NAMESPACE" | sed "s/pod\///g" | head -n 1` +validateVar "$POD_NAME" "POD_NAME" -# Unmount pv from /mnt earlier mounted -echo "unmounting $PV_NAME from /mnt JOB_UID: $JOB_UID ..." -umount /mnt +oc rsync /restore/ "$POD_NAME":/drupal-data --delete -n "$NAMESPACE" # We remove /root/.cache/ in each iteration to prevent restic backups to run out of memory and fail the cronjobs we run, # as we detected this malfunction in our infra. -- GitLab From adc863c6ae0f573a6a29bb8437e51b9742f1699a Mon Sep 17 00:00:00 2001 From: Vineet Reddy Rajula <rajula.vineet.reddy@cern.ch> Date: Tue, 27 Jul 2021 20:48:28 +0200 Subject: [PATCH 2/4] Remove ownerRef and add argocd ignore annotation --- chart/drupal-operations/templates/clear-cache.yaml | 4 +++- chart/drupal-operations/templates/database-restore.yaml | 4 +++- chart/drupal-operations/templates/drupalsite-backup.yaml | 4 +++- chart/drupal-operations/templates/drupalsite-restore.yaml | 4 +++- examples/clear-cache-taskrun.yaml | 7 ------- examples/database-restore-taskrun.yaml | 7 ------- examples/drupalsite-backup-taskrun.yaml | 7 ------- examples/drupalsite-restore-taskrun.yaml | 7 ------- 8 files changed, 12 insertions(+), 32 deletions(-) diff --git a/chart/drupal-operations/templates/clear-cache.yaml b/chart/drupal-operations/templates/clear-cache.yaml index 05ab110..32acc84 100644 --- a/chart/drupal-operations/templates/clear-cache.yaml +++ b/chart/drupal-operations/templates/clear-cache.yaml @@ -2,8 +2,10 @@ apiVersion: tekton.dev/v1beta1 kind: ClusterTask metadata: name: clear-cache - annotations: + labels: app: drupal + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous spec: params: - name: drupalSite diff --git a/chart/drupal-operations/templates/database-restore.yaml b/chart/drupal-operations/templates/database-restore.yaml index 3fdf7eb..3e9f4ed 100644 --- a/chart/drupal-operations/templates/database-restore.yaml +++ b/chart/drupal-operations/templates/database-restore.yaml @@ -2,8 +2,10 @@ apiVersion: tekton.dev/v1beta1 kind: ClusterTask metadata: name: database-restore - annotations: + labels: app: drupal + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous spec: params: - name: drupalSite diff --git a/chart/drupal-operations/templates/drupalsite-backup.yaml b/chart/drupal-operations/templates/drupalsite-backup.yaml index 27e9b41..964d32a 100644 --- a/chart/drupal-operations/templates/drupalsite-backup.yaml +++ b/chart/drupal-operations/templates/drupalsite-backup.yaml @@ -2,8 +2,10 @@ apiVersion: tekton.dev/v1beta1 kind: ClusterTask metadata: name: drupalsite-backup - annotations: + labels: app: drupal + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous spec: params: - name: drupalSite diff --git a/chart/drupal-operations/templates/drupalsite-restore.yaml b/chart/drupal-operations/templates/drupalsite-restore.yaml index e7d3de3..8fe740f 100644 --- a/chart/drupal-operations/templates/drupalsite-restore.yaml +++ b/chart/drupal-operations/templates/drupalsite-restore.yaml @@ -2,8 +2,10 @@ apiVersion: tekton.dev/v1beta1 kind: ClusterTask metadata: name: drupalsite-restore - annotations: + labels: app: drupal + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous spec: params: - name: drupalSite diff --git a/examples/clear-cache-taskrun.yaml b/examples/clear-cache-taskrun.yaml index b8c8e30..dd0257c 100644 --- a/examples/clear-cache-taskrun.yaml +++ b/examples/clear-cache-taskrun.yaml @@ -2,13 +2,6 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: clear-cache- - ownerReferences: - - apiVersion: tekton.dev/v1beta1 - blockOwnerDeletion: true - controller: true - kind: Pipeline - name: build-test - uid: my-uid spec: taskRef: name: clear-cache diff --git a/examples/database-restore-taskrun.yaml b/examples/database-restore-taskrun.yaml index 08af4ba..80accf0 100644 --- a/examples/database-restore-taskrun.yaml +++ b/examples/database-restore-taskrun.yaml @@ -2,13 +2,6 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: database-restore- - ownerReferences: - - apiVersion: tekton.dev/v1beta1 - blockOwnerDeletion: true - controller: true - kind: Pipeline - name: build-test - uid: my-uid spec: taskRef: name: database-restore diff --git a/examples/drupalsite-backup-taskrun.yaml b/examples/drupalsite-backup-taskrun.yaml index 904ee1f..67745ab 100644 --- a/examples/drupalsite-backup-taskrun.yaml +++ b/examples/drupalsite-backup-taskrun.yaml @@ -2,13 +2,6 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: drupalsite-backup- - ownerReferences: - - apiVersion: tekton.dev/v1beta1 - blockOwnerDeletion: true - controller: true - kind: ClusterTask - name: build-test - uid: my-uid spec: taskRef: name: drupalsite-backup diff --git a/examples/drupalsite-restore-taskrun.yaml b/examples/drupalsite-restore-taskrun.yaml index bafc026..a26a6ed 100644 --- a/examples/drupalsite-restore-taskrun.yaml +++ b/examples/drupalsite-restore-taskrun.yaml @@ -2,13 +2,6 @@ apiVersion: tekton.dev/v1beta1 kind: TaskRun metadata: generateName: drupalsite-restore- - ownerReferences: - - apiVersion: tekton.dev/v1beta1 - blockOwnerDeletion: true - controller: true - kind: ClusterTask - name: build-test - uid: my-uid spec: taskRef: name: drupalsite-restore -- GitLab From 4e63caee6056649c4262f60865560d317b117b4c Mon Sep 17 00:00:00 2001 From: Vineet Reddy Rajula <rajula.vineet.reddy@cern.ch> Date: Wed, 28 Jul 2021 10:47:50 +0200 Subject: [PATCH 3/4] Add container name to oc rsync cmd --- velero-restic-restore/restore_pvs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/velero-restic-restore/restore_pvs.sh b/velero-restic-restore/restore_pvs.sh index 5a0ea85..27aaf7b 100755 --- a/velero-restic-restore/restore_pvs.sh +++ b/velero-restic-restore/restore_pvs.sh @@ -42,7 +42,7 @@ restic -p /tmp/repository-password -r "$RESTIC_REPO" restore "$RESTIC_SNAPSHOT_I POD_NAME=`oc get pods -l app=drupal,drupalSite="$DRUPALSITE" -o name -n "$NAMESPACE" | sed "s/pod\///g" | head -n 1` validateVar "$POD_NAME" "POD_NAME" -oc rsync /restore/ "$POD_NAME":/drupal-data --delete -n "$NAMESPACE" +oc rsync /restore/ "$POD_NAME":/drupal-data --delete -n "$NAMESPACE" -c php-fpm # We remove /root/.cache/ in each iteration to prevent restic backups to run out of memory and fail the cronjobs we run, # as we detected this malfunction in our infra. -- GitLab From af588e78c2028e2781eadcd2cec67952a02713c9 Mon Sep 17 00:00:00 2001 From: Vineet Reddy Rajula <rajula.vineet.reddy@cern.ch> Date: Wed, 28 Jul 2021 12:08:18 +0200 Subject: [PATCH 4/4] Remove mouting openstack auth creds in restore job --- tekton-task-templates/restore_pv_job.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tekton-task-templates/restore_pv_job.yaml b/tekton-task-templates/restore_pv_job.yaml index e0095a9..42b3731 100644 --- a/tekton-task-templates/restore_pv_job.yaml +++ b/tekton-task-templates/restore_pv_job.yaml @@ -15,10 +15,6 @@ spec: job-name: $TASK-$DRUPALSITE spec: volumes: - - name: openstack-credentials - secret: - secretName: openstack-auth-secrets - defaultMode: 420 - name: velero-restic-credentials secret: secretName: velero-restic-credentials @@ -56,13 +52,6 @@ spec: - name: velero-restic-credentials readOnly: true mountPath: /tmp - lifecycle: - preStop: - exec: - command: - - /bin/sh - - '-c' - - umount /mnt; sleep 10 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: Always -- GitLab