diff --git a/pkg/controller/vsphere/actuator_test.go b/pkg/controller/vsphere/actuator_test.go index 70746d7c9855ebc937cc1986aade85477dbab42f..613576ce5a86d25418dc8c73626e6a7aa56d9909 100644 --- a/pkg/controller/vsphere/actuator_test.go +++ b/pkg/controller/vsphere/actuator_test.go @@ -232,7 +232,7 @@ func TestMachineEvents(t *testing.T) { operation: func(actuator *Actuator, machine *machinev1.Machine) { actuator.Delete(ctx, machine) }, - event: "test: reconciler failed to Delete machine: destroying vm in progress, reconciling", + event: "test: reconciler failed to Delete machine: destroying vm in progress, requeuing", }, { name: "Delete machine event succeed", diff --git a/pkg/controller/vsphere/reconciler.go b/pkg/controller/vsphere/reconciler.go index 6c1dcae727849a56ab7806e6554e9379b135c887..1a60fa7d194725a2f12867e5f42210b1dc1d3623 100644 --- a/pkg/controller/vsphere/reconciler.go +++ b/pkg/controller/vsphere/reconciler.go @@ -48,7 +48,9 @@ const ( // vSphere tasks description IDs, for determinate task types (clone, delete, etc) const ( - cloneVmTaskDescriptionId = "VirtualMachine.clone" + cloneVmTaskDescriptionId = "VirtualMachine.clone" + destroyVmTaskDescriptionId = "VirtualMachine.destroy" + powerOffVmTaskDescriptionId = "VirtualMachine.powerOff" ) // Reconciler runs the logic to reconciles a machine resource towards its desired state @@ -126,7 +128,7 @@ func (r *Reconciler) create() error { return fmt.Errorf("Failed to check task status: %w", err) } } else if !taskIsFinished { - return fmt.Errorf("task %v has not finished", moTask.Reference().Value) + return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value) } // If taskIsFinished then next reconcile should result in update. return nil @@ -157,9 +159,9 @@ func (r *Reconciler) update() error { Namespace: r.machine.Namespace, Reason: "Task finished with error", }) - return err + return fmt.Errorf("%v task %v finished with error: %w", moTask.Info.DescriptionId, moTask.Reference().Value, err) } else if !taskIsFinished { - return fmt.Errorf("task %v has not finished", moTask.Reference().Value) + return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value) } } } @@ -241,15 +243,14 @@ func (r *Reconciler) delete() error { Reason: "Task finished with error", }) klog.Errorf("Delete task finished with error: %w", err) - return err + return fmt.Errorf("%v task %v finished with error: %w", moTask.Info.DescriptionId, moTask.Reference().Value, err) } else { klog.Warningf( "TaskRef points to clone task which finished with error: %w. Proceeding with machine deletion", err, ) - return err } } else if !taskIsFinished { - return fmt.Errorf("task %v has not finished", moTask.Reference().Value) + return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value) } } } @@ -296,12 +297,21 @@ func (r *Reconciler) delete() error { } } - if _, err := vm.powerOffVM(); err != nil { - return err + powerState, err := vm.getPowerState() + if err != nil { + return fmt.Errorf("can not determine %v vm power state: %w", r.machine.GetName(), err) + } + if powerState != types.VirtualMachinePowerStatePoweredOff { + powerOffTaskRef, err := vm.powerOffVM() + if err != nil { + return fmt.Errorf("%v: failed to power off vm: %w", r.machine.GetName(), err) + } + if err := setProviderStatus(powerOffTaskRef, conditionSuccess(), r.machineScope, vm); err != nil { + return fmt.Errorf("failed to set provider status: %w", err) + } + return fmt.Errorf("powering off vm is in progress, requeuing") } - // Will this error if VM not all the way powered off yet? Don't want to - // emit an event for a transient condition. task, err := vm.Obj.Destroy(r.Context) if err != nil { metrics.RegisterFailedInstanceDelete(&metrics.MachineLabels{ @@ -317,7 +327,7 @@ func (r *Reconciler) delete() error { } // TODO: consider returning an error to specify retry time here - return fmt.Errorf("destroying vm in progress, reconciling") + return fmt.Errorf("destroying vm in progress, requeuing") } // nodeHasVolumesAttached returns true if node status still have volumes attached diff --git a/pkg/controller/vsphere/reconciler_test.go b/pkg/controller/vsphere/reconciler_test.go index 39e8493b72618645a135dc081a346f211264bbc3..b16b90248e41661c8ea5a2b19ae2946ed787c33c 100644 --- a/pkg/controller/vsphere/reconciler_test.go +++ b/pkg/controller/vsphere/reconciler_test.go @@ -1400,17 +1400,32 @@ func TestDelete(t *testing.T) { t.Errorf("expected error on the first call to delete") } - moTask, err := reconciler.session.GetTask(reconciler.Context, reconciler.providerStatus.TaskRef) + powerOffTask, err := reconciler.session.GetTask(reconciler.Context, reconciler.providerStatus.TaskRef) if err != nil { if !isRetrieveMONotFound(reconciler.providerStatus.TaskRef, err) { t.Fatal(err) } } - if moTask.Info.DescriptionId != "VirtualMachine.destroy" { - t.Errorf("task description expected: VirtualMachine.destroy, got: %v", moTask.Info.DescriptionId) + // first run should schedule power off + if powerOffTask.Info.DescriptionId != powerOffVmTaskDescriptionId { + t.Errorf("task description expected: %v, got: %v", powerOffVmTaskDescriptionId, powerOffTask.Info.DescriptionId) } - // expect the second call to not find the vm and succeed + if err := reconciler.delete(); err == nil { + t.Errorf("expected error on the second call to delete") + } + destroyTask, err := reconciler.session.GetTask(reconciler.Context, reconciler.providerStatus.TaskRef) + if err != nil { + if !isRetrieveMONotFound(reconciler.providerStatus.TaskRef, err) { + t.Fatal(err) + } + } + // second run should destroy vm + if destroyTask.Info.DescriptionId != destroyVmTaskDescriptionId { + t.Errorf("task description expected: %v, got: %v", destroyVmTaskDescriptionId, destroyTask.Info.DescriptionId) + } + + // expect the third call to not find the vm and succeed if err := reconciler.delete(); err != nil { t.Errorf("unexpected error: %v", err) }