From 21fc46e772c3d156bba577a21ceb58efc7e768c7 Mon Sep 17 00:00:00 2001 From: fossedihelm Date: Tue, 28 May 2024 10:42:49 +0200 Subject: [PATCH] Add `eviction-in-progress` annotation during evacuation Add the `descheduler.alpha.kubernetes.io/eviction-in-progress` annotation to virt-launcher pods during evacuation migration./ This annotation indicates pods whose eviction was initiated by an external component. ref: https://github.com/kubevirt/community/pull/258 Signed-off-by: fossedihelm --- pkg/virt-controller/watch/migration.go | 102 ++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/pkg/virt-controller/watch/migration.go b/pkg/virt-controller/watch/migration.go index b0a99caac7a3..ccd3b8e59b3c 100644 --- a/pkg/virt-controller/watch/migration.go +++ b/pkg/virt-controller/watch/migration.go @@ -88,6 +88,8 @@ const defaultFinalizedMigrationGarbageCollectionBuffer = 5 // cause the migration to fail when it could have reasonably succeeded. const defaultCatchAllPendingTimeoutSeconds = int64(60 * 15) +const deschedulerEvictionInProgressAnnotation = "descheduler.alpha.kubernetes.io/eviction-in-progress" + var migrationBackoffError = errors.New(MigrationBackoffReason) type MigrationController struct { @@ -434,10 +436,16 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta // 3. Begin progressing migration state based on VMI's MigrationState status. } else if vmi == nil { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "Migration failed because vmi does not exist.") log.Log.Object(migration).Error("vmi does not exist") } else if vmi.IsFinal() { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "Migration failed vmi shutdown during migration.") log.Log.Object(migration).Error("Unable to migrate vmi because vmi is shutdown.") } else if migration.DeletionTimestamp != nil && !c.isMigrationHandedOff(migration, vmi) { @@ -451,16 +459,28 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta migrationCopy.Status.Conditions = append(migrationCopy.Status.Conditions, condition) } migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } } else if podExists && podIsDown(pod) { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "Migration failed because target pod shutdown during migration") log.Log.Object(migration).Errorf("target pod %s/%s shutdown during migration", pod.Namespace, pod.Name) } else if migration.TargetIsCreated() && !podExists { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "Migration target pod was removed during active migration.") log.Log.Object(migration).Error("target pod disappeared during migration") } else if migration.TargetIsHandedOff() && vmi.Status.MigrationState == nil { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "VMI's migration state was cleared during the active migration.") log.Log.Object(migration).Error("vmi migration state cleared during migration") } else if migration.TargetIsHandedOff() && @@ -468,6 +488,9 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta vmi.Status.MigrationState.MigrationUID != migration.UID { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "VMI's migration state was taken over by another migration job during active migration.") log.Log.Object(migration).Error("vmi's migration state was taken over by another migration object") } else if vmi.Status.MigrationState != nil && @@ -475,6 +498,9 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta vmi.Status.MigrationState.Failed { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "source node reported migration failed") log.Log.Object(migration).Errorf("VMI %s/%s reported migration failed", vmi.Namespace, vmi.Name) @@ -488,6 +514,9 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta migrationCopy.Status.Conditions = append(migrationCopy.Status.Conditions, condition) } else if attachmentPodExists && podIsDown(attachmentPod) { migrationCopy.Status.Phase = virtv1.MigrationFailed + if err := c.removeDeschedulerAnnotationFromSourcePod(migrationCopy); err != nil { + return err + } c.recorder.Eventf(migration, k8sv1.EventTypeWarning, FailedMigrationReason, "Migration failed because target attachment pod shutdown during migration") log.Log.Object(migration).Errorf("target attachment pod %s/%s shutdown during migration", attachmentPod.Namespace, attachmentPod.Name) } else { @@ -1277,6 +1306,12 @@ func (c *MigrationController) sync(key string, migration *virtv1.VirtualMachineI return nil } + if _, exists := migration.GetAnnotations()[virtv1.EvacuationMigrationAnnotation]; exists { + if err = c.addDeschedulerAnnotationToSourcePod(sourcePod); err != nil { + return err + } + } + // patch VMI annotations and set RuntimeUser in preparation for target pod creation patches := c.setupVMIRuntimeUser(vmi) if !patches.IsEmpty() { @@ -1332,8 +1367,17 @@ func (c *MigrationController) sync(key string, migration *virtv1.VirtualMachineI !vmi.Status.MigrationState.Failed && !vmi.Status.MigrationState.Completed { - return c.handleMarkMigrationFailedOnVMI(migration, vmi) + err = c.handleMarkMigrationFailedOnVMI(migration, vmi) + if err != nil { + return err + } } + + if migration.Status.Phase != virtv1.MigrationFailed { + return nil + } + + return c.removeDeschedulerAnnotationFromSourcePod(migration) case virtv1.MigrationRunning: if migration.DeletionTimestamp != nil && vmi.Status.MigrationState != nil { err = c.markMigrationAbortInVmiStatus(migration, vmi) @@ -1346,6 +1390,62 @@ func (c *MigrationController) sync(key string, migration *virtv1.VirtualMachineI return nil } +func (c *MigrationController) addDeschedulerAnnotationToSourcePod(sourcePod *k8sv1.Pod) error { + if _, exists := sourcePod.GetAnnotations()[deschedulerEvictionInProgressAnnotation]; !exists { + patchSet := patch.New( + patch.WithAdd(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(deschedulerEvictionInProgressAnnotation)), ""), + ) + patchBytes, err := patchSet.GeneratePayload() + if err != nil { + return err + } + + _, err = c.clientset.CoreV1().Pods(sourcePod.Namespace).Patch(context.Background(), sourcePod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) + if err != nil { + log.Log.Object(sourcePod).Errorf("failed to add %s pod annotation: %v", deschedulerEvictionInProgressAnnotation, err) + return err + } + } + return nil +} + +func (c *MigrationController) removeDeschedulerAnnotationFromSourcePod(migration *virtv1.VirtualMachineInstanceMigration) error { + if migration.Status.MigrationState == nil || migration.Status.MigrationState.SourcePod == "" { + return nil + } + + podKey := controller.NamespacedKey(migration.Namespace, migration.Status.MigrationState.SourcePod) + obj, exists, err := c.podIndexer.GetByKey(podKey) + if !exists { + log.Log.Reason(err).Errorf("source pod %s does not exist", migration.Status.MigrationState.SourcePod) + return nil + } + if err != nil { + log.Log.Reason(err).Errorf("Failed to fetch source pod %s for namespace from cache.", migration.Status.MigrationState.SourcePod) + return err + } + + sourcePod := obj.(*k8sv1.Pod) + if _, exists := sourcePod.GetAnnotations()[deschedulerEvictionInProgressAnnotation]; exists { + patchSet := patch.New( + patch.WithTest(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(deschedulerEvictionInProgressAnnotation)), ""), + patch.WithRemove(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(deschedulerEvictionInProgressAnnotation))), + ) + patchBytes, err := patchSet.GeneratePayload() + if err != nil { + return err + } + + _, err = c.clientset.CoreV1().Pods(sourcePod.Namespace).Patch(context.Background(), sourcePod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) + if err != nil { + log.Log.Object(sourcePod).Errorf("failed to remove %s pod annotation : %v", deschedulerEvictionInProgressAnnotation, err) + return err + } + } + + return nil +} + func (c *MigrationController) setupVMIRuntimeUser(vmi *virtv1.VirtualMachineInstance) *patch.PatchSet { patchSet := patch.New() if !c.clusterConfig.RootEnabled() {