Unverified Commit d8af5ea7 authored by Clayton Coleman's avatar Clayton Coleman
Browse files

Ingress rollouts should specify minReadySeconds

Ingress components may run directly behind load balancers and thus
need to delay deployment rollout long enough for the load balancers
to see the new process. Without minReadySeconds, the upgrade process
will immediately delete the old pod as soon as the new pod is ready,
which means that the LB may briefly see no pods, which then triggers
on some LB the behavior of including all hosts (generally if all
hosts are unhealthy the LB will allow requests to go to any host
as opposed to failing closed).

In the future we may wish to allow minReadySeconds to be customized
by customers since not all load balancers will take less than 30s
to converge. However, 30s is a reasonable start.
parent f98fde81
......@@ -200,6 +200,16 @@ func desiredRouterDeployment(ci *operatorv1.IngressController, ingressController
gracePeriod := int64(60 * 60)
deployment.Spec.Template.Spec.TerminationGracePeriodSeconds = &gracePeriod
// Services behind load balancers should roll out new instances only after we are certain
// the new instance is part of rotation. This is set based on the highest value across all
// platforms, excluding custom load balancers like an F5, but our recommendation for these
// values for those should be indentical to the slowest cloud, AWS (which does not allow
// health checks to be more frequent than 10 seconds).
deployment.Spec.MinReadySeconds =
(2 + /* max healthy checks required to be brought into rotation across all platforms */
1) * /* we could miss one */
10 /* the longest health check interval on any platform */
volumes := deployment.Spec.Template.Spec.Volumes
routerVolumeMounts := deployment.Spec.Template.Spec.Containers[0].VolumeMounts
......@@ -994,6 +1004,7 @@ func hashableDeployment(deployment *appsv1.Deployment, onlyTemplate bool) *appsv
// Copy metadata and spec fields to which any changes should trigger an
// update of the deployment but should not trigger a rolling update.
hashableDeployment.Labels = deployment.Labels
hashableDeployment.Spec.MinReadySeconds = deployment.Spec.MinReadySeconds
hashableDeployment.Spec.Strategy = deployment.Spec.Strategy
var replicas *int32
if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas != int32(1) {
......@@ -1178,6 +1189,7 @@ func deploymentConfigChanged(current, expected *appsv1.Deployment) (bool, *appsv
replicas = *expected.Spec.Replicas
}
updated.Spec.Replicas = &replicas
updated.Spec.MinReadySeconds = expected.Spec.MinReadySeconds
return true, updated
}
......
......@@ -1015,6 +1015,20 @@ func TestDeploymentConfigChanged(t *testing.T) {
},
expect: true,
},
{
description: "if .spec.minReadySeconds changes to non-zero",
mutate: func(deployment *appsv1.Deployment) {
deployment.Spec.MinReadySeconds = 10
},
expect: true,
},
{
description: "if .spec.minReadySeconds changes to none",
mutate: func(deployment *appsv1.Deployment) {
deployment.Spec.MinReadySeconds = 0
},
expect: true,
},
}
for _, tc := range testCases {
......@@ -1027,6 +1041,7 @@ func TestDeploymentConfigChanged(t *testing.T) {
UID: "1",
},
Spec: appsv1.DeploymentSpec{
MinReadySeconds: 30,
Strategy: appsv1.DeploymentStrategy{
Type: appsv1.RollingUpdateDeploymentStrategyType,
RollingUpdate: &appsv1.RollingUpdateDeployment{
......
......@@ -557,7 +557,7 @@ func TestIngressControllerScale(t *testing.T) {
}
// Wait for the deployment scale up to be observed.
if err := waitForAvailableReplicas(t, kclient, ic, 2*time.Minute, newReplicas); err != nil {
if err := waitForAvailableReplicas(t, kclient, ic, 4*time.Minute, newReplicas); err != nil {
t.Fatalf("failed waiting deployment %s to scale to %d: %v", defaultName, newReplicas, err)
}
......@@ -2153,14 +2153,16 @@ func waitForIngressControllerCondition(t *testing.T, cl client.Client, timeout t
}
func assertIngressControllerDeleted(t *testing.T, cl client.Client, ing *operatorv1.IngressController) {
if err := deleteIngressController(cl, ing, 2*time.Minute); err != nil {
t.Helper()
if err := deleteIngressController(t, cl, ing, 2*time.Minute); err != nil {
t.Fatalf("WARNING: cloud resources may have been leaked! failed to delete ingresscontroller %s: %v", ing.Name, err)
} else {
t.Logf("deleted ingresscontroller %s", ing.Name)
}
}
func deleteIngressController(cl client.Client, ic *operatorv1.IngressController, timeout time.Duration) error {
func deleteIngressController(t *testing.T, cl client.Client, ic *operatorv1.IngressController, timeout time.Duration) error {
t.Helper()
name := types.NamespacedName{Namespace: ic.Namespace, Name: ic.Name}
if err := cl.Delete(context.TODO(), ic); err != nil {
return fmt.Errorf("failed to delete ingresscontroller: %v", err)
......@@ -2171,6 +2173,7 @@ func deleteIngressController(cl client.Client, ic *operatorv1.IngressController,
if errors.IsNotFound(err) {
return true, nil
}
t.Logf("failed to delete ingress controller %s/%s: %v", ic.Namespace, ic.Name, err)
return false, nil
}
return false, nil
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment