diff --git a/cmd/machine-api-operator/start.go b/cmd/machine-api-operator/start.go index b225ecc29669be6ec3d7db971be3182b8a7d5f74..c23a3844ac2092c382d3e859a46a5973734fda95 100644 --- a/cmd/machine-api-operator/start.go +++ b/cmd/machine-api-operator/start.go @@ -9,6 +9,7 @@ import ( "strconv" osconfigv1 "github.com/openshift/api/config/v1" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/operator" "github.com/openshift/machine-api-operator/pkg/util" @@ -72,11 +73,13 @@ func runStartCmd(cmd *cobra.Command, args []string) { } stopCh := make(chan struct{}) + le := util.GetLeaderElectionConfig(cb.config, osconfigv1.LeaderElection{}) + leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ Lock: CreateResourceLock(cb, componentNamespace, componentName), - LeaseDuration: util.LeaseDuration, - RenewDeadline: util.RenewDeadline, - RetryPeriod: util.RetryPeriod, + RenewDeadline: le.RenewDeadline.Duration, + RetryPeriod: le.RetryPeriod.Duration, + LeaseDuration: le.LeaseDuration.Duration, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { ctrlCtx := CreateControllerContext(cb, stopCh, componentNamespace) diff --git a/cmd/machine-healthcheck/main.go b/cmd/machine-healthcheck/main.go index 1f9fb002a7a856d041825e37b76321227d056eff..91ab7161a824ff3278c004bc737422dd8db92bc7 100644 --- a/cmd/machine-healthcheck/main.go +++ b/cmd/machine-healthcheck/main.go @@ -2,16 +2,20 @@ package main import ( "flag" + "fmt" "runtime" "github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck" "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/util" + osconfigv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/api/machine/v1beta1" + "github.com/openshift/library-go/pkg/config/leaderelection" "github.com/openshift/machine-api-operator/pkg/controller" sdkVersion "github.com/operator-framework/operator-sdk/version" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -27,6 +31,12 @@ func printVersion() { } func main() { + // Used to get the default values for leader election from library-go + defaultLeaderElectionValues := leaderelection.LeaderElectionDefaulting( + osconfigv1.LeaderElection{}, + "", "", + ) + watchNamespace := flag.String( "namespace", "", @@ -57,10 +67,11 @@ func main() { "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", ) + // Default values are printed for the user to see, but zero is set as the default to distinguish user intent from default value for topology aware leader election leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - util.LeaseDuration, - "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + 0, + fmt.Sprintf("The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. Default: (%s)", defaultLeaderElectionValues.LeaseDuration.Duration), ) klog.InitFlags(nil) @@ -73,15 +84,20 @@ func main() { klog.Fatal(err) } + le := util.GetLeaderElectionConfig(cfg, osconfigv1.LeaderElection{ + Disable: !*leaderElect, + LeaseDuration: metav1.Duration{Duration: *leaderElectLeaseDuration}, + }) + opts := manager.Options{ MetricsBindAddress: *metricsAddress, HealthProbeBindAddress: *healthAddr, LeaderElection: *leaderElect, LeaderElectionNamespace: *leaderElectResourceNamespace, LeaderElectionID: "cluster-api-provider-healthcheck-leader", - LeaseDuration: leaderElectLeaseDuration, - RetryPeriod: util.TimeDuration(util.RetryPeriod), - RenewDeadline: util.TimeDuration(util.RenewDeadline), + LeaseDuration: &le.LeaseDuration.Duration, + RetryPeriod: &le.RetryPeriod.Duration, + RenewDeadline: &le.RenewDeadline.Duration, } if *watchNamespace != "" { diff --git a/cmd/machineset/main.go b/cmd/machineset/main.go index cb4d2f8a9fc53e04eb9c548d693fe778d5c97204..8fb771430b8fc73ebf79bf001b8af395aab31287 100644 --- a/cmd/machineset/main.go +++ b/cmd/machineset/main.go @@ -18,15 +18,19 @@ package main import ( "flag" + "fmt" "log" "time" + osconfigv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/api/machine/v1beta1" + "github.com/openshift/library-go/pkg/config/leaderelection" "github.com/openshift/machine-api-operator/pkg/controller" "github.com/openshift/machine-api-operator/pkg/controller/machineset" "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/util" mapiwebhooks "github.com/openshift/machine-api-operator/pkg/webhooks" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -42,6 +46,12 @@ const ( ) func main() { + // Used to get the default values for leader election from library-go + defaultLeaderElectionValues := leaderelection.LeaderElectionDefaulting( + osconfigv1.LeaderElection{}, + "", "", + ) + flag.Set("logtostderr", "true") klog.InitFlags(nil) watchNamespace := flag.String("namespace", "", @@ -75,10 +85,11 @@ func main() { "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", ) + // Default values are printed for the user to see, but zero is set as the default to distinguish user intent from default value for topology aware leader election leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - util.LeaseDuration, - "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + 0, + fmt.Sprintf("The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. Default: (%s)", defaultLeaderElectionValues.LeaseDuration.Duration), ) flag.Parse() @@ -93,6 +104,11 @@ func main() { log.Fatal(err) } + le := util.GetLeaderElectionConfig(cfg, osconfigv1.LeaderElection{ + Disable: !*leaderElect, + LeaseDuration: metav1.Duration{Duration: *leaderElectLeaseDuration}, + }) + // Create a new Cmd to provide shared dependencies and start components syncPeriod := 10 * time.Minute opts := manager.Options{ @@ -103,9 +119,9 @@ func main() { LeaderElection: *leaderElect, LeaderElectionNamespace: *leaderElectResourceNamespace, LeaderElectionID: "cluster-api-provider-machineset-leader", - LeaseDuration: leaderElectLeaseDuration, - RetryPeriod: util.TimeDuration(util.RetryPeriod), - RenewDeadline: util.TimeDuration(util.RenewDeadline), + LeaseDuration: &le.LeaseDuration.Duration, + RetryPeriod: &le.RetryPeriod.Duration, + RenewDeadline: &le.RenewDeadline.Duration, } mgr, err := manager.New(cfg, opts) diff --git a/cmd/nodelink-controller/main.go b/cmd/nodelink-controller/main.go index 9d044423986886bcf3675964ab061d8cc571010c..523834a424d245784d383aa80cb842760b2ff0ed 100644 --- a/cmd/nodelink-controller/main.go +++ b/cmd/nodelink-controller/main.go @@ -2,13 +2,17 @@ package main import ( "flag" + "fmt" "runtime" + osconfigv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/api/machine/v1beta1" + "github.com/openshift/library-go/pkg/config/leaderelection" "github.com/openshift/machine-api-operator/pkg/controller" "github.com/openshift/machine-api-operator/pkg/controller/nodelink" "github.com/openshift/machine-api-operator/pkg/util" sdkVersion "github.com/operator-framework/operator-sdk/version" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -24,6 +28,12 @@ func printVersion() { func main() { printVersion() + // Used to get the default values for leader election from library-go + defaultLeaderElectionValues := leaderelection.LeaderElectionDefaulting( + osconfigv1.LeaderElection{}, + "", "", + ) + watchNamespace := flag.String( "namespace", "", @@ -42,10 +52,11 @@ func main() { "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", ) + // Default values are printed for the user to see, but zero is set as the default to distinguish user intent from default value for topology aware leader election leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - util.LeaseDuration, - "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + 0, + fmt.Sprintf("The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. Default: (%s)", defaultLeaderElectionValues.LeaseDuration.Duration), ) klog.InitFlags(nil) @@ -58,15 +69,20 @@ func main() { klog.Fatal(err) } + le := util.GetLeaderElectionConfig(cfg, osconfigv1.LeaderElection{ + Disable: !*leaderElect, + LeaseDuration: metav1.Duration{Duration: *leaderElectLeaseDuration}, + }) + opts := manager.Options{ // Disable metrics serving MetricsBindAddress: "0", LeaderElection: *leaderElect, LeaderElectionNamespace: *leaderElectResourceNamespace, LeaderElectionID: "cluster-api-provider-nodelink-leader", - LeaseDuration: leaderElectLeaseDuration, - RetryPeriod: util.TimeDuration(util.RetryPeriod), - RenewDeadline: util.TimeDuration(util.RenewDeadline), + LeaseDuration: &le.LeaseDuration.Duration, + RetryPeriod: &le.RetryPeriod.Duration, + RenewDeadline: &le.RenewDeadline.Duration, } if *watchNamespace != "" { opts.Namespace = *watchNamespace diff --git a/cmd/vsphere/main.go b/cmd/vsphere/main.go index 19af41dfc39dc3778fb027cb5aa092a79de02065..e8a69a237bbf9ebda827fe76f33ed43c0e6b28fd 100644 --- a/cmd/vsphere/main.go +++ b/cmd/vsphere/main.go @@ -8,12 +8,14 @@ import ( configv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/api/machine/v1beta1" + "github.com/openshift/library-go/pkg/config/leaderelection" capimachine "github.com/openshift/machine-api-operator/pkg/controller/machine" machine "github.com/openshift/machine-api-operator/pkg/controller/vsphere" machinesetcontroller "github.com/openshift/machine-api-operator/pkg/controller/vsphere/machineset" "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/util" "github.com/openshift/machine-api-operator/pkg/version" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" "k8s.io/klog/v2/klogr" ctrl "sigs.k8s.io/controller-runtime" @@ -27,6 +29,12 @@ func main() { var printVersion bool flag.BoolVar(&printVersion, "version", false, "print version and exit") + // Used to get the default values for leader election from library-go + defaultLeaderElectionValues := leaderelection.LeaderElectionDefaulting( + configv1.LeaderElection{}, + "", "", + ) + klog.InitFlags(nil) watchNamespace := flag.String( "namespace", @@ -46,10 +54,11 @@ func main() { "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", ) + // Default values are printed for the user to see, but zero is set as the default to distinguish user intent from default value for topology aware leader election leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - util.LeaseDuration, - "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + 0, + fmt.Sprintf("The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. Default: (%s)", defaultLeaderElectionValues.LeaseDuration.Duration), ) metricsAddress := flag.String( @@ -74,6 +83,11 @@ func main() { cfg := config.GetConfigOrDie() syncPeriod := 10 * time.Minute + le := util.GetLeaderElectionConfig(cfg, configv1.LeaderElection{ + Disable: !*leaderElect, + LeaseDuration: metav1.Duration{Duration: *leaderElectLeaseDuration}, + }) + opts := manager.Options{ MetricsBindAddress: *metricsAddress, HealthProbeBindAddress: *healthAddr, @@ -81,9 +95,9 @@ func main() { LeaderElection: *leaderElect, LeaderElectionNamespace: *leaderElectResourceNamespace, LeaderElectionID: "cluster-api-provider-vsphere-leader", - LeaseDuration: leaderElectLeaseDuration, - RetryPeriod: util.TimeDuration(util.RetryPeriod), - RenewDeadline: util.TimeDuration(util.RenewDeadline), + LeaseDuration: &le.LeaseDuration.Duration, + RetryPeriod: &le.RetryPeriod.Duration, + RenewDeadline: &le.RenewDeadline.Duration, } if *watchNamespace != "" { diff --git a/pkg/util/durations.go b/pkg/util/durations.go deleted file mode 100644 index efbb64d2f4278322c85e2d63028f402527f50f7a..0000000000000000000000000000000000000000 --- a/pkg/util/durations.go +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright 2021 Red Hat. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package util - -import ( - "time" -) - -// The default durations for the leader election operations. -const ( - // LeaseDuration is the default duration for the leader election lease. - LeaseDuration = 137 * time.Second - // RenewDeadline is the default duration for the leader renewal. - RenewDeadline = 107 * time.Second - // RetryPeriod is the default duration for the leader election retrial. - RetryPeriod = 26 * time.Second -) - -// TimeDuration returns a pointer to the time.Duration. -func TimeDuration(i time.Duration) *time.Duration { - return &i -} diff --git a/pkg/util/leaderelection.go b/pkg/util/leaderelection.go new file mode 100644 index 0000000000000000000000000000000000000000..3f69feb35832f7c23871b817df1313455ea7c4c9 --- /dev/null +++ b/pkg/util/leaderelection.go @@ -0,0 +1,56 @@ +/* +Copyright 2021 Red Hat. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "context" + + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/library-go/pkg/config/clusterstatus" + "github.com/openshift/library-go/pkg/config/leaderelection" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" +) + +// GetLeaderElectionConfig returns leader election configs defaults based on the cluster topology +func GetLeaderElectionConfig(restcfg *rest.Config, leaderElection configv1.LeaderElection) configv1.LeaderElection { + + userExplicitlySetLeaderElectionValues := leaderElection.LeaseDuration.Duration != 0 || + leaderElection.RenewDeadline.Duration != 0 || + leaderElection.RetryPeriod.Duration != 0 + + // Defaults follow conventions + // https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability + defaultLeaderElection := leaderelection.LeaderElectionDefaulting( + leaderElection, + "", "", + ) + + // If user has not supplied any leader election values and leader election is not disabled + // Fetch cluster infra status to determine if we should be using SNO LE config + if !userExplicitlySetLeaderElectionValues && !leaderElection.Disable { + if infra, err := clusterstatus.GetClusterInfraStatus(context.TODO(), restcfg); err == nil && infra != nil { + if infra.ControlPlaneTopology == configv1.SingleReplicaTopologyMode { + return leaderelection.LeaderElectionSNOConfig(defaultLeaderElection) + } + } else { + klog.Warningf("unable to get cluster infrastructure status, using HA cluster values for leader election: %v", err) + } + } + + return defaultLeaderElection +}