Skip to content

Commit

Permalink
Support waiting for kube-apiserver to be ready with timout during NPD…
Browse files Browse the repository at this point in the history
… startup
  • Loading branch information
yguo0905 committed Jul 9, 2019
1 parent 30babe9 commit 6936414
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 4 deletions.
9 changes: 9 additions & 0 deletions cmd/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

"net/url"

Expand Down Expand Up @@ -49,6 +50,12 @@ type NodeProblemDetectorOptions struct {
EnableK8sExporter bool
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
ApiServerOverride string
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
// ready.
APIServerWaitTimeout time.Duration
// APIServerWaitInterval is the interval between the checks on the
// readiness of kube-apiserver.
APIServerWaitInterval time.Duration

// prometheusExporter options
// PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable.
Expand Down Expand Up @@ -96,6 +103,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&npdo.EnableK8sExporter, "enable-k8s-exporter", true, "Enables reporting to Kubernetes API server.")
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
"", "Custom URI used to connect to Kubernetes ApiServer. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
"", "Custom node name used to override hostname")
Expand Down
24 changes: 23 additions & 1 deletion pkg/exporters/k8sexporter/k8s_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/golang/glog"

"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/wait"

"k8s.io/node-problem-detector/cmd/options"
"k8s.io/node-problem-detector/pkg/exporters/k8sexporter/condition"
Expand All @@ -38,13 +39,23 @@ type k8sExporter struct {
conditionManager condition.ConditionManager
}

// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, panics if error occurs.
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting,
// panics if error occurs.
//
// Note that this function may be blocked (until a timeout occurs) before
// kube-apiserver becomes ready.
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
if !npdo.EnableK8sExporter {
return nil
}

c := problemclient.NewClientOrDie(npdo)

glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
glog.Warningf("kube-apiserver did not become ready: %v", err)
}

ke := k8sExporter{
client: c,
conditionManager: condition.NewConditionManager(c, clock.RealClock{}),
Expand Down Expand Up @@ -91,3 +102,14 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
}
}()
}

func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
// If NPD can get the node object from kube-apiserver, the server is
// ready and the RBAC permission is set correctly.
if _, err := c.GetNode(); err == nil {
return true, nil
}
return false, nil
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"reflect"
"sync"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
)

// FakeProblemClient is a fake problem client for debug.
Expand Down Expand Up @@ -92,3 +92,7 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
// Eventf does nothing now.
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
}

func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
return nil, fmt.Errorf("GetNode() not implemented")
}
11 changes: 9 additions & 2 deletions pkg/exporters/k8sexporter/problemclient/problem_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/pkg/api/legacyscheme"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/clock"
Expand All @@ -47,6 +47,9 @@ type Client interface {
SetConditions(conditions []v1.NodeCondition) error
// Eventf reports the event.
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
// GetNode returns the Node object of the node on which the
// node-problem-detector runs.
GetNode() (*v1.Node, error)
}

type nodeProblemClient struct {
Expand Down Expand Up @@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
}

func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
node, err := c.GetNode()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
}

func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
}

// generatePatch generates condition patch
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
raw, err := json.Marshal(&conditions)
Expand Down

0 comments on commit 6936414

Please sign in to comment.