Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support waiting for kube-apiserver to be ready with timout during NPD startup #308

Merged
merged 1 commit into from
Jul 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cmd/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

"net/url"

Expand Down Expand Up @@ -49,6 +50,12 @@ type NodeProblemDetectorOptions struct {
EnableK8sExporter bool
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
ApiServerOverride string
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
// ready.
APIServerWaitTimeout time.Duration
// APIServerWaitInterval is the interval between the checks on the
// readiness of kube-apiserver.
APIServerWaitInterval time.Duration

// prometheusExporter options
// PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable.
Expand Down Expand Up @@ -96,6 +103,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&npdo.EnableK8sExporter, "enable-k8s-exporter", true, "Enables reporting to Kubernetes API server.")
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
"", "Custom URI used to connect to Kubernetes ApiServer. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
"", "Custom node name used to override hostname")
Expand Down
24 changes: 23 additions & 1 deletion pkg/exporters/k8sexporter/k8s_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/golang/glog"

"k8s.io/apimachinery/pkg/util/clock"
"k8s.io/apimachinery/pkg/util/wait"

"k8s.io/node-problem-detector/cmd/options"
"k8s.io/node-problem-detector/pkg/exporters/k8sexporter/condition"
Expand All @@ -38,13 +39,23 @@ type k8sExporter struct {
conditionManager condition.ConditionManager
}

// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, panics if error occurs.
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting,
// panics if error occurs.
//
// Note that this function may be blocked (until a timeout occurs) before
// kube-apiserver becomes ready.
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
if !npdo.EnableK8sExporter {
return nil
}

c := problemclient.NewClientOrDie(npdo)

glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
}

ke := k8sExporter{
client: c,
conditionManager: condition.NewConditionManager(c, clock.RealClock{}),
Expand Down Expand Up @@ -91,3 +102,14 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
}
}()
}

func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
// If NPD can get the node object from kube-apiserver, the server is
// ready and the RBAC permission is set correctly.
if _, err := c.GetNode(); err == nil {
return true, nil
}
return false, nil
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"reflect"
"sync"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
)

// FakeProblemClient is a fake problem client for debug.
Expand Down Expand Up @@ -92,3 +92,7 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
// Eventf does nothing now.
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
}

func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
return nil, fmt.Errorf("GetNode() not implemented")
}
11 changes: 9 additions & 2 deletions pkg/exporters/k8sexporter/problemclient/problem_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/kubernetes/pkg/api/legacyscheme"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/clock"
Expand All @@ -47,6 +47,9 @@ type Client interface {
SetConditions(conditions []v1.NodeCondition) error
// Eventf reports the event.
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
// GetNode returns the Node object of the node on which the
// node-problem-detector runs.
GetNode() (*v1.Node, error)
}

type nodeProblemClient struct {
Expand Down Expand Up @@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
}

func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
node, err := c.GetNode()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
}

func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
}

// generatePatch generates condition patch
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
raw, err := json.Marshal(&conditions)
Expand Down