Skip to content
This repository has been archived by the owner on Mar 5, 2024. It is now read-only.

Commit

Permalink
Add deeper healthcheck for agent (#268)
Browse files Browse the repository at this point in the history
Currently the agent health endpoint just checks that it can return some
metadata from AWS. I would also like to be able to check that the agent
is able to communticate succsessfully with the server. The motivation is
that we are renewing certificates externally and when the client cert
expires the agent will fail to talk to the server. In this case I want
the healthcheck to fail so that kubernetes will restart the agent and
cause it to re-read (the now renewed certs) from disk.

I've implemented this as a URL query param so that doing /health will
continue to function the same but /health?deep=anything will also do a
grpc call to the server and assert the health endpoint there returns
"ok"
  • Loading branch information
mattmb authored and Joseph-Irving committed Aug 9, 2019
1 parent 6057c19 commit 3ea5e8e
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 5 deletions.
43 changes: 40 additions & 3 deletions pkg/aws/metadata/handler_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ package metadata
import (
"context"
"fmt"
"github.com/cenkalti/backoff"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/uswitch/kiam/pkg/server"
"github.com/uswitch/kiam/pkg/statsd"
"io/ioutil"
"net/http"
)

type healthHandler struct {
client server.Client
endpoint string
}

Expand All @@ -38,13 +41,23 @@ func (h *healthHandler) Handle(ctx context.Context, w http.ResponseWriter, req *
defer statsd.Client.NewTiming().Send("handler.health")
}

req, err := http.NewRequest("GET", fmt.Sprintf("%s/latest/meta-data/instance-id", h.endpoint), nil)
deep := req.URL.Query().Get("deep")
if deep != "" {
health, err := findServerHealth(ctx, h.client)
if err != nil {
return http.StatusInternalServerError, err
} else if health != "ok" {
return http.StatusInternalServerError, fmt.Errorf("server health: %s", health)
}
}

metaReq, err := http.NewRequest("GET", fmt.Sprintf("%s/latest/meta-data/instance-id", h.endpoint), nil)
if err != nil {
return http.StatusInternalServerError, fmt.Errorf("couldn't create request: %s", err)
}

client := &http.Client{}
resp, err := client.Do(req.WithContext(ctx))
resp, err := client.Do(metaReq.WithContext(ctx))
if err != nil {
return http.StatusInternalServerError, fmt.Errorf("couldn't read metadata response: %s", err)
}
Expand All @@ -59,8 +72,32 @@ func (h *healthHandler) Handle(ctx context.Context, w http.ResponseWriter, req *
return http.StatusOK, nil
}

func newHealthHandler(endpoint string) *healthHandler {
func findServerHealth(ctx context.Context, client server.Client) (string, error) {

healthCh := make(chan string, 1)
op := func() error {
health, err := client.Health(ctx)
if err != nil {
return err
}
healthCh <- health
return nil
}

strategy := backoff.NewExponentialBackOff()
strategy.InitialInterval = retryInterval

err := backoff.Retry(op, backoff.WithContext(strategy, ctx))
if err != nil {
return "", err
}

return <-healthCh, nil
}

func newHealthHandler(client server.Client, endpoint string) *healthHandler {
return &healthHandler{
client: client,
endpoint: endpoint,
}
}
91 changes: 91 additions & 0 deletions pkg/aws/metadata/handler_health_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package metadata

import (
"github.com/fortytw2/leaktest"
"github.com/gorilla/mux"
st "github.com/uswitch/kiam/pkg/testutil/server"
"io/ioutil"
"net/http"
"net/http/httptest"
"testing"
)

func TestHealthReturn(t *testing.T) {
defer leaktest.Check(t)()
testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) {
res.WriteHeader(http.StatusOK)
res.Write([]byte("i-12345"))
}))
defer func() { testServer.Close() }()

r, err := http.NewRequest("GET", "/health", nil)
if err != nil {
t.Error("Error creating http request")
}
rr := httptest.NewRecorder()
handler := newHealthHandler(st.NewStubClient(), testServer.URL)
router := mux.NewRouter()
handler.Install(router)
router.ServeHTTP(rr, r)
if rr.Code != http.StatusOK {
t.Error("expected 200 response, was", rr.Code)
}
body, err := ioutil.ReadAll(rr.Body)
if err != nil {
t.Error("error reading body of metadata response")
}
if string(body) != "i-12345" {
t.Error("instance-id not returned correctly")
}
}

func TestDeepHealthBadReturn(t *testing.T) {
defer leaktest.Check(t)()
testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) {
res.WriteHeader(http.StatusOK)
res.Write([]byte("i-12345"))
}))
defer func() { testServer.Close() }()

r, err := http.NewRequest("GET", "/health?deep=true", nil)
if err != nil {
t.Error("Error creating http request")
}
rr := httptest.NewRecorder()
handler := newHealthHandler(st.NewStubClient().WithHealth("bad"), testServer.URL)
router := mux.NewRouter()
handler.Install(router)
router.ServeHTTP(rr, r)
if rr.Code != http.StatusInternalServerError {
t.Error("expected 500 response, was", rr.Code)
}
}

func TestDeepHealthReturn(t *testing.T) {
defer leaktest.Check(t)()
testServer := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) {
res.WriteHeader(http.StatusOK)
res.Write([]byte("i-12345"))
}))
defer func() { testServer.Close() }()

r, err := http.NewRequest("GET", "/health?deep=true", nil)
if err != nil {
t.Error("Error creating http request")
}
rr := httptest.NewRecorder()
handler := newHealthHandler(st.NewStubClient().WithHealth("ok"), testServer.URL)
router := mux.NewRouter()
handler.Install(router)
router.ServeHTTP(rr, r)
if rr.Code != http.StatusOK {
t.Error("expected 200 response, was", rr.Code)
}
body, err := ioutil.ReadAll(rr.Body)
if err != nil {
t.Error("error reading body of metadata response")
}
if string(body) != "i-12345" {
t.Error("instance-id not returned correctly")
}
}
2 changes: 1 addition & 1 deletion pkg/aws/metadata/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func buildHTTPServer(config *ServerOptions, client server.Client) (*http.Server,
router := mux.NewRouter()
router.Handle("/ping", http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { fmt.Fprint(w, "pong") }))

h := newHealthHandler(config.MetadataEndpoint)
h := newHealthHandler(client, config.MetadataEndpoint)
h.Install(router)

r := newRoleHandler(client, buildClientIP(config))
Expand Down
8 changes: 7 additions & 1 deletion pkg/testutil/server/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ type StubClient struct {
credentialsCallCount int
roles []GetRoleResult
rolesCallCount int
health string
}

// GetRoleResult is a return value from GetRole
Expand Down Expand Up @@ -42,14 +43,19 @@ func (c *StubClient) GetCredentials(ctx context.Context, ip, role string) (*sts.
}

func (c *StubClient) Health(ctx context.Context) (string, error) {
return "ok", nil
return c.health, nil
}

func (c *StubClient) WithRoles(roles ...GetRoleResult) *StubClient {
c.roles = roles
return c
}

func (c *StubClient) WithHealth(health string) *StubClient {
c.health = health
return c
}

type GetCredentialsResult struct {
Credentials *sts.Credentials
Error error
Expand Down

0 comments on commit 3ea5e8e

Please sign in to comment.