Skip to content

Commit

Permalink
Configure the HTTP2 transport to handle broken connections (#189)
Browse files Browse the repository at this point in the history
Configure the HTTP2 transport to handle broken/idle connections

Two timeout values were added, ReadIdleTimeout and PingTimeout, that are both interrelated.
The ReadIdleTimeout enables the ping health check every configured duration if no frame has been received on the HTTP/2 connection. The PingTimeout can be used to configure the total amount of time to wait for a ping response before closing the connection. Both of these timeout values assist in cleaning up broken or idle connections forcing the client to re-connect. 
This is the current known workaround for the following Go issue:
golang/go#36026
  • Loading branch information
tabboud authored Jun 21, 2021
1 parent afd2a0d commit dc3fd3e
Show file tree
Hide file tree
Showing 44 changed files with 15,595 additions and 290 deletions.
11 changes: 11 additions & 0 deletions changelog/@unreleased/pr-189.v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type: improvement
improvement:
description: |-
Configure the HTTP2 transport to handle broken/idle connections
Two timeout values were added, ReadIdleTimeout and PingTimeout, that are both interrelated.
The ReadIdleTimeout enables the ping health check every configured duration if no frame has been received on the HTTP/2 connection. The PingTimeout can be used to configure the total amount of time to wait for a ping response before closing the connection. Both of these timeout values assist in cleaning up broken or idle connections forcing the client to re-connect.
This is the current known workaround for the following Go issue:
https://github.com/golang/go/issues/36026
links:
- https://github.com/palantir/conjure-go-runtime/pull/189
10 changes: 6 additions & 4 deletions conjure-go-client/httpclient/client_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ import (
"github.com/palantir/pkg/metrics"
"github.com/palantir/pkg/retry"
"github.com/palantir/pkg/tlsconfig"
werror "github.com/palantir/witchcraft-go-error"
"golang.org/x/net/http2"
"golang.org/x/net/proxy"
)

Expand Down Expand Up @@ -66,6 +64,8 @@ type httpClientBuilder struct {
TLSHandshakeTimeout time.Duration
ExpectContinueTimeout time.Duration
ResponseHeaderTimeout time.Duration
HTTP2ReadIdleTimeout time.Duration
HTTP2PingTimeout time.Duration

// http.Dialer modifiers
DialTimeout time.Duration
Expand Down Expand Up @@ -132,6 +132,8 @@ func getDefaultHTTPClientBuilder() *httpClientBuilder {
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
HTTP2ReadIdleTimeout: 30 * time.Second,
HTTP2PingTimeout: 15 * time.Second,
// These are higher than the defaults, but match Java and
// heuristically work better for our relatively large services.
MaxIdleConns: 200,
Expand Down Expand Up @@ -184,8 +186,8 @@ func httpClientAndRoundTripHandlersFromBuilder(b *httpClientBuilder) (*http.Clie
transport.Proxy = b.Proxy
}
if !b.DisableHTTP2 {
if err := http2.ConfigureTransport(transport); err != nil {
return nil, nil, werror.Wrap(err, "failed to configure transport for http2")
if err := configureHTTP2(transport, b.HTTP2ReadIdleTimeout, b.HTTP2PingTimeout); err != nil {
return nil, nil, err
}
}
if !b.DisableTracing {
Expand Down
26 changes: 26 additions & 0 deletions conjure-go-client/httpclient/client_params.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,32 @@ func WithDisableHTTP2() ClientOrHTTPClientParam {
})
}

// WithHTTP2ReadIdleTimeout configures the HTTP/2 ReadIdleTimeout.
// A ReadIdleTimeout > 0 will enable health checks and allows broken/idle
// connections to be pruned more quickly, preventing the client from
// attempting to re-use connections that will no longer work.
// If the HTTP/2 connection has not received any frames after the ReadIdleTimeout period,
// then periodic pings (health checks) will be sent to the server before attempting to close the connection.
// The amount of time to wait for the ping response can be configured by the WithHTTP2PingTimeout param.
// If unset, the client defaults to 30 seconds, if HTTP2 is enabled.
func WithHTTP2ReadIdleTimeout(timeout time.Duration) ClientOrHTTPClientParam {
return clientOrHTTPClientParamFunc(func(b *httpClientBuilder) error {
b.HTTP2ReadIdleTimeout = timeout
return nil
})
}

// WithHTTP2PingTimeout configures the amount of time to wait for a ping response
// before closing an HTTP/2 connection. The PingTimeout is only valid when
// the ReadIdleTimeout is > 0 otherwise pings (health checks) are not enabled.
// If unset, the client defaults to 15 seconds, if HTTP/2 is enabled and the ReadIdleTimeout is > 0.
func WithHTTP2PingTimeout(timeout time.Duration) ClientOrHTTPClientParam {
return clientOrHTTPClientParamFunc(func(b *httpClientBuilder) error {
b.HTTP2PingTimeout = timeout
return nil
})
}

// WithMaxIdleConns sets the number of reusable TCP connections the client
// will maintain. If unset, the client defaults to 32.
func WithMaxIdleConns(conns int) ClientOrHTTPClientParam {
Expand Down
20 changes: 20 additions & 0 deletions conjure-go-client/httpclient/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ type ClientConfig struct {
// fully writing the request headers if the request has an "Expect: 100-continue" header.
ExpectContinueTimeout *time.Duration `json:"expect-continue-timeout,omitempty" yaml:"expect-continue-timeout,omitempty"`

// HTTP2ReadIdleTimeout sets the maximum time to wait before sending periodic health checks (pings) for an HTTP/2 connection.
// If unset, the client defaults to 30s for HTTP/2 clients.
HTTP2ReadIdleTimeout *time.Duration `json:"http2-read-idle-timeout" yaml:"http2-read-idle-timeout"`
// HTTP2PingTimeout is the maximum time to wait for a ping response in an HTTP/2 connection,
// when health checking is enabled which is done by setting the HTTP2ReadIdleTimeout > 0.
// If unset, the client defaults to 15s if the HTTP2ReadIdleTimeout is > 0.
HTTP2PingTimeout *time.Duration `json:"http2-ping-timeout" yaml:"http2-ping-timeout"`

// MaxIdleConns sets the number of reusable TCP connections the client will maintain.
// If unset, the client defaults to 32.
MaxIdleConns *int `json:"max-idle-conns,omitempty" yaml:"max-idle-conns,omitempty"`
Expand Down Expand Up @@ -154,6 +162,12 @@ func (c ServicesConfig) ClientConfig(serviceName string) ClientConfig {
if conf.ExpectContinueTimeout == nil && c.Default.ExpectContinueTimeout != nil {
conf.ExpectContinueTimeout = c.Default.ExpectContinueTimeout
}
if conf.HTTP2ReadIdleTimeout == nil && c.Default.HTTP2ReadIdleTimeout != nil {
conf.HTTP2ReadIdleTimeout = c.Default.HTTP2ReadIdleTimeout
}
if conf.HTTP2PingTimeout == nil && c.Default.HTTP2PingTimeout != nil {
conf.HTTP2PingTimeout = c.Default.HTTP2PingTimeout
}
if conf.MaxIdleConns == nil && c.Default.MaxIdleConns != nil {
conf.MaxIdleConns = c.Default.MaxIdleConns
}
Expand Down Expand Up @@ -278,6 +292,12 @@ func configToParams(c ClientConfig) ([]ClientParam, error) {
if c.ExpectContinueTimeout != nil && *c.ExpectContinueTimeout != 0 {
params = append(params, WithExpectContinueTimeout(*c.ExpectContinueTimeout))
}
if c.HTTP2ReadIdleTimeout != nil && *c.HTTP2ReadIdleTimeout >= 0 {
params = append(params, WithHTTP2ReadIdleTimeout(*c.HTTP2ReadIdleTimeout))
}
if c.HTTP2PingTimeout != nil && *c.HTTP2PingTimeout >= 0 {
params = append(params, WithHTTP2PingTimeout(*c.HTTP2PingTimeout))
}

// Connections

Expand Down
8 changes: 6 additions & 2 deletions conjure-go-client/httpclient/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ clients:
my-2nd-service:
api-token: so-secret-2
read-timeout: 2m
http2-read-idle-timeout: 10s
http2-ping-timeout: 5s
`,
ExpectedConfig: ServicesConfig{
Services: map[string]ClientConfig{
Expand All @@ -96,8 +98,10 @@ clients:
ReadTimeout: &[]time.Duration{time.Minute}[0],
},
"my-2nd-service": {
APIToken: &[]string{"so-secret-2"}[0],
ReadTimeout: &[]time.Duration{2 * time.Minute}[0],
APIToken: &[]string{"so-secret-2"}[0],
ReadTimeout: &[]time.Duration{2 * time.Minute}[0],
HTTP2ReadIdleTimeout: &[]time.Duration{10 * time.Second}[0],
HTTP2PingTimeout: &[]time.Duration{5 * time.Second}[0],
},
},
},
Expand Down
34 changes: 34 additions & 0 deletions conjure-go-client/httpclient/configure_http2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) 2021 Palantir Technologies. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +build !go1.16

package httpclient

import (
"net/http"
"time"

werror "github.com/palantir/witchcraft-go-error"
"golang.org/x/net/http2"
)

// configureHTTP2 will attempt to configure net/http HTTP/1 Transport to use HTTP/2.
// It returns an error if t1 has already been HTTP/2-enabled.
func configureHTTP2(t1 *http.Transport, _, _ time.Duration) error {
if err := http2.ConfigureTransport(t1); err != nil {
return werror.Wrap(err, "failed to configure transport for http2")
}
return nil
}
49 changes: 49 additions & 0 deletions conjure-go-client/httpclient/configure_http2_go16.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) 2021 Palantir Technologies. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +build go1.16

package httpclient

import (
"net/http"
"time"

werror "github.com/palantir/witchcraft-go-error"
"golang.org/x/net/http2"
)

// configureHTTP2 will attempt to configure net/http HTTP/1 Transport to use HTTP/2.
// The provided readIdleTimeout will set the underlying HTTP/2 transport ReadIdleTimeout.
// It returns an error if t1 has already been HTTP/2-enabled.
func configureHTTP2(t1 *http.Transport, readIdleTimeout, pingTimeout time.Duration) error {
http2Transport, err := http2.ConfigureTransports(t1)
if err != nil {
return werror.Wrap(err, "failed to configure transport for http2")
}
// ReadIdleTimeout is the amount of time to wait before running periodic health checks (pings)
// after not receiving a frame from the HTTP/2 connection.
// Setting this value will enable the health checks and allows broken idle
// connections to be pruned more quickly, preventing the client from
// attempting to re-use connections that will no longer work.
// ref: https://github.com/golang/go/issues/36026
http2Transport.ReadIdleTimeout = readIdleTimeout

// PingTimeout configures the amount of time to wait for a ping response (health check)
// before closing an HTTP/2 connection. The PingTimeout is only valid if
// the above ReadIdleTimeout is > 0.
http2Transport.PingTimeout = pingTimeout

return nil
}
163 changes: 163 additions & 0 deletions conjure-go-client/httpclient/http2_client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// Copyright (c) 2021 Palantir Technologies. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +build go1.16

package httpclient_test

import (
"context"
"io"
"net"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"sync/atomic"
"testing"
"time"

"github.com/palantir/conjure-go-runtime/v2/conjure-go-client/httpclient"
"github.com/palantir/conjure-go-runtime/v2/conjure-go-server/httpserver"
"github.com/stretchr/testify/require"
)

// TestHTTP2Client_reusesBrokenConnection asserts the behavior of an HTTP/2 client that re-uses
// a broken connection and fails to make requests. The clients ReadIdleTimeout is set to 0, which means
// there will be no HTTP/2 health checks enabled and thus the client will re-use existing (broken) connections.
func TestHTTP2Client_reusesBrokenConnection(t *testing.T) {
// The second request, which re-uses the broken connection, will cause an error
// "use of closed network connection" so we should only expect to have 1 dial succeed.
testProxy(t, 0, 0, 1, true)
}

// TestHTTP2Client_reconnectsOnBrokenConnection asserts the behavior of an HTTP/2 client that has
// the ReadIdleTimeout configured very low which forces the client to re-connect on subsequent requests.
func TestHTTP2Client_reconnectsOnBrokenConnection(t *testing.T) {
testProxy(t, time.Second, time.Second, 2, false)
}

func testProxy(t *testing.T, readIdleTimeout, pingTimeout time.Duration, expectedDials int, expectErr bool) {
ctx := context.Background()

ts := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
httpserver.WriteJSONResponse(w, map[string]string{
"proto": r.Proto,
}, http.StatusOK)
}))
ts.EnableHTTP2 = true
ts.StartTLS()
defer ts.Close()

u, err := url.Parse(ts.URL)
require.NoError(t, err)
proxy := newProxyServer(t, u.Host)
defer func() {
require.NoError(t, proxy.ln.Close())
}()
stopCh := make(chan struct{})
go proxy.serve(t, stopCh, false)

client, err := httpclient.NewClient(
httpclient.WithBaseURLs([]string{"https://" + proxy.ln.Addr().String()}),
httpclient.WithHTTP2ReadIdleTimeout(readIdleTimeout),
httpclient.WithHTTP2PingTimeout(pingTimeout),
httpclient.WithTLSInsecureSkipVerify(),
httpclient.WithDisableRestErrors(),
httpclient.WithMaxRetries(0),
httpclient.WithHTTPTimeout(time.Second),
)
require.NoError(t, err)

expectedRespBody := map[string]string{
"proto": "HTTP/2.0",
}
var actualResp map[string]string

// First request should always succeed
resp, err := client.Get(ctx,
httpclient.WithJSONResponse(&actualResp),
)
require.NoError(t, err)
require.Equal(t, resp.StatusCode, http.StatusOK)
require.Equal(t, expectedRespBody, actualResp)

// close the proxy to simulate a broken TCP connection
close(stopCh)

// restart the proxy with the expected error
stopCh = make(chan struct{})
go proxy.serve(t, stopCh, expectErr)
defer close(stopCh)

<-time.After(time.Second + readIdleTimeout + pingTimeout)

// Second Request
resp, err = client.Get(ctx,
httpclient.WithJSONResponse(&actualResp),
)
if expectErr {
require.Error(t, err)
require.Nil(t, resp)
} else {
require.NoError(t, err)
require.Equal(t, resp.StatusCode, http.StatusOK)
require.Equal(t, expectedRespBody, actualResp)
}
require.Equal(t, expectedDials, proxy.DialCount())
}

type proxyServer struct {
ln net.Listener
proxyURL string
dialCount int32
}

func newProxyServer(t *testing.T, proxyURL string) *proxyServer {
ln, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
return &proxyServer{
proxyURL: proxyURL,
ln: ln,
}
}

func (p *proxyServer) serve(t *testing.T, stopCh chan struct{}, expectErr bool) {
conn, err := p.ln.Accept()
if expectErr {
require.Error(t, err)
require.True(t, strings.Contains(err.Error(), "use of closed network connection"))
return
}
require.NoError(t, err)
atomic.AddInt32(&p.dialCount, 1)
go p.handleConnection(t, conn, stopCh)
}

func (p *proxyServer) handleConnection(t *testing.T, in net.Conn, stopCh chan struct{}) {
out, err := net.Dial("tcp", p.proxyURL)
require.NoError(t, err)
go func() {
_, _ = io.Copy(out, in)
}()
go func() {
_, _ = io.Copy(in, out)
}()
<-stopCh
require.NoError(t, out.Close())
}

func (p *proxyServer) DialCount() int {
return int(atomic.LoadInt32(&p.dialCount))
}
Loading

0 comments on commit dc3fd3e

Please sign in to comment.