Skip to content

Commit bc5b695

Browse files
fix(retry): fix retries when using protobuf encoding (#13316)
(cherry picked from commit a457c5d)
1 parent 91a3486 commit bc5b695

File tree

2 files changed

+42
-12
lines changed

2 files changed

+42
-12
lines changed

pkg/querier/queryrange/queryrangebase/retry.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ import (
77
"github.com/go-kit/log"
88
"github.com/go-kit/log/level"
99
"github.com/grafana/dskit/backoff"
10-
"github.com/grafana/dskit/httpgrpc"
10+
"github.com/grafana/dskit/grpcutil"
1111
"github.com/prometheus/client_golang/prometheus"
1212
"github.com/prometheus/client_golang/prometheus/promauto"
13+
"google.golang.org/grpc/codes"
1314

1415
"github.com/grafana/loki/v3/pkg/util"
1516
util_log "github.com/grafana/loki/v3/pkg/util/log"
@@ -88,9 +89,8 @@ func (r retry) Do(ctx context.Context, req Request) (Response, error) {
8889
return resp, nil
8990
}
9091

91-
// Retry if we get a HTTP 500 or a non-HTTP error.
92-
httpResp, ok := httpgrpc.HTTPResponseFromError(err)
93-
if !ok || httpResp.Code/100 == 5 {
92+
// Retry if we get a HTTP 500 or an unknown error.
93+
if code := grpcutil.ErrorToStatusCode(err); code == codes.Unknown || code/100 == 5 {
9494
lastErr = err
9595
level.Error(util_log.WithContext(ctx, r.log)).Log(
9696
"msg", "error processing request",

pkg/querier/queryrange/queryrangebase/retry_test.go

+38-8
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ import (
88
"testing"
99

1010
"github.com/go-kit/log"
11+
"github.com/gogo/status"
1112
"github.com/grafana/dskit/httpgrpc"
1213
"github.com/stretchr/testify/require"
1314
"go.uber.org/atomic"
15+
"google.golang.org/grpc/codes"
1416

1517
"github.com/grafana/loki/v3/pkg/util/constants"
1618
)
@@ -19,10 +21,11 @@ func TestRetry(t *testing.T) {
1921
var try atomic.Int32
2022

2123
for _, tc := range []struct {
22-
name string
23-
handler Handler
24-
resp Response
25-
err error
24+
name string
25+
handler Handler
26+
resp Response
27+
err error
28+
expectedCalls int
2629
}{
2730
{
2831
name: "retry failures",
@@ -32,21 +35,26 @@ func TestRetry(t *testing.T) {
3235
}
3336
return nil, fmt.Errorf("fail")
3437
}),
35-
resp: &PrometheusResponse{Status: "Hello World"},
38+
resp: &PrometheusResponse{Status: "Hello World"},
39+
expectedCalls: 5,
3640
},
3741
{
3842
name: "don't retry 400s",
3943
handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) {
44+
try.Inc()
4045
return nil, httpgrpc.Errorf(http.StatusBadRequest, "Bad Request")
4146
}),
42-
err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"),
47+
err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"),
48+
expectedCalls: 1,
4349
},
4450
{
4551
name: "retry 500s",
4652
handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) {
53+
try.Inc()
4754
return nil, httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error")
4855
}),
49-
err: httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error"),
56+
err: httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error"),
57+
expectedCalls: 5,
5058
},
5159
{
5260
name: "last error",
@@ -56,7 +64,28 @@ func TestRetry(t *testing.T) {
5664
}
5765
return nil, httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error")
5866
}),
59-
err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"),
67+
err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"),
68+
expectedCalls: 5,
69+
},
70+
// previous entires in this table use httpgrpc.Errorf for generating the error which also populates the Details field with the marshalled http response.
71+
// Next set of tests validate the retry behavior when using protobuf encoding where the status does not include the details.
72+
{
73+
name: "protobuf enc don't retry 400s",
74+
handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) {
75+
try.Inc()
76+
return nil, status.New(codes.Code(http.StatusBadRequest), "Bad Request").Err()
77+
}),
78+
err: status.New(codes.Code(http.StatusBadRequest), "Bad Request").Err(),
79+
expectedCalls: 1,
80+
},
81+
{
82+
name: "protobuf enc retry 500s",
83+
handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) {
84+
try.Inc()
85+
return nil, status.New(codes.Code(http.StatusInternalServerError), "Internal Server Error").Err()
86+
}),
87+
err: status.New(codes.Code(http.StatusInternalServerError), "Internal Server Error").Err(),
88+
expectedCalls: 5,
6089
},
6190
} {
6291
t.Run(tc.name, func(t *testing.T) {
@@ -68,6 +97,7 @@ func TestRetry(t *testing.T) {
6897
resp, err := h.Do(context.Background(), req)
6998
require.Equal(t, tc.err, err)
7099
require.Equal(t, tc.resp, resp)
100+
require.EqualValues(t, tc.expectedCalls, try.Load())
71101
})
72102
}
73103
}

0 commit comments

Comments
 (0)