diff --git a/CHANGELOG.md b/CHANGELOG.md index 99a7e8d0f15..0ed04b3de2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ * [CHANGE] No longer send the final diff in GRPC streaming. Instead we rely on the streamed intermediate results. [#4062](https://github.com/grafana/tempo/pull/4062) (@joe-elliott) * [CHANGE] Update Go to 1.23.1 [#4146](https://github.com/grafana/tempo/pull/4146) [#4147](https://github.com/grafana/tempo/pull/4147) (@javiermolinar) * [CHANGE] TraceQL: Add range condition for byte predicates [#4198](https://github.com/grafana/tempo/pull/4198) (@ie-pham) +* [CHANGE] Return 422 for TRACE_TOO_LARGE queries [#4160](https://github.com/grafana/tempo/pull/4160) (@zalegrala) * [FEATURE] Discarded span logging `log_discarded_spans` [#3957](https://github.com/grafana/tempo/issues/3957) (@dastrobu) * [FEATURE] TraceQL support for instrumentation scope [#3967](https://github.com/grafana/tempo/pull/3967) (@ie-pham) * [ENHANCEMENT] TraceQL: Attribute iterators collect matched array values [#3867](https://github.com/grafana/tempo/pull/3867) (@electron0zero, @stoewer) diff --git a/integration/e2e/limits_test.go b/integration/e2e/limits_test.go index 0110dd32cef..2540214ccde 100644 --- a/integration/e2e/limits_test.go +++ b/integration/e2e/limits_test.go @@ -26,6 +26,7 @@ import ( "github.com/grafana/tempo/integration/util" "github.com/grafana/tempo/pkg/httpclient" + "github.com/grafana/tempo/pkg/model/trace" "github.com/grafana/tempo/pkg/tempopb" tempoUtil "github.com/grafana/tempo/pkg/util" "github.com/grafana/tempo/pkg/util/test" @@ -233,22 +234,22 @@ func TestQueryLimits(t *testing.T) { querierClient := httpclient.New("http://"+tempo.Endpoint(3200)+"/querier", tempoUtil.FakeTenantID) _, err = client.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // confirm frontend returns 500 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm frontend returns 422 _, err = querierClient.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // todo: this should return 400 ideally so the frontend does not retry, but does not currently + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // complete block timeout is 10 seconds time.Sleep(15 * time.Second) _, err = client.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 500") // confirm frontend returns 500 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm frontend returns 422 _, err = querierClient.QueryTrace(tempoUtil.TraceIDToHexString(traceID[:])) - require.ErrorContains(t, err, "trace exceeds max size") - require.ErrorContains(t, err, "failed with response: 400") // confirm querier returns 400 + require.ErrorContains(t, err, trace.ErrTraceTooLarge.Error()) + require.ErrorContains(t, err, "failed with response: 422") // confirm querier returns 422 } func TestLimitsPartialSuccess(t *testing.T) { diff --git a/modules/frontend/combiner/trace_by_id.go b/modules/frontend/combiner/trace_by_id.go index 24393bf8adb..dfdd886f431 100644 --- a/modules/frontend/combiner/trace_by_id.go +++ b/modules/frontend/combiner/trace_by_id.go @@ -2,6 +2,7 @@ package combiner import ( "bytes" + "errors" "fmt" "io" "net/http" @@ -83,6 +84,13 @@ func (c *traceByIDCombiner) AddResponse(r PipelineResponse) error { // Consume the trace _, err = c.c.Consume(resp.Trace) + + if errors.Is(err, trace.ErrTraceTooLarge) { + c.code = http.StatusUnprocessableEntity + c.statusMessage = fmt.Sprint(err) + return nil + } + return err } @@ -156,6 +164,11 @@ func (c *traceByIDCombiner) shouldQuit() bool { return false } + // test special case for 422 + if c.code == http.StatusUnprocessableEntity { + return true + } + // bail on other 400s if c.code/100 == 4 { return true diff --git a/modules/frontend/combiner/trace_by_id_test.go b/modules/frontend/combiner/trace_by_id_test.go index 0f605de8640..ac83e8ead6e 100644 --- a/modules/frontend/combiner/trace_by_id_test.go +++ b/modules/frontend/combiner/trace_by_id_test.go @@ -48,15 +48,15 @@ func TestTraceByIDShouldQuit(t *testing.T) { should = c.ShouldQuit() require.False(t, should) - // trace too large, should not quit but should return an error + // trace too large, should quit and should not return an error c = NewTraceByID(1, api.HeaderAcceptJSON) err = c.AddResponse(toHTTPProtoResponse(t, &tempopb.TraceByIDResponse{ Trace: test.MakeTrace(1, nil), Metrics: &tempopb.TraceByIDMetrics{}, }, 200)) - require.Error(t, err) + require.NoError(t, err) should = c.ShouldQuit() - require.False(t, should) + require.True(t, should) } func TestTraceByIDHonorsContentType(t *testing.T) { diff --git a/modules/frontend/pipeline/sync_handler_adjust_response_code.go b/modules/frontend/pipeline/sync_handler_adjust_response_code.go index a7c9a362af1..2c06d55052a 100644 --- a/modules/frontend/pipeline/sync_handler_adjust_response_code.go +++ b/modules/frontend/pipeline/sync_handler_adjust_response_code.go @@ -44,8 +44,9 @@ func (c statusCodeAdjustWare) RoundTrip(req Request) (*http.Response, error) { // if the frontend issues a bad request then externally we need to represent that as an // internal error // exceptions + // 422 - unprocessable entity // 429 - too many requests - if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { + if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 && resp.StatusCode != 422 { resp.StatusCode = http.StatusInternalServerError resp.Status = http.StatusText(http.StatusInternalServerError) // leave the body alone. it will preserve the original error message diff --git a/modules/querier/http.go b/modules/querier/http.go index 247d512445f..bd10f03c746 100644 --- a/modules/querier/http.go +++ b/modules/querier/http.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "net/http" + "strings" "time" "github.com/golang/protobuf/jsonpb" //nolint:all //deprecated @@ -425,9 +426,10 @@ func handleError(w http.ResponseWriter, err error) { return } - // todo: better understand all errors returned from queriers and categorize more as 4XX - if errors.Is(err, trace.ErrTraceTooLarge) { - http.Error(w, err.Error(), http.StatusBadRequest) + // TODO: better understand all errors returned from queriers and categorize more as 4XX + // NOTE: we receive a GRPC error from the ingesters, and so we need to check the string content of error as well. + if errors.Is(err, trace.ErrTraceTooLarge) || strings.Contains(err.Error(), trace.ErrTraceTooLarge.Error()) { + http.Error(w, err.Error(), http.StatusUnprocessableEntity) return }