From cbd16b287e0b95132cb8be4f8ca2f31d83984ff5 Mon Sep 17 00:00:00 2001 From: Ben Murphy Date: Fri, 7 Jun 2024 16:27:30 +0100 Subject: [PATCH 1/4] Fix HTTP Connection Reuse for S3 range requests If EOF is not read then the connection is not eligble for reuse. Try to read an extra byte to ensure EOF is read. --- tempodb/backend/s3/s3.go | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tempodb/backend/s3/s3.go b/tempodb/backend/s3/s3.go index cdc5369e8d2..a1b90ca7a45 100644 --- a/tempodb/backend/s3/s3.go +++ b/tempodb/backend/s3/s3.go @@ -561,20 +561,17 @@ func (rw *readerWriter) readRange(ctx context.Context, objName string, offset in } defer reader.Close() - totalBytes := 0 - for { - byteCount, err := reader.Read(buffer[totalBytes:]) - if errors.Is(err, io.EOF) { - return nil - } - if err != nil { - return fmt.Errorf("error in range read from s3 backend: %w", err) - } - if byteCount == 0 { - return nil - } - totalBytes += byteCount + /* bytes read == len(buffer) if and only if err == nil */ + _, err = io.ReadFull(reader, buffer) + + if err == nil { + /* read EOF so connection can be reused */ + var dummy [1]byte + _, _ = reader.Read(dummy[:]) + return nil } + + return fmt.Errorf("error in range read from s3 backend: %w", err) } func fetchCreds(cfg *Config) (*credentials.Credentials, error) { From 5b5e5ee66205b46f985ad0593340d32aba3364c4 Mon Sep 17 00:00:00 2001 From: Ben Murphy Date: Fri, 7 Jun 2024 16:28:51 +0100 Subject: [PATCH 2/4] Explicitly set MaxIdleConnsPerHost/MaxIdleConns for S3 backend The previous value was the MinIO default which is 16 per host and 100 global. --- tempodb/backend/s3/s3.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tempodb/backend/s3/s3.go b/tempodb/backend/s3/s3.go index a1b90ca7a45..51ccb411909 100644 --- a/tempodb/backend/s3/s3.go +++ b/tempodb/backend/s3/s3.go @@ -623,6 +623,10 @@ func createCore(cfg *Config, hedge bool) (*minio.Core, error) { return nil, fmt.Errorf("create minio.DefaultTransport: %w", err) } + /* minio sets MaxIdleConns to 100 but we should also increase per host to 100 */ + customTransport.MaxIdleConnsPerHost = 100 + customTransport.MaxIdleConns = 100 + tlsConfig, err := cfg.GetTLSConfig() if err != nil { return nil, fmt.Errorf("failed to create TLS config: %w", err) From 7d00c3daa58a7c370a9c3b193ef6b02d48ff88ae Mon Sep 17 00:00:00 2001 From: Ben Murphy Date: Fri, 7 Jun 2024 16:54:51 +0100 Subject: [PATCH 3/4] Fix HTTP Connection Reuse for GCS range requests If EOF is not read then the connection is not eligble for reuse. Try to read an extra byte to ensure EOF is read. --- tempodb/backend/gcs/gcs.go | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tempodb/backend/gcs/gcs.go b/tempodb/backend/gcs/gcs.go index a8e110ff4f2..3a50b38658c 100644 --- a/tempodb/backend/gcs/gcs.go +++ b/tempodb/backend/gcs/gcs.go @@ -477,20 +477,17 @@ func (rw *readerWriter) readRange(ctx context.Context, name string, offset int64 } defer r.Close() - totalBytes := 0 - for { - byteCount, err := r.Read(buffer[totalBytes:]) - if errors.Is(err, io.EOF) { - return nil - } - if err != nil { - return err - } - if byteCount == 0 { - return nil - } - totalBytes += byteCount + /* bytes read == len(buffer) if and only if err == nil */ + _, err = io.ReadFull(r, buffer) + + if err == nil { + /* read EOF so connection can be reused */ + var dummy [1]byte + _, _ = r.Read(dummy[:]) + return nil } + + return err } func createBucket(ctx context.Context, cfg *Config, hedge bool) (*storage.BucketHandle, error) { From e53d9f8dba769e7c032598c78a20ebf92777119c Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Tue, 18 Jun 2024 14:43:58 -0400 Subject: [PATCH 4/4] changelog Signed-off-by: Joe Elliott --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 234c479abe1..8dbd9c1316d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * [ENHANCEMENT] Use multiple goroutines to unmarshal responses in parallel in the query frontend. [#3713](https://github.com/grafana/tempo/pull/3713) (@joe-elliott) * [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio) * [BUGFIX] max_global_traces_per_user: take into account ingestion.tenant_shard_size when converting to local limit [#3618](https://github.com/grafana/tempo/pull/3618) (@kvrhdn) +* [BUGFIX] Fix http connection reuse on GCP and AWS by reading io.EOF through the http body. [#3760](https://github.com/grafana/tempo/pull/3760) (@bmteller) ## v2.5.0