From ea5cd508a21c0c47127445e46da0aa5b2eafd54b Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Tue, 15 Jun 2021 21:15:18 +0200 Subject: [PATCH 01/24] Basic persistent queue implementation based on storage extension interface --- exporter/exporterhelper/README.md | 42 ++- exporter/exporterhelper/common.go | 11 +- exporter/exporterhelper/common_test.go | 16 +- exporter/exporterhelper/consumers_queue.go | 31 ++ exporter/exporterhelper/logs.go | 20 +- exporter/exporterhelper/metrics.go | 20 +- exporter/exporterhelper/persistent_queue.go | 343 ++++++++++++++++++ .../exporterhelper/persistent_queue_test.go | 314 ++++++++++++++++ exporter/exporterhelper/queued_retry.go | 200 +++++++--- exporter/exporterhelper/queued_retry_test.go | 39 +- exporter/exporterhelper/traces.go | 20 +- 11 files changed, 992 insertions(+), 64 deletions(-) create mode 100644 exporter/exporterhelper/consumers_queue.go create mode 100644 exporter/exporterhelper/persistent_queue.go create mode 100644 exporter/exporterhelper/persistent_queue_test.go diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index 1b1bdc42639..e3a5af18a4f 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -17,12 +17,52 @@ The following configuration options can be modified: - `sending_queue` - `enabled` (default = true) - `num_consumers` (default = 10): Number of consumers that dequeue batches; ignored if `enabled` is `false` - - `queue_size` (default = 5000): Maximum number of batches kept in memory before data; ignored if `enabled` is `false`; + - `queue_size` (default = 5000): Maximum number of batches kept in memory or on disk (for persistent storage) before dropping; ignored if `enabled` is `false` User should calculate this as `num_seconds * requests_per_second` where: - `num_seconds` is the number of seconds to buffer in case of a backend outage - `requests_per_second` is the average number of requests per seconds. + - `persistent_enabled` (default = false): When set, enables persistence via a file extension - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `timeout` (default = 5s): Time to wait per individual attempt to send data to a backend. The full list of settings exposed for this helper exporter are documented [here](factory.go). + +### Persistent Queue + +**Status: under development** + +When `persistent_enabled` is set, the queue is being buffered to disk by the +[file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). +It currently can be enabled only in OpenTelemetry Collector Contrib. + +This has some limitations currently. The items that have been passed to consumer for the actual exporting +are removed from persistent queue. In effect, in case of a sudden shutdown, they might be lost. + +``` + ┌─Consumer #1─┐ + │ ┌───┐ │ + ┌──Deleted──┐ ┌───►│ │ 1 │ ├───► Success + X x x │ │ └───┘ │ + x x x │ │ │ + x x x │ └─────────────┘ + x x x │ + ┌─────────Persistent queue────x─────x───┐ │ ┌─Consumer #2─┐ + │ x x x │ │ │ ┌───┐ │ + │ ┌───┐ ┌───┐ ┌─x─┐ ┌─x─┐ ┌─x─┐ │ │ │ │ 2 │ ├───► Permanent -> X + │ n+1 │ n │ ... │ 4 │ │ 3 │ │ 2 │ │ 1 │ ├────┼───►│ └───┘ │ failure + │ └───┘ └───┘ └───┘ └───┘ └───┘ │ │ │ │ + │ │ │ └─────────────┘ + └───────────────────────────────────────┘ │ + ▲ ▲ │ ┌─Consumer #3─┐ + │ │ │ │ ┌───┐ │ Temporary + │ │ └───►│ │ 3 │ ├───► failure + write read │ └───┘ │ + index index │ │ │ + ▲ └─────────────┘ │ + │ ▲ │ + │ └── Retry ───────┤ + │ │ + │ │ + └───────────────────────── Requeuing ◄────────── Retry limit exceeded ─┘ +``` \ No newline at end of file diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index 6ef4f8f6d34..bb558945b21 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -52,8 +52,13 @@ type request interface { onError(error) request // Returns the count of spans/metric points or log records. count() int + // marshal serializes the current request into a byte stream + marshal() ([]byte, error) } +// requestUnmarshaler defines a function which can take a byte stream and unmarshal it into a relevant request +type requestUnmarshaler func([]byte) (request, error) + // requestSender is an abstraction of a sender for a request independent of the type of the data (traces, metrics, logs). type requestSender interface { send(req request) error @@ -159,7 +164,7 @@ type baseExporter struct { qrSender *queuedRetrySender } -func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings) *baseExporter { +func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signalName string, reqUnnmarshaler requestUnmarshaler) *baseExporter { be := &baseExporter{ Component: componenthelper.New(bs.componentOptions...), } @@ -169,7 +174,7 @@ func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, ExporterID: cfg.ID(), ExporterCreateSettings: set, }) - be.qrSender = newQueuedRetrySender(cfg.ID().String(), bs.QueueSettings, bs.RetrySettings, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) + be.qrSender = newQueuedRetrySender(cfg.ID(), signalName, bs.QueueSettings, bs.RetrySettings, reqUnnmarshaler, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) be.sender = be.qrSender return be @@ -189,7 +194,7 @@ func (be *baseExporter) Start(ctx context.Context, host component.Host) error { } // If no error then start the queuedRetrySender. - return be.qrSender.start() + return be.qrSender.start(ctx, host) } // Shutdown all senders and exporter and is invoked during service shutdown. diff --git a/exporter/exporterhelper/common_test.go b/exporter/exporterhelper/common_test.go index b34dced18cd..d2f2b813c5c 100644 --- a/exporter/exporterhelper/common_test.go +++ b/exporter/exporterhelper/common_test.go @@ -26,6 +26,8 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/consumer/consumerhelper" + "go.opentelemetry.io/collector/model/pdata" ) var ( @@ -37,7 +39,7 @@ var ( ) func TestBaseExporter(t *testing.T) { - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions()) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(), "", nopRequestUnmarshaler()) require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) require.NoError(t, be.Shutdown(context.Background())) } @@ -51,6 +53,8 @@ func TestBaseExporterWithOptions(t *testing.T) { WithStart(func(ctx context.Context, host component.Host) error { return want }), WithShutdown(func(ctx context.Context) error { return want }), WithTimeout(DefaultTimeoutSettings())), + "", + nopRequestUnmarshaler(), ) require.Equal(t, want, be.Start(context.Background(), componenttest.NewNopHost())) require.Equal(t, want, be.Shutdown(context.Background())) @@ -64,3 +68,13 @@ func checkStatus(t *testing.T, sd *oteltest.Span, err error) { require.Equal(t, codes.Unset, sd.StatusCode(), "SpanData %v", sd) } } + +func nopTracePusher() consumerhelper.ConsumeTracesFunc { + return func(ctx context.Context, ld pdata.Traces) error { + return nil + } +} + +func nopRequestUnmarshaler() requestUnmarshaler { + return newTraceRequestUnmarshalerFunc(nopTracePusher()) +} diff --git a/exporter/exporterhelper/consumers_queue.go b/exporter/exporterhelper/consumers_queue.go new file mode 100644 index 00000000000..e05b809cb64 --- /dev/null +++ b/exporter/exporterhelper/consumers_queue.go @@ -0,0 +1,31 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +// consumersQueue is largely based on queue.BoundedQueue and matches the subset used in the collector +// It describes a producer-consumer exchange which can be backed by e.g. the memory-based ring buffer queue +// (queue.BoundedQueue) or via disk-based queue (persistentQueue) +type consumersQueue interface { + // StartConsumers starts a given number of goroutines consuming items from the queue + // and passing them into the consumer callback. + StartConsumers(num int, callback func(item interface{})) + // Produce is used by the producer to submit new item to the queue. Returns false in case of queue overflow. + Produce(item interface{}) bool + // Stop stops all consumers, as well as the length reporter if started, + // and releases the items channel. It blocks until all consumers have stopped. + Stop() + // Size returns the current Size of the queue + Size() int +} diff --git a/exporter/exporterhelper/logs.go b/exporter/exporterhelper/logs.go index 3873e3f7d0d..00b2325483d 100644 --- a/exporter/exporterhelper/logs.go +++ b/exporter/exporterhelper/logs.go @@ -23,9 +23,13 @@ import ( "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/consumer/consumerhelper" + "go.opentelemetry.io/collector/model/otlp" "go.opentelemetry.io/collector/model/pdata" ) +var logsMarshaler = otlp.NewProtobufLogsMarshaler() +var logsUnmarshaler = otlp.NewProtobufLogsUnmarshaler() + type logsRequest struct { baseRequest ld pdata.Logs @@ -40,6 +44,16 @@ func newLogsRequest(ctx context.Context, ld pdata.Logs, pusher consumerhelper.Co } } +func newLogsRequestUnmarshalerFunc(pusher consumerhelper.ConsumeLogsFunc) requestUnmarshaler { + return func(bytes []byte) (request, error) { + logs, err := logsUnmarshaler.UnmarshalLogs(bytes) + if err != nil { + return nil, err + } + return newLogsRequest(context.Background(), logs, pusher), nil + } +} + func (req *logsRequest) onError(err error) request { var logError consumererror.Logs if consumererror.AsLogs(err, &logError) { @@ -52,6 +66,10 @@ func (req *logsRequest) export(ctx context.Context) error { return req.pusher(ctx, req.ld) } +func (req *logsRequest) marshal() ([]byte, error) { + return logsMarshaler.MarshalLogs(req.ld) +} + func (req *logsRequest) count() int { return req.ld.LogRecordCount() } @@ -81,7 +99,7 @@ func NewLogsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs) + be := newBaseExporter(cfg, set, bs, "logs", newLogsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &logsExporterWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/metrics.go b/exporter/exporterhelper/metrics.go index a48af7ba398..ed142a4afc9 100644 --- a/exporter/exporterhelper/metrics.go +++ b/exporter/exporterhelper/metrics.go @@ -23,9 +23,13 @@ import ( "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/consumer/consumerhelper" + "go.opentelemetry.io/collector/model/otlp" "go.opentelemetry.io/collector/model/pdata" ) +var metricsMarshaler = otlp.NewProtobufMetricsMarshaler() +var metricsUnmarshaler = otlp.NewProtobufMetricsUnmarshaler() + type metricsRequest struct { baseRequest md pdata.Metrics @@ -40,6 +44,16 @@ func newMetricsRequest(ctx context.Context, md pdata.Metrics, pusher consumerhel } } +func newMetricsRequestUnmarshalerFunc(pusher consumerhelper.ConsumeMetricsFunc) requestUnmarshaler { + return func(bytes []byte) (request, error) { + metrics, err := metricsUnmarshaler.UnmarshalMetrics(bytes) + if err != nil { + return nil, err + } + return newMetricsRequest(context.Background(), metrics, pusher), nil + } +} + func (req *metricsRequest) onError(err error) request { var metricsError consumererror.Metrics if consumererror.AsMetrics(err, &metricsError) { @@ -52,6 +66,10 @@ func (req *metricsRequest) export(ctx context.Context) error { return req.pusher(ctx, req.md) } +func (req *metricsRequest) marshal() ([]byte, error) { + return metricsMarshaler.MarshalMetrics(req.md) +} + func (req *metricsRequest) count() int { return req.md.DataPointCount() } @@ -81,7 +99,7 @@ func NewMetricsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs) + be := newBaseExporter(cfg, set, bs, "metrics", newMetricsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &metricsSenderWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go new file mode 100644 index 00000000000..e347a29af08 --- /dev/null +++ b/exporter/exporterhelper/persistent_queue.go @@ -0,0 +1,343 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "sync" + "time" + + "github.com/jaegertracing/jaeger/pkg/queue" + "go.uber.org/atomic" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/extension/storage" +) + +// persistentQueue holds the queue backed by file storage +type persistentQueue struct { + logger *zap.Logger + stopWG sync.WaitGroup + produceMu sync.Mutex + exitChan chan int + capacity int + numWorkers int + + storage persistentStorage +} + +// persistentStorage provides an interface for request storage operations +type persistentStorage interface { + // put appends the request to the storage + put(req request) error + // get returns the next available request; not the channel is unbuffered + get() <-chan request + // size returns the current size of the storage in number of requets + size() int + // stop gracefully stops the storage + stop() +} + +// persistentContiguousStorage provides a persistent queue implementation backed by file storage extension +// +// Write index describes the position at which next item is going to be stored +// Read index describes which item needs to be read next +// When Write index = Read index, no elements are in the queue +// +// ┌─file extension-backed queue─┐ +// │ │ +// │ ┌───┐ ┌───┐ ┌───┐ │ +// │ n+1 │ n │ ... │ 4 │ │ 3 │ │ +// │ └───┘ └───┘ └─x─┘ │ +// │ x │ +// └───────────────────────x─────┘ +// ▲ ▲ x +// │ │ xxx deleted +// │ │ +// write read +// index index +// +type persistentContiguousStorage struct { + logger *zap.Logger + client storage.Client + mu sync.Mutex + queueName string + readIndex uint64 + writeIndex uint64 + readIndexKey string + writeIndexKey string + retryDelay time.Duration + unmarshaler requestUnmarshaler + reqChan chan request + stopped *atomic.Uint32 +} + +const ( + queueNameKey = "queueName" + zapItemKey = "itemKey" + itemKeyTemplate = "it-%d" + readIndexKey = "ri" + writeIndexKey = "wi" + defaultRetryDelay = 500 * time.Millisecond +) + +// newPersistentQueue creates a new queue backed by file storage +func newPersistentQueue(ctx context.Context, name string, capacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentQueue { + return &persistentQueue{ + logger: logger, + exitChan: make(chan int), + capacity: capacity, + storage: newPersistentContiguousStorage(ctx, name, logger, client, unmarshaler), + } +} + +// StartConsumers starts the given number of consumers which will be consuming items +func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{})) { + pq.numWorkers = num + var startWG sync.WaitGroup + + factory := func() queue.Consumer { + return queue.ConsumerFunc(callback) + } + + for i := 0; i < pq.numWorkers; i++ { + pq.stopWG.Add(1) + startWG.Add(1) + go func() { + startWG.Done() + defer pq.stopWG.Done() + consumer := factory() + + for { + select { + case req := <-pq.storage.get(): + consumer.Consume(req) + case <-pq.exitChan: + return + } + } + }() + } + startWG.Wait() +} + +// Produce adds an item to the queue and returns true if it was accepted +func (pq *persistentQueue) Produce(item interface{}) bool { + pq.produceMu.Lock() + defer pq.produceMu.Unlock() + + if pq.storage.size() >= pq.capacity { + return false + } + err := pq.storage.put(item.(request)) + return err == nil +} + +// Stop stops accepting items, shuts down the queue and closes the persistent queue +func (pq *persistentQueue) Stop() { + pq.storage.stop() + close(pq.exitChan) + pq.stopWG.Wait() +} + +// Size returns the current depth of the queue +func (pq *persistentQueue) Size() int { + return pq.storage.size() +} + +// newPersistentContiguousStorage creates a new file-storage extension backed queue. It needs to be initialized separately +func newPersistentContiguousStorage(ctx context.Context, queueName string, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { + wcs := &persistentContiguousStorage{ + logger: logger, + client: client, + queueName: queueName, + unmarshaler: unmarshaler, + reqChan: make(chan request), + stopped: atomic.NewUint32(0), + retryDelay: defaultRetryDelay, + } + initPersistentContiguousStorage(ctx, wcs) + go wcs.loop(ctx) + return wcs +} + +func initPersistentContiguousStorage(ctx context.Context, wcs *persistentContiguousStorage) { + wcs.readIndexKey = wcs.buildReadIndexKey() + wcs.writeIndexKey = wcs.buildWriteIndexKey() + + readIndexBytes, err := wcs.client.Get(ctx, wcs.readIndexKey) + if err != nil || readIndexBytes == nil { + wcs.logger.Debug("failed getting read index, starting with a new one", zap.String(queueNameKey, wcs.queueName)) + wcs.readIndex = 0 + } else { + val, conversionErr := bytesToUint64(readIndexBytes) + if conversionErr != nil { + wcs.logger.Warn("read index corrupted, starting with a new one", zap.String(queueNameKey, wcs.queueName)) + wcs.readIndex = 0 + } else { + wcs.readIndex = val + } + } + + writeIndexBytes, err := wcs.client.Get(ctx, wcs.writeIndexKey) + if err != nil || writeIndexBytes == nil { + wcs.logger.Debug("failed getting write index, starting with a new one", zap.String(queueNameKey, wcs.queueName)) + wcs.writeIndex = 0 + } else { + val, conversionErr := bytesToUint64(writeIndexBytes) + if conversionErr != nil { + wcs.logger.Warn("write index corrupted, starting with a new one", zap.String(queueNameKey, wcs.queueName)) + wcs.writeIndex = 0 + } else { + wcs.writeIndex = val + } + } +} + +// Put marshals the request and puts it into the persistent queue +func (pcs *persistentContiguousStorage) put(req request) error { + buf, err := req.marshal() + if err != nil { + return err + } + + pcs.mu.Lock() + defer pcs.mu.Unlock() + + itemKey := pcs.buildItemKey(pcs.writeIndex) + err = pcs.client.Set(context.Background(), itemKey, buf) + if err != nil { + return err + } + + pcs.writeIndex++ + writeIndexBytes, err := uint64ToBytes(pcs.writeIndex) + if err != nil { + pcs.logger.Warn("failed converting write index uint64 to bytes", zap.Error(err)) + } + + return pcs.client.Set(context.Background(), pcs.writeIndexKey, writeIndexBytes) +} + +// getNextItem pulls the next available item from the persistent storage; if none is found, returns (nil, false) +func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (request, bool) { + pcs.mu.Lock() + defer pcs.mu.Unlock() + + if pcs.readIndex < pcs.writeIndex { + itemKey := pcs.buildItemKey(pcs.readIndex) + // Increase here, so despite errors it would still progress + pcs.readIndex++ + + buf, err := pcs.client.Get(ctx, itemKey) + if err != nil { + pcs.logger.Error("error when getting item from persistent storage", + zap.String(queueNameKey, pcs.queueName), zap.String(zapItemKey, itemKey), zap.Error(err)) + return nil, false + } + req, unmarshalErr := pcs.unmarshaler(buf) + pcs.updateItemRead(ctx, itemKey) + if unmarshalErr != nil { + pcs.logger.Error("error when unmarshalling item from persistent storage", + zap.String(queueNameKey, pcs.queueName), zap.String(zapItemKey, itemKey), zap.Error(unmarshalErr)) + return nil, false + } + + return req, true + } + + return nil, false +} + +func (pcs *persistentContiguousStorage) loop(ctx context.Context) { + for { + if pcs.stopped.Load() != 0 { + return + } + + req, found := pcs.getNextItem(ctx) + if found { + pcs.reqChan <- req + } else { + time.Sleep(pcs.retryDelay) + } + } +} + +// get returns the next request from the queue as available via the channel +func (pcs *persistentContiguousStorage) get() <-chan request { + return pcs.reqChan +} + +func (pcs *persistentContiguousStorage) size() int { + pcs.mu.Lock() + defer pcs.mu.Unlock() + return int(pcs.writeIndex - pcs.readIndex) +} + +func (pcs *persistentContiguousStorage) stop() { + pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(queueNameKey, pcs.queueName)) + pcs.stopped.Store(1) +} + +func (pcs *persistentContiguousStorage) updateItemRead(ctx context.Context, itemKey string) { + err := pcs.client.Delete(ctx, itemKey) + if err != nil { + pcs.logger.Debug("failed deleting item", zap.String(zapItemKey, itemKey)) + } + + readIndexBytes, err := uint64ToBytes(pcs.readIndex) + if err != nil { + pcs.logger.Warn("failed converting read index uint64 to bytes", zap.Error(err)) + } else { + err = pcs.client.Set(ctx, pcs.readIndexKey, readIndexBytes) + if err != nil { + pcs.logger.Warn("failed storing read index", zap.Error(err)) + } + } +} + +func (pcs *persistentContiguousStorage) buildItemKey(index uint64) string { + return fmt.Sprintf(itemKeyTemplate, index) +} + +func (pcs *persistentContiguousStorage) buildReadIndexKey() string { + return readIndexKey +} + +func (pcs *persistentContiguousStorage) buildWriteIndexKey() string { + return writeIndexKey +} + +func uint64ToBytes(val uint64) ([]byte, error) { + var buf bytes.Buffer + err := binary.Write(&buf, binary.LittleEndian, val) + if err != nil { + return nil, err + } + return buf.Bytes(), err +} + +func bytesToUint64(b []byte) (uint64, error) { + var val uint64 + err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &val) + if err != nil { + return val, err + } + return val, nil +} diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go new file mode 100644 index 00000000000..316e4371aca --- /dev/null +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -0,0 +1,314 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "context" + "errors" + "fmt" + "io/ioutil" + "os" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/consumer/pdata" + "go.opentelemetry.io/collector/extension/storage" +) + +func createStorageExtension(_ string) storage.Extension { + // After having storage moved to core, we could leverage storagetest.NewTestExtension(nil, path) + return newMockStorageExtension() +} + +func createTestQueue(extension storage.Extension, capacity int) *persistentQueue { + logger, _ := zap.NewDevelopment() + + client, err := extension.GetClient(context.Background(), component.KindReceiver, config.ComponentID{}, "") + if err != nil { + panic(err) + } + + wq := newPersistentQueue(context.Background(), "foo", capacity, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) + wq.storage.(*persistentContiguousStorage).mu.Lock() + wq.storage.(*persistentContiguousStorage).retryDelay = 10 * time.Millisecond + wq.storage.(*persistentContiguousStorage).mu.Unlock() + return wq +} + +func createTestPersistentStorage(extension storage.Extension) persistentStorage { + logger, _ := zap.NewDevelopment() + + client, err := extension.GetClient(context.Background(), component.KindReceiver, config.ComponentID{}, "") + if err != nil { + panic(err) + } + + return newPersistentContiguousStorage(context.Background(), "foo", logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) +} + +func createTemporaryDirectory() string { + directory, err := ioutil.TempDir("", "persistent-test") + if err != nil { + panic(err) + } + return directory +} + +func TestPersistentQueue_RepeatPutCloseReadClose(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + traces := newTraces(5, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + for i := 0; i < 10; i++ { + ext := createStorageExtension(path) + ps := createTestPersistentStorage(ext) + require.Equal(t, 0, ps.size()) + err := ps.put(req) + require.NoError(t, err) + err = ext.Shutdown(context.Background()) + require.NoError(t, err) + + // TODO: when replacing mock with real storage, this could actually be uncommented + // ext = createStorageExtension(path) + // ps = createTestPersistentStorage(ext) + + require.Equal(t, 1, ps.size()) + readReq := <-ps.get() + require.Equal(t, 0, ps.size()) + require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) + err = ext.Shutdown(context.Background()) + require.NoError(t, err) + } + + // No more items + ext := createStorageExtension(path) + wq := createTestQueue(ext, 5000) + require.Equal(t, 0, wq.Size()) + ext.Shutdown(context.Background()) +} + +func TestPersistentQueue_Capacity(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + ext := createStorageExtension(path) + defer ext.Shutdown(context.Background()) + + wq := createTestQueue(ext, 5) + require.Equal(t, 0, wq.Size()) + + traces := newTraces(1, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + for i := 0; i < 10; i++ { + result := wq.Produce(req) + if i < 5 { + require.True(t, result) + } else { + require.False(t, result) + } + } + require.Equal(t, 5, wq.Size()) +} + +func TestPersistentQueue_ConsumersProducers(t *testing.T) { + cases := []struct { + numMessagesProduced int + numConsumers int + }{ + { + numMessagesProduced: 1, + numConsumers: 1, + }, + { + numMessagesProduced: 100, + numConsumers: 1, + }, + { + numMessagesProduced: 100, + numConsumers: 3, + }, + { + numMessagesProduced: 1, + numConsumers: 100, + }, + { + numMessagesProduced: 100, + numConsumers: 100, + }, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("#messages: %d #consumers: %d", c.numMessagesProduced, c.numConsumers), func(t *testing.T) { + path := createTemporaryDirectory() + + traces := newTraces(1, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + ext := createStorageExtension(path) + tq := createTestQueue(ext, 5000) + + defer os.RemoveAll(path) + defer tq.Stop() + defer ext.Shutdown(context.Background()) + + numMessagesConsumed := int32(0) + tq.StartConsumers(c.numConsumers, func(item interface{}) { + if item != nil { + atomic.AddInt32(&numMessagesConsumed, 1) + } + }) + + for i := 0; i < c.numMessagesProduced; i++ { + tq.Produce(req) + } + + require.Eventually(t, func() bool { + return c.numMessagesProduced == int(atomic.LoadInt32(&numMessagesConsumed)) + }, 500*time.Millisecond, 10*time.Millisecond) + }) + } +} + +func BenchmarkPersistentQueue_1Trace10Spans(b *testing.B) { + cases := []struct { + numTraces int + numSpansPerTrace int + }{ + { + numTraces: 1, + numSpansPerTrace: 1, + }, + { + numTraces: 1, + numSpansPerTrace: 10, + }, + { + numTraces: 10, + numSpansPerTrace: 10, + }, + } + + for _, c := range cases { + b.Run(fmt.Sprintf("#traces: %d #spansPerTrace: %d", c.numTraces, c.numSpansPerTrace), func(bb *testing.B) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + ext := createStorageExtension(path) + ps := createTestPersistentStorage(ext) + + traces := newTraces(c.numTraces, c.numSpansPerTrace) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + bb.ResetTimer() + + for i := 0; i < bb.N; i++ { + err := ps.put(req) + require.NoError(bb, err) + } + + for i := 0; i < bb.N; i++ { + req := ps.get() + require.NotNil(bb, req) + } + ext.Shutdown(context.Background()) + }) + } +} + +func newTraces(numTraces int, numSpans int) pdata.Traces { + traces := pdata.NewTraces() + batch := traces.ResourceSpans().AppendEmpty() + batch.Resource().Attributes().InsertString("resource-attr", "some-resource") + batch.Resource().Attributes().InsertInt("num-traces", int64(numTraces)) + batch.Resource().Attributes().InsertInt("num-spans", int64(numSpans)) + + for i := 0; i < numTraces; i++ { + traceID := pdata.NewTraceID([16]byte{1, 2, 3, byte(i)}) + ils := batch.InstrumentationLibrarySpans().AppendEmpty() + for j := 0; j < numSpans; j++ { + span := ils.Spans().AppendEmpty() + span.SetTraceID(traceID) + span.SetSpanID(pdata.NewSpanID([8]byte{1, 2, 3, byte(j)})) + span.SetName("should-not-be-changed") + span.Attributes().InsertInt("int-attribute", int64(j)) + span.Attributes().InsertString("str-attribute-1", "foobar") + span.Attributes().InsertString("str-attribute-2", "fdslafjasdk12312312jkl") + span.Attributes().InsertString("str-attribute-3", "AbcDefGeKKjkfdsafasdfsdasdf") + span.Attributes().InsertString("str-attribute-4", "xxxxxx") + span.Attributes().InsertString("str-attribute-5", "abcdef") + } + } + + return traces +} + +type mockStorageExtension struct{} + +func (m mockStorageExtension) Start(_ context.Context, _ component.Host) error { + return nil +} + +func (m mockStorageExtension) Shutdown(_ context.Context) error { + return nil +} + +func (m mockStorageExtension) GetClient(ctx context.Context, kind component.Kind, id config.ComponentID, s string) (storage.Client, error) { + return newMockStorageClient(), nil +} + +func newMockStorageExtension() storage.Extension { + return &mockStorageExtension{} +} + +func newMockStorageClient() storage.Client { + return &mockStorageClient{ + st: map[string][]byte{}, + } +} + +type mockStorageClient struct { + st map[string][]byte +} + +func (m mockStorageClient) Get(_ context.Context, s string) ([]byte, error) { + val, found := m.st[s] + if !found { + return []byte{}, errors.New("key not found") + } + + return val, nil +} + +func (m mockStorageClient) Set(_ context.Context, s string, bytes []byte) error { + m.st[s] = bytes + return nil +} + +func (m mockStorageClient) Delete(_ context.Context, s string) error { + delete(m.st, s) + return nil +} + +func (m mockStorageClient) Close(_ context.Context) error { + return nil +} diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index aed8ddd2406..a37d9002a17 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -29,8 +29,11 @@ import ( "go.uber.org/zap" "go.uber.org/zap/zapcore" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/exporter/exporterhelper/internal" + "go.opentelemetry.io/collector/extension/storage" "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" ) @@ -43,7 +46,9 @@ var ( metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) - errSendingQueueIsFull = errors.New("sending_queue is full") + errNoStorageClient = errors.New("no storage client extension found") + errMultipleStorageClients = errors.New("multiple storage extensions found") + errSendingQueueIsFull = errors.New("sending_queue is full") ) func init() { @@ -58,6 +63,8 @@ type QueueSettings struct { NumConsumers int `mapstructure:"num_consumers"` // QueueSize is the maximum number of batches allowed in queue at a given time. QueueSize int `mapstructure:"queue_size"` + // PersistentEnabled describes whether persistency via storage file extension should be enabled + PersistentEnabled bool `mapstructure:"persistent_enabled"` } // DefaultQueueSettings returns the default settings for QueueSettings. @@ -69,7 +76,8 @@ func DefaultQueueSettings() QueueSettings { // This is a pretty decent value for production. // User should calculate this from the perspective of how many seconds to buffer in case of a backend outage, // multiply that by the number of requests per seconds. - QueueSize: 5000, + QueueSize: 5000, + PersistentEnabled: false, } } @@ -99,13 +107,16 @@ func DefaultRetrySettings() RetrySettings { } type queuedRetrySender struct { - fullName string - cfg QueueSettings - consumerSender requestSender - queue *internal.BoundedQueue - retryStopCh chan struct{} - traceAttributes []attribute.KeyValue - logger *zap.Logger + id config.ComponentID + signalName string + cfg QueueSettings + consumerSender requestSender + queue consumersQueue + retryStopCh chan struct{} + traceAttributes []attribute.KeyValue + logger *zap.Logger + requeuingEnabled bool + requestUnmarshaler requestUnmarshaler } func createSampledLogger(logger *zap.Logger) *zap.Logger { @@ -127,29 +138,128 @@ func createSampledLogger(logger *zap.Logger) *zap.Logger { return logger.WithOptions(opts) } -func newQueuedRetrySender(fullName string, qCfg QueueSettings, rCfg RetrySettings, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { +func newQueuedRetrySender(id config.ComponentID, signalName string, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { retryStopCh := make(chan struct{}) sampledLogger := createSampledLogger(logger) - traceAttr := attribute.String(obsmetrics.ExporterKey, fullName) - return &queuedRetrySender{ - fullName: fullName, - cfg: qCfg, - consumerSender: &retrySender{ - traceAttribute: traceAttr, - cfg: rCfg, - nextSender: nextSender, - stopCh: retryStopCh, - logger: sampledLogger, - }, - queue: internal.NewBoundedQueue(qCfg.QueueSize, func(item interface{}) {}), - retryStopCh: retryStopCh, - traceAttributes: []attribute.KeyValue{traceAttr}, - logger: sampledLogger, + traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) + + qrs := &queuedRetrySender{ + id: id, + signalName: signalName, + cfg: qCfg, + retryStopCh: retryStopCh, + traceAttributes: []attribute.KeyValue{traceAttr}, + logger: sampledLogger, + requestUnmarshaler: reqUnmarshaler, } + + qrs.consumerSender = &retrySender{ + traceAttribute: traceAttr, + cfg: rCfg, + nextSender: nextSender, + stopCh: retryStopCh, + logger: sampledLogger, + // Following three functions are provided in such way because they actually depend on queuedRetrySender + onSuccess: qrs.onSuccess, + onTemporaryFailure: qrs.onTemporaryFailure, + onPermanentFailure: qrs.onPermanentFailure, + } + + if !qCfg.PersistentEnabled { + qrs.queue = internal.NewBoundedQueue(qrs.cfg.QueueSize, func(item interface{}) {}) + } + // The Persistent Queue is initialized separately as it needs extra information about the component + + return qrs +} + +func (qrs *queuedRetrySender) onSuccess(req request, err error) error { + return nil +} + +func (qrs *queuedRetrySender) onPermanentFailure(req request, err error) error { + return err +} + +func (qrs *queuedRetrySender) onTemporaryFailure(req request, err error) error { + if qrs.requeuingEnabled && qrs.queue != nil { + if qrs.queue.Produce(req) { + qrs.logger.Error( + "Exporting failed. Putting back to the end of the queue.", + zap.Error(err), + ) + } else { + qrs.logger.Error( + "Exporting failed. Queue did not accept requeuing request. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + } + return err + } + + qrs.logger.Error( + "Exporting failed. No more retries left. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + return err +} + +func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signalName string) (*storage.Client, error) { + var storageExtension storage.Extension + for _, ext := range host.GetExtensions() { + if se, ok := ext.(storage.Extension); ok { + if storageExtension != nil { + return nil, errMultipleStorageClients + } + storageExtension = se + } + } + + if storageExtension == nil { + return nil, errNoStorageClient + } + + client, err := storageExtension.GetClient(ctx, component.KindExporter, id, signalName) + if err != nil { + return nil, err + } + + return &client, err +} + +// initializePersistentQueue uses extra information for initialization available from component.Host +func (qrs *queuedRetrySender) initializePersistentQueue(ctx context.Context, host component.Host) error { + if qrs.cfg.PersistentEnabled { + storageClient, err := getStorageClient(ctx, host, qrs.id, qrs.signalName) + if err != nil { + return err + } + + qrs.queue = newPersistentQueue(ctx, qrs.fullName(), qrs.cfg.QueueSize, qrs.logger, *storageClient, qrs.requestUnmarshaler) + + // TODO: this can be further exposed as a config param rather than relying on a type of queue + qrs.requeuingEnabled = true + } + + return nil +} + +func (qrs *queuedRetrySender) fullName() string { + if qrs.signalName == "" { + return qrs.id.String() + } + return fmt.Sprintf("%s-%s", qrs.id.String(), qrs.signalName) } // start is invoked during service startup. -func (qrs *queuedRetrySender) start() error { +func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) error { + err := qrs.initializePersistentQueue(ctx, host) + if err != nil { + return err + } + qrs.queue.StartConsumers(qrs.cfg.NumConsumers, func(item interface{}) { req := item.(request) _ = qrs.consumerSender.send(req) @@ -159,7 +269,7 @@ func (qrs *queuedRetrySender) start() error { if qrs.cfg.Enabled { err := queueSizeGauge.UpsertEntry(func() int64 { return int64(qrs.queue.Size()) - }, metricdata.NewLabelValue(qrs.fullName)) + }, metricdata.NewLabelValue(qrs.fullName())) if err != nil { return fmt.Errorf("failed to create retry queue size metric: %v", err) } @@ -205,15 +315,17 @@ func (qrs *queuedRetrySender) shutdown() { if qrs.cfg.Enabled { _ = queueSizeGauge.UpsertEntry(func() int64 { return int64(0) - }, metricdata.NewLabelValue(qrs.fullName)) + }, metricdata.NewLabelValue(qrs.fullName())) } - // First stop the retry goroutines, so that unblocks the queue workers. + // First Stop the retry goroutines, so that unblocks the queue numWorkers. close(qrs.retryStopCh) // Stop the queued sender, this will drain the queue and will call the retry (which is stopped) that will only // try once every request. - qrs.queue.Stop() + if qrs.queue != nil { + qrs.queue.Stop() + } } // TODO: Clean this by forcing all exporters to return an internal error type that always include the information about retries. @@ -238,12 +350,17 @@ func NewThrottleRetry(err error, delay time.Duration) error { } } +type onRequestHandlingFinishedFunc func(request, error) error + type retrySender struct { - traceAttribute attribute.KeyValue - cfg RetrySettings - nextSender requestSender - stopCh chan struct{} - logger *zap.Logger + traceAttribute attribute.KeyValue + cfg RetrySettings + nextSender requestSender + stopCh chan struct{} + logger *zap.Logger + onSuccess onRequestHandlingFinishedFunc + onTemporaryFailure onRequestHandlingFinishedFunc + onPermanentFailure onRequestHandlingFinishedFunc } // send implements the requestSender interface @@ -280,7 +397,7 @@ func (rs *retrySender) send(req request) error { err := rs.nextSender.send(req) if err == nil { - return nil + return rs.onSuccess(req, nil) } // Immediately drop data on permanent errors. @@ -290,7 +407,7 @@ func (rs *retrySender) send(req request) error { zap.Error(err), zap.Int("dropped_items", req.count()), ) - return err + return rs.onPermanentFailure(req, err) } // Give the request a chance to extract signal data to retry if only some data @@ -301,12 +418,7 @@ func (rs *retrySender) send(req request) error { if backoffDelay == backoff.Stop { // throw away the batch err = fmt.Errorf("max elapsed time expired %w", err) - rs.logger.Error( - "Exporting failed. No more retries left. Dropping data.", - zap.Error(err), - zap.Int("dropped_items", req.count()), - ) - return err + return rs.onTemporaryFailure(req, err) } throttleErr := throttleRetry{} @@ -329,7 +441,7 @@ func (rs *retrySender) send(req request) error { ) retryNum++ - // back-off, but get interrupted when shutting down or request is cancelled or timed out. + // back-off, but Get interrupted when shutting down or request is cancelled or timed out. select { case <-req.context().Done(): return fmt.Errorf("request is cancelled or timed out %w", err) diff --git a/exporter/exporterhelper/queued_retry_test.go b/exporter/exporterhelper/queued_retry_test.go index 357b80b40d5..98973d4b9e4 100644 --- a/exporter/exporterhelper/queued_retry_test.go +++ b/exporter/exporterhelper/queued_retry_test.go @@ -32,13 +32,21 @@ import ( "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/internal/testdata" + "go.opentelemetry.io/collector/model/pdata" "go.opentelemetry.io/collector/obsreport/obsreporttest" ) +func mockRequestUnmarshaler(mr *mockRequest) requestUnmarshaler { + return func(bytes []byte) (request, error) { + return mr, nil + } +} + func TestQueuedRetry_DropOnPermanentError(t *testing.T) { qCfg := DefaultQueueSettings() rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + mockR := newMockRequest(context.Background(), 2, consumererror.Permanent(errors.New("bad data"))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", mockRequestUnmarshaler(mockR)) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -46,7 +54,6 @@ func TestQueuedRetry_DropOnPermanentError(t *testing.T) { assert.NoError(t, be.Shutdown(context.Background())) }) - mockR := newMockRequest(context.Background(), 2, consumererror.Permanent(errors.New("bad data"))) ocs.run(func() { // This is asynchronous so it should just enqueue, no errors expected. require.NoError(t, be.sender.send(mockR)) @@ -62,7 +69,7 @@ func TestQueuedRetry_DropOnNoRetry(t *testing.T) { qCfg := DefaultQueueSettings() rCfg := DefaultRetrySettings() rCfg.Enabled = false - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -87,7 +94,7 @@ func TestQueuedRetry_OnError(t *testing.T) { qCfg.NumConsumers = 1 rCfg := DefaultRetrySettings() rCfg.InitialInterval = 0 - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -113,7 +120,7 @@ func TestQueuedRetry_StopWhileWaiting(t *testing.T) { qCfg := DefaultQueueSettings() qCfg.NumConsumers = 1 rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -146,7 +153,7 @@ func TestQueuedRetry_DoNotPreserveCancellation(t *testing.T) { qCfg := DefaultQueueSettings() qCfg.NumConsumers = 1 rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -175,7 +182,7 @@ func TestQueuedRetry_MaxElapsedTime(t *testing.T) { rCfg := DefaultRetrySettings() rCfg.InitialInterval = time.Millisecond rCfg.MaxElapsedTime = 100 * time.Millisecond - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -221,7 +228,7 @@ func TestQueuedRetry_ThrottleError(t *testing.T) { qCfg.NumConsumers = 1 rCfg := DefaultRetrySettings() rCfg.InitialInterval = 10 * time.Millisecond - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -253,7 +260,7 @@ func TestQueuedRetry_RetryOnError(t *testing.T) { qCfg.QueueSize = 1 rCfg := DefaultRetrySettings() rCfg.InitialInterval = 0 - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -279,7 +286,7 @@ func TestQueuedRetry_DropOnFull(t *testing.T) { qCfg := DefaultQueueSettings() qCfg.QueueSize = 0 rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -297,7 +304,7 @@ func TestQueuedRetryHappyPath(t *testing.T) { qCfg := DefaultQueueSettings() rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) ocs := newObservabilityConsumerSender(be.qrSender.consumerSender) be.qrSender.consumerSender = ocs require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) @@ -331,7 +338,7 @@ func TestQueuedRetry_QueueMetricsReported(t *testing.T) { qCfg := DefaultQueueSettings() qCfg.NumConsumers = 0 // to make every request go straight to the queue rCfg := DefaultRetrySettings() - be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg))) + be := newBaseExporter(&defaultExporterCfg, componenttest.NewNopExporterCreateSettings(), fromOptions(WithRetry(rCfg), WithQueue(qCfg)), "", nopRequestUnmarshaler()) require.NoError(t, be.Start(context.Background(), componenttest.NewNopHost())) for i := 0; i < 7; i++ { @@ -371,6 +378,10 @@ func (mer *mockErrorRequest) onError(error) request { return mer } +func (mer *mockErrorRequest) marshal() ([]byte, error) { + return nil, nil +} + func (mer *mockErrorRequest) count() int { return 7 } @@ -402,6 +413,10 @@ func (m *mockRequest) export(ctx context.Context) error { return ctx.Err() } +func (m *mockRequest) marshal() ([]byte, error) { + return pdata.NewTraces().ToOtlpProtoBytes() +} + func (m *mockRequest) onError(error) request { return &mockRequest{ baseRequest: m.baseRequest, diff --git a/exporter/exporterhelper/traces.go b/exporter/exporterhelper/traces.go index 3f63e3744b4..b35bb1a5ea9 100644 --- a/exporter/exporterhelper/traces.go +++ b/exporter/exporterhelper/traces.go @@ -23,9 +23,13 @@ import ( "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/consumer/consumerhelper" + "go.opentelemetry.io/collector/model/otlp" "go.opentelemetry.io/collector/model/pdata" ) +var tracesMarshaler = otlp.NewProtobufTracesMarshaler() +var tracesUnmarshaler = otlp.NewProtobufTracesUnmarshaler() + type tracesRequest struct { baseRequest td pdata.Traces @@ -40,6 +44,20 @@ func newTracesRequest(ctx context.Context, td pdata.Traces, pusher consumerhelpe } } +func newTraceRequestUnmarshalerFunc(pusher consumerhelper.ConsumeTracesFunc) requestUnmarshaler { + return func(bytes []byte) (request, error) { + traces, err := tracesUnmarshaler.UnmarshalTraces(bytes) + if err != nil { + return nil, err + } + return newTracesRequest(context.Background(), traces, pusher), nil + } +} + +func (req *tracesRequest) marshal() ([]byte, error) { + return tracesMarshaler.MarshalTraces(req.td) +} + func (req *tracesRequest) onError(err error) request { var traceError consumererror.Traces if consumererror.AsTraces(err, &traceError) { @@ -82,7 +100,7 @@ func NewTracesExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs) + be := newBaseExporter(cfg, set, bs, "traces", newTraceRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &tracesExporterWithObservability{ obsrep: be.obsrep, From 5b0bd729b2212d0c9cf072ac035524984091b97a Mon Sep 17 00:00:00 2001 From: Przemek Maciolek <58699843+pmm-sumo@users.noreply.github.com> Date: Wed, 16 Jun 2021 22:42:02 +0200 Subject: [PATCH 02/24] Code cleanup and clarifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Patryk Małek --- exporter/exporterhelper/common.go | 2 +- exporter/exporterhelper/consumers_queue.go | 3 ++- exporter/exporterhelper/persistent_queue.go | 14 +++++++------- exporter/exporterhelper/persistent_queue_test.go | 3 --- exporter/exporterhelper/queued_retry.go | 2 +- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index bb558945b21..7aaff568961 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -56,7 +56,7 @@ type request interface { marshal() ([]byte, error) } -// requestUnmarshaler defines a function which can take a byte stream and unmarshal it into a relevant request +// requestUnmarshaler defines a function which takes a byte slice and unmarshal it into a relevant request type requestUnmarshaler func([]byte) (request, error) // requestSender is an abstraction of a sender for a request independent of the type of the data (traces, metrics, logs). diff --git a/exporter/exporterhelper/consumers_queue.go b/exporter/exporterhelper/consumers_queue.go index e05b809cb64..ddb139a70ac 100644 --- a/exporter/exporterhelper/consumers_queue.go +++ b/exporter/exporterhelper/consumers_queue.go @@ -21,7 +21,8 @@ type consumersQueue interface { // StartConsumers starts a given number of goroutines consuming items from the queue // and passing them into the consumer callback. StartConsumers(num int, callback func(item interface{})) - // Produce is used by the producer to submit new item to the queue. Returns false in case of queue overflow. + // Produce is used by the producer to submit new item to the queue. Returns false if the item wasn't added + // to the queue due to queue overflow. Produce(item interface{}) bool // Stop stops all consumers, as well as the length reporter if started, // and releases the items channel. It blocks until all consumers have stopped. diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index e347a29af08..77dfd39aeb6 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -34,7 +34,7 @@ type persistentQueue struct { logger *zap.Logger stopWG sync.WaitGroup produceMu sync.Mutex - exitChan chan int + exitChan chan struct{} capacity int numWorkers int @@ -93,14 +93,14 @@ const ( itemKeyTemplate = "it-%d" readIndexKey = "ri" writeIndexKey = "wi" - defaultRetryDelay = 500 * time.Millisecond + defaultRetryDelay = 100 * time.Millisecond ) // newPersistentQueue creates a new queue backed by file storage func newPersistentQueue(ctx context.Context, name string, capacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentQueue { return &persistentQueue{ logger: logger, - exitChan: make(chan int), + exitChan: make(chan struct{}), capacity: capacity, storage: newPersistentContiguousStorage(ctx, name, logger, client, unmarshaler), } @@ -172,7 +172,7 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, logge retryDelay: defaultRetryDelay, } initPersistentContiguousStorage(ctx, wcs) - go wcs.loop(ctx) + go wcs.loop() return wcs } @@ -264,13 +264,13 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques return nil, false } -func (pcs *persistentContiguousStorage) loop(ctx context.Context) { +func (pcs *persistentContiguousStorage) loop() { for { if pcs.stopped.Load() != 0 { return } - req, found := pcs.getNextItem(ctx) + req, found := pcs.getNextItem(context.Background()) if found { pcs.reqChan <- req } else { @@ -279,7 +279,7 @@ func (pcs *persistentContiguousStorage) loop(ctx context.Context) { } } -// get returns the next request from the queue as available via the channel +// get returns the request channel that all the requests will be send on func (pcs *persistentContiguousStorage) get() <-chan request { return pcs.reqChan } diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index 316e4371aca..121e7caebe9 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -47,9 +47,6 @@ func createTestQueue(extension storage.Extension, capacity int) *persistentQueue } wq := newPersistentQueue(context.Background(), "foo", capacity, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) - wq.storage.(*persistentContiguousStorage).mu.Lock() - wq.storage.(*persistentContiguousStorage).retryDelay = 10 * time.Millisecond - wq.storage.(*persistentContiguousStorage).mu.Unlock() return wq } diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index a37d9002a17..37270b48894 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -441,7 +441,7 @@ func (rs *retrySender) send(req request) error { ) retryNum++ - // back-off, but Get interrupted when shutting down or request is cancelled or timed out. + // back-off, but get interrupted when shutting down or request is cancelled or timed out. select { case <-req.context().Done(): return fmt.Errorf("request is cancelled or timed out %w", err) From 190434b782a9bb348f789da939eba532c8e471ba Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 16 Jun 2021 23:27:25 +0200 Subject: [PATCH 03/24] Replace atomic with channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Patryk Małek --- exporter/exporterhelper/persistent_queue.go | 34 ++++++++++++------- .../exporterhelper/persistent_queue_test.go | 20 +++++++++++ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 77dfd39aeb6..86d51f633d9 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -23,7 +23,6 @@ import ( "time" "github.com/jaegertracing/jaeger/pkg/queue" - "go.uber.org/atomic" "go.uber.org/zap" "go.opentelemetry.io/collector/extension/storage" @@ -84,7 +83,8 @@ type persistentContiguousStorage struct { retryDelay time.Duration unmarshaler requestUnmarshaler reqChan chan request - stopped *atomic.Uint32 + stopChan chan struct{} + stopOnce sync.Once } const ( @@ -168,7 +168,7 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, logge queueName: queueName, unmarshaler: unmarshaler, reqChan: make(chan request), - stopped: atomic.NewUint32(0), + stopChan: make(chan struct{}), retryDelay: defaultRetryDelay, } initPersistentContiguousStorage(ctx, wcs) @@ -266,15 +266,21 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques func (pcs *persistentContiguousStorage) loop() { for { - if pcs.stopped.Load() != 0 { - return - } - - req, found := pcs.getNextItem(context.Background()) - if found { - pcs.reqChan <- req - } else { - time.Sleep(pcs.retryDelay) + for { + req, found := pcs.getNextItem(context.Background()) + if found { + select { + case <-pcs.stopChan: + return + case pcs.reqChan <- req: + } + } else { + select { + case <-pcs.stopChan: + return + case <-time.After(pcs.retryDelay): + } + } } } } @@ -292,7 +298,9 @@ func (pcs *persistentContiguousStorage) size() int { func (pcs *persistentContiguousStorage) stop() { pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(queueNameKey, pcs.queueName)) - pcs.stopped.Store(1) + pcs.stopOnce.Do(func() { + close(pcs.stopChan) + }) } func (pcs *persistentContiguousStorage) updateItemRead(ctx context.Context, itemKey string) { diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index 121e7caebe9..fb3ac7fb61d 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -128,6 +128,26 @@ func TestPersistentQueue_Capacity(t *testing.T) { require.Equal(t, 5, wq.Size()) } +func TestPersistentQueue_Close(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + ext := createStorageExtension(path) + defer ext.Shutdown(context.Background()) + + wq := createTestQueue(ext, 1001) + traces := newTraces(1, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + wq.StartConsumers(100, func(item interface{}) {}) + + for i := 0; i < 1000; i++ { + wq.Produce(req) + } + // This will close the queue very quickly, consumers should not be able to consume anything and finish gracefully + wq.Stop() +} + func TestPersistentQueue_ConsumersProducers(t *testing.T) { cases := []struct { numMessagesProduced int From 05aa64839db4cc1da2788c7706bf4e1fccbf1231 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Tue, 22 Jun 2021 20:00:30 +0200 Subject: [PATCH 04/24] Handle queued_retry persistence for item processed by consumers --- exporter/exporterhelper/README.md | 63 +-- exporter/exporterhelper/common.go | 30 +- exporter/exporterhelper/logs.go | 2 +- exporter/exporterhelper/metrics.go | 2 +- exporter/exporterhelper/persistent_queue.go | 271 +---------- .../exporterhelper/persistent_queue_test.go | 175 +------ exporter/exporterhelper/persistent_storage.go | 442 ++++++++++++++++++ .../exporterhelper/persistent_storage_test.go | 337 +++++++++++++ exporter/exporterhelper/queued_retry.go | 75 +-- exporter/exporterhelper/traces.go | 2 +- 10 files changed, 910 insertions(+), 489 deletions(-) create mode 100644 exporter/exporterhelper/persistent_storage.go create mode 100644 exporter/exporterhelper/persistent_storage_test.go diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index e3a5af18a4f..6ec4d676b3a 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -21,7 +21,7 @@ The following configuration options can be modified: User should calculate this as `num_seconds * requests_per_second` where: - `num_seconds` is the number of seconds to buffer in case of a backend outage - `requests_per_second` is the average number of requests per seconds. - - `persistent_enabled` (default = false): When set, enables persistence via a file extension + - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `timeout` (default = 5s): Time to wait per individual attempt to send data to a backend. @@ -32,37 +32,42 @@ The full list of settings exposed for this helper exporter are documented [here] **Status: under development** -When `persistent_enabled` is set, the queue is being buffered to disk by the +When `persistent_storage_enabled` is set to true, the queue is being buffered to disk by the [file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). It currently can be enabled only in OpenTelemetry Collector Contrib. -This has some limitations currently. The items that have been passed to consumer for the actual exporting -are removed from persistent queue. In effect, in case of a sudden shutdown, they might be lost. ``` - ┌─Consumer #1─┐ - │ ┌───┐ │ - ┌──Deleted──┐ ┌───►│ │ 1 │ ├───► Success - X x x │ │ └───┘ │ - x x x │ │ │ - x x x │ └─────────────┘ - x x x │ - ┌─────────Persistent queue────x─────x───┐ │ ┌─Consumer #2─┐ - │ x x x │ │ │ ┌───┐ │ - │ ┌───┐ ┌───┐ ┌─x─┐ ┌─x─┐ ┌─x─┐ │ │ │ │ 2 │ ├───► Permanent -> X - │ n+1 │ n │ ... │ 4 │ │ 3 │ │ 2 │ │ 1 │ ├────┼───►│ └───┘ │ failure - │ └───┘ └───┘ └───┘ └───┘ └───┘ │ │ │ │ - │ │ │ └─────────────┘ - └───────────────────────────────────────┘ │ - ▲ ▲ │ ┌─Consumer #3─┐ - │ │ │ │ ┌───┐ │ Temporary - │ │ └───►│ │ 3 │ ├───► failure - write read │ └───┘ │ - index index │ │ │ - ▲ └─────────────┘ │ - │ ▲ │ - │ └── Retry ───────┤ - │ │ - │ │ - └───────────────────────── Requeuing ◄────────── Retry limit exceeded ─┘ + ┌─Consumer #1─┐ + │ ┌───┐ │ + ──────Deleted────── ┌───►│ │ 1 │ ├───► Success + Waiting in channel x x x │ │ └───┘ │ + for consumer ───┐ x x x │ │ │ + │ x x x │ └─────────────┘ + ▼ x x x │ +┌─────────────────────────────────────────x─────x───┐ │ ┌─Consumer #2─┐ +│ x x x │ │ │ ┌───┐ │ +│ ┌───┐ ┌───┐ ┌───┐ ┌─x─┐ ┌───┐ ┌─x─┐ ┌─x─┐ │ │ │ │ 2 │ ├───► Permanent -> X +│ n+1 │ n │ ... │ 6 │ │ 5 │ │ 4 │ │ 3 │ │ 2 │ │ 1 │ ├────┼───►│ └───┘ │ failure +│ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ │ │ │ │ +│ │ │ └─────────────┘ +└───────────────────────────────────────────────────┘ │ + ▲ ▲ ▲ ▲ │ ┌─Consumer #3─┐ + │ │ │ │ │ │ ┌───┐ │ + │ │ │ │ │ │ │ 3 │ ├───► (in progress) + write read └─────┬─────┘ ├───►│ └───┘ │ + index index │ │ │ │ + ▲ │ │ └─────────────┘ + │ │ │ + │ currently │ ┌─Consumer #4─┐ + │ processed │ │ ┌───┐ │ Temporary + │ └───►│ │ 4 │ ├───► failure + │ │ └───┘ │ │ + │ │ │ │ + │ └─────────────┘ │ + │ ▲ │ + │ └── Retry ───────┤ + │ │ + │ │ + └────────────────────────────────────── Requeuing ◄────── Retry limit exceeded ───┘ ``` \ No newline at end of file diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index 7aaff568961..509a16c9295 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -54,9 +54,12 @@ type request interface { count() int // marshal serializes the current request into a byte stream marshal() ([]byte, error) + // onProcessingFinished provides capability to do the cleanup (e.g. remove item from persistent queue) + onProcessingFinished() + setOnProcessingFinished(callback func()) } -// requestUnmarshaler defines a function which takes a byte slice and unmarshal it into a relevant request +// requestUnmarshaler defines a function which takes a byte slice and unmarshals it into a relevant request type requestUnmarshaler func([]byte) (request, error) // requestSender is an abstraction of a sender for a request independent of the type of the data (traces, metrics, logs). @@ -66,7 +69,8 @@ type requestSender interface { // baseRequest is a base implementation for the request. type baseRequest struct { - ctx context.Context + ctx context.Context + processingFinishedCallback func() } func (req *baseRequest) context() context.Context { @@ -77,6 +81,16 @@ func (req *baseRequest) setContext(ctx context.Context) { req.ctx = ctx } +func (req *baseRequest) setOnProcessingFinished(callback func()) { + req.processingFinishedCallback = callback +} + +func (req *baseRequest) onProcessingFinished() { + if req.processingFinishedCallback != nil { + req.processingFinishedCallback() + } +} + // baseSettings represents all the options that users can configure. type baseSettings struct { componentOptions []componenthelper.Option @@ -164,7 +178,15 @@ type baseExporter struct { qrSender *queuedRetrySender } -func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signalName string, reqUnnmarshaler requestUnmarshaler) *baseExporter { +type signalType string + +const ( + signalLogs = signalType("logs") + signalMetrics = signalType("metrics") + signalTraces = signalType("traces") +) + +func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signal signalType, reqUnnmarshaler requestUnmarshaler) *baseExporter { be := &baseExporter{ Component: componenthelper.New(bs.componentOptions...), } @@ -174,7 +196,7 @@ func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, ExporterID: cfg.ID(), ExporterCreateSettings: set, }) - be.qrSender = newQueuedRetrySender(cfg.ID(), signalName, bs.QueueSettings, bs.RetrySettings, reqUnnmarshaler, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) + be.qrSender = newQueuedRetrySender(cfg.ID(), signal, bs.QueueSettings, bs.RetrySettings, reqUnnmarshaler, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) be.sender = be.qrSender return be diff --git a/exporter/exporterhelper/logs.go b/exporter/exporterhelper/logs.go index 00b2325483d..8ba5d19ed2b 100644 --- a/exporter/exporterhelper/logs.go +++ b/exporter/exporterhelper/logs.go @@ -99,7 +99,7 @@ func NewLogsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, "logs", newLogsRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, signalLogs, newLogsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &logsExporterWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/metrics.go b/exporter/exporterhelper/metrics.go index ed142a4afc9..513a9ed0766 100644 --- a/exporter/exporterhelper/metrics.go +++ b/exporter/exporterhelper/metrics.go @@ -99,7 +99,7 @@ func NewMetricsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, "metrics", newMetricsRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, signalMetrics, newMetricsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &metricsSenderWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 86d51f633d9..09f863af334 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -15,12 +15,8 @@ package exporterhelper import ( - "bytes" "context" - "encoding/binary" - "fmt" "sync" - "time" "github.com/jaegertracing/jaeger/pkg/queue" "go.uber.org/zap" @@ -32,75 +28,20 @@ import ( type persistentQueue struct { logger *zap.Logger stopWG sync.WaitGroup - produceMu sync.Mutex - exitChan chan struct{} + stopOnce sync.Once + stopChan chan struct{} capacity int numWorkers int - storage persistentStorage + produceMu sync.Mutex + storage persistentStorage } -// persistentStorage provides an interface for request storage operations -type persistentStorage interface { - // put appends the request to the storage - put(req request) error - // get returns the next available request; not the channel is unbuffered - get() <-chan request - // size returns the current size of the storage in number of requets - size() int - // stop gracefully stops the storage - stop() -} - -// persistentContiguousStorage provides a persistent queue implementation backed by file storage extension -// -// Write index describes the position at which next item is going to be stored -// Read index describes which item needs to be read next -// When Write index = Read index, no elements are in the queue -// -// ┌─file extension-backed queue─┐ -// │ │ -// │ ┌───┐ ┌───┐ ┌───┐ │ -// │ n+1 │ n │ ... │ 4 │ │ 3 │ │ -// │ └───┘ └───┘ └─x─┘ │ -// │ x │ -// └───────────────────────x─────┘ -// ▲ ▲ x -// │ │ xxx deleted -// │ │ -// write read -// index index -// -type persistentContiguousStorage struct { - logger *zap.Logger - client storage.Client - mu sync.Mutex - queueName string - readIndex uint64 - writeIndex uint64 - readIndexKey string - writeIndexKey string - retryDelay time.Duration - unmarshaler requestUnmarshaler - reqChan chan request - stopChan chan struct{} - stopOnce sync.Once -} - -const ( - queueNameKey = "queueName" - zapItemKey = "itemKey" - itemKeyTemplate = "it-%d" - readIndexKey = "ri" - writeIndexKey = "wi" - defaultRetryDelay = 100 * time.Millisecond -) - // newPersistentQueue creates a new queue backed by file storage func newPersistentQueue(ctx context.Context, name string, capacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentQueue { return &persistentQueue{ logger: logger, - exitChan: make(chan struct{}), + stopChan: make(chan struct{}), capacity: capacity, storage: newPersistentContiguousStorage(ctx, name, logger, client, unmarshaler), } @@ -127,7 +68,7 @@ func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{ select { case req := <-pq.storage.get(): consumer.Consume(req) - case <-pq.exitChan: + case <-pq.stopChan: return } } @@ -150,202 +91,14 @@ func (pq *persistentQueue) Produce(item interface{}) bool { // Stop stops accepting items, shuts down the queue and closes the persistent queue func (pq *persistentQueue) Stop() { - pq.storage.stop() - close(pq.exitChan) - pq.stopWG.Wait() + pq.stopOnce.Do(func() { + pq.storage.stop() + close(pq.stopChan) + pq.stopWG.Wait() + }) } -// Size returns the current depth of the queue +// Size returns the current depth of the queue, excluding the item already in the storage channel (if any) func (pq *persistentQueue) Size() int { return pq.storage.size() } - -// newPersistentContiguousStorage creates a new file-storage extension backed queue. It needs to be initialized separately -func newPersistentContiguousStorage(ctx context.Context, queueName string, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { - wcs := &persistentContiguousStorage{ - logger: logger, - client: client, - queueName: queueName, - unmarshaler: unmarshaler, - reqChan: make(chan request), - stopChan: make(chan struct{}), - retryDelay: defaultRetryDelay, - } - initPersistentContiguousStorage(ctx, wcs) - go wcs.loop() - return wcs -} - -func initPersistentContiguousStorage(ctx context.Context, wcs *persistentContiguousStorage) { - wcs.readIndexKey = wcs.buildReadIndexKey() - wcs.writeIndexKey = wcs.buildWriteIndexKey() - - readIndexBytes, err := wcs.client.Get(ctx, wcs.readIndexKey) - if err != nil || readIndexBytes == nil { - wcs.logger.Debug("failed getting read index, starting with a new one", zap.String(queueNameKey, wcs.queueName)) - wcs.readIndex = 0 - } else { - val, conversionErr := bytesToUint64(readIndexBytes) - if conversionErr != nil { - wcs.logger.Warn("read index corrupted, starting with a new one", zap.String(queueNameKey, wcs.queueName)) - wcs.readIndex = 0 - } else { - wcs.readIndex = val - } - } - - writeIndexBytes, err := wcs.client.Get(ctx, wcs.writeIndexKey) - if err != nil || writeIndexBytes == nil { - wcs.logger.Debug("failed getting write index, starting with a new one", zap.String(queueNameKey, wcs.queueName)) - wcs.writeIndex = 0 - } else { - val, conversionErr := bytesToUint64(writeIndexBytes) - if conversionErr != nil { - wcs.logger.Warn("write index corrupted, starting with a new one", zap.String(queueNameKey, wcs.queueName)) - wcs.writeIndex = 0 - } else { - wcs.writeIndex = val - } - } -} - -// Put marshals the request and puts it into the persistent queue -func (pcs *persistentContiguousStorage) put(req request) error { - buf, err := req.marshal() - if err != nil { - return err - } - - pcs.mu.Lock() - defer pcs.mu.Unlock() - - itemKey := pcs.buildItemKey(pcs.writeIndex) - err = pcs.client.Set(context.Background(), itemKey, buf) - if err != nil { - return err - } - - pcs.writeIndex++ - writeIndexBytes, err := uint64ToBytes(pcs.writeIndex) - if err != nil { - pcs.logger.Warn("failed converting write index uint64 to bytes", zap.Error(err)) - } - - return pcs.client.Set(context.Background(), pcs.writeIndexKey, writeIndexBytes) -} - -// getNextItem pulls the next available item from the persistent storage; if none is found, returns (nil, false) -func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (request, bool) { - pcs.mu.Lock() - defer pcs.mu.Unlock() - - if pcs.readIndex < pcs.writeIndex { - itemKey := pcs.buildItemKey(pcs.readIndex) - // Increase here, so despite errors it would still progress - pcs.readIndex++ - - buf, err := pcs.client.Get(ctx, itemKey) - if err != nil { - pcs.logger.Error("error when getting item from persistent storage", - zap.String(queueNameKey, pcs.queueName), zap.String(zapItemKey, itemKey), zap.Error(err)) - return nil, false - } - req, unmarshalErr := pcs.unmarshaler(buf) - pcs.updateItemRead(ctx, itemKey) - if unmarshalErr != nil { - pcs.logger.Error("error when unmarshalling item from persistent storage", - zap.String(queueNameKey, pcs.queueName), zap.String(zapItemKey, itemKey), zap.Error(unmarshalErr)) - return nil, false - } - - return req, true - } - - return nil, false -} - -func (pcs *persistentContiguousStorage) loop() { - for { - for { - req, found := pcs.getNextItem(context.Background()) - if found { - select { - case <-pcs.stopChan: - return - case pcs.reqChan <- req: - } - } else { - select { - case <-pcs.stopChan: - return - case <-time.After(pcs.retryDelay): - } - } - } - } -} - -// get returns the request channel that all the requests will be send on -func (pcs *persistentContiguousStorage) get() <-chan request { - return pcs.reqChan -} - -func (pcs *persistentContiguousStorage) size() int { - pcs.mu.Lock() - defer pcs.mu.Unlock() - return int(pcs.writeIndex - pcs.readIndex) -} - -func (pcs *persistentContiguousStorage) stop() { - pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(queueNameKey, pcs.queueName)) - pcs.stopOnce.Do(func() { - close(pcs.stopChan) - }) -} - -func (pcs *persistentContiguousStorage) updateItemRead(ctx context.Context, itemKey string) { - err := pcs.client.Delete(ctx, itemKey) - if err != nil { - pcs.logger.Debug("failed deleting item", zap.String(zapItemKey, itemKey)) - } - - readIndexBytes, err := uint64ToBytes(pcs.readIndex) - if err != nil { - pcs.logger.Warn("failed converting read index uint64 to bytes", zap.Error(err)) - } else { - err = pcs.client.Set(ctx, pcs.readIndexKey, readIndexBytes) - if err != nil { - pcs.logger.Warn("failed storing read index", zap.Error(err)) - } - } -} - -func (pcs *persistentContiguousStorage) buildItemKey(index uint64) string { - return fmt.Sprintf(itemKeyTemplate, index) -} - -func (pcs *persistentContiguousStorage) buildReadIndexKey() string { - return readIndexKey -} - -func (pcs *persistentContiguousStorage) buildWriteIndexKey() string { - return writeIndexKey -} - -func uint64ToBytes(val uint64) ([]byte, error) { - var buf bytes.Buffer - err := binary.Write(&buf, binary.LittleEndian, val) - if err != nil { - return nil, err - } - return buf.Bytes(), err -} - -func bytesToUint64(b []byte) (uint64, error) { - var val uint64 - err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &val) - if err != nil { - return val, err - } - return val, nil -} diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index fb3ac7fb61d..47cc14d6571 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -16,9 +16,7 @@ package exporterhelper import ( "context" - "errors" "fmt" - "io/ioutil" "os" "sync/atomic" "testing" @@ -33,11 +31,6 @@ import ( "go.opentelemetry.io/collector/extension/storage" ) -func createStorageExtension(_ string) storage.Extension { - // After having storage moved to core, we could leverage storagetest.NewTestExtension(nil, path) - return newMockStorageExtension() -} - func createTestQueue(extension storage.Extension, capacity int) *persistentQueue { logger, _ := zap.NewDevelopment() @@ -50,60 +43,6 @@ func createTestQueue(extension storage.Extension, capacity int) *persistentQueue return wq } -func createTestPersistentStorage(extension storage.Extension) persistentStorage { - logger, _ := zap.NewDevelopment() - - client, err := extension.GetClient(context.Background(), component.KindReceiver, config.ComponentID{}, "") - if err != nil { - panic(err) - } - - return newPersistentContiguousStorage(context.Background(), "foo", logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) -} - -func createTemporaryDirectory() string { - directory, err := ioutil.TempDir("", "persistent-test") - if err != nil { - panic(err) - } - return directory -} - -func TestPersistentQueue_RepeatPutCloseReadClose(t *testing.T) { - path := createTemporaryDirectory() - defer os.RemoveAll(path) - - traces := newTraces(5, 10) - req := newTracesRequest(context.Background(), traces, nopTracePusher()) - - for i := 0; i < 10; i++ { - ext := createStorageExtension(path) - ps := createTestPersistentStorage(ext) - require.Equal(t, 0, ps.size()) - err := ps.put(req) - require.NoError(t, err) - err = ext.Shutdown(context.Background()) - require.NoError(t, err) - - // TODO: when replacing mock with real storage, this could actually be uncommented - // ext = createStorageExtension(path) - // ps = createTestPersistentStorage(ext) - - require.Equal(t, 1, ps.size()) - readReq := <-ps.get() - require.Equal(t, 0, ps.size()) - require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) - err = ext.Shutdown(context.Background()) - require.NoError(t, err) - } - - // No more items - ext := createStorageExtension(path) - wq := createTestQueue(ext, 5000) - require.Equal(t, 0, wq.Size()) - ext.Shutdown(context.Background()) -} - func TestPersistentQueue_Capacity(t *testing.T) { path := createTemporaryDirectory() defer os.RemoveAll(path) @@ -121,7 +60,11 @@ func TestPersistentQueue_Capacity(t *testing.T) { result := wq.Produce(req) if i < 5 { require.True(t, result) - } else { + } + // Depending if loop already picked the first element into the channel, + // the 6th item can be either accepted or not, so let's skip it and make + // sure the items above 6th are not accepted. + if i > 5 { require.False(t, result) } } @@ -144,8 +87,16 @@ func TestPersistentQueue_Close(t *testing.T) { for i := 0; i < 1000; i++ { wq.Produce(req) } - // This will close the queue very quickly, consumers should not be able to consume anything and finish gracefully - wq.Stop() + // This will close the queue very quickly, consumers might not be able to consume anything and should finish gracefully + require.Eventually(t, func() bool { + wq.Stop() + return true + }, 500*time.Millisecond, 10*time.Millisecond) + // The additional stop should not panic + require.Eventually(t, func() bool { + wq.Stop() + return true + }, 500*time.Millisecond, 10*time.Millisecond) } func TestPersistentQueue_ConsumersProducers(t *testing.T) { @@ -207,51 +158,6 @@ func TestPersistentQueue_ConsumersProducers(t *testing.T) { } } -func BenchmarkPersistentQueue_1Trace10Spans(b *testing.B) { - cases := []struct { - numTraces int - numSpansPerTrace int - }{ - { - numTraces: 1, - numSpansPerTrace: 1, - }, - { - numTraces: 1, - numSpansPerTrace: 10, - }, - { - numTraces: 10, - numSpansPerTrace: 10, - }, - } - - for _, c := range cases { - b.Run(fmt.Sprintf("#traces: %d #spansPerTrace: %d", c.numTraces, c.numSpansPerTrace), func(bb *testing.B) { - path := createTemporaryDirectory() - defer os.RemoveAll(path) - ext := createStorageExtension(path) - ps := createTestPersistentStorage(ext) - - traces := newTraces(c.numTraces, c.numSpansPerTrace) - req := newTracesRequest(context.Background(), traces, nopTracePusher()) - - bb.ResetTimer() - - for i := 0; i < bb.N; i++ { - err := ps.put(req) - require.NoError(bb, err) - } - - for i := 0; i < bb.N; i++ { - req := ps.get() - require.NotNil(bb, req) - } - ext.Shutdown(context.Background()) - }) - } -} - func newTraces(numTraces int, numSpans int) pdata.Traces { traces := pdata.NewTraces() batch := traces.ResourceSpans().AppendEmpty() @@ -278,54 +184,3 @@ func newTraces(numTraces int, numSpans int) pdata.Traces { return traces } - -type mockStorageExtension struct{} - -func (m mockStorageExtension) Start(_ context.Context, _ component.Host) error { - return nil -} - -func (m mockStorageExtension) Shutdown(_ context.Context) error { - return nil -} - -func (m mockStorageExtension) GetClient(ctx context.Context, kind component.Kind, id config.ComponentID, s string) (storage.Client, error) { - return newMockStorageClient(), nil -} - -func newMockStorageExtension() storage.Extension { - return &mockStorageExtension{} -} - -func newMockStorageClient() storage.Client { - return &mockStorageClient{ - st: map[string][]byte{}, - } -} - -type mockStorageClient struct { - st map[string][]byte -} - -func (m mockStorageClient) Get(_ context.Context, s string) ([]byte, error) { - val, found := m.st[s] - if !found { - return []byte{}, errors.New("key not found") - } - - return val, nil -} - -func (m mockStorageClient) Set(_ context.Context, s string, bytes []byte) error { - m.st[s] = bytes - return nil -} - -func (m mockStorageClient) Delete(_ context.Context, s string) error { - delete(m.st, s) - return nil -} - -func (m mockStorageClient) Close(_ context.Context) error { - return nil -} diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go new file mode 100644 index 00000000000..72dc0d30148 --- /dev/null +++ b/exporter/exporterhelper/persistent_storage.go @@ -0,0 +1,442 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "bytes" + "context" + "encoding/binary" + "errors" + "fmt" + "sync" + "time" + + "go.opencensus.io/metric/metricdata" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/extension/storage" +) + +// persistentStorage provides an interface for request storage operations +type persistentStorage interface { + // put appends the request to the storage + put(req request) error + // get returns the next available request; note that the channel is unbuffered + get() <-chan request + // size returns the current size of the storage with items waiting for processing + size() int + // stop gracefully stops the storage + stop() +} + +// persistentContiguousStorage provides a persistent queue implementation backed by file storage extension +// +// Write index describes the position at which next item is going to be stored. +// Read index describes which item needs to be read next. +// When Write index = Read index, no elements are in the queue. +// +// The items currently processed by consumers are not deleted until the processing is finished. +// Their list is stored under a separate key. +// +// +// ┌───────file extension-backed queue───────┐ +// │ │ +// │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ ┌───┐ │ +// │ n+1 │ n │ ... │ 4 │ │ 3 │ │ 2 │ │ 1 │ │ +// │ └───┘ └───┘ └─x─┘ └─|─┘ └─x─┘ │ +// │ x | x │ +// └───────────────────────x─────|─────x─────┘ +// ▲ ▲ x | x +// │ │ x | xxxx deleted +// │ │ x | +// write read x └── currently processed item +// index index x +// xxxx deleted +// +type persistentContiguousStorage struct { + logger *zap.Logger + queueName string + client storage.Client + retryDelay time.Duration + unmarshaler requestUnmarshaler + + reqChan chan request + stopOnce sync.Once + stopChan chan struct{} + + mu sync.Mutex + readIndex itemIndex + writeIndex itemIndex + currentlyProcessedItems []itemIndex +} + +type itemIndex uint64 + +const ( + zapKey = "key" + zapQueueNameKey = "queueName" + zapErrorCount = "errorCount" + zapNumberOfItems = "numberOfItems" + + defaultRetryDelay = 100 * time.Millisecond + + readIndexKey = "ri" + writeIndexKey = "wi" + currentlyProcessedItemsKey = "pi" +) + +var ( + errStoringItemToQueue = errors.New("item could not be stored to persistent queue") + errUpdatingIndex = errors.New("index could not be updated") +) + +// newPersistentContiguousStorage creates a new file-storage extension backed queue. It needs to be initialized separately +func newPersistentContiguousStorage(ctx context.Context, queueName string, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { + wcs := &persistentContiguousStorage{ + logger: logger, + client: client, + queueName: queueName, + unmarshaler: unmarshaler, + reqChan: make(chan request), + stopChan: make(chan struct{}), + retryDelay: defaultRetryDelay, + } + + initPersistentContiguousStorage(ctx, wcs) + + err := currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { + return int64(wcs.numberOfCurrentlyProcessedItems()) + }, metricdata.NewLabelValue(wcs.queueName)) + if err != nil { + logger.Error("failed to create number of currently processed items metric", zap.Error(err)) + } + + go wcs.loop() + return wcs +} + +func initPersistentContiguousStorage(ctx context.Context, wcs *persistentContiguousStorage) { + readIndexIf := wcs._clientGet(ctx, readIndexKey, bytesToItemIndex) + if readIndexIf == nil { + wcs.logger.Debug("failed getting read index, starting with a new one", + zap.String(zapQueueNameKey, wcs.queueName)) + wcs.readIndex = 0 + } else { + wcs.readIndex = readIndexIf.(itemIndex) + } + + writeIndexIf := wcs._clientGet(ctx, writeIndexKey, bytesToItemIndex) + if writeIndexIf == nil { + wcs.logger.Debug("failed getting write index, starting with a new one", + zap.String(zapQueueNameKey, wcs.queueName)) + wcs.writeIndex = 0 + } else { + wcs.writeIndex = writeIndexIf.(itemIndex) + } +} + +// loop is the main loop that handles fetching items from the persistent buffer +func (pcs *persistentContiguousStorage) loop() { + // We want to run it here so it's not blocking + reqs := pcs.retrieveUnprocessedItems(context.Background()) + if len(reqs) > 0 { + errCount := 0 + for _, req := range reqs { + if pcs.put(req) != nil { + errCount++ + } + } + pcs.logger.Info("moved items for processing back to queue", + zap.String(zapQueueNameKey, pcs.queueName), + zap.Int(zapNumberOfItems, len(reqs)), zap.Int(zapErrorCount, errCount)) + } + + for { + for { + req, found := pcs.getNextItem(context.Background()) + if found { + select { + case <-pcs.stopChan: + return + case pcs.reqChan <- req: + } + } else { + select { + case <-pcs.stopChan: + return + case <-time.After(pcs.retryDelay): + } + } + } + } +} + +// get returns the request channel that all the requests will be send on +func (pcs *persistentContiguousStorage) get() <-chan request { + return pcs.reqChan +} + +// size returns the number of currently available items, which were not picked by consumers yet +func (pcs *persistentContiguousStorage) size() int { + pcs.mu.Lock() + defer pcs.mu.Unlock() + return int(pcs.writeIndex - pcs.readIndex) +} + +// numberOfCurrentlyProcessedItems returns the count of batches for which processing started but hasn't finish yet +func (pcs *persistentContiguousStorage) numberOfCurrentlyProcessedItems() int { + pcs.mu.Lock() + defer pcs.mu.Unlock() + return len(pcs.currentlyProcessedItems) +} + +func (pcs *persistentContiguousStorage) stop() { + pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.stopOnce.Do(func() { + _ = currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { + return int64(0) + }, metricdata.NewLabelValue(pcs.queueName)) + close(pcs.stopChan) + }) +} + +// put marshals the request and puts it into the persistent queue +func (pcs *persistentContiguousStorage) put(req request) error { + pcs.mu.Lock() + defer pcs.mu.Unlock() + + ctx := context.Background() + + itemKey := pcs.itemKey(pcs.writeIndex) + if !pcs._clientSet(ctx, itemKey, req, requestToBytes) { + return errStoringItemToQueue + } + + pcs.writeIndex++ + if !pcs._clientSet(ctx, writeIndexKey, pcs.writeIndex, itemIndexToBytes) { + return errUpdatingIndex + } + + return nil +} + +// getNextItem pulls the next available item from the persistent storage; if none is found, returns (nil, false) +func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (request, bool) { + pcs.mu.Lock() + defer pcs.mu.Unlock() + + if pcs.readIndex < pcs.writeIndex { + index := pcs.readIndex + // Increase here, so even if errors happen below, it always iterates + pcs.readIndex++ + + pcs.updateReadIndex(ctx) + pcs.itemProcessingStart(ctx, index) + + req := pcs._clientGet(ctx, pcs.itemKey(index), pcs.bytesToRequest).(request) + if req == nil { + return nil, false + } + + req.setOnProcessingFinished(func() { + pcs.mu.Lock() + defer pcs.mu.Unlock() + pcs.itemProcessingFinish(ctx, index) + }) + return req, true + } + + return nil, false +} + +// retrieveUnprocessedItems gets the items for which processing was not finished, cleans the storage +// and moves the items back to the queue +func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Context) []request { + var reqs []request + pcs.mu.Lock() + defer pcs.mu.Unlock() + + pcs.logger.Debug("checking if there are items left by consumers") + processedItemsIf := pcs._clientGet(ctx, currentlyProcessedItemsKey, bytesToItemIndexArray) + if processedItemsIf == nil { + return reqs + } + processedItems, ok := processedItemsIf.([]itemIndex) + if !ok { + pcs.logger.Warn("failed fetching list of unprocessed items", + zap.String(zapQueueNameKey, pcs.queueName)) + return reqs + } + + if len(processedItems) > 0 { + pcs.logger.Info("fetching items left for processing by consumers", + zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(processedItems))) + } else { + pcs.logger.Debug("no items left for processing by consumers") + } + + for _, it := range processedItems { + req := pcs._clientGet(ctx, pcs.itemKey(it), pcs.bytesToRequest).(request) + pcs._clientDelete(ctx, pcs.itemKey(it)) + reqs = append(reqs, req) + } + + return reqs +} + +// itemProcessingStart appends the item to the list of currently processed items +func (pcs *persistentContiguousStorage) itemProcessingStart(ctx context.Context, index itemIndex) { + pcs.currentlyProcessedItems = append(pcs.currentlyProcessedItems, index) + pcs._clientSet(ctx, currentlyProcessedItemsKey, pcs.currentlyProcessedItems, itemIndexArrayToBytes) +} + +// itemProcessingFinish removes the item from the list of currently processed items and deletes it from the persistent queue +func (pcs *persistentContiguousStorage) itemProcessingFinish(ctx context.Context, index itemIndex) { + var updatedProcessedItems []itemIndex + for _, it := range pcs.currentlyProcessedItems { + if it != index { + updatedProcessedItems = append(updatedProcessedItems, it) + } + } + pcs.currentlyProcessedItems = updatedProcessedItems + + pcs._clientSet(ctx, currentlyProcessedItemsKey, pcs.currentlyProcessedItems, itemIndexArrayToBytes) + pcs._clientDelete(ctx, pcs.itemKey(index)) +} + +func (pcs *persistentContiguousStorage) updateReadIndex(ctx context.Context) { + pcs._clientSet(ctx, readIndexKey, pcs.readIndex, itemIndexToBytes) +} + +func (pcs *persistentContiguousStorage) itemKey(index itemIndex) string { + return fmt.Sprintf("%d", index) +} + +func (pcs *persistentContiguousStorage) _clientSet(ctx context.Context, key string, value interface{}, marshal func(interface{}) ([]byte, error)) bool { + valueBytes, err := marshal(value) + if err != nil { + pcs.logger.Warn("failed marshaling item", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) + return false + } + + return pcs._clientSetBuf(ctx, key, valueBytes) +} + +func (pcs *persistentContiguousStorage) _clientGet(ctx context.Context, key string, unmarshal func([]byte) (interface{}, error)) interface{} { + valueBytes := pcs._clientGetBuf(ctx, key) + if valueBytes == nil { + return nil + } + + item, err := unmarshal(valueBytes) + if err != nil { + pcs.logger.Warn("failed unmarshaling item", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) + return nil + } + + return item +} + +func (pcs *persistentContiguousStorage) _clientDelete(ctx context.Context, key string) { + err := pcs.client.Delete(ctx, key) + if err != nil { + pcs.logger.Warn("failed deleting item", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key)) + } +} + +func (pcs *persistentContiguousStorage) _clientGetBuf(ctx context.Context, key string) []byte { + buf, err := pcs.client.Get(ctx, key) + if err != nil { + pcs.logger.Debug("error when getting item from persistent storage", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) + return nil + } + return buf +} + +func (pcs *persistentContiguousStorage) _clientSetBuf(ctx context.Context, key string, buf []byte) bool { + err := pcs.client.Set(ctx, key, buf) + if err != nil { + pcs.logger.Debug("error when storing item to persistent storage", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) + return false + } + return true +} + +func itemIndexToBytes(val interface{}) ([]byte, error) { + var buf bytes.Buffer + err := binary.Write(&buf, binary.LittleEndian, val) + if err != nil { + return nil, err + } + return buf.Bytes(), err +} + +func bytesToItemIndex(b []byte) (interface{}, error) { + var val itemIndex + err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &val) + if err != nil { + return val, err + } + return val, nil +} + +func itemIndexArrayToBytes(arr interface{}) ([]byte, error) { + var buf bytes.Buffer + count := 0 + + if arr != nil { + arrItemIndex, ok := arr.([]itemIndex) + if ok { + count = len(arrItemIndex) + } + } + + err := binary.Write(&buf, binary.LittleEndian, uint32(count)) + if err != nil { + return nil, err + } + + err = binary.Write(&buf, binary.LittleEndian, arr) + if err != nil { + return nil, err + } + return buf.Bytes(), err +} + +func bytesToItemIndexArray(b []byte) (interface{}, error) { + var size uint32 + reader := bytes.NewReader(b) + err := binary.Read(reader, binary.LittleEndian, &size) + if err != nil { + return nil, err + } + + val := make([]itemIndex, size) + err = binary.Read(reader, binary.LittleEndian, &val) + return val, err +} + +func requestToBytes(req interface{}) ([]byte, error) { + return req.(request).marshal() +} + +func (pcs *persistentContiguousStorage) bytesToRequest(b []byte) (interface{}, error) { + return pcs.unmarshaler(b) +} diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go new file mode 100644 index 00000000000..12301d4b6e2 --- /dev/null +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -0,0 +1,337 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "context" + "errors" + "fmt" + "io/ioutil" + "os" + "reflect" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opencensus.io/tag" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/extension/storage" +) + +func createStorageExtension(_ string) storage.Extension { + // After having storage moved to core, we could leverage storagetest.NewTestExtension(nil, path) + return newMockStorageExtension() +} + +func createTestClient(extension storage.Extension) storage.Client { + client, err := extension.GetClient(context.Background(), component.KindReceiver, config.ComponentID{}, "") + if err != nil { + panic(err) + } + return client +} + +func createTestPersistentStorage(client storage.Client) *persistentContiguousStorage { + logger, _ := zap.NewDevelopment() + return newPersistentContiguousStorage(context.Background(), "foo", logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) +} + +func createTemporaryDirectory() string { + directory, err := ioutil.TempDir("", "persistent-test") + if err != nil { + panic(err) + } + return directory +} + +func TestPersistentStorage_CurrentlyProcessedItems(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + traces := newTraces(5, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + + for i := 0; i < 5; i++ { + err := ps.put(req) + require.NoError(t, err) + } + + requireItemIndexesEqual(t, ps, []itemIndex{0}) + + readReq := getItemFromChannel(t, ps) + require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) + + requireItemIndexesEqual(t, ps, []itemIndex{0, 1}) + + secondReadReq := getItemFromChannel(t, ps) + requireItemIndexesEqual(t, ps, []itemIndex{0, 1, 2}) + + secondReadReq.onProcessingFinished() + requireItemIndexesEqual(t, ps, []itemIndex{0, 2}) + + // Reload the storage and check how many items are there, which, after the current one is fetched should go to 3 + newPs := createTestPersistentStorage(client) + require.Eventually(t, func() bool { + return newPs.size() == 3 + }, 500*time.Millisecond, 10*time.Millisecond) + + requireItemIndexesEqual(t, newPs, []itemIndex{3}) + + for i := 0; i < 4; i++ { + req := getItemFromChannel(t, newPs) + req.onProcessingFinished() + } + + requireItemIndexesEqual(t, newPs, nil) + require.Eventually(t, func() bool { + return newPs.size() == 0 + }, 500*time.Millisecond, 10*time.Millisecond) + + // The writeIndex should be now set accordingly + require.Equal(t, 7, int(newPs.writeIndex)) + // There should be no items left in the storage + for i := 0; i < int(newPs.writeIndex); i++ { + require.Nil(t, newPs._clientGetBuf(context.Background(), newPs.itemKey(itemIndex(i)))) + } +} + +func TestPersistentStorage_MetricsReported(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + traces := newTraces(5, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + + for i := 0; i < 5; i++ { + err := ps.put(req) + require.NoError(t, err) + } + + _ = getItemFromChannel(t, ps) + requireItemIndexesEqual(t, ps, []itemIndex{0, 1}) + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") + + ps.stop() + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(0), "exporter/processed_batches_size") +} + +func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + traces := newTraces(5, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + for i := 0; i < 10; i++ { + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + require.Equal(t, 0, ps.size()) + + // Put two elements + err := ps.put(req) + require.NoError(t, err) + err = ps.put(req) + require.NoError(t, err) + + err = ext.Shutdown(context.Background()) + require.NoError(t, err) + + // TODO: when replacing mock with real storage, this could actually be uncommented + // ext = createStorageExtension(path) + // ps = createTestPersistentStorage(ext) + + // The first element should be already picked by loop + require.Eventually(t, func() bool { + return ps.size() == 1 + }, 500*time.Millisecond, 10*time.Millisecond) + + // Lets read both of the elements we put + readReq := getItemFromChannel(t, ps) + require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) + + readReq = getItemFromChannel(t, ps) + require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) + require.Equal(t, 0, ps.size()) + + err = ext.Shutdown(context.Background()) + require.NoError(t, err) + } + + // No more items + ext := createStorageExtension(path) + wq := createTestQueue(ext, 5000) + require.Equal(t, 0, wq.Size()) + ext.Shutdown(context.Background()) +} + +func BenchmarkPersistentStorage_TraceSpans(b *testing.B) { + cases := []struct { + numTraces int + numSpansPerTrace int + }{ + { + numTraces: 1, + numSpansPerTrace: 1, + }, + { + numTraces: 1, + numSpansPerTrace: 10, + }, + { + numTraces: 10, + numSpansPerTrace: 10, + }, + } + + for _, c := range cases { + b.Run(fmt.Sprintf("#traces: %d #spansPerTrace: %d", c.numTraces, c.numSpansPerTrace), func(bb *testing.B) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + + traces := newTraces(c.numTraces, c.numSpansPerTrace) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + bb.ResetTimer() + + for i := 0; i < bb.N; i++ { + err := ps.put(req) + require.NoError(bb, err) + } + + for i := 0; i < bb.N; i++ { + req := ps.get() + require.NotNil(bb, req) + } + ext.Shutdown(context.Background()) + }) + } +} + +func TestPersistentStorage_ItemIndexMarshaling(t *testing.T) { + cases := []struct { + arr1 []itemIndex + arr2 []itemIndex + }{ + { + arr1: []itemIndex{0, 1, 2}, + arr2: []itemIndex{0, 1, 2}, + }, + { + arr1: []itemIndex{}, + arr2: []itemIndex{}, + }, + { + arr1: nil, + arr2: []itemIndex{}, + }, + } + + for _, c := range cases { + count := 0 + if c.arr1 != nil { + count = len(c.arr1) + } + t.Run(fmt.Sprintf("#elements:%d", count), func(tt *testing.T) { + barr, err := itemIndexArrayToBytes(c.arr1) + require.NoError(t, err) + arr2, err := bytesToItemIndexArray(barr) + require.NoError(t, err) + require.Equal(t, c.arr2, arr2) + }) + } +} + +func getItemFromChannel(t *testing.T, pcs *persistentContiguousStorage) request { + var readReq request + require.Eventually(t, func() bool { + readReq = <-pcs.get() + return true + }, 500*time.Millisecond, 10*time.Millisecond) + return readReq +} + +func requireItemIndexesEqual(t *testing.T, pcs *persistentContiguousStorage, compare []itemIndex) { + require.Eventually(t, func() bool { + pcs.mu.Lock() + defer pcs.mu.Unlock() + return reflect.DeepEqual(pcs.currentlyProcessedItems, compare) + }, 500*time.Millisecond, 10*time.Millisecond) +} + +type mockStorageExtension struct{} + +func (m mockStorageExtension) Start(_ context.Context, _ component.Host) error { + return nil +} + +func (m mockStorageExtension) Shutdown(_ context.Context) error { + return nil +} + +func (m mockStorageExtension) GetClient(ctx context.Context, kind component.Kind, id config.ComponentID, s string) (storage.Client, error) { + return newMockStorageClient(), nil +} + +func newMockStorageExtension() storage.Extension { + return &mockStorageExtension{} +} + +func newMockStorageClient() storage.Client { + return &mockStorageClient{ + st: map[string][]byte{}, + } +} + +type mockStorageClient struct { + st map[string][]byte +} + +func (m mockStorageClient) Get(_ context.Context, s string) ([]byte, error) { + val, found := m.st[s] + if !found { + return []byte{}, errors.New("key not found") + } + + return val, nil +} + +func (m mockStorageClient) Set(_ context.Context, s string, bytes []byte) error { + m.st[s] = bytes + return nil +} + +func (m mockStorageClient) Delete(_ context.Context, s string) error { + delete(m.st, s) + return nil +} + +func (m mockStorageClient) Close(_ context.Context) error { + return nil +} diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index 37270b48894..fa4e7f05218 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -46,6 +46,12 @@ var ( metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) + currentlyProcessedBatchesGauge, _ = r.AddInt64DerivedGauge( + obsmetrics.ExporterKey+"/processed_batches_size", + metric.WithDescription("Number of currently processed batches"), + metric.WithLabelKeys(obsmetrics.ExporterKey), + metric.WithUnit(metricdata.UnitDimensionless)) + errNoStorageClient = errors.New("no storage client extension found") errMultipleStorageClients = errors.New("multiple storage extensions found") errSendingQueueIsFull = errors.New("sending_queue is full") @@ -63,8 +69,8 @@ type QueueSettings struct { NumConsumers int `mapstructure:"num_consumers"` // QueueSize is the maximum number of batches allowed in queue at a given time. QueueSize int `mapstructure:"queue_size"` - // PersistentEnabled describes whether persistency via storage file extension should be enabled - PersistentEnabled bool `mapstructure:"persistent_enabled"` + // PersistentStorageEnabled describes whether persistence via a file storage extension is enabled + PersistentStorageEnabled bool `mapstructure:"persistent_storage_enabled"` } // DefaultQueueSettings returns the default settings for QueueSettings. @@ -76,8 +82,8 @@ func DefaultQueueSettings() QueueSettings { // This is a pretty decent value for production. // User should calculate this from the perspective of how many seconds to buffer in case of a backend outage, // multiply that by the number of requests per seconds. - QueueSize: 5000, - PersistentEnabled: false, + QueueSize: 5000, + PersistentStorageEnabled: false, } } @@ -108,7 +114,7 @@ func DefaultRetrySettings() RetrySettings { type queuedRetrySender struct { id config.ComponentID - signalName string + signal signalType cfg QueueSettings consumerSender requestSender queue consumersQueue @@ -138,14 +144,14 @@ func createSampledLogger(logger *zap.Logger) *zap.Logger { return logger.WithOptions(opts) } -func newQueuedRetrySender(id config.ComponentID, signalName string, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { +func newQueuedRetrySender(id config.ComponentID, signal signalType, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { retryStopCh := make(chan struct{}) sampledLogger := createSampledLogger(logger) traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) qrs := &queuedRetrySender{ id: id, - signalName: signalName, + signal: signal, cfg: qCfg, retryStopCh: retryStopCh, traceAttributes: []attribute.KeyValue{traceAttr}, @@ -165,7 +171,7 @@ func newQueuedRetrySender(id config.ComponentID, signalName string, qCfg QueueSe onPermanentFailure: qrs.onPermanentFailure, } - if !qCfg.PersistentEnabled { + if !qCfg.PersistentStorageEnabled { qrs.queue = internal.NewBoundedQueue(qrs.cfg.QueueSize, func(item interface{}) {}) } // The Persistent Queue is initialized separately as it needs extra information about the component @@ -173,40 +179,40 @@ func newQueuedRetrySender(id config.ComponentID, signalName string, qCfg QueueSe return qrs } -func (qrs *queuedRetrySender) onSuccess(req request, err error) error { +func (qrs *queuedRetrySender) onSuccess(_ request, _ error) error { return nil } -func (qrs *queuedRetrySender) onPermanentFailure(req request, err error) error { +func (qrs *queuedRetrySender) onPermanentFailure(_ request, err error) error { return err } func (qrs *queuedRetrySender) onTemporaryFailure(req request, err error) error { - if qrs.requeuingEnabled && qrs.queue != nil { - if qrs.queue.Produce(req) { - qrs.logger.Error( - "Exporting failed. Putting back to the end of the queue.", - zap.Error(err), - ) - } else { - qrs.logger.Error( - "Exporting failed. Queue did not accept requeuing request. Dropping data.", - zap.Error(err), - zap.Int("dropped_items", req.count()), - ) - } + if !qrs.requeuingEnabled || qrs.queue == nil { + qrs.logger.Error( + "Exporting failed. No more retries left. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) return err } - qrs.logger.Error( - "Exporting failed. No more retries left. Dropping data.", - zap.Error(err), - zap.Int("dropped_items", req.count()), - ) + if qrs.queue.Produce(req) { + qrs.logger.Error( + "Exporting failed. Putting back to the end of the queue.", + zap.Error(err), + ) + } else { + qrs.logger.Error( + "Exporting failed. Queue did not accept requeuing request. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + } return err } -func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signalName string) (*storage.Client, error) { +func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signal signalType) (*storage.Client, error) { var storageExtension storage.Extension for _, ext := range host.GetExtensions() { if se, ok := ext.(storage.Extension); ok { @@ -221,7 +227,7 @@ func getStorageClient(ctx context.Context, host component.Host, id config.Compon return nil, errNoStorageClient } - client, err := storageExtension.GetClient(ctx, component.KindExporter, id, signalName) + client, err := storageExtension.GetClient(ctx, component.KindExporter, id, string(signal)) if err != nil { return nil, err } @@ -231,8 +237,8 @@ func getStorageClient(ctx context.Context, host component.Host, id config.Compon // initializePersistentQueue uses extra information for initialization available from component.Host func (qrs *queuedRetrySender) initializePersistentQueue(ctx context.Context, host component.Host) error { - if qrs.cfg.PersistentEnabled { - storageClient, err := getStorageClient(ctx, host, qrs.id, qrs.signalName) + if qrs.cfg.PersistentStorageEnabled { + storageClient, err := getStorageClient(ctx, host, qrs.id, qrs.signal) if err != nil { return err } @@ -247,10 +253,10 @@ func (qrs *queuedRetrySender) initializePersistentQueue(ctx context.Context, hos } func (qrs *queuedRetrySender) fullName() string { - if qrs.signalName == "" { + if qrs.signal == "" { return qrs.id.String() } - return fmt.Sprintf("%s-%s", qrs.id.String(), qrs.signalName) + return fmt.Sprintf("%s-%s", qrs.id.String(), qrs.signal) } // start is invoked during service startup. @@ -263,6 +269,7 @@ func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) er qrs.queue.StartConsumers(qrs.cfg.NumConsumers, func(item interface{}) { req := item.(request) _ = qrs.consumerSender.send(req) + req.onProcessingFinished() }) // Start reporting queue length metric diff --git a/exporter/exporterhelper/traces.go b/exporter/exporterhelper/traces.go index b35bb1a5ea9..9507769218f 100644 --- a/exporter/exporterhelper/traces.go +++ b/exporter/exporterhelper/traces.go @@ -100,7 +100,7 @@ func NewTracesExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, "traces", newTraceRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, signalTraces, newTraceRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &tracesExporterWithObservability{ obsrep: be.obsrep, From 12891e55be78fa26e0bf211399808297565e4284 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 23 Jun 2021 19:36:09 +0200 Subject: [PATCH 05/24] Fix test condition for capacity --- .../exporterhelper/persistent_queue_test.go | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index 47cc14d6571..524c1aef4ab 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -47,28 +47,34 @@ func TestPersistentQueue_Capacity(t *testing.T) { path := createTemporaryDirectory() defer os.RemoveAll(path) - ext := createStorageExtension(path) - defer ext.Shutdown(context.Background()) - - wq := createTestQueue(ext, 5) - require.Equal(t, 0, wq.Size()) - - traces := newTraces(1, 10) - req := newTracesRequest(context.Background(), traces, nopTracePusher()) + for i := 0; i < 100; i++ { + ext := createStorageExtension(path) + defer ext.Shutdown(context.Background()) + + wq := createTestQueue(ext, 5) + require.Equal(t, 0, wq.Size()) + + traces := newTraces(1, 10) + req := newTracesRequest(context.Background(), traces, nopTracePusher()) + + for i := 0; i < 10; i++ { + result := wq.Produce(req) + if i < 6 { + require.True(t, result) + } else { + require.False(t, result) + } - for i := 0; i < 10; i++ { - result := wq.Produce(req) - if i < 5 { - require.True(t, result) - } - // Depending if loop already picked the first element into the channel, - // the 6th item can be either accepted or not, so let's skip it and make - // sure the items above 6th are not accepted. - if i > 5 { - require.False(t, result) + // Lets make sure the loop picks the first element into the channel, + // so the capacity could be used in full + if i == 0 { + require.Eventually(t, func() bool { + return wq.Size() == 0 + }, 1000*time.Millisecond, 10*time.Millisecond) + } } + require.Equal(t, 5, wq.Size()) } - require.Equal(t, 5, wq.Size()) } func TestPersistentQueue_Close(t *testing.T) { From 829b0feb46e1e91dcba59695411667b0351aa9a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Ma=C5=82ek?= Date: Wed, 30 Jun 2021 17:03:15 +0200 Subject: [PATCH 06/24] Remove unnecessary for loop --- exporter/exporterhelper/persistent_storage.go | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 72dc0d30148..f6970cb3139 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -164,20 +164,18 @@ func (pcs *persistentContiguousStorage) loop() { } for { - for { - req, found := pcs.getNextItem(context.Background()) - if found { - select { - case <-pcs.stopChan: - return - case pcs.reqChan <- req: - } - } else { - select { - case <-pcs.stopChan: - return - case <-time.After(pcs.retryDelay): - } + req, found := pcs.getNextItem(context.Background()) + if found { + select { + case <-pcs.stopChan: + return + case pcs.reqChan <- req: + } + } else { + select { + case <-pcs.stopChan: + return + case <-time.After(pcs.retryDelay): } } } From bd683d89fad8a8ab19f687e3805fb26fe156fe07 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Mon, 12 Jul 2021 15:42:39 +0200 Subject: [PATCH 07/24] Update to recent core version --- exporter/exporterhelper/persistent_queue_test.go | 2 +- exporter/exporterhelper/queued_retry_test.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index 524c1aef4ab..f0f05d970d6 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -27,8 +27,8 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/config" - "go.opentelemetry.io/collector/consumer/pdata" "go.opentelemetry.io/collector/extension/storage" + "go.opentelemetry.io/collector/model/pdata" ) func createTestQueue(extension storage.Extension, capacity int) *persistentQueue { diff --git a/exporter/exporterhelper/queued_retry_test.go b/exporter/exporterhelper/queued_retry_test.go index 98973d4b9e4..5f772fd9999 100644 --- a/exporter/exporterhelper/queued_retry_test.go +++ b/exporter/exporterhelper/queued_retry_test.go @@ -32,6 +32,7 @@ import ( "go.opentelemetry.io/collector/component/componenttest" "go.opentelemetry.io/collector/consumer/consumererror" "go.opentelemetry.io/collector/internal/testdata" + "go.opentelemetry.io/collector/model/otlp" "go.opentelemetry.io/collector/model/pdata" "go.opentelemetry.io/collector/obsreport/obsreporttest" ) @@ -414,7 +415,7 @@ func (m *mockRequest) export(ctx context.Context) error { } func (m *mockRequest) marshal() ([]byte, error) { - return pdata.NewTraces().ToOtlpProtoBytes() + return otlp.NewProtobufTracesMarshaler().MarshalTraces(pdata.NewTraces()) } func (m *mockRequest) onError(error) request { From 24bc134956594f3a12b2a06dd65cb4573294fed6 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Mon, 19 Jul 2021 13:59:00 +0200 Subject: [PATCH 08/24] Use batching in persistent storage --- exporter/exporterhelper/common.go | 2 +- exporter/exporterhelper/persistent_storage.go | 243 +++++++----------- .../persistent_storage_batch.go | 229 +++++++++++++++++ .../persistent_storage_batch_test.go | 70 +++++ .../exporterhelper/persistent_storage_test.go | 75 ++++-- exporter/exporterhelper/queued_retry.go | 4 +- 6 files changed, 446 insertions(+), 177 deletions(-) create mode 100644 exporter/exporterhelper/persistent_storage_batch.go create mode 100644 exporter/exporterhelper/persistent_storage_batch_test.go diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index 509a16c9295..9e15e780b49 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -40,7 +40,7 @@ func DefaultTimeoutSettings() TimeoutSettings { } } -// request is an abstraction of an individual request (batch of data) independent of the type of the data (traces, metrics, logs). +// request is an abstraction of an individual request (batchStruct of data) independent of the type of the data (traces, metrics, logs). type request interface { // context returns the Context of the requests. context() context.Context diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index f6970cb3139..211611c4043 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -15,9 +15,7 @@ package exporterhelper import ( - "bytes" "context" - "encoding/binary" "errors" "fmt" "sync" @@ -98,8 +96,9 @@ const ( ) var ( - errStoringItemToQueue = errors.New("item could not be stored to persistent queue") - errUpdatingIndex = errors.New("index could not be updated") + errValueNotSet = errors.New("value not set") + + errKeyNotPresentInBatch = errors.New("key was not present in get batchStruct") ) // newPersistentContiguousStorage creates a new file-storage extension backed queue. It needs to be initialized separately @@ -127,23 +126,28 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, logge return wcs } -func initPersistentContiguousStorage(ctx context.Context, wcs *persistentContiguousStorage) { - readIndexIf := wcs._clientGet(ctx, readIndexKey, bytesToItemIndex) - if readIndexIf == nil { - wcs.logger.Debug("failed getting read index, starting with a new one", - zap.String(zapQueueNameKey, wcs.queueName)) - wcs.readIndex = 0 - } else { - wcs.readIndex = readIndexIf.(itemIndex) +func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContiguousStorage) { + var writeIndex itemIndex + var readIndex itemIndex + batch, err := newBatch(pcs).get(readIndexKey, writeIndexKey).execute(ctx) + + if err == nil { + readIndex, err = batch.getItemIndexResult(readIndexKey) + } + + if err == nil { + writeIndex, err = batch.getItemIndexResult(writeIndexKey) } - writeIndexIf := wcs._clientGet(ctx, writeIndexKey, bytesToItemIndex) - if writeIndexIf == nil { - wcs.logger.Debug("failed getting write index, starting with a new one", - zap.String(zapQueueNameKey, wcs.queueName)) - wcs.writeIndex = 0 + if err != nil { + pcs.logger.Debug("failed getting read/write index, starting with new ones", + zap.String(zapQueueNameKey, pcs.queueName), + zap.Error(err)) + pcs.readIndex = 0 + pcs.writeIndex = 0 } else { - wcs.writeIndex = writeIndexIf.(itemIndex) + pcs.readIndex = readIndex + pcs.writeIndex = writeIndex } } @@ -218,13 +222,11 @@ func (pcs *persistentContiguousStorage) put(req request) error { ctx := context.Background() itemKey := pcs.itemKey(pcs.writeIndex) - if !pcs._clientSet(ctx, itemKey, req, requestToBytes) { - return errStoringItemToQueue - } - pcs.writeIndex++ - if !pcs._clientSet(ctx, writeIndexKey, pcs.writeIndex, itemIndexToBytes) { - return errUpdatingIndex + + _, err := newBatch(pcs).setItemIndex(writeIndexKey, pcs.writeIndex).setRequest(itemKey, req).execute(ctx) + if err != nil { + return err } return nil @@ -243,8 +245,13 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques pcs.updateReadIndex(ctx) pcs.itemProcessingStart(ctx, index) - req := pcs._clientGet(ctx, pcs.itemKey(index), pcs.bytesToRequest).(request) - if req == nil { + batch, err := newBatch(pcs).get(pcs.itemKey(index)).execute(ctx) + if err != nil { + return nil, false + } + + req, err := batch.getRequestResult(pcs.itemKey(index)) + if err != nil || req == nil { return nil, false } @@ -263,18 +270,18 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques // and moves the items back to the queue func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Context) []request { var reqs []request + var processedItems []itemIndex + pcs.mu.Lock() defer pcs.mu.Unlock() - pcs.logger.Debug("checking if there are items left by consumers") - processedItemsIf := pcs._clientGet(ctx, currentlyProcessedItemsKey, bytesToItemIndexArray) - if processedItemsIf == nil { - return reqs + pcs.logger.Debug("checking if there are items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + batch, err := newBatch(pcs).get(currentlyProcessedItemsKey).execute(ctx) + if err == nil { + processedItems, err = batch.getItemIndexArrayResult(currentlyProcessedItemsKey) } - processedItems, ok := processedItemsIf.([]itemIndex) - if !ok { - pcs.logger.Warn("failed fetching list of unprocessed items", - zap.String(zapQueueNameKey, pcs.queueName)) + if err != nil { + pcs.logger.Warn("could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) return reqs } @@ -285,10 +292,30 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con pcs.logger.Debug("no items left for processing by consumers") } - for _, it := range processedItems { - req := pcs._clientGet(ctx, pcs.itemKey(it), pcs.bytesToRequest).(request) - pcs._clientDelete(ctx, pcs.itemKey(it)) - reqs = append(reqs, req) + reqs = make([]request, len(processedItems)) + keys := make([]string, len(processedItems)) + batch = newBatch(pcs) + for i, it := range processedItems { + keys[i] = pcs.itemKey(it) + batch. + get(keys[i]). + delete(keys[i]) + } + + _, err = batch.execute(ctx) + if err != nil { + pcs.logger.Warn("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + return reqs + } + + for i, key := range keys { + req, err := batch.getRequestResult(key) + if err != nil { + pcs.logger.Warn("failed unmarshalling item", + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key)) + } else { + reqs[i] = req + } } return reqs @@ -297,7 +324,13 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con // itemProcessingStart appends the item to the list of currently processed items func (pcs *persistentContiguousStorage) itemProcessingStart(ctx context.Context, index itemIndex) { pcs.currentlyProcessedItems = append(pcs.currentlyProcessedItems, index) - pcs._clientSet(ctx, currentlyProcessedItemsKey, pcs.currentlyProcessedItems, itemIndexArrayToBytes) + _, err := newBatch(pcs). + setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). + execute(ctx) + if err != nil { + pcs.logger.Warn("failed updating currently processed items", + zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) + } } // itemProcessingFinish removes the item from the list of currently processed items and deletes it from the persistent queue @@ -310,131 +343,27 @@ func (pcs *persistentContiguousStorage) itemProcessingFinish(ctx context.Context } pcs.currentlyProcessedItems = updatedProcessedItems - pcs._clientSet(ctx, currentlyProcessedItemsKey, pcs.currentlyProcessedItems, itemIndexArrayToBytes) - pcs._clientDelete(ctx, pcs.itemKey(index)) -} - -func (pcs *persistentContiguousStorage) updateReadIndex(ctx context.Context) { - pcs._clientSet(ctx, readIndexKey, pcs.readIndex, itemIndexToBytes) -} - -func (pcs *persistentContiguousStorage) itemKey(index itemIndex) string { - return fmt.Sprintf("%d", index) -} - -func (pcs *persistentContiguousStorage) _clientSet(ctx context.Context, key string, value interface{}, marshal func(interface{}) ([]byte, error)) bool { - valueBytes, err := marshal(value) - if err != nil { - pcs.logger.Warn("failed marshaling item", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) - return false - } - - return pcs._clientSetBuf(ctx, key, valueBytes) -} - -func (pcs *persistentContiguousStorage) _clientGet(ctx context.Context, key string, unmarshal func([]byte) (interface{}, error)) interface{} { - valueBytes := pcs._clientGetBuf(ctx, key) - if valueBytes == nil { - return nil - } - - item, err := unmarshal(valueBytes) - if err != nil { - pcs.logger.Warn("failed unmarshaling item", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) - return nil - } - - return item -} - -func (pcs *persistentContiguousStorage) _clientDelete(ctx context.Context, key string) { - err := pcs.client.Delete(ctx, key) - if err != nil { - pcs.logger.Warn("failed deleting item", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key)) - } -} - -func (pcs *persistentContiguousStorage) _clientGetBuf(ctx context.Context, key string) []byte { - buf, err := pcs.client.Get(ctx, key) - if err != nil { - pcs.logger.Debug("error when getting item from persistent storage", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) - return nil - } - return buf -} - -func (pcs *persistentContiguousStorage) _clientSetBuf(ctx context.Context, key string, buf []byte) bool { - err := pcs.client.Set(ctx, key, buf) - if err != nil { - pcs.logger.Debug("error when storing item to persistent storage", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) - return false - } - return true -} - -func itemIndexToBytes(val interface{}) ([]byte, error) { - var buf bytes.Buffer - err := binary.Write(&buf, binary.LittleEndian, val) + _, err := newBatch(pcs). + setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). + delete(pcs.itemKey(index)). + execute(ctx) if err != nil { - return nil, err + pcs.logger.Warn("failed updating currently processed items", + zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } - return buf.Bytes(), err } -func bytesToItemIndex(b []byte) (interface{}, error) { - var val itemIndex - err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &val) - if err != nil { - return val, err - } - return val, nil -} - -func itemIndexArrayToBytes(arr interface{}) ([]byte, error) { - var buf bytes.Buffer - count := 0 - - if arr != nil { - arrItemIndex, ok := arr.([]itemIndex) - if ok { - count = len(arrItemIndex) - } - } - - err := binary.Write(&buf, binary.LittleEndian, uint32(count)) - if err != nil { - return nil, err - } - - err = binary.Write(&buf, binary.LittleEndian, arr) - if err != nil { - return nil, err - } - return buf.Bytes(), err -} +func (pcs *persistentContiguousStorage) updateReadIndex(ctx context.Context) { + _, err := newBatch(pcs). + setItemIndex(readIndexKey, pcs.readIndex). + execute(ctx) -func bytesToItemIndexArray(b []byte) (interface{}, error) { - var size uint32 - reader := bytes.NewReader(b) - err := binary.Read(reader, binary.LittleEndian, &size) if err != nil { - return nil, err + pcs.logger.Warn("failed updating read index", + zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } - - val := make([]itemIndex, size) - err = binary.Read(reader, binary.LittleEndian, &val) - return val, err -} - -func requestToBytes(req interface{}) ([]byte, error) { - return req.(request).marshal() } -func (pcs *persistentContiguousStorage) bytesToRequest(b []byte) (interface{}, error) { - return pcs.unmarshaler(b) +func (pcs *persistentContiguousStorage) itemKey(index itemIndex) string { + return fmt.Sprintf("%d", index) } diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go new file mode 100644 index 00000000000..eb2a73f27a6 --- /dev/null +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -0,0 +1,229 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "bytes" + "context" + "encoding/binary" + "sync" + + "go.uber.org/zap" + + "go.opentelemetry.io/collector/extension/storage" +) + +// batchStruct provides convenience capabilities for creating and processing storage extension batches +type batchStruct struct { + logger *zap.Logger + pcs *persistentContiguousStorage + + mu sync.Mutex + + operations []storage.Operation + getOperations map[string]storage.Operation +} + +func newBatch(pcs *persistentContiguousStorage) *batchStruct { + return &batchStruct{ + logger: pcs.logger, + pcs: pcs, + operations: []storage.Operation{}, + getOperations: map[string]storage.Operation{}, + } +} + +// execute runs the provided operations in order +func (bof *batchStruct) execute(ctx context.Context) (*batchStruct, error) { + bof.mu.Lock() + defer bof.mu.Unlock() + + err := bof.pcs.client.Batch(ctx, bof.operations...) + if err != nil { + return nil, err + } + + return bof, nil +} + +// set adds a Set operation to the batch +func (bof *batchStruct) set(key string, value interface{}, marshal func(interface{}) ([]byte, error)) *batchStruct { + bof.mu.Lock() + defer bof.mu.Unlock() + + valueBytes, err := marshal(value) + if err != nil { + bof.logger.Warn("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) + } + + bof.operations = append(bof.operations, storage.SetOperation(key, valueBytes)) + return bof +} + +// get adds a Get operation to the batch. After executing, its result will be available through getResult +func (bof *batchStruct) get(keys ...string) *batchStruct { + bof.mu.Lock() + defer bof.mu.Unlock() + + for _, key := range keys { + op := storage.GetOperation(key) + bof.getOperations[key] = op + bof.operations = append(bof.operations, op) + } + + return bof +} + +// delete adds a Delete operation to the batch +func (bof *batchStruct) delete(keys ...string) *batchStruct { + bof.mu.Lock() + defer bof.mu.Unlock() + + for _, key := range keys { + bof.operations = append(bof.operations, storage.DeleteOperation(key)) + } + + return bof +} + +// getResult returns the result of a Get operation for a given key using the provided unmarshal function. +// It should be called after execute +func (bof *batchStruct) getResult(key string, unmarshal func([]byte) (interface{}, error)) (interface{}, error) { + op := bof.getOperations[key] + if op == nil { + return nil, errKeyNotPresentInBatch + } + + if op.Value == nil { + return nil, nil + } + + return unmarshal(op.Value) +} + +// getRequestResult returns the result of a Get operation as a request +func (bof *batchStruct) getRequestResult(key string) (request, error) { + reqIf, err := bof.getResult(key, bof.bytesToRequest) + if err != nil { + return nil, err + } + + return reqIf.(request), nil +} + +// getItemIndexResult returns the result of a Get operation as an itemIndex +func (bof *batchStruct) getItemIndexResult(key string) (itemIndex, error) { + itemIndexIf, err := bof.getResult(key, bytesToItemIndex) + if err != nil { + return itemIndex(0), err + } + + if itemIndexIf == nil { + return itemIndex(0), errValueNotSet + } + + return itemIndexIf.(itemIndex), nil +} + +// getItemIndexArrayResult returns the result of a Get operation as a itemIndexArray +func (bof *batchStruct) getItemIndexArrayResult(key string) ([]itemIndex, error) { + itemIndexArrIf, err := bof.getResult(key, bytesToItemIndexArray) + if err != nil { + return nil, err + } + + if itemIndexArrIf == nil { + return nil, nil + } + + return itemIndexArrIf.([]itemIndex), nil +} + +// setRequest adds Set operation over a given request to the batch +func (bof *batchStruct) setRequest(key string, value request) *batchStruct { + return bof.set(key, value, requestToBytes) +} + +// setItemIndex adds Set operation over a given itemIndex to the batch +func (bof *batchStruct) setItemIndex(key string, value itemIndex) *batchStruct { + return bof.set(key, value, itemIndexToBytes) +} + +// setItemIndexArray adds Set operation over a given itemIndex array to the batch +func (bof *batchStruct) setItemIndexArray(key string, value []itemIndex) *batchStruct { + return bof.set(key, value, itemIndexArrayToBytes) +} + +func itemIndexToBytes(val interface{}) ([]byte, error) { + var buf bytes.Buffer + err := binary.Write(&buf, binary.LittleEndian, val) + if err != nil { + return nil, err + } + return buf.Bytes(), err +} + +func bytesToItemIndex(b []byte) (interface{}, error) { + var val itemIndex + err := binary.Read(bytes.NewReader(b), binary.LittleEndian, &val) + if err != nil { + return val, err + } + return val, nil +} + +func itemIndexArrayToBytes(arr interface{}) ([]byte, error) { + var buf bytes.Buffer + count := 0 + + if arr != nil { + arrItemIndex, ok := arr.([]itemIndex) + if ok { + count = len(arrItemIndex) + } + } + + err := binary.Write(&buf, binary.LittleEndian, uint32(count)) + if err != nil { + return nil, err + } + + err = binary.Write(&buf, binary.LittleEndian, arr) + if err != nil { + return nil, err + } + return buf.Bytes(), err +} + +func bytesToItemIndexArray(b []byte) (interface{}, error) { + var size uint32 + reader := bytes.NewReader(b) + err := binary.Read(reader, binary.LittleEndian, &size) + if err != nil { + return nil, err + } + + val := make([]itemIndex, size) + err = binary.Read(reader, binary.LittleEndian, &val) + return val, err +} + +func requestToBytes(req interface{}) ([]byte, error) { + return req.(request).marshal() +} + +func (bof *batchStruct) bytesToRequest(b []byte) (interface{}, error) { + return bof.pcs.unmarshaler(b) +} diff --git a/exporter/exporterhelper/persistent_storage_batch_test.go b/exporter/exporterhelper/persistent_storage_batch_test.go new file mode 100644 index 00000000000..41ada268c29 --- /dev/null +++ b/exporter/exporterhelper/persistent_storage_batch_test.go @@ -0,0 +1,70 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exporterhelper + +import ( + "context" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPersistentStorageBatch_Operations(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + + itemIndexValue := itemIndex(123) + itemIndexArrayValue := []itemIndex{itemIndex(1), itemIndex(2)} + + _, err := newBatch(ps). + setItemIndex("index", itemIndexValue). + setItemIndexArray("arr", itemIndexArrayValue). + execute(context.Background()) + + require.NoError(t, err) + + batch, err := newBatch(ps). + get("index", "arr"). + execute(context.Background()) + require.NoError(t, err) + + retrievedItemIndexValue, err := batch.getItemIndexResult("index") + require.NoError(t, err) + require.Equal(t, itemIndexValue, retrievedItemIndexValue) + + retrievedItemIndexArrayValue, err := batch.getItemIndexArrayResult("arr") + require.NoError(t, err) + require.Equal(t, itemIndexArrayValue, retrievedItemIndexArrayValue) + + _, err = newBatch(ps).delete("index", "arr").execute(context.Background()) + require.NoError(t, err) + + batch, err = newBatch(ps). + get("index", "arr"). + execute(context.Background()) + require.NoError(t, err) + + _, err = batch.getItemIndexResult("index") + require.Error(t, err, errValueNotSet) + + retrievedItemIndexArrayValue, err = batch.getItemIndexArrayResult("arr") + require.NoError(t, err) + require.Nil(t, retrievedItemIndexArrayValue) +} diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index 12301d4b6e2..31327d8c007 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -21,6 +21,7 @@ import ( "io/ioutil" "os" "reflect" + "sync" "testing" "time" @@ -75,42 +76,52 @@ func TestPersistentStorage_CurrentlyProcessedItems(t *testing.T) { require.NoError(t, err) } - requireItemIndexesEqual(t, ps, []itemIndex{0}) + // Item index 0 is currently in unbuffered channel + requireCurrentItemIndexesEqual(t, ps, []itemIndex{0}) + // Now, this will take item 0 and pull item 1 into the unbuffered channel readReq := getItemFromChannel(t, ps) require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) + requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1}) - requireItemIndexesEqual(t, ps, []itemIndex{0, 1}) - + // This takes item 1 from channel and pulls another one (item 2) into the unbuffered channel secondReadReq := getItemFromChannel(t, ps) - requireItemIndexesEqual(t, ps, []itemIndex{0, 1, 2}) + requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1, 2}) + // Lets mark item 1 as finished, it will remove it from the currently processed items list secondReadReq.onProcessingFinished() - requireItemIndexesEqual(t, ps, []itemIndex{0, 2}) + requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 2}) - // Reload the storage and check how many items are there, which, after the current one is fetched should go to 3 + // Reload the storage. Since items 0 and 2 were not finished, those should be requeued at the end. + // The queue should be essentially {3,4,0,2} out of which item "3" should be pulled right away into + // the unbuffered channel. Check how many items are there, which, after the current one is fetched should go to 3. newPs := createTestPersistentStorage(client) require.Eventually(t, func() bool { return newPs.size() == 3 }, 500*time.Millisecond, 10*time.Millisecond) - requireItemIndexesEqual(t, newPs, []itemIndex{3}) + requireCurrentItemIndexesEqual(t, newPs, []itemIndex{3}) + // We should be able to pull all remaining items now for i := 0; i < 4; i++ { req := getItemFromChannel(t, newPs) req.onProcessingFinished() } - requireItemIndexesEqual(t, newPs, nil) + // The queue should be now empty + requireCurrentItemIndexesEqual(t, newPs, nil) require.Eventually(t, func() bool { return newPs.size() == 0 }, 500*time.Millisecond, 10*time.Millisecond) // The writeIndex should be now set accordingly require.Equal(t, 7, int(newPs.writeIndex)) + // There should be no items left in the storage for i := 0; i < int(newPs.writeIndex); i++ { - require.Nil(t, newPs._clientGetBuf(context.Background(), newPs.itemKey(itemIndex(i)))) + bb, err := client.Get(context.Background(), newPs.itemKey(itemIndex(i))) + require.NoError(t, err) + require.Nil(t, bb) } } @@ -131,7 +142,7 @@ func TestPersistentStorage_MetricsReported(t *testing.T) { } _ = getItemFromChannel(t, ps) - requireItemIndexesEqual(t, ps, []itemIndex{0, 1}) + requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1}) checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") ps.stop() @@ -277,7 +288,7 @@ func getItemFromChannel(t *testing.T, pcs *persistentContiguousStorage) request return readReq } -func requireItemIndexesEqual(t *testing.T, pcs *persistentContiguousStorage, compare []itemIndex) { +func requireCurrentItemIndexesEqual(t *testing.T, pcs *persistentContiguousStorage, compare []itemIndex) { require.Eventually(t, func() bool { pcs.mu.Lock() defer pcs.mu.Unlock() @@ -310,28 +321,58 @@ func newMockStorageClient() storage.Client { } type mockStorageClient struct { - st map[string][]byte + st map[string][]byte + mux sync.Mutex } -func (m mockStorageClient) Get(_ context.Context, s string) ([]byte, error) { +func (m *mockStorageClient) Get(_ context.Context, s string) ([]byte, error) { + m.mux.Lock() + defer m.mux.Unlock() + val, found := m.st[s] if !found { - return []byte{}, errors.New("key not found") + return nil, nil } return val, nil } -func (m mockStorageClient) Set(_ context.Context, s string, bytes []byte) error { +func (m *mockStorageClient) Set(_ context.Context, s string, bytes []byte) error { + m.mux.Lock() + defer m.mux.Unlock() + m.st[s] = bytes return nil } -func (m mockStorageClient) Delete(_ context.Context, s string) error { +func (m *mockStorageClient) Delete(_ context.Context, s string) error { + m.mux.Lock() + defer m.mux.Unlock() + delete(m.st, s) return nil } -func (m mockStorageClient) Close(_ context.Context) error { +func (m *mockStorageClient) Close(_ context.Context) error { + return nil +} + +func (m *mockStorageClient) Batch(_ context.Context, ops ...storage.Operation) error { + m.mux.Lock() + defer m.mux.Unlock() + + for _, op := range ops { + switch op.Type { + case storage.Get: + op.Value = m.st[op.Key] + case storage.Set: + m.st[op.Key] = op.Value + case storage.Delete: + delete(m.st, op.Key) + default: + return errors.New("wrong operation type") + } + } + return nil } diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index fa4e7f05218..00214aaccfc 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -97,7 +97,7 @@ type RetrySettings struct { // MaxInterval is the upper bound on backoff interval. Once this value is reached the delay between // consecutive retries will always be `MaxInterval`. MaxInterval time.Duration `mapstructure:"max_interval"` - // MaxElapsedTime is the maximum amount of time (including retries) spent trying to send a request/batch. + // MaxElapsedTime is the maximum amount of time (including retries) spent trying to send a request/batchStruct. // Once this value is reached, the data is discarded. MaxElapsedTime time.Duration `mapstructure:"max_elapsed_time"` } @@ -423,7 +423,7 @@ func (rs *retrySender) send(req request) error { backoffDelay := expBackoff.NextBackOff() if backoffDelay == backoff.Stop { - // throw away the batch + // throw away the batchStruct err = fmt.Errorf("max elapsed time expired %w", err) return rs.onTemporaryFailure(req, err) } From bc806fedc70fa9de5cf6b3bcd977a1a10fa96a9c Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Mon, 19 Jul 2021 22:13:50 +0200 Subject: [PATCH 09/24] Make sure empty requests are ignored --- exporter/exporterhelper/persistent_storage.go | 5 +++++ .../exporterhelper/persistent_storage_test.go | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 211611c4043..99f5fc1d8a1 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -216,6 +216,11 @@ func (pcs *persistentContiguousStorage) stop() { // put marshals the request and puts it into the persistent queue func (pcs *persistentContiguousStorage) put(req request) error { + // Nil requests are ignored + if req == nil { + return nil + } + pcs.mu.Lock() defer pcs.mu.Unlock() diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index 31327d8c007..6e51fcb9f88 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -199,6 +199,25 @@ func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { ext.Shutdown(context.Background()) } +func TestPersistentStorage_EmptyRequest(t *testing.T) { + path := createTemporaryDirectory() + defer os.RemoveAll(path) + + ext := createStorageExtension(path) + client := createTestClient(ext) + ps := createTestPersistentStorage(client) + + require.Equal(t, 0, ps.size()) + + err := ps.put(nil) + require.NoError(t, err) + + require.Equal(t, 0, ps.size()) + + err = ext.Shutdown(context.Background()) + require.NoError(t, err) +} + func BenchmarkPersistentStorage_TraceSpans(b *testing.B) { cases := []struct { numTraces int From 89253e716da7a7affa2e4ddab46767e87cc3acd7 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Wed, 25 Aug 2021 13:52:11 +0200 Subject: [PATCH 10/24] Better logging and overall cleanup (addressing PR comments) --- exporter/exporterhelper/common.go | 15 ++--- exporter/exporterhelper/logs.go | 2 +- exporter/exporterhelper/metrics.go | 2 +- exporter/exporterhelper/persistent_queue.go | 6 +- exporter/exporterhelper/persistent_storage.go | 55 +++++++++++-------- .../persistent_storage_batch.go | 13 +++-- .../exporterhelper/persistent_storage_test.go | 2 +- exporter/exporterhelper/queued_retry.go | 10 ++-- exporter/exporterhelper/traces.go | 2 +- 9 files changed, 54 insertions(+), 53 deletions(-) diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index 9e15e780b49..a7685eb6baa 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -40,7 +40,7 @@ func DefaultTimeoutSettings() TimeoutSettings { } } -// request is an abstraction of an individual request (batchStruct of data) independent of the type of the data (traces, metrics, logs). +// request is an abstraction of an individual request (batch of data) independent of the type of the data (traces, metrics, logs). type request interface { // context returns the Context of the requests. context() context.Context @@ -54,8 +54,9 @@ type request interface { count() int // marshal serializes the current request into a byte stream marshal() ([]byte, error) - // onProcessingFinished provides capability to do the cleanup (e.g. remove item from persistent queue) + // onProcessingFinished calls the optional callback function to handle cleanup after all processing is finished onProcessingFinished() + // setOnProcessingFinished allows to set an optional callback function to do the cleanup (e.g. remove the item from persistent queue) setOnProcessingFinished(callback func()) } @@ -178,15 +179,7 @@ type baseExporter struct { qrSender *queuedRetrySender } -type signalType string - -const ( - signalLogs = signalType("logs") - signalMetrics = signalType("metrics") - signalTraces = signalType("traces") -) - -func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signal signalType, reqUnnmarshaler requestUnmarshaler) *baseExporter { +func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signal config.DataType, reqUnnmarshaler requestUnmarshaler) *baseExporter { be := &baseExporter{ Component: componenthelper.New(bs.componentOptions...), } diff --git a/exporter/exporterhelper/logs.go b/exporter/exporterhelper/logs.go index 8ba5d19ed2b..3f475f09ae3 100644 --- a/exporter/exporterhelper/logs.go +++ b/exporter/exporterhelper/logs.go @@ -99,7 +99,7 @@ func NewLogsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, signalLogs, newLogsRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, config.LogsDataType, newLogsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &logsExporterWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/metrics.go b/exporter/exporterhelper/metrics.go index 513a9ed0766..96c9b9975cd 100644 --- a/exporter/exporterhelper/metrics.go +++ b/exporter/exporterhelper/metrics.go @@ -99,7 +99,7 @@ func NewMetricsExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, signalMetrics, newMetricsRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, config.MetricsDataType, newMetricsRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &metricsSenderWithObservability{ obsrep: be.obsrep, diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 09f863af334..04315158adc 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -37,7 +37,7 @@ type persistentQueue struct { storage persistentStorage } -// newPersistentQueue creates a new queue backed by file storage +// newPersistentQueue creates a new queue backed by file storage; name parameter must be a unique value that identifies the queue func newPersistentQueue(ctx context.Context, name string, capacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentQueue { return &persistentQueue{ logger: logger, @@ -50,7 +50,6 @@ func newPersistentQueue(ctx context.Context, name string, capacity int, logger * // StartConsumers starts the given number of consumers which will be consuming items func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{})) { pq.numWorkers = num - var startWG sync.WaitGroup factory := func() queue.Consumer { return queue.ConsumerFunc(callback) @@ -58,9 +57,7 @@ func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{ for i := 0; i < pq.numWorkers; i++ { pq.stopWG.Add(1) - startWG.Add(1) go func() { - startWG.Done() defer pq.stopWG.Done() consumer := factory() @@ -74,7 +71,6 @@ func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{ } }() } - startWG.Wait() } // Produce adds an item to the queue and returns true if it was accepted diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 99f5fc1d8a1..b3597cf377f 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -17,7 +17,7 @@ package exporterhelper import ( "context" "errors" - "fmt" + "strconv" "sync" "time" @@ -101,9 +101,11 @@ var ( errKeyNotPresentInBatch = errors.New("key was not present in get batchStruct") ) -// newPersistentContiguousStorage creates a new file-storage extension backed queue. It needs to be initialized separately +// newPersistentContiguousStorage creates a new file-storage extension backed queue; +// queueName parameter must be a unique value that identifies the queue. +// The queue needs to be initialized separately using initPersistentContiguousStorage. func newPersistentContiguousStorage(ctx context.Context, queueName string, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { - wcs := &persistentContiguousStorage{ + pcs := &persistentContiguousStorage{ logger: logger, client: client, queueName: queueName, @@ -113,17 +115,17 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, logge retryDelay: defaultRetryDelay, } - initPersistentContiguousStorage(ctx, wcs) + initPersistentContiguousStorage(ctx, pcs) err := currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { - return int64(wcs.numberOfCurrentlyProcessedItems()) - }, metricdata.NewLabelValue(wcs.queueName)) + return int64(pcs.numberOfCurrentlyProcessedItems()) + }, metricdata.NewLabelValue(pcs.queueName)) if err != nil { logger.Error("failed to create number of currently processed items metric", zap.Error(err)) } - go wcs.loop() - return wcs + go pcs.loop() + return pcs } func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContiguousStorage) { @@ -140,7 +142,7 @@ func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContigu } if err != nil { - pcs.logger.Debug("failed getting read/write index, starting with new ones", + pcs.logger.Error("failed getting read/write index, starting with new ones", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) pcs.readIndex = 0 @@ -162,9 +164,17 @@ func (pcs *persistentContiguousStorage) loop() { errCount++ } } - pcs.logger.Info("moved items for processing back to queue", - zap.String(zapQueueNameKey, pcs.queueName), - zap.Int(zapNumberOfItems, len(reqs)), zap.Int(zapErrorCount, errCount)) + if errCount > 0 { + pcs.logger.Error("errors occurred while moving items for processing back to queue", + zap.String(zapQueueNameKey, pcs.queueName), + zap.Int(zapNumberOfItems, len(reqs)), zap.Int(zapErrorCount, errCount)) + + } else { + pcs.logger.Info("moved items for processing back to queue", + zap.String(zapQueueNameKey, pcs.queueName), + zap.Int(zapNumberOfItems, len(reqs))) + + } } for { @@ -207,10 +217,10 @@ func (pcs *persistentContiguousStorage) numberOfCurrentlyProcessedItems() int { func (pcs *persistentContiguousStorage) stop() { pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(zapQueueNameKey, pcs.queueName)) pcs.stopOnce.Do(func() { + close(pcs.stopChan) _ = currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { - return int64(0) + return int64(pcs.numberOfCurrentlyProcessedItems()) }, metricdata.NewLabelValue(pcs.queueName)) - close(pcs.stopChan) }) } @@ -230,11 +240,8 @@ func (pcs *persistentContiguousStorage) put(req request) error { pcs.writeIndex++ _, err := newBatch(pcs).setItemIndex(writeIndexKey, pcs.writeIndex).setRequest(itemKey, req).execute(ctx) - if err != nil { - return err - } - return nil + return err } // getNextItem pulls the next available item from the persistent storage; if none is found, returns (nil, false) @@ -242,7 +249,7 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques pcs.mu.Lock() defer pcs.mu.Unlock() - if pcs.readIndex < pcs.writeIndex { + if pcs.readIndex != pcs.writeIndex { index := pcs.readIndex // Increase here, so even if errors happen below, it always iterates pcs.readIndex++ @@ -286,7 +293,7 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con processedItems, err = batch.getItemIndexArrayResult(currentlyProcessedItemsKey) } if err != nil { - pcs.logger.Warn("could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.logger.Error("could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) return reqs } @@ -309,7 +316,7 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con _, err = batch.execute(ctx) if err != nil { - pcs.logger.Warn("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.logger.Warn("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) return reqs } @@ -317,7 +324,7 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con req, err := batch.getRequestResult(key) if err != nil { pcs.logger.Warn("failed unmarshalling item", - zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key)) + zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) } else { reqs[i] = req } @@ -364,11 +371,11 @@ func (pcs *persistentContiguousStorage) updateReadIndex(ctx context.Context) { execute(ctx) if err != nil { - pcs.logger.Warn("failed updating read index", + pcs.logger.Debug("failed updating read index", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } func (pcs *persistentContiguousStorage) itemKey(index itemIndex) string { - return fmt.Sprintf("%d", index) + return strconv.FormatUint(uint64(index), 10) } diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go index eb2a73f27a6..70629ed544b 100644 --- a/exporter/exporterhelper/persistent_storage_batch.go +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -18,6 +18,7 @@ import ( "bytes" "context" "encoding/binary" + "errors" "sync" "go.uber.org/zap" @@ -25,6 +26,8 @@ import ( "go.opentelemetry.io/collector/extension/storage" ) +var errItemIndexArrInvalidDataType = errors.New("invalid data type, expected []itemIndex") + // batchStruct provides convenience capabilities for creating and processing storage extension batches type batchStruct struct { logger *zap.Logger @@ -65,7 +68,7 @@ func (bof *batchStruct) set(key string, value interface{}, marshal func(interfac valueBytes, err := marshal(value) if err != nil { - bof.logger.Warn("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) + bof.logger.Debug("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) } bof.operations = append(bof.operations, storage.SetOperation(key, valueBytes)) @@ -186,16 +189,18 @@ func bytesToItemIndex(b []byte) (interface{}, error) { func itemIndexArrayToBytes(arr interface{}) ([]byte, error) { var buf bytes.Buffer - count := 0 + size := 0 if arr != nil { arrItemIndex, ok := arr.([]itemIndex) if ok { - count = len(arrItemIndex) + size = len(arrItemIndex) + } else { + return nil, errItemIndexArrInvalidDataType } } - err := binary.Write(&buf, binary.LittleEndian, uint32(count)) + err := binary.Write(&buf, binary.LittleEndian, uint32(size)) if err != nil { return nil, err } diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index 6e51fcb9f88..c29e26d6dee 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -146,7 +146,7 @@ func TestPersistentStorage_MetricsReported(t *testing.T) { checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") ps.stop() - checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(0), "exporter/processed_batches_size") + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") } func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index 00214aaccfc..a588ff3fc59 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -97,7 +97,7 @@ type RetrySettings struct { // MaxInterval is the upper bound on backoff interval. Once this value is reached the delay between // consecutive retries will always be `MaxInterval`. MaxInterval time.Duration `mapstructure:"max_interval"` - // MaxElapsedTime is the maximum amount of time (including retries) spent trying to send a request/batchStruct. + // MaxElapsedTime is the maximum amount of time (including retries) spent trying to send a request/batch. // Once this value is reached, the data is discarded. MaxElapsedTime time.Duration `mapstructure:"max_elapsed_time"` } @@ -114,7 +114,7 @@ func DefaultRetrySettings() RetrySettings { type queuedRetrySender struct { id config.ComponentID - signal signalType + signal config.DataType cfg QueueSettings consumerSender requestSender queue consumersQueue @@ -144,7 +144,7 @@ func createSampledLogger(logger *zap.Logger) *zap.Logger { return logger.WithOptions(opts) } -func newQueuedRetrySender(id config.ComponentID, signal signalType, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { +func newQueuedRetrySender(id config.ComponentID, signal config.DataType, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { retryStopCh := make(chan struct{}) sampledLogger := createSampledLogger(logger) traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) @@ -212,7 +212,7 @@ func (qrs *queuedRetrySender) onTemporaryFailure(req request, err error) error { return err } -func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signal signalType) (*storage.Client, error) { +func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signal config.DataType) (*storage.Client, error) { var storageExtension storage.Extension for _, ext := range host.GetExtensions() { if se, ok := ext.(storage.Extension); ok { @@ -423,7 +423,7 @@ func (rs *retrySender) send(req request) error { backoffDelay := expBackoff.NextBackOff() if backoffDelay == backoff.Stop { - // throw away the batchStruct + // throw away the batch err = fmt.Errorf("max elapsed time expired %w", err) return rs.onTemporaryFailure(req, err) } diff --git a/exporter/exporterhelper/traces.go b/exporter/exporterhelper/traces.go index 9507769218f..7f0ef0dbc5a 100644 --- a/exporter/exporterhelper/traces.go +++ b/exporter/exporterhelper/traces.go @@ -100,7 +100,7 @@ func NewTracesExporter( } bs := fromOptions(options...) - be := newBaseExporter(cfg, set, bs, signalTraces, newTraceRequestUnmarshalerFunc(pusher)) + be := newBaseExporter(cfg, set, bs, config.TracesDataType, newTraceRequestUnmarshalerFunc(pusher)) be.wrapConsumerSender(func(nextSender requestSender) requestSender { return &tracesExporterWithObservability{ obsrep: be.obsrep, From 6412dca3fedc8f2f158eeea8b53bbb9e70ffe5bf Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 27 Aug 2021 13:31:07 +0200 Subject: [PATCH 11/24] Update persistent queue to use internal Bounded Queue --- exporter/exporterhelper/persistent_queue.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 04315158adc..a10b9bab0a5 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -18,9 +18,9 @@ import ( "context" "sync" - "github.com/jaegertracing/jaeger/pkg/queue" "go.uber.org/zap" + "go.opentelemetry.io/collector/exporter/exporterhelper/internal" "go.opentelemetry.io/collector/extension/storage" ) @@ -51,8 +51,8 @@ func newPersistentQueue(ctx context.Context, name string, capacity int, logger * func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{})) { pq.numWorkers = num - factory := func() queue.Consumer { - return queue.ConsumerFunc(callback) + factory := func() internal.Consumer { + return internal.ConsumerFunc(callback) } for i := 0; i < pq.numWorkers; i++ { From 1bb552e2d564805aa479801f8f900b1035eac3d7 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 27 Aug 2021 14:13:15 +0200 Subject: [PATCH 12/24] Do not operation if marshaling failed --- exporter/exporterhelper/persistent_storage_batch.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go index 70629ed544b..342d08135ff 100644 --- a/exporter/exporterhelper/persistent_storage_batch.go +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -69,9 +69,10 @@ func (bof *batchStruct) set(key string, value interface{}, marshal func(interfac valueBytes, err := marshal(value) if err != nil { bof.logger.Debug("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) + } else { + bof.operations = append(bof.operations, storage.SetOperation(key, valueBytes)) } - bof.operations = append(bof.operations, storage.SetOperation(key, valueBytes)) return bof } From f331906126008b33e51e218d21508e1cdcb945f9 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Mon, 30 Aug 2021 16:55:02 +0200 Subject: [PATCH 13/24] Better batch handling and removal of unnecessary mutex --- exporter/exporterhelper/persistent_storage.go | 29 ++++++++++++------- .../persistent_storage_batch.go | 15 ---------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index b3597cf377f..7e2bb10e465 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -306,22 +306,31 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con reqs = make([]request, len(processedItems)) keys := make([]string, len(processedItems)) - batch = newBatch(pcs) + retrieveBatch := newBatch(pcs) + cleanupBatch := newBatch(pcs) for i, it := range processedItems { keys[i] = pcs.itemKey(it) - batch. - get(keys[i]). - delete(keys[i]) + retrieveBatch.get(keys[i]) + cleanupBatch.delete(keys[i]) } - _, err = batch.execute(ctx) - if err != nil { - pcs.logger.Warn("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) + _, retrieveErr := retrieveBatch.execute(ctx) + _, cleanupErr := cleanupBatch.execute(ctx) + + if retrieveErr != nil { + pcs.logger.Warn("failed retrieving items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(retrieveErr)) + } + + if cleanupErr != nil { + pcs.logger.Debug("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(cleanupErr)) + } + + if retrieveErr != nil { return reqs } for i, key := range keys { - req, err := batch.getRequestResult(key) + req, err := retrieveBatch.getRequestResult(key) if err != nil { pcs.logger.Warn("failed unmarshalling item", zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) @@ -340,7 +349,7 @@ func (pcs *persistentContiguousStorage) itemProcessingStart(ctx context.Context, setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). execute(ctx) if err != nil { - pcs.logger.Warn("failed updating currently processed items", + pcs.logger.Debug("failed updating currently processed items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } @@ -360,7 +369,7 @@ func (pcs *persistentContiguousStorage) itemProcessingFinish(ctx context.Context delete(pcs.itemKey(index)). execute(ctx) if err != nil { - pcs.logger.Warn("failed updating currently processed items", + pcs.logger.Debug("failed updating currently processed items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go index 342d08135ff..0c17fa7d031 100644 --- a/exporter/exporterhelper/persistent_storage_batch.go +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -19,7 +19,6 @@ import ( "context" "encoding/binary" "errors" - "sync" "go.uber.org/zap" @@ -33,8 +32,6 @@ type batchStruct struct { logger *zap.Logger pcs *persistentContiguousStorage - mu sync.Mutex - operations []storage.Operation getOperations map[string]storage.Operation } @@ -50,9 +47,6 @@ func newBatch(pcs *persistentContiguousStorage) *batchStruct { // execute runs the provided operations in order func (bof *batchStruct) execute(ctx context.Context) (*batchStruct, error) { - bof.mu.Lock() - defer bof.mu.Unlock() - err := bof.pcs.client.Batch(ctx, bof.operations...) if err != nil { return nil, err @@ -63,9 +57,6 @@ func (bof *batchStruct) execute(ctx context.Context) (*batchStruct, error) { // set adds a Set operation to the batch func (bof *batchStruct) set(key string, value interface{}, marshal func(interface{}) ([]byte, error)) *batchStruct { - bof.mu.Lock() - defer bof.mu.Unlock() - valueBytes, err := marshal(value) if err != nil { bof.logger.Debug("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) @@ -78,9 +69,6 @@ func (bof *batchStruct) set(key string, value interface{}, marshal func(interfac // get adds a Get operation to the batch. After executing, its result will be available through getResult func (bof *batchStruct) get(keys ...string) *batchStruct { - bof.mu.Lock() - defer bof.mu.Unlock() - for _, key := range keys { op := storage.GetOperation(key) bof.getOperations[key] = op @@ -92,9 +80,6 @@ func (bof *batchStruct) get(keys ...string) *batchStruct { // delete adds a Delete operation to the batch func (bof *batchStruct) delete(keys ...string) *batchStruct { - bof.mu.Lock() - defer bof.mu.Unlock() - for _, key := range keys { bof.operations = append(bof.operations, storage.DeleteOperation(key)) } From 5545f0c3f0fe8313195d2fe6ea69478d762aa546 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Mon, 30 Aug 2021 22:32:26 +0200 Subject: [PATCH 14/24] Replace polling with signaling through channel --- exporter/exporterhelper/persistent_queue.go | 2 +- .../exporterhelper/persistent_queue_test.go | 2 +- exporter/exporterhelper/persistent_storage.go | 58 +++++++++++-------- .../exporterhelper/persistent_storage_test.go | 2 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index a10b9bab0a5..615e181b5ab 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -43,7 +43,7 @@ func newPersistentQueue(ctx context.Context, name string, capacity int, logger * logger: logger, stopChan: make(chan struct{}), capacity: capacity, - storage: newPersistentContiguousStorage(ctx, name, logger, client, unmarshaler), + storage: newPersistentContiguousStorage(ctx, name, capacity, logger, client, unmarshaler), } } diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index f0f05d970d6..4f1e74e2bf5 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -65,7 +65,7 @@ func TestPersistentQueue_Capacity(t *testing.T) { require.False(t, result) } - // Lets make sure the loop picks the first element into the channel, + // Let's make sure the loop picks the first element into the channel, // so the capacity could be used in full if i == 0 { require.Eventually(t, func() bool { diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 7e2bb10e465..493e4040b4e 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -19,7 +19,6 @@ import ( "errors" "strconv" "sync" - "time" "go.opencensus.io/metric/metricdata" "go.uber.org/zap" @@ -33,7 +32,7 @@ type persistentStorage interface { put(req request) error // get returns the next available request; note that the channel is unbuffered get() <-chan request - // size returns the current size of the storage with items waiting for processing + // size returns the current size of the persistent storage with items waiting for processing size() int // stop gracefully stops the storage stop() @@ -67,12 +66,13 @@ type persistentContiguousStorage struct { logger *zap.Logger queueName string client storage.Client - retryDelay time.Duration unmarshaler requestUnmarshaler - reqChan chan request - stopOnce sync.Once + putChan chan struct{} stopChan chan struct{} + stopOnce sync.Once + + reqChan chan request mu sync.Mutex readIndex itemIndex @@ -88,8 +88,6 @@ const ( zapErrorCount = "errorCount" zapNumberOfItems = "numberOfItems" - defaultRetryDelay = 100 * time.Millisecond - readIndexKey = "ri" writeIndexKey = "wi" currentlyProcessedItemsKey = "pi" @@ -104,18 +102,19 @@ var ( // newPersistentContiguousStorage creates a new file-storage extension backed queue; // queueName parameter must be a unique value that identifies the queue. // The queue needs to be initialized separately using initPersistentContiguousStorage. -func newPersistentContiguousStorage(ctx context.Context, queueName string, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { +func newPersistentContiguousStorage(ctx context.Context, queueName string, channelCapacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { pcs := &persistentContiguousStorage{ logger: logger, client: client, queueName: queueName, unmarshaler: unmarshaler, + putChan: make(chan struct{}, channelCapacity), reqChan: make(chan request), stopChan: make(chan struct{}), - retryDelay: defaultRetryDelay, } initPersistentContiguousStorage(ctx, pcs) + unprocessedReqs := pcs.retrieveUnprocessedItems(context.Background()) err := currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { return int64(pcs.numberOfCurrentlyProcessedItems()) @@ -124,7 +123,18 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, logge logger.Error("failed to create number of currently processed items metric", zap.Error(err)) } + // We start the loop first so in case there are more elements in the persistent storage than the capacity, + // it does not get blocked on initialization + go pcs.loop() + + // Make sure the leftover requests are handled + pcs.enqueueUnprocessedReqs(unprocessedReqs) + // Make sure the communication channel is loaded up + for i := 0; i < pcs.size(); i++ { + pcs.putChan <- struct{}{} + } + return pcs } @@ -153,10 +163,7 @@ func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContigu } } -// loop is the main loop that handles fetching items from the persistent buffer -func (pcs *persistentContiguousStorage) loop() { - // We want to run it here so it's not blocking - reqs := pcs.retrieveUnprocessedItems(context.Background()) +func (pcs *persistentContiguousStorage) enqueueUnprocessedReqs(reqs []request) { if len(reqs) > 0 { errCount := 0 for _, req := range reqs { @@ -176,20 +183,18 @@ func (pcs *persistentContiguousStorage) loop() { } } +} +// loop is the main loop that handles fetching items from the persistent buffer +func (pcs *persistentContiguousStorage) loop() { for { - req, found := pcs.getNextItem(context.Background()) - if found { - select { - case <-pcs.stopChan: - return - case pcs.reqChan <- req: - } - } else { - select { - case <-pcs.stopChan: - return - case <-time.After(pcs.retryDelay): + select { + case <-pcs.stopChan: + return + case <-pcs.putChan: + req, found := pcs.getNextItem(context.Background()) + if found { + pcs.reqChan <- req } } } @@ -241,6 +246,9 @@ func (pcs *persistentContiguousStorage) put(req request) error { _, err := newBatch(pcs).setItemIndex(writeIndexKey, pcs.writeIndex).setRequest(itemKey, req).execute(ctx) + // Inform the loop that there's some data to process + pcs.putChan <- struct{}{} + return err } diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index c29e26d6dee..c6e00b24a0a 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -49,7 +49,7 @@ func createTestClient(extension storage.Extension) storage.Client { func createTestPersistentStorage(client storage.Client) *persistentContiguousStorage { logger, _ := zap.NewDevelopment() - return newPersistentContiguousStorage(context.Background(), "foo", logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) + return newPersistentContiguousStorage(context.Background(), "foo", 1000, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) } func createTemporaryDirectory() string { From 249c678abcd66395198c5031c4a0ff5f902ddcae Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Tue, 31 Aug 2021 13:14:37 +0200 Subject: [PATCH 15/24] Remove persistent queue mutexes --- exporter/exporterhelper/persistent_queue.go | 16 ++------- exporter/exporterhelper/persistent_storage.go | 33 ++++++++++++------- .../exporterhelper/persistent_storage_test.go | 18 ++++++---- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 615e181b5ab..3915514b7d8 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -30,11 +30,8 @@ type persistentQueue struct { stopWG sync.WaitGroup stopOnce sync.Once stopChan chan struct{} - capacity int numWorkers int - - produceMu sync.Mutex - storage persistentStorage + storage persistentStorage } // newPersistentQueue creates a new queue backed by file storage; name parameter must be a unique value that identifies the queue @@ -42,8 +39,7 @@ func newPersistentQueue(ctx context.Context, name string, capacity int, logger * return &persistentQueue{ logger: logger, stopChan: make(chan struct{}), - capacity: capacity, - storage: newPersistentContiguousStorage(ctx, name, capacity, logger, client, unmarshaler), + storage: newPersistentContiguousStorage(ctx, name, uint64(capacity), logger, client, unmarshaler), } } @@ -75,12 +71,6 @@ func (pq *persistentQueue) StartConsumers(num int, callback func(item interface{ // Produce adds an item to the queue and returns true if it was accepted func (pq *persistentQueue) Produce(item interface{}) bool { - pq.produceMu.Lock() - defer pq.produceMu.Unlock() - - if pq.storage.size() >= pq.capacity { - return false - } err := pq.storage.put(item.(request)) return err == nil } @@ -96,5 +86,5 @@ func (pq *persistentQueue) Stop() { // Size returns the current depth of the queue, excluding the item already in the storage channel (if any) func (pq *persistentQueue) Size() int { - return pq.storage.size() + return int(pq.storage.size()) } diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 493e4040b4e..c28e72bde63 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -19,6 +19,7 @@ import ( "errors" "strconv" "sync" + "sync/atomic" "go.opencensus.io/metric/metricdata" "go.uber.org/zap" @@ -33,7 +34,7 @@ type persistentStorage interface { // get returns the next available request; note that the channel is unbuffered get() <-chan request // size returns the current size of the persistent storage with items waiting for processing - size() int + size() uint64 // stop gracefully stops the storage stop() } @@ -71,6 +72,7 @@ type persistentContiguousStorage struct { putChan chan struct{} stopChan chan struct{} stopOnce sync.Once + capacity uint64 reqChan chan request @@ -78,6 +80,8 @@ type persistentContiguousStorage struct { readIndex itemIndex writeIndex itemIndex currentlyProcessedItems []itemIndex + + itemsCount uint64 } type itemIndex uint64 @@ -94,21 +98,22 @@ const ( ) var ( - errValueNotSet = errors.New("value not set") - + errMaxCapacityReached = errors.New("max capacity reached") + errValueNotSet = errors.New("value not set") errKeyNotPresentInBatch = errors.New("key was not present in get batchStruct") ) // newPersistentContiguousStorage creates a new file-storage extension backed queue; // queueName parameter must be a unique value that identifies the queue. // The queue needs to be initialized separately using initPersistentContiguousStorage. -func newPersistentContiguousStorage(ctx context.Context, queueName string, channelCapacity int, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { +func newPersistentContiguousStorage(ctx context.Context, queueName string, capacity uint64, logger *zap.Logger, client storage.Client, unmarshaler requestUnmarshaler) *persistentContiguousStorage { pcs := &persistentContiguousStorage{ logger: logger, client: client, queueName: queueName, unmarshaler: unmarshaler, - putChan: make(chan struct{}, channelCapacity), + capacity: capacity, + putChan: make(chan struct{}, capacity), reqChan: make(chan request), stopChan: make(chan struct{}), } @@ -131,7 +136,7 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, chann // Make sure the leftover requests are handled pcs.enqueueUnprocessedReqs(unprocessedReqs) // Make sure the communication channel is loaded up - for i := 0; i < pcs.size(); i++ { + for i := uint64(0); i < pcs.size(); i++ { pcs.putChan <- struct{}{} } @@ -161,6 +166,8 @@ func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContigu pcs.readIndex = readIndex pcs.writeIndex = writeIndex } + + atomic.StoreUint64(&pcs.itemsCount, uint64(pcs.writeIndex-pcs.readIndex)) } func (pcs *persistentContiguousStorage) enqueueUnprocessedReqs(reqs []request) { @@ -206,10 +213,8 @@ func (pcs *persistentContiguousStorage) get() <-chan request { } // size returns the number of currently available items, which were not picked by consumers yet -func (pcs *persistentContiguousStorage) size() int { - pcs.mu.Lock() - defer pcs.mu.Unlock() - return int(pcs.writeIndex - pcs.readIndex) +func (pcs *persistentContiguousStorage) size() uint64 { + return atomic.LoadUint64(&pcs.itemsCount) } // numberOfCurrentlyProcessedItems returns the count of batches for which processing started but hasn't finish yet @@ -239,11 +244,16 @@ func (pcs *persistentContiguousStorage) put(req request) error { pcs.mu.Lock() defer pcs.mu.Unlock() - ctx := context.Background() + if pcs.size() >= pcs.capacity { + pcs.logger.Warn("maximum queue capacity reached", zap.String(zapQueueNameKey, pcs.queueName)) + return errMaxCapacityReached + } itemKey := pcs.itemKey(pcs.writeIndex) pcs.writeIndex++ + atomic.StoreUint64(&pcs.itemsCount, uint64(pcs.writeIndex-pcs.readIndex)) + ctx := context.Background() _, err := newBatch(pcs).setItemIndex(writeIndexKey, pcs.writeIndex).setRequest(itemKey, req).execute(ctx) // Inform the loop that there's some data to process @@ -261,6 +271,7 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques index := pcs.readIndex // Increase here, so even if errors happen below, it always iterates pcs.readIndex++ + atomic.StoreUint64(&pcs.itemsCount, uint64(pcs.writeIndex-pcs.readIndex)) pcs.updateReadIndex(ctx) pcs.itemProcessingStart(ctx, index) diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index c6e00b24a0a..b03e47286fd 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -47,9 +47,13 @@ func createTestClient(extension storage.Extension) storage.Client { return client } -func createTestPersistentStorage(client storage.Client) *persistentContiguousStorage { +func createTestPersistentStorageWithCapacity(client storage.Client, capacity uint64) *persistentContiguousStorage { logger, _ := zap.NewDevelopment() - return newPersistentContiguousStorage(context.Background(), "foo", 1000, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) + return newPersistentContiguousStorage(context.Background(), "foo", capacity, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) +} + +func createTestPersistentStorage(client storage.Client) *persistentContiguousStorage { + return createTestPersistentStorageWithCapacity(client, 1000) } func createTemporaryDirectory() string { @@ -160,7 +164,7 @@ func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { ext := createStorageExtension(path) client := createTestClient(ext) ps := createTestPersistentStorage(client) - require.Equal(t, 0, ps.size()) + require.Equal(t, uint64(0), ps.size()) // Put two elements err := ps.put(req) @@ -186,7 +190,7 @@ func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { readReq = getItemFromChannel(t, ps) require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) - require.Equal(t, 0, ps.size()) + require.Equal(t, uint64(0), ps.size()) err = ext.Shutdown(context.Background()) require.NoError(t, err) @@ -207,12 +211,12 @@ func TestPersistentStorage_EmptyRequest(t *testing.T) { client := createTestClient(ext) ps := createTestPersistentStorage(client) - require.Equal(t, 0, ps.size()) + require.Equal(t, uint64(0), ps.size()) err := ps.put(nil) require.NoError(t, err) - require.Equal(t, 0, ps.size()) + require.Equal(t, uint64(0), ps.size()) err = ext.Shutdown(context.Background()) require.NoError(t, err) @@ -243,7 +247,7 @@ func BenchmarkPersistentStorage_TraceSpans(b *testing.B) { defer os.RemoveAll(path) ext := createStorageExtension(path) client := createTestClient(ext) - ps := createTestPersistentStorage(client) + ps := createTestPersistentStorageWithCapacity(client, 10000000) traces := newTraces(c.numTraces, c.numSpansPerTrace) req := newTracesRequest(context.Background(), traces, nopTracePusher()) From 7b3794e16f370c8af3afe5225767e15b2510d8f2 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Tue, 31 Aug 2021 13:23:34 +0200 Subject: [PATCH 16/24] Mute logs during benchmarks --- exporter/exporterhelper/persistent_storage_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index b03e47286fd..c7ee2c4137e 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -47,13 +47,13 @@ func createTestClient(extension storage.Extension) storage.Client { return client } -func createTestPersistentStorageWithCapacity(client storage.Client, capacity uint64) *persistentContiguousStorage { - logger, _ := zap.NewDevelopment() +func createTestPersistentStorageWithLoggingAndCapacity(client storage.Client, logger *zap.Logger, capacity uint64) *persistentContiguousStorage { return newPersistentContiguousStorage(context.Background(), "foo", capacity, logger, client, newTraceRequestUnmarshalerFunc(nopTracePusher())) } func createTestPersistentStorage(client storage.Client) *persistentContiguousStorage { - return createTestPersistentStorageWithCapacity(client, 1000) + logger, _ := zap.NewDevelopment() + return createTestPersistentStorageWithLoggingAndCapacity(client, logger, 1000) } func createTemporaryDirectory() string { @@ -247,7 +247,7 @@ func BenchmarkPersistentStorage_TraceSpans(b *testing.B) { defer os.RemoveAll(path) ext := createStorageExtension(path) client := createTestClient(ext) - ps := createTestPersistentStorageWithCapacity(client, 10000000) + ps := createTestPersistentStorageWithLoggingAndCapacity(client, zap.NewNop(), 10000000) traces := newTraces(c.numTraces, c.numSpansPerTrace) req := newTracesRequest(context.Background(), traces, nopTracePusher()) From 64b713a4a9a68e1fc9a2bc0169b32378951c2495 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Tue, 31 Aug 2021 14:45:44 +0200 Subject: [PATCH 17/24] Cleanup error messages --- exporter/exporterhelper/persistent_storage.go | 40 ++++++++++--------- .../persistent_storage_batch.go | 2 +- .../exporterhelper/persistent_storage_test.go | 4 +- exporter/exporterhelper/queued_retry.go | 2 +- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index c28e72bde63..7831c15a27a 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -157,9 +157,13 @@ func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContigu } if err != nil { - pcs.logger.Error("failed getting read/write index, starting with new ones", - zap.String(zapQueueNameKey, pcs.queueName), - zap.Error(err)) + if err == errValueNotSet { + pcs.logger.Info("Initializing new persistent queue", zap.String(zapQueueNameKey, pcs.queueName)) + } else { + pcs.logger.Error("Failed getting read/write index, starting with new ones", + zap.String(zapQueueNameKey, pcs.queueName), + zap.Error(err)) + } pcs.readIndex = 0 pcs.writeIndex = 0 } else { @@ -179,12 +183,12 @@ func (pcs *persistentContiguousStorage) enqueueUnprocessedReqs(reqs []request) { } } if errCount > 0 { - pcs.logger.Error("errors occurred while moving items for processing back to queue", + pcs.logger.Error("Errors occurred while moving items for processing back to queue", zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(reqs)), zap.Int(zapErrorCount, errCount)) } else { - pcs.logger.Info("moved items for processing back to queue", + pcs.logger.Info("Moved items for processing back to queue", zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(reqs))) @@ -217,7 +221,7 @@ func (pcs *persistentContiguousStorage) size() uint64 { return atomic.LoadUint64(&pcs.itemsCount) } -// numberOfCurrentlyProcessedItems returns the count of batches for which processing started but hasn't finish yet +// numberOfCurrentlyProcessedItems returns the count of batches for which processing started but hasn't finished yet func (pcs *persistentContiguousStorage) numberOfCurrentlyProcessedItems() int { pcs.mu.Lock() defer pcs.mu.Unlock() @@ -225,7 +229,7 @@ func (pcs *persistentContiguousStorage) numberOfCurrentlyProcessedItems() int { } func (pcs *persistentContiguousStorage) stop() { - pcs.logger.Debug("stopping persistentContiguousStorage", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.logger.Debug("Stopping persistentContiguousStorage", zap.String(zapQueueNameKey, pcs.queueName)) pcs.stopOnce.Do(func() { close(pcs.stopChan) _ = currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { @@ -245,7 +249,7 @@ func (pcs *persistentContiguousStorage) put(req request) error { defer pcs.mu.Unlock() if pcs.size() >= pcs.capacity { - pcs.logger.Warn("maximum queue capacity reached", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.logger.Warn("Maximum queue capacity reached", zap.String(zapQueueNameKey, pcs.queueName)) return errMaxCapacityReached } @@ -306,21 +310,21 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con pcs.mu.Lock() defer pcs.mu.Unlock() - pcs.logger.Debug("checking if there are items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + pcs.logger.Debug("Checking if there are items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) batch, err := newBatch(pcs).get(currentlyProcessedItemsKey).execute(ctx) if err == nil { processedItems, err = batch.getItemIndexArrayResult(currentlyProcessedItemsKey) } if err != nil { - pcs.logger.Error("could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) + pcs.logger.Error("Could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) return reqs } if len(processedItems) > 0 { - pcs.logger.Info("fetching items left for processing by consumers", + pcs.logger.Info("Fetching items left for processing by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(processedItems))) } else { - pcs.logger.Debug("no items left for processing by consumers") + pcs.logger.Debug("No items left for processing by consumers") } reqs = make([]request, len(processedItems)) @@ -337,11 +341,11 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con _, cleanupErr := cleanupBatch.execute(ctx) if retrieveErr != nil { - pcs.logger.Warn("failed retrieving items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(retrieveErr)) + pcs.logger.Warn("Failed retrieving items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(retrieveErr)) } if cleanupErr != nil { - pcs.logger.Debug("failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(cleanupErr)) + pcs.logger.Debug("Failed cleaning items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(cleanupErr)) } if retrieveErr != nil { @@ -351,7 +355,7 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con for i, key := range keys { req, err := retrieveBatch.getRequestResult(key) if err != nil { - pcs.logger.Warn("failed unmarshalling item", + pcs.logger.Warn("Failed unmarshalling item", zap.String(zapQueueNameKey, pcs.queueName), zap.String(zapKey, key), zap.Error(err)) } else { reqs[i] = req @@ -368,7 +372,7 @@ func (pcs *persistentContiguousStorage) itemProcessingStart(ctx context.Context, setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). execute(ctx) if err != nil { - pcs.logger.Debug("failed updating currently processed items", + pcs.logger.Debug("Failed updating currently processed items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } @@ -388,7 +392,7 @@ func (pcs *persistentContiguousStorage) itemProcessingFinish(ctx context.Context delete(pcs.itemKey(index)). execute(ctx) if err != nil { - pcs.logger.Debug("failed updating currently processed items", + pcs.logger.Debug("Failed updating currently processed items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } @@ -399,7 +403,7 @@ func (pcs *persistentContiguousStorage) updateReadIndex(ctx context.Context) { execute(ctx) if err != nil { - pcs.logger.Debug("failed updating read index", + pcs.logger.Debug("Failed updating read index", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go index 0c17fa7d031..ed77695bc8f 100644 --- a/exporter/exporterhelper/persistent_storage_batch.go +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -59,7 +59,7 @@ func (bof *batchStruct) execute(ctx context.Context) (*batchStruct, error) { func (bof *batchStruct) set(key string, value interface{}, marshal func(interface{}) ([]byte, error)) *batchStruct { valueBytes, err := marshal(value) if err != nil { - bof.logger.Debug("failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) + bof.logger.Debug("Failed marshaling item, skipping it", zap.String(zapKey, key), zap.Error(err)) } else { bof.operations = append(bof.operations, storage.SetOperation(key, valueBytes)) } diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index c7ee2c4137e..36cbddbfaa0 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -147,10 +147,10 @@ func TestPersistentStorage_MetricsReported(t *testing.T) { _ = getItemFromChannel(t, ps) requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1}) - checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_processed_batches") ps.stop() - checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/processed_batches_size") + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_processed_batches") } func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index a588ff3fc59..5c7ab030d0b 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -47,7 +47,7 @@ var ( metric.WithUnit(metricdata.UnitDimensionless)) currentlyProcessedBatchesGauge, _ = r.AddInt64DerivedGauge( - obsmetrics.ExporterKey+"/processed_batches_size", + obsmetrics.ExporterKey+"/currently_processed_batches", metric.WithDescription("Number of currently processed batches"), metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) From 647e7bf25a0b7eb870c4b6a2c49a502edf059bef Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Thu, 2 Sep 2021 21:24:53 +0200 Subject: [PATCH 18/24] Use enable_unstable build flag to control inclusion of persistent buffering capability --- Makefile | 13 +- Makefile.Common | 8 + exporter/exporterhelper/README.md | 4 +- exporter/exporterhelper/common.go | 4 +- exporter/exporterhelper/persistent_queue.go | 3 + .../exporterhelper/persistent_queue_test.go | 3 + exporter/exporterhelper/persistent_storage.go | 3 + .../persistent_storage_batch.go | 3 + .../persistent_storage_batch_test.go | 3 + .../exporterhelper/persistent_storage_test.go | 3 + exporter/exporterhelper/queued_retry.go | 197 +--------------- exporter/exporterhelper/queued_retry_v1.go | 116 ++++++++++ exporter/exporterhelper/queued_retry_v2.go | 212 ++++++++++++++++++ 13 files changed, 374 insertions(+), 198 deletions(-) create mode 100644 exporter/exporterhelper/queued_retry_v1.go create mode 100644 exporter/exporterhelper/queued_retry_v2.go diff --git a/Makefile b/Makefile index f2a3b198590..4af74b12a4c 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ gotestinstall: .PHONY: gotest gotest: - @$(MAKE) for-all CMD="make test" + @$(MAKE) for-all CMD="make test test-unstable" .PHONY: gobenchmark gobenchmark: @@ -81,7 +81,7 @@ gotest-with-cover: .PHONY: golint golint: - @$(MAKE) for-all CMD="make lint" + @$(MAKE) for-all CMD="make lint lint-unstable" .PHONY: goimpi goimpi: @@ -148,6 +148,11 @@ otelcol: go generate ./... $(MAKE) build-binary-internal +.PHONY: otelcol-unstable +otelcol-unstable: + go generate ./... + $(MAKE) build-binary-internal-unstable + .PHONY: run run: GO111MODULE=on go run --race ./cmd/otelcol/... --config ${RUN_CONFIG} ${RUN_ARGS} @@ -237,6 +242,10 @@ binaries-windows_amd64: build-binary-internal: GO111MODULE=on CGO_ENABLED=0 go build -trimpath -o ./bin/otelcol_$(GOOS)_$(GOARCH)$(EXTENSION) $(BUILD_INFO) ./cmd/otelcol +.PHONY: build-binary-internal-unstable +build-binary-internal-unstable: + GO111MODULE=on CGO_ENABLED=0 go build -trimpath -o ./bin/otelcol_$(GOOS)_$(GOARCH)$(EXTENSION)_unstable $(BUILD_INFO) -tags enable_unstable ./cmd/otelcol + .PHONY: deb-rpm-package %-package: ARCH ?= amd64 %-package: diff --git a/Makefile.Common b/Makefile.Common index 85d282b2d54..02a144f3a2f 100644 --- a/Makefile.Common +++ b/Makefile.Common @@ -12,6 +12,10 @@ IMPI=impi test: @echo $(ALL_PKGS) | xargs -n 10 $(GOTEST) $(GOTEST_OPT) +.PHONY: test-unstable +test-unstable: + @echo $(ALL_PKGS) | xargs -n 10 $(GOTEST) $(GOTEST_OPT) -tags enable_unstable + .PHONY: benchmark benchmark: $(GOTEST) -bench=. -run=notests ./... @@ -25,6 +29,10 @@ fmt: lint: $(LINT) run --allow-parallel-runners +.PHONY: lint-unstable +lint-unstable: + $(LINT) run --allow-parallel-runners --build-tags enable_unstable + .PHONY: impi impi: @$(IMPI) --local go.opentelemetry.io/collector --scheme stdThirdPartyLocal ./... diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index 6ec4d676b3a..c58ab80fa6e 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -21,7 +21,7 @@ The following configuration options can be modified: User should calculate this as `num_seconds * requests_per_second` where: - `num_seconds` is the number of seconds to buffer in case of a backend outage - `requests_per_second` is the average number of requests per seconds. - - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension + - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension (note, `unable_unstable` compilation flag needs to be enabled first) - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `timeout` (default = 5s): Time to wait per individual attempt to send data to a backend. @@ -34,7 +34,7 @@ The full list of settings exposed for this helper exporter are documented [here] When `persistent_storage_enabled` is set to true, the queue is being buffered to disk by the [file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). -It currently can be enabled only in OpenTelemetry Collector Contrib. +It currently can be enabled only in OpenTelemetry Collector Contrib. Also, `unable_unstable` compilation flag needs to be enabled. ``` diff --git a/exporter/exporterhelper/common.go b/exporter/exporterhelper/common.go index a7685eb6baa..0b961b055a6 100644 --- a/exporter/exporterhelper/common.go +++ b/exporter/exporterhelper/common.go @@ -179,7 +179,7 @@ type baseExporter struct { qrSender *queuedRetrySender } -func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signal config.DataType, reqUnnmarshaler requestUnmarshaler) *baseExporter { +func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, bs *baseSettings, signal config.DataType, reqUnmarshaler requestUnmarshaler) *baseExporter { be := &baseExporter{ Component: componenthelper.New(bs.componentOptions...), } @@ -189,7 +189,7 @@ func newBaseExporter(cfg config.Exporter, set component.ExporterCreateSettings, ExporterID: cfg.ID(), ExporterCreateSettings: set, }) - be.qrSender = newQueuedRetrySender(cfg.ID(), signal, bs.QueueSettings, bs.RetrySettings, reqUnnmarshaler, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) + be.qrSender = newQueuedRetrySender(cfg.ID(), signal, bs.QueueSettings, bs.RetrySettings, reqUnmarshaler, &timeoutSender{cfg: bs.TimeoutSettings}, set.Logger) be.sender = be.qrSender return be diff --git a/exporter/exporterhelper/persistent_queue.go b/exporter/exporterhelper/persistent_queue.go index 3915514b7d8..d74e93cca86 100644 --- a/exporter/exporterhelper/persistent_queue.go +++ b/exporter/exporterhelper/persistent_queue.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/persistent_queue_test.go b/exporter/exporterhelper/persistent_queue_test.go index 4f1e74e2bf5..d012087341c 100644 --- a/exporter/exporterhelper/persistent_queue_test.go +++ b/exporter/exporterhelper/persistent_queue_test.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 7831c15a27a..5b65ceacfde 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/persistent_storage_batch.go b/exporter/exporterhelper/persistent_storage_batch.go index ed77695bc8f..189a563993a 100644 --- a/exporter/exporterhelper/persistent_storage_batch.go +++ b/exporter/exporterhelper/persistent_storage_batch.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/persistent_storage_batch_test.go b/exporter/exporterhelper/persistent_storage_batch_test.go index 41ada268c29..29773e62042 100644 --- a/exporter/exporterhelper/persistent_storage_batch_test.go +++ b/exporter/exporterhelper/persistent_storage_batch_test.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index 36cbddbfaa0..9b39c8213e2 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build enable_unstable +// +build enable_unstable + package exporterhelper import ( diff --git a/exporter/exporterhelper/queued_retry.go b/exporter/exporterhelper/queued_retry.go index 5c7ab030d0b..81e761ed755 100644 --- a/exporter/exporterhelper/queued_retry.go +++ b/exporter/exporterhelper/queued_retry.go @@ -29,11 +29,7 @@ import ( "go.uber.org/zap" "go.uber.org/zap/zapcore" - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/config" "go.opentelemetry.io/collector/consumer/consumererror" - "go.opentelemetry.io/collector/exporter/exporterhelper/internal" - "go.opentelemetry.io/collector/extension/storage" "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" ) @@ -46,15 +42,7 @@ var ( metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) - currentlyProcessedBatchesGauge, _ = r.AddInt64DerivedGauge( - obsmetrics.ExporterKey+"/currently_processed_batches", - metric.WithDescription("Number of currently processed batches"), - metric.WithLabelKeys(obsmetrics.ExporterKey), - metric.WithUnit(metricdata.UnitDimensionless)) - - errNoStorageClient = errors.New("no storage client extension found") - errMultipleStorageClients = errors.New("multiple storage extensions found") - errSendingQueueIsFull = errors.New("sending_queue is full") + errSendingQueueIsFull = errors.New("sending_queue is full") ) func init() { @@ -112,19 +100,6 @@ func DefaultRetrySettings() RetrySettings { } } -type queuedRetrySender struct { - id config.ComponentID - signal config.DataType - cfg QueueSettings - consumerSender requestSender - queue consumersQueue - retryStopCh chan struct{} - traceAttributes []attribute.KeyValue - logger *zap.Logger - requeuingEnabled bool - requestUnmarshaler requestUnmarshaler -} - func createSampledLogger(logger *zap.Logger) *zap.Logger { if logger.Core().Enabled(zapcore.DebugLevel) { // Debugging is enabled. Don't do any sampling. @@ -144,147 +119,6 @@ func createSampledLogger(logger *zap.Logger) *zap.Logger { return logger.WithOptions(opts) } -func newQueuedRetrySender(id config.ComponentID, signal config.DataType, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { - retryStopCh := make(chan struct{}) - sampledLogger := createSampledLogger(logger) - traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) - - qrs := &queuedRetrySender{ - id: id, - signal: signal, - cfg: qCfg, - retryStopCh: retryStopCh, - traceAttributes: []attribute.KeyValue{traceAttr}, - logger: sampledLogger, - requestUnmarshaler: reqUnmarshaler, - } - - qrs.consumerSender = &retrySender{ - traceAttribute: traceAttr, - cfg: rCfg, - nextSender: nextSender, - stopCh: retryStopCh, - logger: sampledLogger, - // Following three functions are provided in such way because they actually depend on queuedRetrySender - onSuccess: qrs.onSuccess, - onTemporaryFailure: qrs.onTemporaryFailure, - onPermanentFailure: qrs.onPermanentFailure, - } - - if !qCfg.PersistentStorageEnabled { - qrs.queue = internal.NewBoundedQueue(qrs.cfg.QueueSize, func(item interface{}) {}) - } - // The Persistent Queue is initialized separately as it needs extra information about the component - - return qrs -} - -func (qrs *queuedRetrySender) onSuccess(_ request, _ error) error { - return nil -} - -func (qrs *queuedRetrySender) onPermanentFailure(_ request, err error) error { - return err -} - -func (qrs *queuedRetrySender) onTemporaryFailure(req request, err error) error { - if !qrs.requeuingEnabled || qrs.queue == nil { - qrs.logger.Error( - "Exporting failed. No more retries left. Dropping data.", - zap.Error(err), - zap.Int("dropped_items", req.count()), - ) - return err - } - - if qrs.queue.Produce(req) { - qrs.logger.Error( - "Exporting failed. Putting back to the end of the queue.", - zap.Error(err), - ) - } else { - qrs.logger.Error( - "Exporting failed. Queue did not accept requeuing request. Dropping data.", - zap.Error(err), - zap.Int("dropped_items", req.count()), - ) - } - return err -} - -func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signal config.DataType) (*storage.Client, error) { - var storageExtension storage.Extension - for _, ext := range host.GetExtensions() { - if se, ok := ext.(storage.Extension); ok { - if storageExtension != nil { - return nil, errMultipleStorageClients - } - storageExtension = se - } - } - - if storageExtension == nil { - return nil, errNoStorageClient - } - - client, err := storageExtension.GetClient(ctx, component.KindExporter, id, string(signal)) - if err != nil { - return nil, err - } - - return &client, err -} - -// initializePersistentQueue uses extra information for initialization available from component.Host -func (qrs *queuedRetrySender) initializePersistentQueue(ctx context.Context, host component.Host) error { - if qrs.cfg.PersistentStorageEnabled { - storageClient, err := getStorageClient(ctx, host, qrs.id, qrs.signal) - if err != nil { - return err - } - - qrs.queue = newPersistentQueue(ctx, qrs.fullName(), qrs.cfg.QueueSize, qrs.logger, *storageClient, qrs.requestUnmarshaler) - - // TODO: this can be further exposed as a config param rather than relying on a type of queue - qrs.requeuingEnabled = true - } - - return nil -} - -func (qrs *queuedRetrySender) fullName() string { - if qrs.signal == "" { - return qrs.id.String() - } - return fmt.Sprintf("%s-%s", qrs.id.String(), qrs.signal) -} - -// start is invoked during service startup. -func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) error { - err := qrs.initializePersistentQueue(ctx, host) - if err != nil { - return err - } - - qrs.queue.StartConsumers(qrs.cfg.NumConsumers, func(item interface{}) { - req := item.(request) - _ = qrs.consumerSender.send(req) - req.onProcessingFinished() - }) - - // Start reporting queue length metric - if qrs.cfg.Enabled { - err := queueSizeGauge.UpsertEntry(func() int64 { - return int64(qrs.queue.Size()) - }, metricdata.NewLabelValue(qrs.fullName())) - if err != nil { - return fmt.Errorf("failed to create retry queue size metric: %v", err) - } - } - - return nil -} - // send implements the requestSender interface func (qrs *queuedRetrySender) send(req request) error { if !qrs.cfg.Enabled { @@ -316,25 +150,6 @@ func (qrs *queuedRetrySender) send(req request) error { return nil } -// shutdown is invoked during service shutdown. -func (qrs *queuedRetrySender) shutdown() { - // Cleanup queue metrics reporting - if qrs.cfg.Enabled { - _ = queueSizeGauge.UpsertEntry(func() int64 { - return int64(0) - }, metricdata.NewLabelValue(qrs.fullName())) - } - - // First Stop the retry goroutines, so that unblocks the queue numWorkers. - close(qrs.retryStopCh) - - // Stop the queued sender, this will drain the queue and will call the retry (which is stopped) that will only - // try once every request. - if qrs.queue != nil { - qrs.queue.Stop() - } -} - // TODO: Clean this by forcing all exporters to return an internal error type that always include the information about retries. type throttleRetry struct { err error @@ -357,7 +172,7 @@ func NewThrottleRetry(err error, delay time.Duration) error { } } -type onRequestHandlingFinishedFunc func(request, error) error +type onRequestHandlingFinishedFunc func(*zap.Logger, request, error) error type retrySender struct { traceAttribute attribute.KeyValue @@ -365,9 +180,7 @@ type retrySender struct { nextSender requestSender stopCh chan struct{} logger *zap.Logger - onSuccess onRequestHandlingFinishedFunc onTemporaryFailure onRequestHandlingFinishedFunc - onPermanentFailure onRequestHandlingFinishedFunc } // send implements the requestSender interface @@ -404,7 +217,7 @@ func (rs *retrySender) send(req request) error { err := rs.nextSender.send(req) if err == nil { - return rs.onSuccess(req, nil) + return nil } // Immediately drop data on permanent errors. @@ -414,7 +227,7 @@ func (rs *retrySender) send(req request) error { zap.Error(err), zap.Int("dropped_items", req.count()), ) - return rs.onPermanentFailure(req, err) + return err } // Give the request a chance to extract signal data to retry if only some data @@ -425,7 +238,7 @@ func (rs *retrySender) send(req request) error { if backoffDelay == backoff.Stop { // throw away the batch err = fmt.Errorf("max elapsed time expired %w", err) - return rs.onTemporaryFailure(req, err) + return rs.onTemporaryFailure(rs.logger, req, err) } throttleErr := throttleRetry{} diff --git a/exporter/exporterhelper/queued_retry_v1.go b/exporter/exporterhelper/queued_retry_v1.go new file mode 100644 index 00000000000..a1d42bc4115 --- /dev/null +++ b/exporter/exporterhelper/queued_retry_v1.go @@ -0,0 +1,116 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !enable_unstable +// +build !enable_unstable + +package exporterhelper + +import ( + "context" + "fmt" + + "go.opencensus.io/metric/metricdata" + "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/exporter/exporterhelper/internal" + "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" +) + +// queued_retry_v1 includes the code for memory-backed (original) queued retry helper only +// enabled when "enable_unstable" build tag is not set + +type queuedRetrySender struct { + fullName string + cfg QueueSettings + consumerSender requestSender + queue consumersQueue + retryStopCh chan struct{} + traceAttributes []attribute.KeyValue + logger *zap.Logger +} + +func newQueuedRetrySender(id config.ComponentID, _ config.DataType, qCfg QueueSettings, rCfg RetrySettings, _ requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { + retryStopCh := make(chan struct{}) + sampledLogger := createSampledLogger(logger) + traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) + return &queuedRetrySender{ + fullName: id.String(), + cfg: qCfg, + consumerSender: &retrySender{ + traceAttribute: traceAttr, + cfg: rCfg, + nextSender: nextSender, + stopCh: retryStopCh, + logger: sampledLogger, + onTemporaryFailure: onTemporaryFailure, + }, + queue: internal.NewBoundedQueue(qCfg.QueueSize, func(item interface{}) {}), + retryStopCh: retryStopCh, + traceAttributes: []attribute.KeyValue{traceAttr}, + logger: sampledLogger, + } +} + +func onTemporaryFailure(logger *zap.Logger, req request, err error) error { + logger.Error( + "Exporting failed. No more retries left. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + return err +} + +// start is invoked during service startup. +func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) error { + qrs.queue.StartConsumers(qrs.cfg.NumConsumers, func(item interface{}) { + req := item.(request) + _ = qrs.consumerSender.send(req) + req.onProcessingFinished() + }) + + // Start reporting queue length metric + if qrs.cfg.Enabled { + err := queueSizeGauge.UpsertEntry(func() int64 { + return int64(qrs.queue.Size()) + }, metricdata.NewLabelValue(qrs.fullName)) + if err != nil { + return fmt.Errorf("failed to create retry queue size metric: %v", err) + } + } + + return nil +} + +// shutdown is invoked during service shutdown. +func (qrs *queuedRetrySender) shutdown() { + // Cleanup queue metrics reporting + if qrs.cfg.Enabled { + _ = queueSizeGauge.UpsertEntry(func() int64 { + return int64(0) + }, metricdata.NewLabelValue(qrs.fullName)) + } + + // First Stop the retry goroutines, so that unblocks the queue numWorkers. + close(qrs.retryStopCh) + + // Stop the queued sender, this will drain the queue and will call the retry (which is stopped) that will only + // try once every request. + if qrs.queue != nil { + qrs.queue.Stop() + } +} diff --git a/exporter/exporterhelper/queued_retry_v2.go b/exporter/exporterhelper/queued_retry_v2.go new file mode 100644 index 00000000000..5ab3b38130a --- /dev/null +++ b/exporter/exporterhelper/queued_retry_v2.go @@ -0,0 +1,212 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build enable_unstable +// +build enable_unstable + +package exporterhelper + +import ( + "context" + "errors" + "fmt" + + "go.opencensus.io/metric" + "go.opencensus.io/metric/metricdata" + "go.opentelemetry.io/otel/attribute" + "go.uber.org/zap" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/exporter/exporterhelper/internal" + "go.opentelemetry.io/collector/extension/storage" + "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" +) + +// queued_retry_v2 includes the code for both memory-backed and persistent-storage backed queued retry helpers +// enabled by setting "enable_unstable" build tag + +var ( + currentlyProcessedBatchesGauge, _ = r.AddInt64DerivedGauge( + obsmetrics.ExporterKey+"/currently_processed_batches", + metric.WithDescription("Number of currently processed batches"), + metric.WithLabelKeys(obsmetrics.ExporterKey), + metric.WithUnit(metricdata.UnitDimensionless)) + + errNoStorageClient = errors.New("no storage client extension found") + errMultipleStorageClients = errors.New("multiple storage extensions found") +) + +type queuedRetrySender struct { + id config.ComponentID + signal config.DataType + cfg QueueSettings + consumerSender requestSender + queue consumersQueue + retryStopCh chan struct{} + traceAttributes []attribute.KeyValue + logger *zap.Logger + requeuingEnabled bool + requestUnmarshaler requestUnmarshaler +} + +func (qrs *queuedRetrySender) fullName() string { + if qrs.signal == "" { + return qrs.id.String() + } + return fmt.Sprintf("%s-%s", qrs.id.String(), qrs.signal) +} + +func newQueuedRetrySender(id config.ComponentID, signal config.DataType, qCfg QueueSettings, rCfg RetrySettings, reqUnmarshaler requestUnmarshaler, nextSender requestSender, logger *zap.Logger) *queuedRetrySender { + retryStopCh := make(chan struct{}) + sampledLogger := createSampledLogger(logger) + traceAttr := attribute.String(obsmetrics.ExporterKey, id.String()) + + qrs := &queuedRetrySender{ + id: id, + signal: signal, + cfg: qCfg, + retryStopCh: retryStopCh, + traceAttributes: []attribute.KeyValue{traceAttr}, + logger: sampledLogger, + requestUnmarshaler: reqUnmarshaler, + } + + qrs.consumerSender = &retrySender{ + traceAttribute: traceAttr, + cfg: rCfg, + nextSender: nextSender, + stopCh: retryStopCh, + logger: sampledLogger, + // Following three functions actually depend on queuedRetrySender + onTemporaryFailure: qrs.onTemporaryFailure, + } + + if !qCfg.PersistentStorageEnabled { + qrs.queue = internal.NewBoundedQueue(qrs.cfg.QueueSize, func(item interface{}) {}) + } + // The Persistent Queue is initialized separately as it needs extra information about the component + + return qrs +} + +func getStorageClient(ctx context.Context, host component.Host, id config.ComponentID, signal config.DataType) (*storage.Client, error) { + var storageExtension storage.Extension + for _, ext := range host.GetExtensions() { + if se, ok := ext.(storage.Extension); ok { + if storageExtension != nil { + return nil, errMultipleStorageClients + } + storageExtension = se + } + } + + if storageExtension == nil { + return nil, errNoStorageClient + } + + client, err := storageExtension.GetClient(ctx, component.KindExporter, id, string(signal)) + if err != nil { + return nil, err + } + + return &client, err +} + +// initializePersistentQueue uses extra information for initialization available from component.Host +func (qrs *queuedRetrySender) initializePersistentQueue(ctx context.Context, host component.Host) error { + if qrs.cfg.PersistentStorageEnabled { + storageClient, err := getStorageClient(ctx, host, qrs.id, qrs.signal) + if err != nil { + return err + } + + qrs.queue = newPersistentQueue(ctx, qrs.fullName(), qrs.cfg.QueueSize, qrs.logger, *storageClient, qrs.requestUnmarshaler) + + // TODO: this can be further exposed as a config param rather than relying on a type of queue + qrs.requeuingEnabled = true + } + + return nil +} + +func (qrs *queuedRetrySender) onTemporaryFailure(logger *zap.Logger, req request, err error) error { + if !qrs.requeuingEnabled || qrs.queue == nil { + logger.Error( + "Exporting failed. No more retries left. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + return err + } + + if qrs.queue.Produce(req) { + logger.Error( + "Exporting failed. Putting back to the end of the queue.", + zap.Error(err), + ) + } else { + logger.Error( + "Exporting failed. Queue did not accept requeuing request. Dropping data.", + zap.Error(err), + zap.Int("dropped_items", req.count()), + ) + } + return err +} + +// start is invoked during service startup. +func (qrs *queuedRetrySender) start(ctx context.Context, host component.Host) error { + err := qrs.initializePersistentQueue(ctx, host) + if err != nil { + return err + } + + qrs.queue.StartConsumers(qrs.cfg.NumConsumers, func(item interface{}) { + req := item.(request) + _ = qrs.consumerSender.send(req) + req.onProcessingFinished() + }) + + // Start reporting queue length metric + if qrs.cfg.Enabled { + err := queueSizeGauge.UpsertEntry(func() int64 { + return int64(qrs.queue.Size()) + }, metricdata.NewLabelValue(qrs.fullName())) + if err != nil { + return fmt.Errorf("failed to create retry queue size metric: %v", err) + } + } + + return nil +} + +// shutdown is invoked during service shutdown. +func (qrs *queuedRetrySender) shutdown() { + // Cleanup queue metrics reporting + if qrs.cfg.Enabled { + _ = queueSizeGauge.UpsertEntry(func() int64 { + return int64(0) + }, metricdata.NewLabelValue(qrs.fullName())) + } + + // First Stop the retry goroutines, so that unblocks the queue numWorkers. + close(qrs.retryStopCh) + + // Stop the queued sender, this will drain the queue and will call the retry (which is stopped) that will only + // try once every request. + if qrs.queue != nil { + qrs.queue.Stop() + } +} From e35bb59aacd5e24ac37a76527960c1e5023e5a4d Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 10:32:50 +0200 Subject: [PATCH 19/24] Use more clear terminology: "dispatch" rather than "processing" --- exporter/exporterhelper/README.md | 2 +- exporter/exporterhelper/persistent_storage.go | 102 +++++++++--------- .../exporterhelper/persistent_storage_test.go | 24 ++--- exporter/exporterhelper/queued_retry_v2.go | 6 +- 4 files changed, 67 insertions(+), 67 deletions(-) diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index c58ab80fa6e..3989af09512 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -60,7 +60,7 @@ It currently can be enabled only in OpenTelemetry Collector Contrib. Also, `unab ▲ │ │ └─────────────┘ │ │ │ │ currently │ ┌─Consumer #4─┐ - │ processed │ │ ┌───┐ │ Temporary + │ dispatched │ │ ┌───┐ │ Temporary │ └───►│ │ 4 │ ├───► failure │ │ └───┘ │ │ │ │ │ │ diff --git a/exporter/exporterhelper/persistent_storage.go b/exporter/exporterhelper/persistent_storage.go index 5b65ceacfde..ff3cf8a51c7 100644 --- a/exporter/exporterhelper/persistent_storage.go +++ b/exporter/exporterhelper/persistent_storage.go @@ -48,7 +48,7 @@ type persistentStorage interface { // Read index describes which item needs to be read next. // When Write index = Read index, no elements are in the queue. // -// The items currently processed by consumers are not deleted until the processing is finished. +// The items currently dispatched by consumers are not deleted until the processing is finished. // Their list is stored under a separate key. // // @@ -62,7 +62,7 @@ type persistentStorage interface { // ▲ ▲ x | x // │ │ x | xxxx deleted // │ │ x | -// write read x └── currently processed item +// write read x └── currently dispatched item // index index x // xxxx deleted // @@ -79,10 +79,10 @@ type persistentContiguousStorage struct { reqChan chan request - mu sync.Mutex - readIndex itemIndex - writeIndex itemIndex - currentlyProcessedItems []itemIndex + mu sync.Mutex + readIndex itemIndex + writeIndex itemIndex + currentlyDispatchedItems []itemIndex itemsCount uint64 } @@ -95,9 +95,9 @@ const ( zapErrorCount = "errorCount" zapNumberOfItems = "numberOfItems" - readIndexKey = "ri" - writeIndexKey = "wi" - currentlyProcessedItemsKey = "pi" + readIndexKey = "ri" + writeIndexKey = "wi" + currentlyDispatchedItemsKey = "di" ) var ( @@ -122,13 +122,13 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, capac } initPersistentContiguousStorage(ctx, pcs) - unprocessedReqs := pcs.retrieveUnprocessedItems(context.Background()) + notDispatchedReqs := pcs.retrieveNotDispatchedReqs(context.Background()) - err := currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { - return int64(pcs.numberOfCurrentlyProcessedItems()) + err := currentlyDispatchedBatchesGauge.UpsertEntry(func() int64 { + return int64(pcs.numberOfCurrentlyDispatchedItems()) }, metricdata.NewLabelValue(pcs.queueName)) if err != nil { - logger.Error("failed to create number of currently processed items metric", zap.Error(err)) + logger.Error("failed to create number of currently dispatched items metric", zap.Error(err)) } // We start the loop first so in case there are more elements in the persistent storage than the capacity, @@ -137,7 +137,7 @@ func newPersistentContiguousStorage(ctx context.Context, queueName string, capac go pcs.loop() // Make sure the leftover requests are handled - pcs.enqueueUnprocessedReqs(unprocessedReqs) + pcs.enqueueNotDispatchedReqs(notDispatchedReqs) // Make sure the communication channel is loaded up for i := uint64(0); i < pcs.size(); i++ { pcs.putChan <- struct{}{} @@ -177,7 +177,7 @@ func initPersistentContiguousStorage(ctx context.Context, pcs *persistentContigu atomic.StoreUint64(&pcs.itemsCount, uint64(pcs.writeIndex-pcs.readIndex)) } -func (pcs *persistentContiguousStorage) enqueueUnprocessedReqs(reqs []request) { +func (pcs *persistentContiguousStorage) enqueueNotDispatchedReqs(reqs []request) { if len(reqs) > 0 { errCount := 0 for _, req := range reqs { @@ -186,12 +186,12 @@ func (pcs *persistentContiguousStorage) enqueueUnprocessedReqs(reqs []request) { } } if errCount > 0 { - pcs.logger.Error("Errors occurred while moving items for processing back to queue", + pcs.logger.Error("Errors occurred while moving items for dispatching back to queue", zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(reqs)), zap.Int(zapErrorCount, errCount)) } else { - pcs.logger.Info("Moved items for processing back to queue", + pcs.logger.Info("Moved items for dispatching back to queue", zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(reqs))) @@ -224,19 +224,19 @@ func (pcs *persistentContiguousStorage) size() uint64 { return atomic.LoadUint64(&pcs.itemsCount) } -// numberOfCurrentlyProcessedItems returns the count of batches for which processing started but hasn't finished yet -func (pcs *persistentContiguousStorage) numberOfCurrentlyProcessedItems() int { +// numberOfCurrentlyDispatchedItems returns the count of batches for which processing started but hasn't finished yet +func (pcs *persistentContiguousStorage) numberOfCurrentlyDispatchedItems() int { pcs.mu.Lock() defer pcs.mu.Unlock() - return len(pcs.currentlyProcessedItems) + return len(pcs.currentlyDispatchedItems) } func (pcs *persistentContiguousStorage) stop() { pcs.logger.Debug("Stopping persistentContiguousStorage", zap.String(zapQueueNameKey, pcs.queueName)) pcs.stopOnce.Do(func() { close(pcs.stopChan) - _ = currentlyProcessedBatchesGauge.UpsertEntry(func() int64 { - return int64(pcs.numberOfCurrentlyProcessedItems()) + _ = currentlyDispatchedBatchesGauge.UpsertEntry(func() int64 { + return int64(pcs.numberOfCurrentlyDispatchedItems()) }, metricdata.NewLabelValue(pcs.queueName)) }) } @@ -281,7 +281,7 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques atomic.StoreUint64(&pcs.itemsCount, uint64(pcs.writeIndex-pcs.readIndex)) pcs.updateReadIndex(ctx) - pcs.itemProcessingStart(ctx, index) + pcs.itemDispatchingStart(ctx, index) batch, err := newBatch(pcs).get(pcs.itemKey(index)).execute(ctx) if err != nil { @@ -296,7 +296,7 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques req.setOnProcessingFinished(func() { pcs.mu.Lock() defer pcs.mu.Unlock() - pcs.itemProcessingFinish(ctx, index) + pcs.itemDispatchingFinish(ctx, index) }) return req, true } @@ -304,37 +304,37 @@ func (pcs *persistentContiguousStorage) getNextItem(ctx context.Context) (reques return nil, false } -// retrieveUnprocessedItems gets the items for which processing was not finished, cleans the storage +// retrieveNotDispatchedReqs gets the items for which sending was not finished, cleans the storage // and moves the items back to the queue -func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Context) []request { +func (pcs *persistentContiguousStorage) retrieveNotDispatchedReqs(ctx context.Context) []request { var reqs []request - var processedItems []itemIndex + var dispatchedItems []itemIndex pcs.mu.Lock() defer pcs.mu.Unlock() - pcs.logger.Debug("Checking if there are items left by consumers", zap.String(zapQueueNameKey, pcs.queueName)) - batch, err := newBatch(pcs).get(currentlyProcessedItemsKey).execute(ctx) + pcs.logger.Debug("Checking if there are items left for dispatch by consumers", zap.String(zapQueueNameKey, pcs.queueName)) + batch, err := newBatch(pcs).get(currentlyDispatchedItemsKey).execute(ctx) if err == nil { - processedItems, err = batch.getItemIndexArrayResult(currentlyProcessedItemsKey) + dispatchedItems, err = batch.getItemIndexArrayResult(currentlyDispatchedItemsKey) } if err != nil { - pcs.logger.Error("Could not fetch items left by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) + pcs.logger.Error("Could not fetch items left for dispatch by consumers", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) return reqs } - if len(processedItems) > 0 { - pcs.logger.Info("Fetching items left for processing by consumers", - zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(processedItems))) + if len(dispatchedItems) > 0 { + pcs.logger.Info("Fetching items left for dispatch by consumers", + zap.String(zapQueueNameKey, pcs.queueName), zap.Int(zapNumberOfItems, len(dispatchedItems))) } else { - pcs.logger.Debug("No items left for processing by consumers") + pcs.logger.Debug("No items left for dispatch by consumers") } - reqs = make([]request, len(processedItems)) - keys := make([]string, len(processedItems)) + reqs = make([]request, len(dispatchedItems)) + keys := make([]string, len(dispatchedItems)) retrieveBatch := newBatch(pcs) cleanupBatch := newBatch(pcs) - for i, it := range processedItems { + for i, it := range dispatchedItems { keys[i] = pcs.itemKey(it) retrieveBatch.get(keys[i]) cleanupBatch.delete(keys[i]) @@ -368,34 +368,34 @@ func (pcs *persistentContiguousStorage) retrieveUnprocessedItems(ctx context.Con return reqs } -// itemProcessingStart appends the item to the list of currently processed items -func (pcs *persistentContiguousStorage) itemProcessingStart(ctx context.Context, index itemIndex) { - pcs.currentlyProcessedItems = append(pcs.currentlyProcessedItems, index) +// itemDispatchingStart appends the item to the list of currently dispatched items +func (pcs *persistentContiguousStorage) itemDispatchingStart(ctx context.Context, index itemIndex) { + pcs.currentlyDispatchedItems = append(pcs.currentlyDispatchedItems, index) _, err := newBatch(pcs). - setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). + setItemIndexArray(currentlyDispatchedItemsKey, pcs.currentlyDispatchedItems). execute(ctx) if err != nil { - pcs.logger.Debug("Failed updating currently processed items", + pcs.logger.Debug("Failed updating currently dispatched items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } -// itemProcessingFinish removes the item from the list of currently processed items and deletes it from the persistent queue -func (pcs *persistentContiguousStorage) itemProcessingFinish(ctx context.Context, index itemIndex) { - var updatedProcessedItems []itemIndex - for _, it := range pcs.currentlyProcessedItems { +// itemDispatchingFinish removes the item from the list of currently dispatched items and deletes it from the persistent queue +func (pcs *persistentContiguousStorage) itemDispatchingFinish(ctx context.Context, index itemIndex) { + var updatedDispatchedItems []itemIndex + for _, it := range pcs.currentlyDispatchedItems { if it != index { - updatedProcessedItems = append(updatedProcessedItems, it) + updatedDispatchedItems = append(updatedDispatchedItems, it) } } - pcs.currentlyProcessedItems = updatedProcessedItems + pcs.currentlyDispatchedItems = updatedDispatchedItems _, err := newBatch(pcs). - setItemIndexArray(currentlyProcessedItemsKey, pcs.currentlyProcessedItems). + setItemIndexArray(currentlyDispatchedItemsKey, pcs.currentlyDispatchedItems). delete(pcs.itemKey(index)). execute(ctx) if err != nil { - pcs.logger.Debug("Failed updating currently processed items", + pcs.logger.Debug("Failed updating currently dispatched items", zap.String(zapQueueNameKey, pcs.queueName), zap.Error(err)) } } diff --git a/exporter/exporterhelper/persistent_storage_test.go b/exporter/exporterhelper/persistent_storage_test.go index 9b39c8213e2..72420ddc0cf 100644 --- a/exporter/exporterhelper/persistent_storage_test.go +++ b/exporter/exporterhelper/persistent_storage_test.go @@ -84,20 +84,20 @@ func TestPersistentStorage_CurrentlyProcessedItems(t *testing.T) { } // Item index 0 is currently in unbuffered channel - requireCurrentItemIndexesEqual(t, ps, []itemIndex{0}) + requireCurrentlyDispatchedItemsEqual(t, ps, []itemIndex{0}) // Now, this will take item 0 and pull item 1 into the unbuffered channel readReq := getItemFromChannel(t, ps) require.Equal(t, req.(*tracesRequest).td, readReq.(*tracesRequest).td) - requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1}) + requireCurrentlyDispatchedItemsEqual(t, ps, []itemIndex{0, 1}) // This takes item 1 from channel and pulls another one (item 2) into the unbuffered channel secondReadReq := getItemFromChannel(t, ps) - requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1, 2}) + requireCurrentlyDispatchedItemsEqual(t, ps, []itemIndex{0, 1, 2}) - // Lets mark item 1 as finished, it will remove it from the currently processed items list + // Lets mark item 1 as finished, it will remove it from the currently dispatched items list secondReadReq.onProcessingFinished() - requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 2}) + requireCurrentlyDispatchedItemsEqual(t, ps, []itemIndex{0, 2}) // Reload the storage. Since items 0 and 2 were not finished, those should be requeued at the end. // The queue should be essentially {3,4,0,2} out of which item "3" should be pulled right away into @@ -107,7 +107,7 @@ func TestPersistentStorage_CurrentlyProcessedItems(t *testing.T) { return newPs.size() == 3 }, 500*time.Millisecond, 10*time.Millisecond) - requireCurrentItemIndexesEqual(t, newPs, []itemIndex{3}) + requireCurrentlyDispatchedItemsEqual(t, newPs, []itemIndex{3}) // We should be able to pull all remaining items now for i := 0; i < 4; i++ { @@ -116,7 +116,7 @@ func TestPersistentStorage_CurrentlyProcessedItems(t *testing.T) { } // The queue should be now empty - requireCurrentItemIndexesEqual(t, newPs, nil) + requireCurrentlyDispatchedItemsEqual(t, newPs, nil) require.Eventually(t, func() bool { return newPs.size() == 0 }, 500*time.Millisecond, 10*time.Millisecond) @@ -149,11 +149,11 @@ func TestPersistentStorage_MetricsReported(t *testing.T) { } _ = getItemFromChannel(t, ps) - requireCurrentItemIndexesEqual(t, ps, []itemIndex{0, 1}) - checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_processed_batches") + requireCurrentlyDispatchedItemsEqual(t, ps, []itemIndex{0, 1}) + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_dispatched_batches") ps.stop() - checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_processed_batches") + checkValueForProducer(t, []tag.Tag{{Key: exporterTag, Value: "foo"}}, int64(2), "exporter/currently_dispatched_batches") } func TestPersistentStorage_RepeatPutCloseReadClose(t *testing.T) { @@ -314,11 +314,11 @@ func getItemFromChannel(t *testing.T, pcs *persistentContiguousStorage) request return readReq } -func requireCurrentItemIndexesEqual(t *testing.T, pcs *persistentContiguousStorage, compare []itemIndex) { +func requireCurrentlyDispatchedItemsEqual(t *testing.T, pcs *persistentContiguousStorage, compare []itemIndex) { require.Eventually(t, func() bool { pcs.mu.Lock() defer pcs.mu.Unlock() - return reflect.DeepEqual(pcs.currentlyProcessedItems, compare) + return reflect.DeepEqual(pcs.currentlyDispatchedItems, compare) }, 500*time.Millisecond, 10*time.Millisecond) } diff --git a/exporter/exporterhelper/queued_retry_v2.go b/exporter/exporterhelper/queued_retry_v2.go index 5ab3b38130a..52d5048df85 100644 --- a/exporter/exporterhelper/queued_retry_v2.go +++ b/exporter/exporterhelper/queued_retry_v2.go @@ -38,9 +38,9 @@ import ( // enabled by setting "enable_unstable" build tag var ( - currentlyProcessedBatchesGauge, _ = r.AddInt64DerivedGauge( - obsmetrics.ExporterKey+"/currently_processed_batches", - metric.WithDescription("Number of currently processed batches"), + currentlyDispatchedBatchesGauge, _ = r.AddInt64DerivedGauge( + obsmetrics.ExporterKey+"/currently_dispatched_batches", + metric.WithDescription("Number of batches that are currently being sent"), metric.WithLabelKeys(obsmetrics.ExporterKey), metric.WithUnit(metricdata.UnitDimensionless)) From 7484784c1afc33b1c1adfe7d47f86b17840ccdb8 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 10:57:20 +0200 Subject: [PATCH 20/24] Doc clarification on enable_unstable flag --- exporter/exporterhelper/README.md | 43 ++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index 3989af09512..3c5d6c5afeb 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -21,7 +21,8 @@ The following configuration options can be modified: User should calculate this as `num_seconds * requests_per_second` where: - `num_seconds` is the number of seconds to buffer in case of a backend outage - `requests_per_second` is the average number of requests per seconds. - - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension (note, `unable_unstable` compilation flag needs to be enabled first) + - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension + (note, `unable_unstable` build tag needs to be enabled first, see below for more details) - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `timeout` (default = 5s): Time to wait per individual attempt to send data to a backend. @@ -32,10 +33,13 @@ The full list of settings exposed for this helper exporter are documented [here] **Status: under development** -When `persistent_storage_enabled` is set to true, the queue is being buffered to disk by the -[file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). -It currently can be enabled only in OpenTelemetry Collector Contrib. Also, `unable_unstable` compilation flag needs to be enabled. +> :warning: The capability is under development and currently can be enabled only in OpenTelemetry +> Collector Contrib with `unable_unstable` build tag set. +When `persistent_storage_enabled` is set to true, the queue is being buffered to disk using +[file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). +If collector instance is killed while having some items in the persistent queue, on restart the items are being picked and +the exporting is continued. ``` ┌─Consumer #1─┐ @@ -70,4 +74,35 @@ It currently can be enabled only in OpenTelemetry Collector Contrib. Also, `unab │ │ │ │ └────────────────────────────────────── Requeuing ◄────── Retry limit exceeded ───┘ +``` + +Example: + +``` +receivers: + otlp: + protocols: + grpc: +exporters: + otlp: + endpoint: + sending_queue: + persistent_storage_enabled: true +extensions: + file_storage: + directory: /var/lib/storage/otc + timeout: 10s +service: + extensions: [file_storage] + pipelines: + metrics: + receivers: [otlp] + exporters: [otlp] + logs: + receivers: [otlp] + exporters: [otlp] + traces: + receivers: [otlp] + exporters: [otlp] + ``` \ No newline at end of file From b7a78a365de2508b438fc5369df23c2b153b0780 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 11:48:43 +0200 Subject: [PATCH 21/24] Fix small typo in README.md Co-authored-by: Dominik Rosiek --- exporter/exporterhelper/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/exporterhelper/README.md b/exporter/exporterhelper/README.md index 3c5d6c5afeb..0a43c33f8a0 100644 --- a/exporter/exporterhelper/README.md +++ b/exporter/exporterhelper/README.md @@ -22,7 +22,7 @@ The following configuration options can be modified: - `num_seconds` is the number of seconds to buffer in case of a backend outage - `requests_per_second` is the average number of requests per seconds. - `persistent_storage_enabled` (default = false): When set, enables persistence via a file storage extension - (note, `unable_unstable` build tag needs to be enabled first, see below for more details) + (note, `enable_unstable` build tag needs to be enabled first, see below for more details) - `resource_to_telemetry_conversion` - `enabled` (default = false): If `enabled` is `true`, all the resource attributes will be converted to metric labels by default. - `timeout` (default = 5s): Time to wait per individual attempt to send data to a backend. @@ -34,7 +34,7 @@ The full list of settings exposed for this helper exporter are documented [here] **Status: under development** > :warning: The capability is under development and currently can be enabled only in OpenTelemetry -> Collector Contrib with `unable_unstable` build tag set. +> Collector Contrib with `enable_unstable` build tag set. When `persistent_storage_enabled` is set to true, the queue is being buffered to disk using [file storage extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/storage/filestorage). From 9ca46a07bd9ec82c9162deb8c4b8ea73714bfecf Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 17:34:56 +0200 Subject: [PATCH 22/24] Include unstable collector in build artifacts --- .github/workflows/build-and-test.yml | 10 ++++++++++ Makefile | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index d1121755aa1..564f881d990 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -167,3 +167,13 @@ jobs: with: name: collector-binaries path: ./bin.tar + - name: Build Unstable Collector for All Architectures + run: make binaries-all-sys-unstable + - name: Create Unstable Collector Binaries Archive + run: tar -cvf bin-unstable.tar ./bin/*unstable + - name: Upload Unstable Collector Binaries + uses: actions/upload-artifact@v2.2.4 + with: + name: collector-binaries-unstable + path: ./bin-unstable.tar + diff --git a/Makefile b/Makefile index 4af74b12a4c..4b555117e90 100644 --- a/Makefile +++ b/Makefile @@ -218,6 +218,9 @@ docker-otelcol: .PHONY: binaries-all-sys binaries-all-sys: binaries-darwin_amd64 binaries-darwin_arm64 binaries-linux_amd64 binaries-linux_arm64 binaries-windows_amd64 +.PHONY: binaries-all-sys-unstable +binaries-all-sys-unstable: binaries-darwin_amd64-unstable binaries-darwin_arm64-unstable binaries-linux_amd64-unstable binaries-linux_arm64-unstable binaries-windows_amd64-unstable + .PHONY: binaries-darwin_amd64 binaries-darwin_amd64: GOOS=darwin GOARCH=amd64 $(MAKE) build-binary-internal @@ -238,6 +241,26 @@ binaries-linux_arm64: binaries-windows_amd64: GOOS=windows GOARCH=amd64 EXTENSION=.exe $(MAKE) build-binary-internal +.PHONY: binaries-darwin_amd64 +binaries-darwin_amd64: + GOOS=darwin GOARCH=amd64 $(MAKE) build-binary-internal + +.PHONY: binaries-darwin_arm64-unstable +binaries-darwin_arm64-unstable: + GOOS=darwin GOARCH=arm64 $(MAKE) build-binary-internal-unstable + +.PHONY: binaries-linux_amd64-unstable +binaries-linux_amd64-unstable: + GOOS=linux GOARCH=amd64 $(MAKE) build-binary-internal-unstable + +.PHONY: binaries-linux_arm64-unstable +binaries-linux_arm64-unstable: + GOOS=linux GOARCH=arm64 $(MAKE) build-binary-internal-unstable + +.PHONY: binaries-windows_amd64-unstable +binaries-windows_amd64-unstable: + GOOS=windows GOARCH=amd64 EXTENSION=.exe $(MAKE) build-binary-internal-unstable + .PHONY: build-binary-internal build-binary-internal: GO111MODULE=on CGO_ENABLED=0 go build -trimpath -o ./bin/otelcol_$(GOOS)_$(GOARCH)$(EXTENSION) $(BUILD_INFO) ./cmd/otelcol From 532603125e99a19400a6e6f84683717b7e9a9ad8 Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 17:37:29 +0200 Subject: [PATCH 23/24] Rename queued_retry experimental and inmemory implementation files --- .../{queued_retry_v2.go => queued_retry_experimental.go} | 2 +- .../{queued_retry_v1.go => queued_retry_inmemory.go} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename exporter/exporterhelper/{queued_retry_v2.go => queued_retry_experimental.go} (98%) rename exporter/exporterhelper/{queued_retry_v1.go => queued_retry_inmemory.go} (97%) diff --git a/exporter/exporterhelper/queued_retry_v2.go b/exporter/exporterhelper/queued_retry_experimental.go similarity index 98% rename from exporter/exporterhelper/queued_retry_v2.go rename to exporter/exporterhelper/queued_retry_experimental.go index 52d5048df85..08a928e80a5 100644 --- a/exporter/exporterhelper/queued_retry_v2.go +++ b/exporter/exporterhelper/queued_retry_experimental.go @@ -34,7 +34,7 @@ import ( "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" ) -// queued_retry_v2 includes the code for both memory-backed and persistent-storage backed queued retry helpers +// queued_retry_experimental includes the code for both memory-backed and persistent-storage backed queued retry helpers // enabled by setting "enable_unstable" build tag var ( diff --git a/exporter/exporterhelper/queued_retry_v1.go b/exporter/exporterhelper/queued_retry_inmemory.go similarity index 97% rename from exporter/exporterhelper/queued_retry_v1.go rename to exporter/exporterhelper/queued_retry_inmemory.go index a1d42bc4115..bfa067ee315 100644 --- a/exporter/exporterhelper/queued_retry_v1.go +++ b/exporter/exporterhelper/queued_retry_inmemory.go @@ -31,7 +31,7 @@ import ( "go.opentelemetry.io/collector/internal/obsreportconfig/obsmetrics" ) -// queued_retry_v1 includes the code for memory-backed (original) queued retry helper only +// queued_retry_inmemory includes the code for memory-backed (original) queued retry helper only // enabled when "enable_unstable" build tag is not set type queuedRetrySender struct { From 0a65246ac8e6f97d092230526a0c49a1ab650e6a Mon Sep 17 00:00:00 2001 From: Przemek Maciolek Date: Fri, 3 Sep 2021 17:43:01 +0200 Subject: [PATCH 24/24] Fix error in Makefile --- .circleci/config.yml | 2 +- Makefile | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4619b0aab99..c85f6b1daab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -114,7 +114,7 @@ jobs: - attach_to_workspace - run: name: Build collector for all archs - command: grep ^binaries-all-sys Makefile|fmt -w 1|tail -n +2|circleci tests split|xargs make + command: grep ^binaries-all-sys Makefile|fmt -w 1|grep -v binaries-all-sys|circleci tests split|xargs make - run: name: Log checksums to console command: shasum -a 256 bin/* diff --git a/Makefile b/Makefile index 4b555117e90..a9830786ea9 100644 --- a/Makefile +++ b/Makefile @@ -241,9 +241,9 @@ binaries-linux_arm64: binaries-windows_amd64: GOOS=windows GOARCH=amd64 EXTENSION=.exe $(MAKE) build-binary-internal -.PHONY: binaries-darwin_amd64 -binaries-darwin_amd64: - GOOS=darwin GOARCH=amd64 $(MAKE) build-binary-internal +.PHONY: binaries-darwin_amd64-unstable +binaries-darwin_amd64-unstable: + GOOS=darwin GOARCH=amd64 $(MAKE) build-binary-internal-unstable .PHONY: binaries-darwin_arm64-unstable binaries-darwin_arm64-unstable: