Skip to content

Commit e08b3b6

Browse files
committed
Configurable option to skip ads with 500 error on first entries block
A new boolean config option `Ingest.Skip500EntriesError` allows the indexer to skip advertisements when the publisher returns a 500 response with "failed to sync first entry". This value is run-time reloadable. The purpose of this is to skip ads that to publisher will never deliver content for. There is an issue with providers where they will not return entries from some advertisements, and permanently return a 500 result when trying to sync the entries.
1 parent 885bd54 commit e08b3b6

File tree

4 files changed

+26
-2
lines changed

4 files changed

+26
-2
lines changed

command/daemon.go

+1
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,7 @@ func reloadConfig(cfgPath string, ingester *ingest.Ingester, reg *registry.Regis
612612

613613
if ingester != nil {
614614
ingester.RunWorkers(cfg.Ingest.IngestWorkerCount)
615+
ingester.Skip500EntriesError(cfg.Ingest.Skip500EntriesError)
615616
}
616617

617618
err = setLoggingConfig(cfg.Logging)

config/ingest.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ type Ingest struct {
5252
// IngestWorkerCount sets how many ingest worker goroutines to spawn. This
5353
// controls how many concurrent ingest from different providers we can handle.
5454
IngestWorkerCount int
55-
// MaxAsyncConcurrency sets the maximum number of concurrent asynchrouous syncs
56-
// (started by announce messages). Set -1 for unlimited, 0 for default.
55+
// MaxAsyncConcurrency sets the maximum number of concurrent asynchrouous
56+
// syncs (started by announce messages). Set -1 for unlimited, 0 for
57+
// default. This value is reloadable.
5758
MaxAsyncConcurrency int
5859
// MinimumKeyLengt causes any multihash, that has a digest length less than
5960
// this, to be ignored.
@@ -69,6 +70,10 @@ type Ingest struct {
6970
// the announce so that other indexers can also receive it. This is always
7071
// false if configured to use an assigner.
7172
ResendDirectAnnounce bool
73+
// Skip500EntriesError, when true, skips advertisements for which the
74+
// publisher returns a 500 status code and an error message "failed to sync
75+
// first entry". This value is reloadable.
76+
Skip500EntriesError bool
7277
// SyncSegmentDepthLimit is the depth limit of a single sync in a series of
7378
// calls that collectively sync advertisements or their entries. The value
7479
// -1 disables the segmentation where the sync will be done in a single call

doc/config.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ The storetheindex daemon can reload some portions of its config without restarti
1313
- `Indexer.ConfigCheckInterval`
1414
- `Indexer.ShutdownTimeout`
1515
- `Ingest.IngestWorkerCount`
16+
- `Ingest.Skip500EntriesError`
1617
- `Logging`
1718
- `Peering`

internal/ingest/ingest.go

+17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io/fs"
88
"path"
9+
"strings"
910
"sync"
1011
"sync/atomic"
1112
"time"
@@ -166,6 +167,8 @@ type Ingester struct {
166167

167168
// Ingest rates
168169
ingestRates *rate.Map
170+
171+
skip500EntsErr atomic.Bool
169172
}
170173

171174
// NewIngester creates a new Ingester that uses a dagsync Subscriber to handle
@@ -208,6 +211,8 @@ func NewIngester(cfg config.Ingest, h host.Host, idxr indexer.Interface, reg *re
208211
return nil, err
209212
}
210213

214+
ing.skip500EntsErr.Store(cfg.Skip500EntriesError)
215+
211216
// Create and start subscriber. This also registers the storage hook to
212217
// index data as it is received.
213218
sub, err := dagsync.NewSubscriber(h, ing.dsTmp, ing.lsys, cfg.PubSubTopic,
@@ -258,6 +263,10 @@ func (ing *Ingester) MultihashesFromMirror() uint64 {
258263
return ing.mhsFromMirror.Load()
259264
}
260265

266+
func (ing *Ingester) Skip500EntriesError(skip bool) {
267+
ing.skip500EntsErr.Store(skip)
268+
}
269+
261270
func (ing *Ingester) generalDagsyncBlockHook(_ peer.ID, c cid.Cid, actions dagsync.SegmentSyncActions) {
262271
// The only kind of block we should get by loading CIDs here should be
263272
// Advertisement.
@@ -1114,6 +1123,8 @@ func (ing *Ingester) ingestWorkerLogic(ctx context.Context, provider peer.ID, as
11141123
}
11151124
}
11161125

1126+
skip500EntsErr := ing.skip500EntsErr.Load()
1127+
11171128
total := len(assignment.adInfos)
11181129
log.Infow("Running worker on ad stack", "headAdCid", headAdCid, "numAdsToProcess", total)
11191130
var count int
@@ -1181,6 +1192,12 @@ func (ing *Ingester) ingestWorkerLogic(ctx context.Context, provider peer.ID, as
11811192
log.Errorw("Skipping ad because of a permanent error", "adCid", ai.cid, "err", err, "errKind", adIngestErr.state)
11821193
stats.Record(context.Background(), metrics.AdIngestSkippedCount.M(1))
11831194
err = nil
1195+
case adIngestSyncEntriesErr:
1196+
if skip500EntsErr && strings.Contains(err.Error(), "failed to sync first entry") && strings.Contains(err.Error(), ": 500") {
1197+
log.Errorw("Skipping ad because of a permanent 500 error", "adCid", ai.cid, "err", err, "errKind", adIngestErr.state)
1198+
stats.Record(context.Background(), metrics.AdIngestSkippedCount.M(1))
1199+
err = nil
1200+
}
11841201
}
11851202
stats.RecordWithOptions(context.Background(),
11861203
stats.WithMeasurements(metrics.AdIngestErrorCount.M(1)),

0 commit comments

Comments
 (0)