Skip to content

Commit e769bdf

Browse files
KubuxuStebalien
andauthored
fix(f3): fix hot loop in F3 participation (#12575)
* fix(f3): fix hot loop in F3 participation * fix(f3): Correct lease expiration log message * feat(f3): log not-before and not-after for new leases * test(f3): make the f3-enabled test wait for at one lease to expire --------- Signed-off-by: Jakub Sztandera <[email protected]> Co-authored-by: Steven Allen <[email protected]>
1 parent a62fbc1 commit e769bdf

File tree

3 files changed

+32
-13
lines changed

3 files changed

+32
-13
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
- Update `EthGetBlockByNumber` to return a pointer to ethtypes.EthBlock or nil for null rounds. ([filecoin-project/lotus#12529](https://github.com/filecoin-project/lotus/pull/12529))
1919
- Reduce size of embedded genesis CAR files by removing WASM actor blocks and compressing with zstd. This reduces the `lotus` binary size by approximately 10 MiB. ([filecoin-project/lotus#12439](https://github.com/filecoin-project/lotus/pull/12439))
2020
- Add ChainSafe operated Calibration archival node to the bootstrap list ([filecoin-project/lotus#12517](https://github.com/filecoin-project/lotus/pull/12517))
21+
- Fix hotloop in F3 pariticpation API ([filecoin-project/lotus#12575](https://github.com/filecoin-project/lotus/pull/12575))
2122

2223
## Bug Fixes
2324

itests/f3_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func TestF3_Enabled(t *testing.T) {
4646
blocktime := 100 * time.Millisecond
4747
e := setup(t, blocktime)
4848

49-
e.waitTillF3Instance(3, 25*time.Second)
49+
e.waitTillF3Instance(modules.F3LeaseTerm+1, 40*time.Second)
5050
}
5151

5252
// Test that checks that F3 can be rebootsrapped by changing the manifest

node/modules/storageminer.go

+30-12
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ import (
4747
"github.com/filecoin-project/lotus/storage/wdpost"
4848
)
4949

50+
// F3LeaseTerm The number of instances the miner will attempt to lease from nodes.
51+
const F3LeaseTerm = 5
52+
5053
type UuidWrapper struct {
5154
v1api.FullNode
5255
}
@@ -380,15 +383,28 @@ func newF3Participator(node v1api.FullNode, participant dtypes.MinerAddress, bac
380383

381384
func (p *f3Participator) participate(ctx context.Context) error {
382385
for ctx.Err() == nil {
383-
if ticket, err := p.tryGetF3ParticipationTicket(ctx); err != nil {
384-
return err
385-
} else if lease, participating, err := p.tryF3Participate(ctx, ticket); err != nil {
386+
start := time.Now()
387+
ticket, err := p.tryGetF3ParticipationTicket(ctx)
388+
if err != nil {
386389
return err
387-
} else if !participating {
388-
continue
389-
} else if err := p.awaitLeaseExpiry(ctx, lease); err != nil {
390+
}
391+
lease, participating, err := p.tryF3Participate(ctx, ticket)
392+
if err != nil {
390393
return err
391394
}
395+
if participating {
396+
if err := p.awaitLeaseExpiry(ctx, lease); err != nil {
397+
return err
398+
}
399+
}
400+
const minPeriod = 500 * time.Millisecond
401+
if sinceLastLoop := time.Since(start); sinceLastLoop < minPeriod {
402+
select {
403+
case <-time.After(minPeriod - sinceLastLoop):
404+
case <-ctx.Done():
405+
return ctx.Err()
406+
}
407+
}
392408
log.Info("Restarting F3 participation")
393409
}
394410
return ctx.Err()
@@ -449,7 +465,11 @@ func (p *f3Participator) tryF3Participate(ctx context.Context, ticket api.F3Part
449465
p.backOff(ctx)
450466
continue
451467
default:
452-
log.Infow("Successfully acquired F3 participation lease.", "issuer", lease.Issuer, "expiry", lease.ValidityTerm)
468+
log.Infow("Successfully acquired F3 participation lease.",
469+
"issuer", lease.Issuer,
470+
"not-before", lease.FromInstance,
471+
"not-after", lease.FromInstance+lease.ValidityTerm,
472+
)
453473
p.previousTicket = ticket
454474
return lease, true, nil
455475
}
@@ -485,8 +505,8 @@ func (p *f3Participator) awaitLeaseExpiry(ctx context.Context, lease api.F3Parti
485505
}
486506
log.Errorw("Failed to check F3 progress while awaiting lease expiry. Retrying after backoff.", "attempts", p.backoff.Attempt(), "backoff", p.backoff.Duration(), "err", err)
487507
p.backOff(ctx)
488-
case progress.ID+2 >= lease.ValidityTerm:
489-
log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d). Restarting participation.", progress.ID, lease.ValidityTerm)
508+
case progress.ID+2 >= lease.FromInstance+lease.ValidityTerm:
509+
log.Infof("F3 progressed (%d) to within two instances of lease expiry (%d+%d). Restarting participation.", progress.ID, lease.FromInstance, lease.ValidityTerm)
490510
return nil
491511
default:
492512
remainingInstanceLease := lease.ValidityTerm - progress.ID
@@ -529,8 +549,6 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo
529549
// checkProgressInterval defines the duration between progress checks in normal operation mode.
530550
// This interval is used when there are no errors in retrieving the current progress.
531551
checkProgressInterval = 10 * time.Second
532-
// leaseTerm The number of instances the miner will attempt to lease from nodes.
533-
leaseTerm = 5
534552
)
535553

536554
participator := newF3Participator(
@@ -543,7 +561,7 @@ func F3Participation(mctx helpers.MetricsCtx, lc fx.Lifecycle, node v1api.FullNo
543561
},
544562
checkProgressMaxAttempts,
545563
checkProgressInterval,
546-
leaseTerm,
564+
F3LeaseTerm,
547565
)
548566

549567
ctx, cancel := context.WithCancel(mctx)

0 commit comments

Comments
 (0)