Skip to content

Commit d50d94b

Browse files
committed
Fix participation lease removal for wrong network
When manifest changes, depending on the timing it is possible for newly generated valid leases to get removed if the sign message loop attempts to sign messages that are as a result of progressing previous network. Here is an example scenario in a specific order that was causing itests to fail: * participants get a lease for network A up to instance 5 * network A progresses to instance 6 * manifest changes the network name to B * participants get a new lease for network B up to instance 5 * sign loop receives a message from network A, instance 6 * `getParticipantsByInstance` lazily removes leases since it only checks the instance. * the node ends up with no participants, and stuck. To fix this: 1) check if participants asked for are within the current network, and if not refuse to participate. 2) check network name, as well as instance, to lazily remove expired leases.
1 parent 1b15207 commit d50d94b

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

chain/lf3/f3.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ func (fff *F3) runSigningLoop(ctx context.Context) {
155155
clear(alreadyParticipated)
156156
}
157157

158-
participants := fff.leaser.getParticipantsByInstance(mb.Payload.Instance)
158+
participants := fff.leaser.getParticipantsByInstance(mb.NetworkName, mb.Payload.Instance)
159159
for _, id := range participants {
160160
if _, ok := alreadyParticipated[id]; ok {
161161
continue

chain/lf3/participation_lease.go

+14-3
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,26 @@ func (l *leaser) participate(ticket api.F3ParticipationTicket) (api.F3Participat
112112
return newLease, nil
113113
}
114114

115-
func (l *leaser) getParticipantsByInstance(instance uint64) []uint64 {
115+
func (l *leaser) getParticipantsByInstance(network gpbft.NetworkName, instance uint64) []uint64 {
116116
l.mutex.Lock()
117117
defer l.mutex.Unlock()
118+
currentManifest, _ := l.status()
119+
currentNetwork := currentManifest.NetworkName
120+
if currentNetwork != network {
121+
log.Warnf("no participants for network: current network (%s) does not match requested network (%s) at instance %d", currentNetwork, network, instance)
122+
return nil
123+
}
118124
var participants []uint64
119125
for id, lease := range l.leases {
120-
if instance > lease.ToInstance() {
126+
if currentNetwork != lease.Network {
127+
// Lazily delete any lease that does not belong to network, likely acquired from
128+
// prior manifests.
129+
delete(l.leases, id)
130+
log.Warnf("lost F3 participation lease for miner %d at instance %d due to network mismatch: %s != %s", id, instance, currentNetwork, lease.Network)
131+
} else if instance > lease.ToInstance() {
121132
// Lazily delete the expired leases.
122133
delete(l.leases, id)
123-
log.Warnf("lost F3 participation lease for miner %d", id)
134+
log.Warnf("lost F3 participation lease for miner %d due to instance (%d) > lease to instance (%d)", id, instance, lease.ToInstance())
124135
} else {
125136
participants = append(participants, id)
126137
}

0 commit comments

Comments
 (0)