From b202cc895f9159f17fb26de37a4649a7780ebaae Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 16 Aug 2022 12:52:26 +0200 Subject: [PATCH 01/65] Start work on batching in dispute-distribution. --- .../dispute-distribution/src/receiver/mod.rs | 2 +- .../src/node/disputes/dispute-distribution.md | 24 +++++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 9193947e78d1..47d8f11e5553 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -58,7 +58,7 @@ const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid."); const COST_INVALID_CANDIDATE: Rep = Rep::Malicious("Reported candidate was not available."); const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator."); -/// How many statement imports we want to issue in parallel: +/// How many statement imports we want to issue in parallel (for different candidates): pub const MAX_PARALLEL_IMPORTS: usize = 10; /// State for handling incoming `DisputeRequest` messages. diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index b63ea2bdcbf0..90fcb523a0ee 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -192,10 +192,7 @@ Considered attack vectors: won't be able to import any valid disputes and we could run out of resources, if we tried to process them all in parallel. -For tackling 1, we make sure to not occupy resources before we don't know a -candidate is available. So we will not record statements to disk until we -recovered availability for the candidate or know by some other means that the -dispute is legit. +1 is handled by the dispute-coordinator. For 2, we will pick up on any dispute on restart, so assuming that any realistic memory filling attack will take some time, we should be able to participate in a @@ -209,11 +206,10 @@ substrate handles incoming requests in a somewhat fair way. Still we want some defense mechanisms, at the very least we need to make sure to not exhaust resources. -The dispute coordinator will notify us on import about unavailable candidates or +The dispute coordinator will notify us on import about problematic candidates or otherwise invalid imports and we can disconnect from such peers/decrease their reputation drastically. This alone should get us quite far with regards to queue -monopolization, as availability recovery is expected to fail relatively quickly -for unavailable data. +monopolization. Still if those spam messages come at a very high rate, we might still run out of resources if we immediately call `DisputeCoordinatorMessage::ImportStatements` @@ -268,6 +264,20 @@ blocked a peer. For valid disputes, incoming requests can become bursty. On the other hand we will also be very quick in processing them. A channel size of 100 requests seems plenty and should be able to handle bursts adequately. +### Import Performance Considerations + +The dispute coordinator is not optimized for importing votes individually and +will expose very bad import performance in that case. Therefore we will batch +together votes over some time and import votes in batches. + +To trigger participation immediately, we will always import the very first +message for a candidate immediately, but we will keep a record that we have +started an import for that candidate and will batch up more votes for that +candidate over some time and then import them all at once. + +If we ignore duplicate votes, even if imports keep trickling in - it is bounded, +because at some point all validators have voted. + ### Node Startup On startup we need to check with the dispute coordinator for any ongoing From 192d73b4e416410d2ab925c7fdffca5802d99b8a Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 17 Aug 2022 12:56:58 +0200 Subject: [PATCH 02/65] Guide work. --- .../src/node/disputes/dispute-distribution.md | 222 +++++++++--------- 1 file changed, 106 insertions(+), 116 deletions(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index 90fcb523a0ee..8733e411a2a7 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -15,6 +15,13 @@ This design should result in a protocol that is: ## Protocol +Distributing disputes needs to be a reliable protocol. We would like to make as +sure as possible that our vote got properly delivered to all concerned +validators. For this to work, this subsystem won't be gossip based, but instead +will use a request/response protocol for application level confirmations. The +request will be the payload (the actual votes/statements), the response will +be the confirmation. See [below][#wire-format]. + ### Input [`DisputeDistributionMessage`][DisputeDistributionMessage] @@ -107,16 +114,7 @@ struct VotesResponse { } ``` -## Functionality - -Distributing disputes needs to be a reliable protocol. We would like to make as -sure as possible that our vote got properly delivered to all concerned -validators. For this to work, this subsystem won't be gossip based, but instead -will use a request/response protocol for application level confirmations. The -request will be the payload (the actual votes/statements), the response will -be the confirmation. See [above][#wire-format]. - -### Starting a Dispute +## Starting a Dispute A dispute is initiated once a node sends the first `DisputeRequest` wire message, which must contain an "invalid" vote and a "valid" vote. @@ -132,7 +130,7 @@ conflicting votes available, hence we have a valid dispute. Nodes will still need to check whether the disputing votes are somewhat current and not some stale ones. -### Participating in a Dispute +## Participating in a Dispute Upon receiving a `DisputeRequest` message, a dispute distribution will trigger the import of the received votes via the dispute coordinator @@ -144,13 +142,13 @@ except that if the local node deemed the candidate valid, the `SendDispute` message will contain a valid vote signed by our node and will contain the initially received `Invalid` vote. -Note, that we rely on the coordinator to check availability for spam protection -(see below). +Note, that we rely on the coordinator to check validity of a dispute for spam +protection (see below). -### Sending of messages +## Sending of messages Starting and participating in a dispute are pretty similar from the perspective -of dispute distribution. Once we receive a `SendDispute` message we try to make +of dispute distribution. Once we receive a `SendDispute` message, we try to make sure to get the data out. We keep track of all the parachain validators that should see the message, which are all the parachain validators of the session where the dispute happened as they will want to participate in the dispute. In @@ -159,110 +157,102 @@ session (which might be the same or not and may change during the dispute). Those authorities will not participate in the dispute, but need to see the statements so they can include them in blocks. -We keep track of connected parachain validators and authorities and will issue -warnings in the logs if connected nodes are less than two thirds of the -corresponding sets. We also only consider a message transmitted, once we -received a confirmation message. If not, we will keep retrying getting that -message out as long as the dispute is deemed alive. To determine whether a -dispute is still alive we will issue a +### Reliability + +We only consider a message transmitted, once we received a confirmation message. +If not, we will keep retrying getting that message out as long as the dispute is +deemed alive. To determine whether a dispute is still alive we will issue a `DisputeCoordinatorMessage::ActiveDisputes` message before each retry run. Once a dispute is no longer live, we will clean up the state accordingly. -### Reception & Spam Considerations - -Because we are not forwarding foreign statements, spam is less of an issue in -comparison to gossip based systems. Rate limiting should be implemented at the -substrate level, see -[#7750](https://github.com/paritytech/substrate/issues/7750). Still we should -make sure that it is not possible via spamming to prevent a dispute concluding -or worse from getting noticed. - -Considered attack vectors: - -1. Invalid disputes (candidate does not exist) could make us - run out of resources. E.g. if we recorded every statement, we could run out - of disk space eventually. -2. An attacker can just flood us with notifications on any notification - protocol, assuming flood protection is not effective enough, our unbounded - buffers can fill up and we will run out of memory eventually. -3. An attacker could participate in a valid dispute, but send its votes multiple - times. -4. Attackers could spam us at a high rate with invalid disputes. Our incoming - queue of requests could get monopolized by those malicious requests and we - won't be able to import any valid disputes and we could run out of resources, - if we tried to process them all in parallel. - -1 is handled by the dispute-coordinator. - -For 2, we will pick up on any dispute on restart, so assuming that any realistic -memory filling attack will take some time, we should be able to participate in a -dispute under such attacks. - -Importing/discarding redundant votes should be pretty quick, so measures with -regards to 4 should suffice to prevent 3, from doing any real harm. - -For 4, full monopolization of the incoming queue should not be possible assuming -substrate handles incoming requests in a somewhat fair way. Still we want some -defense mechanisms, at the very least we need to make sure to not exhaust -resources. - -The dispute coordinator will notify us on import about problematic candidates or -otherwise invalid imports and we can disconnect from such peers/decrease their -reputation drastically. This alone should get us quite far with regards to queue -monopolization. - -Still if those spam messages come at a very high rate, we might still run out of -resources if we immediately call `DisputeCoordinatorMessage::ImportStatements` -on each one of them. Secondly with our assumption of 1/3 dishonest validators, -getting rid of all of them will take some time, depending on reputation timeouts -some of them might even be able to reconnect eventually. - -To mitigate those issues we will process dispute messages with a maximum -parallelism `N`. We initiate import processes for up to `N` candidates in -parallel. Once we reached `N` parallel requests we will start back pressuring on -the incoming requests. This saves us from resource exhaustion. - -To reduce impact of malicious nodes further, we can keep track from which nodes the -currently importing statements came from and will drop requests from nodes that -already have imports in flight. - -Honest nodes are not expected to send dispute statements at a high rate, but -even if they did: - -- we will import at least the first one and if it is valid it will trigger a - dispute, preventing finality. -- Chances are good that the first sent candidate from a peer is indeed the - oldest one (if they differ in age at all). -- for the dropped request any honest node will retry sending. -- there will be other nodes notifying us about that dispute as well. -- honest votes have a speed advantage on average. Apart from the very first - dispute statement for a candidate, which might cause the availability recovery - process, imports of honest votes will be super fast, while for spam imports - they will always take some time as we have to wait for availability to fail. - -So this general rate limit, that we drop requests from same peers if they come -faster than we can import the statements should not cause any problems for -honest nodes and is in their favor. - -Size of `N`: The larger `N` the better we can handle distributed flood attacks -(see previous paragraph), but we also get potentially more availability recovery -processes happening at the same time, which slows down the individual processes. -And we rather want to have one finish quickly than lots slowly at the same time. -On the other hand, valid disputes are expected to be rare, so if we ever exhaust -`N` it is very likely that this is caused by spam and spam recoveries don't cost -too much bandwidth due to empty responses. - -Considering that an attacker would need to attack many nodes in parallel to have -any effect, an `N` of 10 seems to be a good compromise. For honest requests, most -of those imports will likely concern the same candidate, and for dishonest ones -we get to disconnect from up to ten colluding adversaries at a time. - -For the size of the channel for incoming requests: Due to dropping of repeated -requests from same nodes we can make the channel relatively large without fear -of lots of spam requests sitting there wasting our time, even after we already -blocked a peer. For valid disputes, incoming requests can become bursty. On the -other hand we will also be very quick in processing them. A channel size of 100 -requests seems plenty and should be able to handle bursts adequately. +### Order + +We assume `SendDispute` messages are coming in an order of importance, hence +`dispute-distribution` will make sure to send out network messages in the same +order, even on retry. + +### Rate Limit + +For spam protection (see below), we employ an artifical rate limiting on sending +out messages in order to not hit the rate limit at the receiving side, which +would result in our messages getting dropped and our reputation getting reduced. + +## Reception + +As we shall see the receiving side is mostly about handling spam and ensuring +the dispute-coordinator learns about disputes as fast as possible. + +Goals for the receiving side: + +1. Get new disputes to the dispute-coordinator as fast as possible, so + prioritization can happen properly. +2. Batch votes per disputes as much as possible for good import performance. +3. Prevent malicous nodes exhausting node resources by sending lots of messages. +4. Prevent malicous nodes from sending so many messages/(fake) disputes, + preventing us from concluding good ones. + +Goal 1 and 2 seem to be conflicting, but an easy compromise is possible: When +learning about a new dispute, we will import the vote immediately, making the +dispute coordinator aware and also getting immediate feedback on the validity. +Then if valid we can batch further incoming votes, with less time constraints as +the dispute-coordinator already knows about the dispute. + +Goal 3 and 4 are obviously very related and both can easily be solved via rate +limiting as we shall see below. Rate limits should already be implemented at the +substrate level, but [are +not](https://github.com/paritytech/substrate/issues/7750) at the time of +writing. But even if they were, the enforce substrate limits would likely not be +configurable and thus be still to high for our needs as we can rely on the +following observations: + +1. Each honest validator will only send one message (apart from duplicates on + timeout) per candidate/dispute. +2. An honest validator needs to fully recover availability and validate the + candidate for casting a vote. + +With these two observations, we can conclude that honest validators will usually +not send messages at a high rate. We can therefore enforce conservative rate +limits and thus minimize harm spamming malicious nodes can have. + +Before we dive into how rate limiting solves all spam issues elegantly, let's +further discuss that honest behaviour further: + +What about session changes? Here we might have to inform a new validator set of +lots of already existing disputes at once. + +With observation 1) and a rate limit that is per peer, we are still good: + +Let's assume a rate limit of one message per 200ms per sender. This means 5 +messages from each validator per second. 5 messages means 5 disputes! +Conclusively, we will be able to conclude 5 disputes per second - no matter what +malicious actors are doing. This is assuming dispute messages are sent ordered, +but even if not perfectly ordered: On average it will be 5 disputes per second. + +This is good enough! All those disputes are valid ones and will result in +slashing. Let's assume all of them conclude `valid`, and we slash 1% on those. +This will still mean that nodes get slashed 100% in just 20 seconds. + +In addition participation is expected to take longer, which means on average we +can import/conclude disputes faster than they are generated - regardless of +dispute spam! + +For nodes that have been offline for a while, the same argument as for session +changes holds, but matters even less: We assume 2/3 of nodes to be online, so +even if the worst case 1/3 offline happens and they could not import votes fast +enough (as argued above, they in fact can) it would not matter for consensus. + +### Rate Limiting + +As suggested previously, rate limiting allows to mitigate all threats that come +from malicous actors trying to overwhelm the system in order to get away +without a slash. In this section we will explain how in greater detail. + +### "Unlimited" Imports + +We want to make sure the dispute coordinator learns about _all_ disputes, so it +can prioritize correctly. + +Rate limiting also helps with this goal. Some math. ### Import Performance Considerations From a2f8fde33bcc6c58135fe8dfe8707bbcc8498868 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 18 Aug 2022 17:51:00 +0200 Subject: [PATCH 03/65] More guide changes. Still very much WIP. --- .../src/node/disputes/dispute-distribution.md | 117 +++++++++++++++--- 1 file changed, 98 insertions(+), 19 deletions(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index 8733e411a2a7..a81d140642c8 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -187,9 +187,11 @@ Goals for the receiving side: 1. Get new disputes to the dispute-coordinator as fast as possible, so prioritization can happen properly. 2. Batch votes per disputes as much as possible for good import performance. -3. Prevent malicous nodes exhausting node resources by sending lots of messages. -4. Prevent malicous nodes from sending so many messages/(fake) disputes, +3. Prevent malicious nodes exhausting node resources by sending lots of messages. +4. Prevent malicious nodes from sending so many messages/(fake) disputes, preventing us from concluding good ones. +5. Limit ability of malicious nodes of delaying the vote import due to batching + logic. Goal 1 and 2 seem to be conflicting, but an easy compromise is possible: When learning about a new dispute, we will import the vote immediately, making the @@ -199,11 +201,10 @@ the dispute-coordinator already knows about the dispute. Goal 3 and 4 are obviously very related and both can easily be solved via rate limiting as we shall see below. Rate limits should already be implemented at the -substrate level, but [are -not](https://github.com/paritytech/substrate/issues/7750) at the time of -writing. But even if they were, the enforce substrate limits would likely not be -configurable and thus be still to high for our needs as we can rely on the -following observations: +substrate level, but [are not](https://github.com/paritytech/substrate/issues/7750) +at the time of writing. But even if they were, the enforced substrate limits would +likely not be configurable and thus would still be to high for our needs as we can +rely on the following observations: 1. Each honest validator will only send one message (apart from duplicates on timeout) per candidate/dispute. @@ -232,9 +233,18 @@ This is good enough! All those disputes are valid ones and will result in slashing. Let's assume all of them conclude `valid`, and we slash 1% on those. This will still mean that nodes get slashed 100% in just 20 seconds. -In addition participation is expected to take longer, which means on average we -can import/conclude disputes faster than they are generated - regardless of -dispute spam! +One could also think that in addition participation is expected to take longer, +which means on average we can import/conclude disputes faster than they are +generated - regardless of dispute spam. Unfortunately this is not necessarily +true: There might be parachains with very light load where recovery and +validation can be accomplished very quickly - maybe faster than we can import +those disputes. + +This is probably an argument for not imposing a too low rate limit, although the +issue is more general. Even without any rate limit, if an attacker generates +disputes at a very high rate, nodes will be having trouble keeping participation +up, hence the problem should be mitigated at a [more fundamental +layer](https://github.com/paritytech/polkadot/issues/5898). For nodes that have been offline for a while, the same argument as for session changes holds, but matters even less: We assume 2/3 of nodes to be online, so @@ -244,15 +254,84 @@ enough (as argued above, they in fact can) it would not matter for consensus. ### Rate Limiting As suggested previously, rate limiting allows to mitigate all threats that come -from malicous actors trying to overwhelm the system in order to get away -without a slash. In this section we will explain how in greater detail. - -### "Unlimited" Imports - -We want to make sure the dispute coordinator learns about _all_ disputes, so it -can prioritize correctly. - -Rate limiting also helps with this goal. Some math. +from malicious actors trying to overwhelm the system in order to get away without +a slash, when it comes to dispute-distribution. In this section we will explain +how in greater detail. + +The idea is to open a queue with limited size for each peer. We will process +incoming messages as fast as we can by doing the following: + +1. Check that the sending peer is actually a valid authority - otherwise drop + message and decrease reputation/disconnect. +2. Put message on the peer's queue, if queue is full - drop it. + +Every `RATE_LIMIT` seconds (or rather milliseconds), we pause processing +incoming requests to go a full circle and process one message from each queue. +Processing means `Batching` as explained in the next section. + +### Batching + +To achieve goal 2 we will batch incoming votes/messages together before passing +them on as a single batch to the `dispute-coordinator`. To adhere to goal 1 as +well, we will do the following: + +1. For an incoming message, we check whether we have an existing batch for that + candidate, if not we import directly to the dispute-coordinator, as we have + to assume this is concerning a new dispute. +2. We open a batch and start collecting incoming messages for that candidate, + instead of immediately forwarding. +4. We keep collecting votes in the batch until we received less than + `MIN_KEEP_BATCH_ALIVE_VOTES` unique votes in the last `BATCH_COLLECTING_INTERVAL`. This is + important to accommodate for goal 5 and also 3. +5. We send the whole batch to the dispute-coordinator. + +This together with rate limiting explained above ensures we will be able to +process valid disputes: We can limit the number of simultaneous existing batches +to some high value, but can be rather certain that this limit will never be +reached - hence we won't drop valid disputes. + +Let's assume `MIN_KEEP_BATCH_ALIVE_VOTES` is 10, `BATCH_COLLECTING_INTERVAL` +is `500ms` and above `RATE_LIMIT` is `100ms`. 1/3 of validators are malicious, +so for 1000 this means around 330 malicious actors worst case. + +All those actors can send a message every `100ms`, that is 10 per second. This +means at the begin of an attack they can open up around 3300 batches. Each +containing two votes. So memory usage is still negligible. In reality it is even +less, as we also demand 10 new votes to trickle in per batch in order to keep it +alive, every `500ms`. Hence for the first second, each batch requires 20 votes +each. Each message is 2 votes, so this means 10 messages per batch. Hence to +keep those batches alive 10 attackers are needed for each batch. This reduces +the number of opened batches by a factor of 10: So we only have 330 batches in 1 +second - each containing 20 votes. + +The next second: In order to further grow memory usage, attackers have to +maintain 10 messages per batch and second. Number of batches equals the number +of attackers, each has 10 messages per second, all are needed to maintain the +batches in memory. Therefore we have a hard cap of around 330 (number of +malicious nodes) open batches. Each can be filled with number of malicious +actor's votes. So 330 batches with each 330 votes: Let's assume approximately 100 +bytes per signature/vote. This results in a worst case memory usage of 330 * 330 +* 100 ~= 10 MiB. + +For 10_000 validators, we are already in the Gigabyte range, which means that +with a validator set that large we might want to be more strict with the rate limit or +require a larger rate of incoming votes per batch to keep them alive. + +For a thousand validators a limit on batches of around 1000 should never be +reached in practice. Hence due to rate limiting we have a very good chance to +not ever having to drop a potential valid dispute due to some resource limit. + +Further safe guards: The dispute-coordinator actually confirms/denies imports. +So once we receive a denial by the dispute-coordinator for the initial imported +votes, we can opt into flushing the batch immediately and importing the votes. +This swaps memory usage for more CPU usage, but if that import is deemed invalid +again we can immediately decrease the reputation of the sending peers, so this +should be a net win. + +Instead of filling batches to maximize memory usage, attackers could also try to +overwhelm the dispute coordinator by only sending votes for new candidates all +the time. This attack vector is mitigated by decreasing the peer's reputation on +denial of the invalid imports by the coordinator. ### Import Performance Considerations From 5e2f4a5e28d5172e9d12851b6c9afac73d7b44db Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Aug 2022 12:08:36 +0200 Subject: [PATCH 04/65] Finish guide changes. --- .../src/node/disputes/dispute-distribution.md | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index a81d140642c8..b34dddcffb6f 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -173,7 +173,7 @@ order, even on retry. ### Rate Limit -For spam protection (see below), we employ an artifical rate limiting on sending +For spam protection (see below), we employ an artificial rate limiting on sending out messages in order to not hit the rate limit at the receiving side, which would result in our messages getting dropped and our reputation getting reduced. @@ -288,7 +288,7 @@ well, we will do the following: This together with rate limiting explained above ensures we will be able to process valid disputes: We can limit the number of simultaneous existing batches to some high value, but can be rather certain that this limit will never be -reached - hence we won't drop valid disputes. +reached - hence we won't drop valid disputes: Let's assume `MIN_KEEP_BATCH_ALIVE_VOTES` is 10, `BATCH_COLLECTING_INTERVAL` is `500ms` and above `RATE_LIMIT` is `100ms`. 1/3 of validators are malicious, @@ -330,22 +330,9 @@ should be a net win. Instead of filling batches to maximize memory usage, attackers could also try to overwhelm the dispute coordinator by only sending votes for new candidates all -the time. This attack vector is mitigated by decreasing the peer's reputation on -denial of the invalid imports by the coordinator. - -### Import Performance Considerations - -The dispute coordinator is not optimized for importing votes individually and -will expose very bad import performance in that case. Therefore we will batch -together votes over some time and import votes in batches. - -To trigger participation immediately, we will always import the very first -message for a candidate immediately, but we will keep a record that we have -started an import for that candidate and will batch up more votes for that -candidate over some time and then import them all at once. - -If we ignore duplicate votes, even if imports keep trickling in - it is bounded, -because at some point all validators have voted. +the time. This attack vector is mitigated also by above rate limit and +decreasing the peer's reputation on denial of the invalid imports by the +coordinator. ### Node Startup From 585ec6df8eb6f35b6371cc1cbc9508e8dd3f114a Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 22 Aug 2022 13:02:12 +0200 Subject: [PATCH 05/65] Clarification --- .../src/node/disputes/dispute-distribution.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index b34dddcffb6f..dd59821e2955 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -161,7 +161,8 @@ statements so they can include them in blocks. We only consider a message transmitted, once we received a confirmation message. If not, we will keep retrying getting that message out as long as the dispute is -deemed alive. To determine whether a dispute is still alive we will issue a +deemed alive. To determine whether a dispute is still alive we will the +`dispute-coordinator` for a list of all still active disputes via a `DisputeCoordinatorMessage::ActiveDisputes` message before each retry run. Once a dispute is no longer live, we will clean up the state accordingly. From 7e0291003ac737d9e94227fa98d240ee6183708c Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Aug 2022 10:07:37 +0200 Subject: [PATCH 06/65] Adjust argument about slashing. --- .../src/node/disputes/dispute-distribution.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index dd59821e2955..b0d873a9df76 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -231,10 +231,11 @@ malicious actors are doing. This is assuming dispute messages are sent ordered, but even if not perfectly ordered: On average it will be 5 disputes per second. This is good enough! All those disputes are valid ones and will result in -slashing. Let's assume all of them conclude `valid`, and we slash 1% on those. -This will still mean that nodes get slashed 100% in just 20 seconds. +slashing and disabling of validators. Let's assume all of them conclude `valid`, +and we disable validators only after 100 raised concluding valid disputes, we +would still start disabling misbehaving validators in only 20 seconds. -One could also think that in addition participation is expected to take longer, +One could also think that in addition participation is expected to take longer, which means on average we can import/conclude disputes faster than they are generated - regardless of dispute spam. Unfortunately this is not necessarily true: There might be parachains with very light load where recovery and @@ -242,7 +243,7 @@ validation can be accomplished very quickly - maybe faster than we can import those disputes. This is probably an argument for not imposing a too low rate limit, although the -issue is more general. Even without any rate limit, if an attacker generates +issue is more general: Even without any rate limit, if an attacker generates disputes at a very high rate, nodes will be having trouble keeping participation up, hence the problem should be mitigated at a [more fundamental layer](https://github.com/paritytech/polkadot/issues/5898). From 4e170e2aaeebf6191de550b6ede3a0756034d877 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 23 Aug 2022 11:54:26 +0200 Subject: [PATCH 07/65] WIP: Add constants to receiver. --- .../dispute-distribution/src/receiver/mod.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 47d8f11e5553..8161cda17987 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -58,8 +58,17 @@ const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid."); const COST_INVALID_CANDIDATE: Rep = Rep::Malicious("Reported candidate was not available."); const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator."); -/// How many statement imports we want to issue in parallel (for different candidates): -pub const MAX_PARALLEL_IMPORTS: usize = 10; +/// How many votes must have arrived in the last `BATCH_COLLECTING_INTERVAL` +/// +/// in order for a batch to stay alive and not get flushed/imported to the dispute-coordinator. +/// +/// This ensures a timely import once of batches. +pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 10; + +/// Time we allow to pass for new votes to trickle in. +/// +/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above. +pub const BATCH_COLLECTING_INTERVAL: Duration = Duration::from_millis(500); /// State for handling incoming `DisputeRequest` messages. /// From 362ebaea0d1689b319146fe0e8ba5697063b5feb Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 24 Aug 2022 10:16:05 +0200 Subject: [PATCH 08/65] Maintain order of disputes. --- node/network/dispute-distribution/Cargo.toml | 1 + .../network/dispute-distribution/src/sender/mod.rs | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/node/network/dispute-distribution/Cargo.toml b/node/network/dispute-distribution/Cargo.toml index 321c66cc789f..0488235fd304 100644 --- a/node/network/dispute-distribution/Cargo.toml +++ b/node/network/dispute-distribution/Cargo.toml @@ -21,6 +21,7 @@ sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "maste thiserror = "1.0.31" fatality = "0.0.6" lru = "0.7.7" +indexmap = "1.9.1" [dev-dependencies] async-trait = "0.1.53" diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index 5312528b413e..0b0de1fbc1ad 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -18,6 +18,7 @@ use std::collections::{hash_map::Entry, HashMap, HashSet}; use futures::channel::{mpsc, oneshot}; +use indexmap::IndexMap; use polkadot_node_network_protocol::request_response::v1::DisputeRequest; use polkadot_node_primitives::{CandidateVotes, DisputeMessage, SignedDisputeStatement}; use polkadot_node_subsystem::{messages::DisputeCoordinatorMessage, overseer, ActiveLeavesUpdate}; @@ -54,7 +55,9 @@ pub struct DisputeSender { active_sessions: HashMap, /// All ongoing dispute sendings this subsystem is aware of. - disputes: HashMap, + /// + /// Using an `IndexMap` so items can be iterated in the order of insertion. + disputes: IndexMap>, /// Sender to be cloned for `SendTask`s. tx: mpsc::Sender, @@ -134,10 +137,11 @@ impl DisputeSender { let active_disputes: HashSet<_> = active_disputes.into_iter().map(|(_, c)| c).collect(); - // Cleanup obsolete senders: + // Cleanup obsolete senders (retain keeps order of remaining elements): self.disputes .retain(|candidate_hash, _| active_disputes.contains(candidate_hash)); + // Iterates in order of insertion: for dispute in self.disputes.values_mut() { if have_new_sessions || dispute.has_failed_sends() { dispute @@ -146,7 +150,11 @@ impl DisputeSender { } } - // This should only be non-empty on startup, but if not - we got you covered: + // This should only be non-empty on startup, but if not - we got you covered. + // + // Initial order will not be maintained in that case, but that should be fine as disputes + // recovered at startup will be relatively "old" anyway and we assume that no more than a + // third of the validators will go offline at any point in time anyway. for dispute in unknown_disputes { self.start_send_for_dispute(ctx, runtime, dispute).await? } From 68c7073066224c48d1810843c23f9d3c9d7a2d05 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 24 Aug 2022 14:21:20 +0200 Subject: [PATCH 09/65] dispute-distribuion sender Rate limit. --- Cargo.lock | 21 ++--- node/network/dispute-distribution/Cargo.toml | 1 + node/network/dispute-distribution/src/lib.rs | 29 ++++++- .../dispute-distribution/src/sender/mod.rs | 84 +++++++++++++++++-- .../src/sender/send_task.rs | 21 ++++- 5 files changed, 134 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e281a65b102d..e30ac16f26a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2311,9 +2311,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.21" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "2bfc52cbddcfd745bf1740338492bb0bd83d76c67b445f91c5fb29fae29ecaa1" dependencies = [ "futures-core", "futures-sink", @@ -2321,9 +2321,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "d2acedae88d38235936c3922476b10fced7b2b68136f5e3c03c2d5be348a1115" [[package]] name = "futures-executor" @@ -2382,9 +2382,9 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "ca0bae1fe9752cf7fd9b0064c674ae63f97b37bc714d745cbde0afb7ec4e6765" [[package]] name = "futures-task" @@ -2899,9 +2899,9 @@ checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" [[package]] name = "indexmap" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c6392766afd7964e2531940894cffe4bd8d7d17dbc3c1c4857040fd4b33bdb3" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" dependencies = [ "autocfg", "hashbrown 0.12.0", @@ -6197,6 +6197,7 @@ dependencies = [ "fatality", "futures", "futures-timer", + "indexmap", "lazy_static", "lru 0.7.7", "parity-scale-codec", @@ -11625,9 +11626,9 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "digest 0.10.3", - "rand 0.7.3", + "rand 0.8.5", "static_assertions", ] diff --git a/node/network/dispute-distribution/Cargo.toml b/node/network/dispute-distribution/Cargo.toml index 0488235fd304..f7de49d64cbd 100644 --- a/node/network/dispute-distribution/Cargo.toml +++ b/node/network/dispute-distribution/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] futures = "0.3.21" +futures-timer = "3.0.2" gum = { package = "tracing-gum", path = "../../gum" } derive_more = "0.99.17" parity-scale-codec = { version = "3.1.5", features = ["std"] } diff --git a/node/network/dispute-distribution/src/lib.rs b/node/network/dispute-distribution/src/lib.rs index aefd66e0ae79..2f6641fe3569 100644 --- a/node/network/dispute-distribution/src/lib.rs +++ b/node/network/dispute-distribution/src/lib.rs @@ -24,6 +24,8 @@ //! The sender is responsible for getting our vote out, see [`sender`]. The receiver handles //! incoming [`DisputeRequest`]s and offers spam protection, see [`receiver`]. +use std::time::Duration; + use futures::{channel::mpsc, FutureExt, StreamExt, TryFutureExt}; use polkadot_node_network_protocol::authority_discovery::AuthorityDiscovery; @@ -57,6 +59,8 @@ use self::sender::{DisputeSender, TaskFinish}; /// ## The receiver [`DisputesReceiver`] /// +/// TODO: Obsolete: +/// /// The receiving side is implemented as `DisputesReceiver` and is run as a separate long running task within /// this subsystem ([`DisputesReceiver::run`]). /// @@ -93,6 +97,20 @@ pub use metrics::Metrics; const LOG_TARGET: &'static str = "parachain::dispute-distribution"; +/// Rate limit on the `receiver` side. +/// +/// If messages from one peer come in at a higher rate than every `RECEIVE_RATE_LIMIT` on average, we +/// start dropping messages from that peer to enforce that limit. +pub const RECEIVE_RATE_LIMIT: Duration = Duration::from_millis(100); + +/// Rate limit on the `sender` side. +/// +/// In order to not hit the `RECEIVE_RATE_LIMIT` on the receiving side, we limit out sending rate as +/// well. +/// +/// We add 50ms extra, just to have some save margin to the `RECEIVE_RATE_LIMIT`. +pub const SEND_RATE_LIMIT: Duration = RECEIVE_RATE_LIMIT.saturating_add(Duration::from_millis(50)); + /// The dispute distribution subsystem. pub struct DisputeDistributionSubsystem { /// Easy and efficient runtime access for this subsystem. @@ -172,6 +190,12 @@ where ctx.spawn("disputes-receiver", receiver.run().boxed()) .map_err(FatalError::SpawnTask)?; + // Process messages for sending side. + // + // Note: We want the sender to be rate limited and we are currently taking advantage of the + // fact that the root task of this subsystem is only concerned with sending: Functions of + // `DisputeSender` might back pressure if the rate limit is hit, which will slow down this + // loop. If this fact ever changes, we will likely need another task. loop { let message = MuxedMessage::receive(&mut ctx, &mut self.sender_rx).await; match message { @@ -247,9 +271,10 @@ impl MuxedMessage { // ends. let from_overseer = ctx.recv().fuse(); futures::pin_mut!(from_overseer, from_sender); - futures::select!( - msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(FatalError::SubsystemReceive)), + // We select biased to make sure we finish up loose ends, before starting new work. + futures::select_biased!( msg = from_sender.next() => MuxedMessage::Sender(msg), + msg = from_overseer => MuxedMessage::Subsystem(msg.map_err(FatalError::SubsystemReceive)), ) } } diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index 0b0de1fbc1ad..588eb1e7bc4f 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -14,11 +14,15 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::{ + collections::{HashMap, HashSet}, + time::Duration, +}; use futures::channel::{mpsc, oneshot}; -use indexmap::IndexMap; +use futures_timer::Delay; +use indexmap::{map::Entry, IndexMap}; use polkadot_node_network_protocol::request_response::v1::DisputeRequest; use polkadot_node_primitives::{CandidateVotes, DisputeMessage, SignedDisputeStatement}; use polkadot_node_subsystem::{messages::DisputeCoordinatorMessage, overseer, ActiveLeavesUpdate}; @@ -29,6 +33,9 @@ use polkadot_primitives::v2::{CandidateHash, DisputeStatement, Hash, SessionInde /// /// It is going to spawn real tasks as it sees fit for getting the votes of the particular dispute /// out. +/// +/// As we assume disputes have a priority, we start sending for disputes in the order +/// `start_sender` got called. mod send_task; use send_task::SendTask; pub use send_task::TaskFinish; @@ -38,13 +45,15 @@ mod error; pub use error::{Error, FatalError, JfyiError, Result}; use self::error::JfyiErrorResult; -use crate::{Metrics, LOG_TARGET}; +use crate::{Metrics, LOG_TARGET, SEND_RATE_LIMIT}; /// The `DisputeSender` keeps track of all ongoing disputes we need to send statements out. /// /// For each dispute a `SendTask` is responsible for sending to the concerned validators for that /// particular dispute. The `DisputeSender` keeps track of those tasks, informs them about new /// sessions/validator sets and cleans them up when they become obsolete. +/// +/// The unit of work for the `DisputeSender` is a dispute, represended by `SendTask`s. pub struct DisputeSender { /// All heads we currently consider active. active_heads: Vec, @@ -57,11 +66,14 @@ pub struct DisputeSender { /// All ongoing dispute sendings this subsystem is aware of. /// /// Using an `IndexMap` so items can be iterated in the order of insertion. - disputes: IndexMap>, + disputes: IndexMap, /// Sender to be cloned for `SendTask`s. tx: mpsc::Sender, + /// Future for delaying too frequent creation of dispute sending tasks. + rate_limit: RateLimit, + /// Metrics for reporting stats about sent requests. metrics: Metrics, } @@ -73,13 +85,17 @@ impl DisputeSender { Self { active_heads: Vec::new(), active_sessions: HashMap::new(), - disputes: HashMap::new(), + disputes: IndexMap::new(), tx, + rate_limit: RateLimit::new(), metrics, } } /// Create a `SendTask` for a particular new dispute. + /// + /// This function is rate-limited by `SEND_RATE_LIMIT`. It will block if called too frequently + /// in order to maintain the limit. pub async fn start_sender( &mut self, ctx: &mut Context, @@ -106,6 +122,9 @@ impl DisputeSender { vacant.insert(send_task); }, } + + self.rate_limit.limit().await; + Ok(()) } @@ -115,6 +134,8 @@ impl DisputeSender { /// - Get new authorities to send messages to. /// - Get rid of obsolete tasks and disputes. /// - Get dispute sending started in case we missed one for some reason (e.g. on node startup) + /// + /// This function ensures the `SEND_RATE_LIMIT`, therefore it might block. pub async fn update_leaves( &mut self, ctx: &mut Context, @@ -141,14 +162,27 @@ impl DisputeSender { self.disputes .retain(|candidate_hash, _| active_disputes.contains(candidate_hash)); + // Replace with limit, just in case of an error. + let mut rate_limit = self.rate_limit.replace_with_limit(); + // Iterates in order of insertion: for dispute in self.disputes.values_mut() { if have_new_sessions || dispute.has_failed_sends() { - dispute + let sends_happened = dispute .refresh_sends(ctx, runtime, &self.active_sessions, &self.metrics) .await?; + // Only rate limit if we actually sent someting out _and_ it was not just because + // of errors on previous sends. + // + // Reasoning: It would not be acceptable to slow down the whole subsystem, just + // because of a few bad peers having problems. It is actually better to risk + // running into their rate limit in that case and accept a minor reputation change. + if sends_happened && have_new_sessions { + rate_limit.limit().await; + } } } + self.rate_limit = rate_limit; // This should only be non-empty on startup, but if not - we got you covered. // @@ -156,7 +190,8 @@ impl DisputeSender { // recovered at startup will be relatively "old" anyway and we assume that no more than a // third of the validators will go offline at any point in time anyway. for dispute in unknown_disputes { - self.start_send_for_dispute(ctx, runtime, dispute).await? + self.start_send_for_dispute(ctx, runtime, dispute).await?; + self.rate_limit.limit().await; } Ok(()) } @@ -325,6 +360,41 @@ impl DisputeSender { } } +/// Rate limiting logic. +struct RateLimit { + limit: Delay, +} + +impl RateLimit { + /// Create new `RateLimit` that is immediately ready. + fn new() -> Self { + // Start with an empty duration, as there has not been any previous call. + Self { limit: Delay::new(Duration::new(0, 0)) } + } + + /// Initialized with actual `SEND_RATE_LIMIT` duration. + fn new_limit() -> Self { + Self { limit: Delay::new(SEND_RATE_LIMIT) } + } + + /// Take the `RateLimit` and replace with one that immediately is ready. + fn take(&mut self) -> Self { + std::mem::replace(self, RateLimit::new()) + } + + /// Replace the `RateLimit` and replace `Self::new_limit()`. + fn replace_with_limit(&mut self) -> Self { + std::mem::replace(self, Self::new_limit()) + } + + /// Wait until ready and prepare for next call. + async fn limit(&mut self) { + let old = self.take(); + old.limit.await; + *self = Self::new_limit(); + } +} + /// Retrieve the currently active sessions. /// /// List is all indices of all active sessions together with the head that was used for the query. diff --git a/node/network/dispute-distribution/src/sender/send_task.rs b/node/network/dispute-distribution/src/sender/send_task.rs index a2b8cdcf7441..e564a4d95838 100644 --- a/node/network/dispute-distribution/src/sender/send_task.rs +++ b/node/network/dispute-distribution/src/sender/send_task.rs @@ -42,6 +42,8 @@ use crate::{ /// Delivery status for a particular dispute. /// /// Keeps track of all the validators that have to be reached for a dispute. +/// +/// The unit of work for a `SendTask` is an authority/validator. pub struct SendTask { /// The request we are supposed to get out to all parachain validators of the dispute's session /// and to all current authorities. @@ -100,6 +102,10 @@ impl TaskResult { #[overseer::contextbounds(DisputeDistribution, prefix = self::overseer)] impl SendTask { /// Initiates sending a dispute message to peers. + /// + /// Creation of new `SendTask`s is subject to rate limiting. As each `SendTask` will trigger + /// sending a message to each validator, hence for employing a per-peer rate limit, we need to + /// limit the construction of new `SendTask`s. pub async fn new( ctx: &mut Context, runtime: &mut RuntimeInfo, @@ -118,15 +124,22 @@ impl SendTask { /// /// This function is called at construction and should also be called whenever a session change /// happens and on a regular basis to ensure we are retrying failed attempts. + /// + /// This might resend to validators and is thus subject to any rate limiting we might want. + /// Calls to this function for different instances should be rate limited according to + /// `SEND_RATE_LIMIT`. + /// + /// Returns: `True` if this call resulted in new requests. pub async fn refresh_sends( &mut self, ctx: &mut Context, runtime: &mut RuntimeInfo, active_sessions: &HashMap, metrics: &Metrics, - ) -> Result<()> { + ) -> Result { let new_authorities = self.get_relevant_validators(ctx, runtime, active_sessions).await?; + // Note this will also contain all authorities for which sending failed previously: let add_authorities = new_authorities .iter() .filter(|a| !self.deliveries.contains_key(a)) @@ -141,12 +154,14 @@ impl SendTask { send_requests(ctx, self.tx.clone(), add_authorities, self.request.clone(), metrics) .await?; + let was_empty = new_statuses.is_empty(); + self.has_failed_sends = false; self.deliveries.extend(new_statuses.into_iter()); - Ok(()) + Ok(!was_empty) } - /// Whether any sends have failed since the last refreshed. + /// Whether any sends have failed since the last refresh. pub fn has_failed_sends(&self) -> bool { self.has_failed_sends } From 56519a79748238b38ff93218549a00c1df6703da Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Aug 2022 09:56:28 +0200 Subject: [PATCH 10/65] Cleanup --- node/network/dispute-distribution/src/sender/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index 588eb1e7bc4f..cd2df7272469 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -162,9 +162,6 @@ impl DisputeSender { self.disputes .retain(|candidate_hash, _| active_disputes.contains(candidate_hash)); - // Replace with limit, just in case of an error. - let mut rate_limit = self.rate_limit.replace_with_limit(); - // Iterates in order of insertion: for dispute in self.disputes.values_mut() { if have_new_sessions || dispute.has_failed_sends() { @@ -178,11 +175,10 @@ impl DisputeSender { // because of a few bad peers having problems. It is actually better to risk // running into their rate limit in that case and accept a minor reputation change. if sends_happened && have_new_sessions { - rate_limit.limit().await; + self.rate_limit.limit().await; } } } - self.rate_limit = rate_limit; // This should only be non-empty on startup, but if not - we got you covered. // @@ -361,6 +357,8 @@ impl DisputeSender { } /// Rate limiting logic. +/// +/// Suitable for the sending side. struct RateLimit { limit: Delay, } From b3ab280c495dbe004e6f04eab6c9a239de381170 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Aug 2022 16:03:30 +0200 Subject: [PATCH 11/65] WIP: dispute-distribution receiver. - [ ] Rate limiting - [ ] Batching --- .../network/dispute-distribution/src/error.rs | 2 + .../src/receiver/error.rs | 6 +- .../dispute-distribution/src/receiver/mod.rs | 185 ++++++++++++------ .../src/receiver/peer_queues.rs | 94 +++++++++ 4 files changed, 228 insertions(+), 59 deletions(-) create mode 100644 node/network/dispute-distribution/src/receiver/peer_queues.rs diff --git a/node/network/dispute-distribution/src/error.rs b/node/network/dispute-distribution/src/error.rs index cc5be11602f4..660a10fe08ce 100644 --- a/node/network/dispute-distribution/src/error.rs +++ b/node/network/dispute-distribution/src/error.rs @@ -57,6 +57,8 @@ pub type Result = std::result::Result; pub type FatalResult = std::result::Result; +pub type JfyiResult = std::result::Result; + /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to diff --git a/node/network/dispute-distribution/src/receiver/error.rs b/node/network/dispute-distribution/src/receiver/error.rs index ce578cc8e0f9..e3e161925178 100644 --- a/node/network/dispute-distribution/src/receiver/error.rs +++ b/node/network/dispute-distribution/src/receiver/error.rs @@ -21,6 +21,7 @@ use fatality::Nested; use polkadot_node_network_protocol::{request_response::incoming, PeerId}; use polkadot_node_subsystem_util::runtime; +use polkadot_primitives::v2::AuthorityDiscoveryId; use crate::LOG_TARGET; @@ -49,11 +50,14 @@ pub enum Error { #[error("Peer {0} attempted to participate in dispute and is not a validator.")] NotAValidator(PeerId), + + #[error("Authority {0} sent messages at a too high rate.")] + AuthorityFlooding(AuthorityDiscoveryId), } pub type Result = std::result::Result; -pub type JfyiErrorResult = std::result::Result; +pub type JfyiResult = std::result::Result; /// Utility for eating top level errors and log them. /// diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 8161cda17987..3745674fc4be 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -15,18 +15,20 @@ // along with Polkadot. If not, see . use std::{ - collections::HashSet, + collections::{HashMap, HashSet, VecDeque}, pin::Pin, task::{Context, Poll}, + time::Duration, }; use futures::{ channel::oneshot, - future::{poll_fn, BoxFuture}, - pin_mut, + future::{poll_fn, BoxFuture, Fuse}, + pin_mut, select_biased, stream::{FusedStream, FuturesUnordered, StreamExt}, Future, FutureExt, Stream, }; +use futures_timer::Delay; use lru::LruCache; use polkadot_node_network_protocol::{ @@ -44,19 +46,32 @@ use polkadot_node_subsystem::{ overseer, }; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; +use polkadot_primitives::v2::AuthorityDiscoveryId; use crate::{ metrics::{FAILED, SUCCEEDED}, - Metrics, LOG_TARGET, + Metrics, LOG_TARGET, RECEIVE_RATE_LIMIT, }; mod error; -use self::error::{log_error, JfyiError, JfyiErrorResult, Result}; + +/// Queues for incoming requests by peers. +mod peer_queues; + +use self::{ + error::{log_error, JfyiError, JfyiResult, Result}, + peer_queues::PeerQueues, +}; const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded."); const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid."); const COST_INVALID_CANDIDATE: Rep = Rep::Malicious("Reported candidate was not available."); const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator."); +/// Mildly punish peers exceeding their rate limit. +/// +/// For honest peers this should rarely happen, but if it happens we would not want to disconnect +/// too quickly. Minor cost should suffice for disconnecting any real flooder. +const COST_APPARENT_FLOOD: Rep = Rep::CostMinor("Peer exceeded the rate limit."); /// How many votes must have arrived in the last `BATCH_COLLECTING_INTERVAL` /// @@ -84,19 +99,18 @@ pub struct DisputesReceiver { /// Channel to retrieve incoming requests from. receiver: IncomingRequestReceiver, + /// Rate limiting queue for each peer (only authorities). + peer_queues: PeerQueues, + + /// Delay timer for establishing the rate limit. + rate_limit: Fuse, + /// Authority discovery service: authority_discovery: AD, /// Imports currently being processed. pending_imports: PendingImports, - /// We keep record of the last banned peers. - /// - /// This is needed because once we ban a peer, we will very likely still have pending requests - /// in the incoming channel - we should not waste time recovering availability for those, as we - /// already know the peer is malicious. - banned_peers: LruCache, - /// Log received requests. metrics: Metrics, } @@ -110,7 +124,7 @@ enum MuxedMessage { /// - We need to make sure responses are actually sent (therefore we need to await futures /// promptly). /// - We need to update `banned_peers` accordingly to the result. - ConfirmedImport(JfyiErrorResult<(PeerId, ImportStatementsResult)>), + ConfirmedImport(JfyiResult<(PeerId, ImportStatementsResult)>), /// A new request has arrived and should be handled. NewRequest(IncomingRequest), @@ -122,6 +136,11 @@ impl MuxedMessage { pending_requests: &mut IncomingRequestReceiver, ) -> Result { poll_fn(|ctx| { + // In case of Ready(None), we want to wait for pending requests: + if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) { + return Poll::Ready(Ok(Self::ConfirmedImport(v))) + } + let next_req = pending_requests.recv(|| vec![COST_INVALID_REQUEST]); pin_mut!(next_req); if let Poll::Ready(r) = next_req.poll(ctx) { @@ -130,11 +149,6 @@ impl MuxedMessage { Ok(v) => Poll::Ready(Ok(Self::NewRequest(v))), } } - // In case of Ready(None) return `Pending` below - we want to wait for the next request - // in that case. - if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) { - return Poll::Ready(Ok(Self::ConfirmedImport(v))) - } Poll::Pending }) .await @@ -161,11 +175,12 @@ where runtime, sender, receiver, + peer_queues: PeerQueues::new(), + rate_limit: Delay::new(RECEIVE_RATE_LIMIT).fuse(), authority_discovery, pending_imports: PendingImports::new(), // Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any // malicious requests still pending in the incoming queue. - banned_peers: LruCache::new(MAX_PARALLEL_IMPORTS), metrics, } } @@ -191,45 +206,35 @@ where /// Actual work happening here. async fn run_inner(&mut self) -> Result<()> { - let msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).await?; - - let incoming = match msg { - // We need to clean up futures, to make sure responses are sent: - MuxedMessage::ConfirmedImport(m_bad) => { - self.ban_bad_peer(m_bad)?; - return Ok(()) - }, - MuxedMessage::NewRequest(req) => req, + let msg = if self.peer_queues.is_empty() { + // No point to wake on timeout: + Some(MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).await?) + } else { + self.wait_for_message_or_timeout().await? }; - self.metrics.on_received_request(); - - let peer = incoming.peer; - - // Only accept messages from validators: - if self.authority_discovery.get_authority_ids_by_peer_id(peer).await.is_none() { - incoming - .send_outgoing_response(OutgoingResponse { - result: Err(()), - reputation_changes: vec![COST_NOT_A_VALIDATOR], - sent_feedback: None, - }) - .map_err(|_| JfyiError::SendResponse(peer))?; - - return Err(JfyiError::NotAValidator(peer).into()) - } + if let Some(msg) = msg { + let incoming = match msg { + // We need to clean up futures, to make sure responses are sent: + MuxedMessage::ConfirmedImport(m_bad) => { + self.ban_bad_peer(m_bad)?; + return Ok(()) + }, + MuxedMessage::NewRequest(req) => req, + }; - // Immediately drop requests from peers that already have requests in flight or have - // been banned recently (flood protection): - if self.pending_imports.peer_is_pending(&peer) || self.banned_peers.contains(&peer) { - gum::trace!( - target: LOG_TARGET, - ?peer, - "Dropping message from peer (banned/pending import)" - ); + self.metrics.on_received_request(); + self.dispatch_to_queues(incoming).await?; + // Wait for more messages: return Ok(()) } + // Let's actually process messages, that made it through the rate limit: + // + // Batch: + // - Collect votes - get rid of duplicates. + // - Keep track of import rate. + // - Flush if import rate is not matched // Wait for a free slot: if self.pending_imports.len() >= MAX_PARALLEL_IMPORTS as usize { // Wait for one to finish: @@ -241,6 +246,71 @@ where self.start_import(incoming).await } + /// Wait for a message or the `rate_limit` timeout to hit (if there is one). + /// + /// In case a message got received `rate_limit` will be populated by this function. This way we + /// only wake on timeouts if there are actually any messages to process. + /// + /// In case of timeout we return Ok(None). + async fn wait_for_message_or_timeout(&mut self) -> Result> { + // We already have messages to process - rate limiting activated: + let rcv_msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).fuse(); + pin_mut!(rcv_msg); + let mut timeout = Pin::new(&mut self.rate_limit); + let result = select_biased!( + () = timeout => None, + msg = rcv_msg => Some(msg?), + ); + if result.is_none() { + // Timeout hit - we need a new Delay (started immediately so the following processing + // does not further decrease allowed rate (assuming processing takes less than + // `RECEIVE_RATE_LIMIT`): + self.rate_limit = Delay::new(RECEIVE_RATE_LIMIT).fuse(); + } + Ok(result) + } + + /// Process incoming requests. + /// + /// - Check sender is authority + /// - Dispatch message to corresponding queue in `peer_queues`. + /// - If queue is full, drop message and change reputation of sender. + async fn dispatch_to_queues(&mut self, req: IncomingRequest) -> JfyiResult<()> { + let peer = req.peer; + // Only accept messages from validators, in case there are multiple `AuthorityId`s, we + // just take the first one. On session boundaries this might allow validators to double + // their rate limit for a short period of time, which seems acceptable. + let authority_id = match self + .authority_discovery + .get_authority_ids_by_peer_id(peer) + .await + .and_then(|s| s.into_iter().next()) + { + None => { + req.send_outgoing_response(OutgoingResponse { + result: Err(()), + reputation_changes: vec![COST_NOT_A_VALIDATOR], + sent_feedback: None, + }) + .map_err(|_| JfyiError::SendResponse(peer))?; + return Err(JfyiError::NotAValidator(peer).into()) + }, + Some(auth_id) => auth_id, + }; + + // Queue request: + if let Err((authority_id, req)) = self.peer_queues.push_req(authority_id, req) { + req.send_outgoing_response(OutgoingResponse { + result: Err(()), + reputation_changes: vec![COST_APPARENT_FLOOD], + sent_feedback: None, + }) + .map_err(|_| JfyiError::SendResponse(peer))?; + return Err(JfyiError::AuthorityFlooding(authority_id)) + } + Ok(()) + } + /// Start importing votes for the given request. async fn start_import(&mut self, incoming: IncomingRequest) -> Result<()> { let IncomingRequest { peer, payload, pending_response } = incoming; @@ -291,8 +361,8 @@ where /// In addition we report import metrics. fn ban_bad_peer( &mut self, - result: JfyiErrorResult<(PeerId, ImportStatementsResult)>, - ) -> JfyiErrorResult<()> { + result: JfyiResult<(PeerId, ImportStatementsResult)>, + ) -> JfyiResult<()> { match result? { (_, ImportStatementsResult::ValidImport) => { self.metrics.on_imported(SUCCEEDED); @@ -309,8 +379,7 @@ where /// Manage pending imports in a way that preserves invariants. struct PendingImports { /// Futures in flight. - futures: - FuturesUnordered)>>, + futures: FuturesUnordered)>>, /// Peers whose requests are currently in flight. peers: HashSet, } @@ -348,7 +417,7 @@ impl PendingImports { } impl Stream for PendingImports { - type Item = JfyiErrorResult<(PeerId, ImportStatementsResult)>; + type Item = JfyiResult<(PeerId, ImportStatementsResult)>; fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll> { match Pin::new(&mut self.futures).poll_next(ctx) { Poll::Pending => Poll::Pending, @@ -375,7 +444,7 @@ async fn respond_to_request( peer: PeerId, handled: oneshot::Receiver, pending_response: OutgoingResponseSender, -) -> JfyiErrorResult { +) -> JfyiResult { let result = handled.await.map_err(|_| JfyiError::ImportCanceled(peer))?; let response = match result { diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs new file mode 100644 index 000000000000..154e440ecd42 --- /dev/null +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -0,0 +1,94 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::collections::{hash_map::Entry, HashMap, VecDeque}; + +use polkadot_node_network_protocol::request_response::{v1::DisputeRequest, IncomingRequest}; +use polkadot_primitives::v2::AuthorityDiscoveryId; + +/// How many messages we are willing to queue per peer (validator). +/// +/// The larger this value is, the larger bursts are allowed to be without us dropping messages. On +/// the flipside we should this gets allocated per validator, so for a size of 10 this will result +/// in 10_000 * size_of(IncomingRequest). +/// +/// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons. +pub const PEER_QUEUE_CAPACITY: usize = 10; + +/// Queues for messages from authority peers. +/// +/// Two invariants are ensured: +/// +/// 1. No queue will ever have more than `PEER_QUEUE_CAPACITY` elements. +/// 2. There are no empty queues. Whenever a queue gets empty, it is removed. This way checking +/// whether there are any messages queued is cheap. +pub struct PeerQueues { + queues: HashMap>>, +} + +impl PeerQueues { + /// New empty `PeerQueues` + pub fn new() -> Self { + Self { queues: HashMap::new() } + } + + /// Push an incoming request for a given authority. + /// + /// Returns: `Ok(())` if succeeded, `Err((args))` if capacity is reached. + pub fn push_req( + &mut self, + peer: AuthorityDiscoveryId, + req: IncomingRequest, + ) -> Result<(), (AuthorityDiscoveryId, IncomingRequest)> { + let queue = match self.queues.entry(peer) { + Entry::Vacant(vacant) => { + vacant.insert(VecDeque::new()) + }, + Entry::Occupied(occupied) => { + if occupied.get().len() >= PEER_QUEUE_CAPACITY { + return Err((occupied.key().clone(), req)) + } + occupied.get_mut() + }, + }; + queue.push_back(req); + Ok(()) + } + + /// Pop all heads and return them for processing. + pub fn pop_reqs(&mut self) -> Vec> { + let mut heads = Vec::with_capacity(self.queues.len()); + let mut new_queues = HashMap::new(); + for (k, queue) in self.queues.into_iter() { + let front = queue.pop_front(); + debug_assert!(front.is_some(), "Invariant that queues are never empty is broken."); + + if let Some(front) = front { + heads.push(front); + } + if !queue.is_empty() { + new_queues.insert(k, queue); + } + } + self.queues = new_queues; + heads + } + + /// Whether or not all queues are empty. + pub fn is_empty(&self) -> bool { + self.queues.is_empty() + } +} From 4b4df00e1d32f3a92c39024b2f92296f17ab6704 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Aug 2022 17:16:14 +0200 Subject: [PATCH 12/65] WIP: Batching. --- .../src/receiver/batch.rs | 71 +++++++++++++++++++ .../dispute-distribution/src/receiver/mod.rs | 7 ++ 2 files changed, 78 insertions(+) create mode 100644 node/network/dispute-distribution/src/receiver/batch.rs diff --git a/node/network/dispute-distribution/src/receiver/batch.rs b/node/network/dispute-distribution/src/receiver/batch.rs new file mode 100644 index 000000000000..897c7bedbe1a --- /dev/null +++ b/node/network/dispute-distribution/src/receiver/batch.rs @@ -0,0 +1,71 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::collections::HashMap; + +use polkadot_node_primitives::SignedDisputeStatement; +use polkadot_primitives::v2::{ValidatorIndex, CandidateReceipt, CandidateHash}; + + +struct Batch { + /// The actual candidate this batch is concerned with. + candidate_receipt: CandidateReceipt, + + /// Cache `CandidateHash` to do efficient sanity checks. + candidate_hash: CandidateHash, + + /// All valid votes received in this batch so far. + /// + /// We differentiate between valid and invalid votes, so we can detect (and drop) duplicates, + /// while still allowing validators to equivocate. + /// + /// Detecting and rejecting duplicats is crucial in order to effectively enforce + /// `MIN_KEEP_BATCH_ALIVE_VOTES` per `BATCH_COLLECTING_INTERVAL`. If we would count duplicates + /// here, the mechanism would be broken. + valid_votes: HashMap, + + /// All invalid votes received in this batch so far. + invalid_votes: HashMap, + + /// How many votes have been batched in the last `BATCH_COLLECTING_INTERVAL`? + votes_batched_since_last_tick: u32, +} + +impl Batch { + /// Create a new empty batch based on the given `CandidateReceipt`. + pub fn new(candidate_receipt: CandidateReceipt) -> Self { + Self { + candidate_hash: candidate_receipt.hash(), + candidate_receipt, + valid_votes: HashMap::new(), + invalid_votes: HashMap::new(), + votes_batched_since_last_tick: 0, + } + } + + /// Import votes into the batch. + pub fn import_votes(&mut self, receipt: CandidateReceipt, valid_vote: (SignedDisputeStatement, ValidatorIndex), invalid_vote: (SignedDisputeStatement, ValidatorIndex)) { + debug_assert!(valid_vote.0.candidate_hash() == invalid_vote.0.candidate_hash()); + debug_assert!(valid_vote.0.candidate_hash() == &self.candidate_hash); + + if self.valid_votes.insert(valid_vote.1, valid_vote.0).is_none() { + self.votes_batched_since_last_tick += 1; + } + if self.invalid_votes.insert(invalid_vote.1, invalid_vote.0).is_none() { + self.votes_batched_since_last_tick += 1; + } + } +} diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 3745674fc4be..4bd024f3b30e 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -58,6 +58,9 @@ mod error; /// Queues for incoming requests by peers. mod peer_queues; +/// Batch imports together. +mod batch; + use self::{ error::{log_error, JfyiError, JfyiResult, Result}, peer_queues::PeerQueues, @@ -236,6 +239,10 @@ where // - Keep track of import rate. // - Flush if import rate is not matched // Wait for a free slot: + // + // struct Batch { + // + // } if self.pending_imports.len() >= MAX_PARALLEL_IMPORTS as usize { // Wait for one to finish: let r = self.pending_imports.next().await; From 5816c8d28d133d314a9063a0278ac8c923506e0e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 25 Aug 2022 17:17:10 +0200 Subject: [PATCH 13/65] fmt --- .../network/dispute-distribution/src/receiver/batch.rs | 10 +++++++--- node/network/dispute-distribution/src/receiver/mod.rs | 5 +---- .../dispute-distribution/src/receiver/peer_queues.rs | 4 +--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batch.rs b/node/network/dispute-distribution/src/receiver/batch.rs index 897c7bedbe1a..32a19d69893d 100644 --- a/node/network/dispute-distribution/src/receiver/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batch.rs @@ -17,8 +17,7 @@ use std::collections::HashMap; use polkadot_node_primitives::SignedDisputeStatement; -use polkadot_primitives::v2::{ValidatorIndex, CandidateReceipt, CandidateHash}; - +use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, ValidatorIndex}; struct Batch { /// The actual candidate this batch is concerned with. @@ -57,7 +56,12 @@ impl Batch { } /// Import votes into the batch. - pub fn import_votes(&mut self, receipt: CandidateReceipt, valid_vote: (SignedDisputeStatement, ValidatorIndex), invalid_vote: (SignedDisputeStatement, ValidatorIndex)) { + pub fn import_votes( + &mut self, + receipt: CandidateReceipt, + valid_vote: (SignedDisputeStatement, ValidatorIndex), + invalid_vote: (SignedDisputeStatement, ValidatorIndex), + ) { debug_assert!(valid_vote.0.candidate_hash() == invalid_vote.0.candidate_hash()); debug_assert!(valid_vote.0.candidate_hash() == &self.candidate_hash); diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 4bd024f3b30e..e8ca9cf121a8 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -239,10 +239,7 @@ where // - Keep track of import rate. // - Flush if import rate is not matched // Wait for a free slot: - // - // struct Batch { - // - // } + // if self.pending_imports.len() >= MAX_PARALLEL_IMPORTS as usize { // Wait for one to finish: let r = self.pending_imports.next().await; diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index 154e440ecd42..a33b8e83c2e3 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -54,9 +54,7 @@ impl PeerQueues { req: IncomingRequest, ) -> Result<(), (AuthorityDiscoveryId, IncomingRequest)> { let queue = match self.queues.entry(peer) { - Entry::Vacant(vacant) => { - vacant.insert(VecDeque::new()) - }, + Entry::Vacant(vacant) => vacant.insert(VecDeque::new()), Entry::Occupied(occupied) => { if occupied.get().len() >= PEER_QUEUE_CAPACITY { return Err((occupied.key().clone(), req)) From a821efcf85c7a1d93b615417a5db1205c4f5af19 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 26 Aug 2022 17:25:12 +0200 Subject: [PATCH 14/65] Update `PeerQueues` to maintain more invariants. --- .../src/receiver/peer_queues.rs | 64 ++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index a33b8e83c2e3..dd29a0be68b3 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -16,9 +16,13 @@ use std::collections::{hash_map::Entry, HashMap, VecDeque}; +use futures::future::{pending, Fuse}; +use futures_timer::Delay; use polkadot_node_network_protocol::request_response::{v1::DisputeRequest, IncomingRequest}; use polkadot_primitives::v2::AuthorityDiscoveryId; +use crate::RECEIVE_RATE_LIMIT; + /// How many messages we are willing to queue per peer (validator). /// /// The larger this value is, the larger bursts are allowed to be without us dropping messages. On @@ -28,21 +32,28 @@ use polkadot_primitives::v2::AuthorityDiscoveryId; /// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons. pub const PEER_QUEUE_CAPACITY: usize = 10; -/// Queues for messages from authority peers. +/// Queues for messages from authority peers for rate limiting. /// -/// Two invariants are ensured: +/// Invariants ensured: /// /// 1. No queue will ever have more than `PEER_QUEUE_CAPACITY` elements. /// 2. There are no empty queues. Whenever a queue gets empty, it is removed. This way checking /// whether there are any messages queued is cheap. +/// 3. As long as not empty, `pop_reqs` will, if called in sequence, not return `Ready` more often +/// than once for every `RECEIVE_RATE_LIMIT`, but it will always return Ready eventually. +/// 4. If empty `pop_reqs` will never return `Ready`, but will always be `Pending`. pub struct PeerQueues { + /// Actual queues. queues: HashMap>>, + + /// Delay timer for establishing the rate limit. + rate_limit_waker: Option, } impl PeerQueues { - /// New empty `PeerQueues` + /// New empty `PeerQueues`. pub fn new() -> Self { - Self { queues: HashMap::new() } + Self { queues: HashMap::new(), rate_limit_waker: None } } /// Push an incoming request for a given authority. @@ -63,14 +74,26 @@ impl PeerQueues { }, }; queue.push_back(req); + + // We have at least one element to process - rate limit waker needs to exist now: + self.ensure_waker(); Ok(()) } /// Pop all heads and return them for processing. - pub fn pop_reqs(&mut self) -> Vec> { + /// + /// This gets one message from each peer that has sent at least one. + /// + /// This function is rate limited, if called in sequence it will not return more often than + /// every `RECEIVE_RATE_LIMIT`. + /// + /// NOTE: If empty this function will not return `Ready` at all, but will always be `Pending`. + pub async fn pop_reqs(&mut self) -> Vec> { + self.wait_for_waker().await; + let mut heads = Vec::with_capacity(self.queues.len()); - let mut new_queues = HashMap::new(); - for (k, queue) in self.queues.into_iter() { + let old_queues = std::mem::replace(&mut self.queues, HashMap::new()); + for (k, mut queue) in old_queues.into_iter() { let front = queue.pop_front(); debug_assert!(front.is_some(), "Invariant that queues are never empty is broken."); @@ -78,10 +101,15 @@ impl PeerQueues { heads.push(front); } if !queue.is_empty() { - new_queues.insert(k, queue); + self.queues.insert(k, queue); } } - self.queues = new_queues; + + if !self.is_empty() { + // Still not empty - we should get woken at some point. + self.ensure_waker(); + } + heads } @@ -89,4 +117,22 @@ impl PeerQueues { pub fn is_empty(&self) -> bool { self.queues.is_empty() } + + /// Ensure there is an active waker. + /// + /// Checks whether one exists and if not creates one. + fn ensure_waker(&mut self) -> &mut Delay { + self.rate_limit_waker.get_or_insert(Delay::new(RECEIVE_RATE_LIMIT)) + } + + /// Wait for waker if it exists, or be `Pending` forever. + /// + /// Afterwards it gets set back to `None`. + async fn wait_for_waker(&mut self) { + match self.rate_limit_waker.as_mut() { + None => pending().await, + Some(waker) => waker.await, + } + self.rate_limit_waker = None; + } } From 3b718173dd0fe864056225c360806569f52e189e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 26 Aug 2022 17:26:17 +0200 Subject: [PATCH 15/65] WIP: Batching. --- .../src/receiver/batch.rs | 141 +++++++++- .../src/receiver/error.rs | 3 + .../dispute-distribution/src/receiver/mod.rs | 246 +++++++++++++----- 3 files changed, 311 insertions(+), 79 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batch.rs b/node/network/dispute-distribution/src/receiver/batch.rs index 32a19d69893d..3e0af4a0f32c 100644 --- a/node/network/dispute-distribution/src/receiver/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batch.rs @@ -14,12 +14,92 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::collections::HashMap; +use std::{ + cmp::{Ord, Ordering}, + collections::{hash_map, BTreeMap, BinaryHeap, HashMap, HashSet}, + time::Instant, +}; +use futures_timer::Delay; +use polkadot_node_network_protocol::request_response::{ + incoming::OutgoingResponseSender, v1::DisputeRequest, +}; use polkadot_node_primitives::SignedDisputeStatement; use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, ValidatorIndex}; -struct Batch { +use super::BATCH_COLLECTING_INTERVAL; + +/// TODO: Limit number of batches + +// - Batches can be added very rate limit timeout. +// - They have to be checked every BATCH_COLLECTING_INTERVAL. +// - We can get the earliest next wakeup - keep ordered list of wakeups! Then we always know when +// the next one comes - only needs to get updated on insert. - Tada! +struct Batches { + batches: HashMap, + check_waker: Option, + pending_wakes: BinaryHeap, +} + +/// Represents some batch waiting for its next tick to happen at `next_tick`. +/// +/// This is an internal type meant to be used in the `pending_wakes` `BinaryHeap` field of +/// `Batches`. It provides an `Ord` instance, that sorts descending with regard to `Instant` (so we +/// get a `min-heap` with the earliest `Instant` at the top. +#[derive(Eq, PartialEq)] +struct PendingWake { + candidate_hash: CandidateHash, + next_tick: Instant, +} + +/// A found batch is either really found or got created so it can be found. +enum FoundBatch<'a> { + /// Batch just got created. + Created(&'a mut Batch), + /// Batch already existed. + Found(&'a mut Batch), +} + +impl Batches { + /// Create new empty `Batches`. + pub fn new() -> Self { + Self { batches: HashMap::new(), check_waker: None, pending_wakes: BinaryHeap::new() } + } + + /// Find a particular batch. + /// + /// That is either find it, or we create it as reflected by the result `FoundBatch`. + pub fn find_batch( + &mut self, + candidate_hash: CandidateHash, + candidate_receipt: CandidateReceipt, + ) -> FoundBatch { + debug_assert!(candidate_hash == candidate_receipt.hash()); + match self.batches.entry(candidate_hash) { + hash_map::Entry::Vacant(vacant) => + FoundBatch::Created(vacant.insert(Batch::new(candidate_receipt))), + hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.get_mut()), + } + } + // Next steps: + // + // - Make sure binary heap above stays current. + // - Use head of binary heap to schedule next wakeup. + // - Provide funtion that provides imports delayed by the wakeup future (similar to rate + // limiting). + // - Important: Direct updating of last_tick of `Batch` has to be forbidden as this would break + // our binary heap. Instead all updates have to go through `Batches` +} + +/// A batch of votes to be imported into the `dispute-coordinator`. +/// +/// Vote imports are way more efficient when performed in batches, hence we batch together incoming +/// votes until the rate of incoming votes falls below a threshold, then we import into the dispute +/// coordinator. +/// +/// A `Batch` keeps track of the votes to be imported and the current incoming rate, on rate update +/// it will "flush" in case the incoming rate dropped too low, preparing the import. +pub struct Batch { /// The actual candidate this batch is concerned with. candidate_receipt: CandidateReceipt, @@ -41,35 +121,82 @@ struct Batch { /// How many votes have been batched in the last `BATCH_COLLECTING_INTERVAL`? votes_batched_since_last_tick: u32, + + /// Timestamp of creation or last time we checked incoming rate. + last_tick: Instant, + + /// Requesters waiting for a response. + pending_responses: Vec>, } impl Batch { /// Create a new empty batch based on the given `CandidateReceipt`. - pub fn new(candidate_receipt: CandidateReceipt) -> Self { + /// + /// To create a `Batch` use Batches::find_batch`. + fn new(candidate_receipt: CandidateReceipt) -> Self { Self { candidate_hash: candidate_receipt.hash(), candidate_receipt, valid_votes: HashMap::new(), invalid_votes: HashMap::new(), votes_batched_since_last_tick: 0, + last_tick: Instant::now(), + pending_responses: Vec::new(), } } - /// Import votes into the batch. - pub fn import_votes( + /// Add votes from a validator into the batch. + /// + /// The statements are supposed to be the valid and invalid statements received in a + /// `DisputeRequest`. + /// + /// The given `pending_response` is the corresponding response sender. If at least one of the + /// votes is new as far as this batch is concerned we record the pending_response, for later + /// use. In case both votes are known already, we return the response sender as an `Err` value. + pub fn add_votes( &mut self, - receipt: CandidateReceipt, valid_vote: (SignedDisputeStatement, ValidatorIndex), invalid_vote: (SignedDisputeStatement, ValidatorIndex), - ) { + pending_response: OutgoingResponseSender, + ) -> Result<(), OutgoingResponseSender> { debug_assert!(valid_vote.0.candidate_hash() == invalid_vote.0.candidate_hash()); debug_assert!(valid_vote.0.candidate_hash() == &self.candidate_hash); + let mut duplicate = true; + if self.valid_votes.insert(valid_vote.1, valid_vote.0).is_none() { self.votes_batched_since_last_tick += 1; + duplicate = false; } if self.invalid_votes.insert(invalid_vote.1, invalid_vote.0).is_none() { self.votes_batched_since_last_tick += 1; + duplicate = false; + } + + if duplicate { + Err(pending_response) + } else { + Ok(()) + } + } + + /// When the next "tick" is supposed to happen. + fn time_next_tick(&self) -> Instant { + self.last_tick + BATCH_COLLECTING_INTERVAL + } +} + +impl PartialOrd for PendingWake { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl Ord for PendingWake { + fn cmp(&self, other: &Self) -> Ordering { + // Reverse order for min-heap: + match other.next_tick.cmp(&self.next_tick) { + Ordering::Equal => other.candidate_hash.cmp(&self.candidate_hash), + o => o, } } } diff --git a/node/network/dispute-distribution/src/receiver/error.rs b/node/network/dispute-distribution/src/receiver/error.rs index e3e161925178..575e74f18ef6 100644 --- a/node/network/dispute-distribution/src/receiver/error.rs +++ b/node/network/dispute-distribution/src/receiver/error.rs @@ -45,6 +45,9 @@ pub enum Error { #[error("Dispute request with invalid signatures, from peer {0}.")] InvalidSignature(PeerId), + #[error("Received votes from peer {0} have been completely redundant.")] + RedundantMessage(PeerId), + #[error("Import of dispute got canceled for peer {0} - import failed for some reason.")] ImportCanceled(PeerId), diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index e8ca9cf121a8..98e978d12b5e 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use std::{ - collections::{HashMap, HashSet, VecDeque}, + collections::{hash_map::Entry, HashMap, HashSet, VecDeque}, pin::Pin, task::{Context, Poll}, time::Duration, @@ -46,7 +46,7 @@ use polkadot_node_subsystem::{ overseer, }; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; -use polkadot_primitives::v2::AuthorityDiscoveryId; +use polkadot_primitives::v2::{AuthorityDiscoveryId, CandidateHash}; use crate::{ metrics::{FAILED, SUCCEEDED}, @@ -55,13 +55,14 @@ use crate::{ mod error; -/// Queues for incoming requests by peers. +/// Rate limiting queues for incoming requests by peers. mod peer_queues; /// Batch imports together. mod batch; use self::{ + batch::Batch, error::{log_error, JfyiError, JfyiResult, Result}, peer_queues::PeerQueues, }; @@ -85,13 +86,10 @@ pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 10; /// Time we allow to pass for new votes to trickle in. /// -/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above. +/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above. Must be greater or equal to `RECEIVE_RATE_LIMIT`. pub const BATCH_COLLECTING_INTERVAL: Duration = Duration::from_millis(500); /// State for handling incoming `DisputeRequest` messages. -/// -/// This is supposed to run as its own task in order to easily impose back pressure on the incoming -/// request channel and at the same time to drop flood messages as fast as possible. pub struct DisputesReceiver { /// Access to session information. runtime: RuntimeInfo, @@ -105,8 +103,15 @@ pub struct DisputesReceiver { /// Rate limiting queue for each peer (only authorities). peer_queues: PeerQueues, - /// Delay timer for establishing the rate limit. - rate_limit: Fuse, + /// Time to check on batches whether they are ready for import. + check_batches_waker: Option>, + + /// Currently active batches of imports per candidate. + /// + /// We use an `IndexMap` here as the order of insertion is important. + /// + /// We rely on `time_next_tick()` of the oldest (first inserted + batches: HashMap, /// Authority discovery service: authority_discovery: AD, @@ -131,31 +136,15 @@ enum MuxedMessage { /// A new request has arrived and should be handled. NewRequest(IncomingRequest), -} -impl MuxedMessage { - async fn receive( - pending_imports: &mut PendingImports, - pending_requests: &mut IncomingRequestReceiver, - ) -> Result { - poll_fn(|ctx| { - // In case of Ready(None), we want to wait for pending requests: - if let Poll::Ready(Some(v)) = pending_imports.poll_next_unpin(ctx) { - return Poll::Ready(Ok(Self::ConfirmedImport(v))) - } + /// Rate limit timer hit - is is time to process one row of messages. + RateLimitedReady(Vec>), - let next_req = pending_requests.recv(|| vec![COST_INVALID_REQUEST]); - pin_mut!(next_req); - if let Poll::Ready(r) = next_req.poll(ctx) { - return match r { - Err(e) => Poll::Ready(Err(incoming::Error::from(e).into())), - Ok(v) => Poll::Ready(Ok(Self::NewRequest(v))), - } - } - Poll::Pending - }) - .await - } + /// It is time to check batches. + /// + /// Every `BATCH_COLLECTING_INTERVAL` we check whether less than `MIN_KEEP_BATCH_ALIVE_VOTES` + /// new votes arrived, if so the batch is ready for import. + WakeCheckBatches, } impl DisputesReceiver @@ -179,7 +168,8 @@ where sender, receiver, peer_queues: PeerQueues::new(), - rate_limit: Delay::new(RECEIVE_RATE_LIMIT).fuse(), + check_batches_waker: None, + batches: HashMap::new(), authority_discovery, pending_imports: PendingImports::new(), // Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any @@ -207,29 +197,39 @@ where } } - /// Actual work happening here. + /// Actual work happening here in three phases: + /// + /// 1. Receive and queue incoming messages until the rate limit timer hits. + /// 2. Do import/batching for the head of all queues. + /// 3. Check and flush any ready batches. async fn run_inner(&mut self) -> Result<()> { - let msg = if self.peer_queues.is_empty() { - // No point to wake on timeout: - Some(MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).await?) - } else { - self.wait_for_message_or_timeout().await? - }; + let msg = self.receive_message().await?; - if let Some(msg) = msg { - let incoming = match msg { - // We need to clean up futures, to make sure responses are sent: - MuxedMessage::ConfirmedImport(m_bad) => { - self.ban_bad_peer(m_bad)?; - return Ok(()) - }, - MuxedMessage::NewRequest(req) => req, - }; - - self.metrics.on_received_request(); - self.dispatch_to_queues(incoming).await?; - // Wait for more messages: - return Ok(()) + match msg { + MuxedMessage::NewRequest(req) => { + // Phase 1: + self.metrics.on_received_request(); + self.dispatch_to_queues(req).await?; + }, + MuxedMessage::RateLimitedReady(reqs) => { + // Phase 2: + for req in reqs { + // No early return - we cannot cancel imports of one peer, because the import of + // another failed: + match log_error(self.start_import_or_batch(req).await) { + Ok(()) => {}, + Err(fatal) => return Err(fatal.into()), + } + } + }, + MuxedMessage::WakeCheckBatches => { + // Phase 3: + }, + MuxedMessage::ConfirmedImport(m_bad) => { + // Handle import confirmation: + self.ban_bad_peer(m_bad)?; + return Ok(()) + }, } // Let's actually process messages, that made it through the rate limit: @@ -247,33 +247,91 @@ where } // All good - initiate import. - self.start_import(incoming).await + self.start_import_or_batch(incoming).await + } + + /// Receive one `MuxedMessage`. + async fn receive_message(&mut self) -> Result { + poll_fn(|ctx| { + // In case of Ready(None), we want to wait for pending requests: + if let Poll::Ready(Some(v)) = self.pending_imports.poll_next_unpin(ctx) { + return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v))) + } + + let rate_limited = self.peer_queues.pop_reqs(); + pin_mut!(rate_limited); + if let Poll::Ready(reqs) = rate_limited.poll(ctx) { + return Poll::Ready(Ok(MuxedMessage::RateLimitedReady(reqs))) + } + + if let Some(timer) = self.check_batches_waker.as_mut() { + if let Poll::Ready(()) = Pin::new(timer).poll(ctx) { + return Poll::Ready(Ok(MuxedMessage::WakeCheckBatches)) + } + } + + let next_req = self.receiver.recv(|| vec![COST_INVALID_REQUEST]); + pin_mut!(next_req); + if let Poll::Ready(r) = next_req.poll(ctx) { + return match r { + Err(e) => Poll::Ready(Err(incoming::Error::from(e).into())), + Ok(v) => Poll::Ready(Ok(MuxedMessage::NewRequest(v))), + } + } + Poll::Pending + }) + .await } - /// Wait for a message or the `rate_limit` timeout to hit (if there is one). + /// Wait for a message or one of `rate_limit` or `batch_check` timeout to hit (if there is one). /// /// In case a message got received `rate_limit` will be populated by this function. This way we /// only wake on timeouts if there are actually any messages to process. /// - /// In case of timeout we return Ok(None). + /// In case of any timeout we return Ok(None). async fn wait_for_message_or_timeout(&mut self) -> Result> { - // We already have messages to process - rate limiting activated: let rcv_msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).fuse(); + + let mut timeout = if let Some(timeout) = self.waker.as_mut() { + Pin::new(timeout) + } else { + // No need to get woken: + return Ok(Some(rcv_msg.await?)) + }; + pin_mut!(rcv_msg); - let mut timeout = Pin::new(&mut self.rate_limit); let result = select_biased!( () = timeout => None, msg = rcv_msg => Some(msg?), ); + if result.is_none() { // Timeout hit - we need a new Delay (started immediately so the following processing // does not further decrease allowed rate (assuming processing takes less than // `RECEIVE_RATE_LIMIT`): - self.rate_limit = Delay::new(RECEIVE_RATE_LIMIT).fuse(); + self.waker = self.get_new_waker(); } + Ok(result) } + /// Get a new waker. + /// + /// Ensure we wake up again when needed: + fn get_new_waker(&self) -> Option> { + // Ensure assumption this code makes: + debug_assert!(RECEIVE_RATE_LIMIT <= BATCH_COLLECTING_INTERVAL); + // Messages to process? + if !self.peer_queues.is_empty() { + return Some(Delay::new(RECEIVE_RATE_LIMIT).fuse()) + } + if !self.batches.is_empty() { + return Some(Delay::new(BATCH_COLLECTING_INTERVAL).fuse()) + } + // No need to wake at all: + return None + } + /// Process incoming requests. /// /// - Check sender is authority @@ -315,8 +373,14 @@ where Ok(()) } - /// Start importing votes for the given request. - async fn start_import(&mut self, incoming: IncomingRequest) -> Result<()> { + /// Start importing votes for the given request or batch. + /// + /// Signature check and in case we already have an existing batch we import to that batch, + /// otherwise import to `dispute-coordinator` directly and open a batch. + async fn start_import_or_batch( + &mut self, + incoming: IncomingRequest, + ) -> Result<()> { let IncomingRequest { peer, payload, pending_response } = incoming; let info = self @@ -346,17 +410,55 @@ where Ok(votes) => votes, }; - let (pending_confirmation, confirmation_rx) = oneshot::channel(); - self.sender - .send_message(DisputeCoordinatorMessage::ImportStatements { - candidate_receipt, - session: valid_vote.0.session_index(), - statements: vec![valid_vote, invalid_vote], - pending_confirmation: Some(pending_confirmation), - }) - .await; + match self.batches.entry(*valid_vote.0.candidate_hash()) { + Entry::Vacant(vacant) => { + vacant.insert(Batch::new(candidate_receipt.clone())); + // There was no entry yet - start import immediately: + let (pending_confirmation, confirmation_rx) = oneshot::channel(); + self.sender + .send_message(DisputeCoordinatorMessage::ImportStatements { + candidate_receipt, + session: valid_vote.0.session_index(), + statements: vec![valid_vote, invalid_vote], + pending_confirmation: Some(pending_confirmation), + }) + .await; + + self.pending_imports.push(peer, confirmation_rx, pending_response); + }, + Entry::Occupied(mut occupied) => { + // Just import to batch: + let batch_result = + occupied.get_mut().add_votes(valid_vote, invalid_vote, pending_response); + + if let Err(pending_response) = batch_result { + // We don't expect honest peers to send redundant votes within a single batch, + // as the timeout for retry is much higher. Still we don't want to punish the + // node as it might not be the node's fault. Some other (malicious) node could have been + // faster sending the same votes in order to harm the reputation of that honest + // node. Given that we already have a rate limit, if a validator chooses to + // waste available rate with redundant votes - so be it. The actual dispute + // resolution is uneffected. + gum::debug!( + target: LOG_TARGET, + "Peer sent completely redundant votes within a single batch - that looks fishy!", + ); + pending_response + .send_outgoing_response(OutgoingResponse { + // While we have seen duplicate votes, we cannot confirm as we don't + // know yet whether the batch is going to be confirmed, so we assume + // the worst. We don't want to push the pending response to the batch + // either as that would be unbounded, only limited by the rate limit. + result: Err(()), + reputation_changes: Vec::new(), + sent_feedback: None, + }) + .map_err(|_| JfyiError::SendResponse(peer))?; + return Err(From::from(JfyiError::RedundantMessage(peer))) + } + }, + } - self.pending_imports.push(peer, confirmation_rx, pending_response); Ok(()) } From 909569ba5c6c95335e10ddc910029386c008d2d5 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 26 Aug 2022 18:10:35 +0200 Subject: [PATCH 16/65] Small cleanup --- .../dispute-distribution/src/receiver/mod.rs | 51 +------------------ .../src/receiver/peer_queues.rs | 4 +- 2 files changed, 3 insertions(+), 52 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 98e978d12b5e..52bd5f6e3f9a 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -283,55 +283,6 @@ where .await } - /// Wait for a message or one of `rate_limit` or `batch_check` timeout to hit (if there is one). - /// - /// In case a message got received `rate_limit` will be populated by this function. This way we - /// only wake on timeouts if there are actually any messages to process. - /// - /// In case of any timeout we return Ok(None). - async fn wait_for_message_or_timeout(&mut self) -> Result> { - let rcv_msg = MuxedMessage::receive(&mut self.pending_imports, &mut self.receiver).fuse(); - - let mut timeout = if let Some(timeout) = self.waker.as_mut() { - Pin::new(timeout) - } else { - // No need to get woken: - return Ok(Some(rcv_msg.await?)) - }; - - pin_mut!(rcv_msg); - let result = select_biased!( - () = timeout => None, - msg = rcv_msg => Some(msg?), - ); - - if result.is_none() { - // Timeout hit - we need a new Delay (started immediately so the following processing - // does not further decrease allowed rate (assuming processing takes less than - // `RECEIVE_RATE_LIMIT`): - self.waker = self.get_new_waker(); - } - - Ok(result) - } - - /// Get a new waker. - /// - /// Ensure we wake up again when needed: - fn get_new_waker(&self) -> Option> { - // Ensure assumption this code makes: - debug_assert!(RECEIVE_RATE_LIMIT <= BATCH_COLLECTING_INTERVAL); - // Messages to process? - if !self.peer_queues.is_empty() { - return Some(Delay::new(RECEIVE_RATE_LIMIT).fuse()) - } - if !self.batches.is_empty() { - return Some(Delay::new(BATCH_COLLECTING_INTERVAL).fuse()) - } - // No need to wake at all: - return None - } - /// Process incoming requests. /// /// - Check sender is authority @@ -438,7 +389,7 @@ where // faster sending the same votes in order to harm the reputation of that honest // node. Given that we already have a rate limit, if a validator chooses to // waste available rate with redundant votes - so be it. The actual dispute - // resolution is uneffected. + // resolution is unaffected. gum::debug!( target: LOG_TARGET, "Peer sent completely redundant votes within a single batch - that looks fishy!", diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index dd29a0be68b3..9be45f4a7785 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -26,8 +26,8 @@ use crate::RECEIVE_RATE_LIMIT; /// How many messages we are willing to queue per peer (validator). /// /// The larger this value is, the larger bursts are allowed to be without us dropping messages. On -/// the flipside we should this gets allocated per validator, so for a size of 10 this will result -/// in 10_000 * size_of(IncomingRequest). +/// the flip side we should this gets allocated per validator, so for a size of 10 this will result +/// in 10_000 * size_of(`IncomingRequest`). /// /// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons. pub const PEER_QUEUE_CAPACITY: usize = 10; From fa14c43364df6361f57c9c5a8a490c97626e4dfe Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 30 Aug 2022 12:37:44 +0200 Subject: [PATCH 17/65] Batching logic. --- .../src/receiver/{ => batches}/batch.rs | 188 +++++++++--------- .../src/receiver/batches/mod.rs | 154 ++++++++++++++ .../src/receiver/batches/waiting_queue.rs | 130 ++++++++++++ 3 files changed, 380 insertions(+), 92 deletions(-) rename node/network/dispute-distribution/src/receiver/{ => batches}/batch.rs (54%) create mode 100644 node/network/dispute-distribution/src/receiver/batches/mod.rs create mode 100644 node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs diff --git a/node/network/dispute-distribution/src/receiver/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs similarity index 54% rename from node/network/dispute-distribution/src/receiver/batch.rs rename to node/network/dispute-distribution/src/receiver/batches/batch.rs index 3e0af4a0f32c..c5142fedb9a7 100644 --- a/node/network/dispute-distribution/src/receiver/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -14,82 +14,18 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::{ - cmp::{Ord, Ordering}, - collections::{hash_map, BTreeMap, BinaryHeap, HashMap, HashSet}, - time::Instant, -}; +use std::{collections::HashMap, time::Instant}; -use futures_timer::Delay; +use gum::CandidateHash; use polkadot_node_network_protocol::request_response::{ incoming::OutgoingResponseSender, v1::DisputeRequest, }; use polkadot_node_primitives::SignedDisputeStatement; -use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, ValidatorIndex}; - -use super::BATCH_COLLECTING_INTERVAL; - -/// TODO: Limit number of batches - -// - Batches can be added very rate limit timeout. -// - They have to be checked every BATCH_COLLECTING_INTERVAL. -// - We can get the earliest next wakeup - keep ordered list of wakeups! Then we always know when -// the next one comes - only needs to get updated on insert. - Tada! -struct Batches { - batches: HashMap, - check_waker: Option, - pending_wakes: BinaryHeap, -} - -/// Represents some batch waiting for its next tick to happen at `next_tick`. -/// -/// This is an internal type meant to be used in the `pending_wakes` `BinaryHeap` field of -/// `Batches`. It provides an `Ord` instance, that sorts descending with regard to `Instant` (so we -/// get a `min-heap` with the earliest `Instant` at the top. -#[derive(Eq, PartialEq)] -struct PendingWake { - candidate_hash: CandidateHash, - next_tick: Instant, -} +use polkadot_primitives::v2::{CandidateReceipt, ValidatorIndex}; -/// A found batch is either really found or got created so it can be found. -enum FoundBatch<'a> { - /// Batch just got created. - Created(&'a mut Batch), - /// Batch already existed. - Found(&'a mut Batch), -} +use crate::receiver::{BATCH_COLLECTING_INTERVAL, MIN_KEEP_BATCH_ALIVE_VOTES}; -impl Batches { - /// Create new empty `Batches`. - pub fn new() -> Self { - Self { batches: HashMap::new(), check_waker: None, pending_wakes: BinaryHeap::new() } - } - - /// Find a particular batch. - /// - /// That is either find it, or we create it as reflected by the result `FoundBatch`. - pub fn find_batch( - &mut self, - candidate_hash: CandidateHash, - candidate_receipt: CandidateReceipt, - ) -> FoundBatch { - debug_assert!(candidate_hash == candidate_receipt.hash()); - match self.batches.entry(candidate_hash) { - hash_map::Entry::Vacant(vacant) => - FoundBatch::Created(vacant.insert(Batch::new(candidate_receipt))), - hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.get_mut()), - } - } - // Next steps: - // - // - Make sure binary heap above stays current. - // - Use head of binary heap to schedule next wakeup. - // - Provide funtion that provides imports delayed by the wakeup future (similar to rate - // limiting). - // - Important: Direct updating of last_tick of `Batch` has to be forbidden as this would break - // our binary heap. Instead all updates have to go through `Batches` -} +use super::MAX_BATCH_LIFETIME; /// A batch of votes to be imported into the `dispute-coordinator`. /// @@ -103,7 +39,7 @@ pub struct Batch { /// The actual candidate this batch is concerned with. candidate_receipt: CandidateReceipt, - /// Cache `CandidateHash` to do efficient sanity checks. + /// Cache of `CandidateHash` (candidate_receipt.hash()). candidate_hash: CandidateHash, /// All valid votes received in this batch so far. @@ -119,30 +55,77 @@ pub struct Batch { /// All invalid votes received in this batch so far. invalid_votes: HashMap, - /// How many votes have been batched in the last `BATCH_COLLECTING_INTERVAL`? + /// How many votes have been batched since the last tick/creation. votes_batched_since_last_tick: u32, - /// Timestamp of creation or last time we checked incoming rate. - last_tick: Instant, + /// Expiry time for the batch. + /// + /// By this time the lastest this batch will get flushed. + best_before: Instant, /// Requesters waiting for a response. pending_responses: Vec>, } +/// Result of checking a batch every `BATCH_COLLECTING_INTERVAL`. +pub(super) enum TickResult { + /// Batch is still alive, please call `tick` again at the given `Instant`. + Alive(Batch, Instant), + /// Batch is done, ready for import! + Done(PreparedImport), +} + +/// Ready for import. +pub struct PreparedImport { + pub candidate_receipt: CandidateReceipt, + pub statements: Vec<(SignedDisputeStatement, ValidatorIndex)>, + pub pending_responses: Vec>, +} + +impl From for PreparedImport { + fn from(batch: Batch) -> Self { + let Batch { candidate_receipt, valid_votes, invalid_votes, pending_responses, .. } = batch; + + let statements = valid_votes + .into_iter() + .chain(invalid_votes.into_iter()) + .map(|(index, statement)| (statement, index)) + .collect(); + + Self { candidate_receipt, statements, pending_responses } + } +} + impl Batch { /// Create a new empty batch based on the given `CandidateReceipt`. /// /// To create a `Batch` use Batches::find_batch`. - fn new(candidate_receipt: CandidateReceipt) -> Self { - Self { + /// + /// Arguments: + /// + /// * candidate_recipt - The candidate this batch is meant to track votes for. + /// * `now` - current timestamp for calculating the first tick. + /// + /// Returns: + /// + /// A batch and the first `Instant` you are supposed to call `tick`. + /// + pub(super) fn new(candidate_receipt: CandidateReceipt, now: Instant) -> (Self, Instant) { + let s = Self { candidate_hash: candidate_receipt.hash(), candidate_receipt, valid_votes: HashMap::new(), invalid_votes: HashMap::new(), votes_batched_since_last_tick: 0, - last_tick: Instant::now(), + best_before: Instant::now() + MAX_BATCH_LIFETIME, pending_responses: Vec::new(), - } + }; + (s, s.calculate_next_tick(now)) + } + + /// Hash of the candidate this batch is batching votes for. + pub fn candidate_hash(&self) -> &CandidateHash { + &self.candidate_hash } /// Add votes from a validator into the batch. @@ -180,23 +163,44 @@ impl Batch { } } - /// When the next "tick" is supposed to happen. - fn time_next_tick(&self) -> Instant { - self.last_tick + BATCH_COLLECTING_INTERVAL + /// Check batch for liveness. + /// + /// This function is supposed to be called at instants given at construction and as returned as + /// part of `TickResult`. + pub fn tick(self, now: Instant) -> TickResult { + if self.votes_batched_since_last_tick >= MIN_KEEP_BATCH_ALIVE_VOTES && + now < self.best_before + { + // Still good: + let next_tick = self.calculate_next_tick(now); + // Reset counter: + self.votes_batched_since_last_tick = 0; + TickResult::Alive(self, next_tick) + } else { + TickResult::Done(PreparedImport::from(self)) + } } -} -impl PartialOrd for PendingWake { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} -impl Ord for PendingWake { - fn cmp(&self, other: &Self) -> Ordering { - // Reverse order for min-heap: - match other.next_tick.cmp(&self.next_tick) { - Ordering::Equal => other.candidate_hash.cmp(&self.candidate_hash), - o => o, + /// Calculate when the next tick should happen. + /// + /// This will usually return `now + BATCH_COLLECTING_INTERVAL`, except if the lifetime of this batch + /// would exceed `MAX_BATCH_LIFETIME`. + /// + /// # Arguments + /// + /// * `now` - The current time. + fn calculate_next_tick(&self, now: Instant) -> Instant { + let next_tick = now + BATCH_COLLECTING_INTERVAL; + if next_tick < self.best_before { + next_tick + } else { + self.best_before } } } + +// Test tick behaviour: +// - If less than `MIN_KEEP_BATCH_ALIVE_VOTES` trickled in since last tick - batch should become +// done. +// - If batch surpased its `best_before` it should become done. +// - Batch does not count duplicate votes. diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs new file mode 100644 index 000000000000..b2d7edfa864e --- /dev/null +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -0,0 +1,154 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::{ + collections::{hash_map, HashMap}, + time::{Duration, Instant}, +}; + +use futures::future::pending; + +use polkadot_node_network_protocol::request_response::DISPUTE_REQUEST_TIMEOUT; +use polkadot_primitives::v2::{CandidateHash, CandidateReceipt}; + +use crate::receiver::batches::{batch::TickResult, waiting_queue::PendingWake}; + +pub use self::batch::{Batch, PreparedImport}; +use self::waiting_queue::WaitingQueue; + +use super::BATCH_COLLECTING_INTERVAL; + +/// A single batch (per candidate) as managed by `Batches`. +mod batch; + +/// Simple abstraction queue events in time and wait for them to become ready. +mod waiting_queue; + +/// Safe-guard in case votes trickle in real slow. +/// +/// If the batch life time exceeded the time the sender is willing to wait for a confirmation, we +/// would trigger pointless re-sends. +const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT - Duration::from_secs(2); + +/// TODO: Limit number of batches + +// - Batches can be added very rate limit timeout. +// - They have to be checked every BATCH_COLLECTING_INTERVAL. +// - We can get the earliest next wakeup - keep ordered list of wakeups! Then we always know when +// the next one comes - only needs to get updated on insert. - Tada! +/// Manage batches. +/// +/// - Batches can be found via `find_batch()` in order to add votes to them/check they exist. +/// - Batches can be checked for being ready for flushing in order to import contained votes. +struct Batches { + /// The batches we manage. + /// + /// Kept invariants: + /// For each entry in `batches`, there exists an entry in `waiting_queue` as well - we wait on + /// all batches! + batches: HashMap, + /// Waiting queue for waiting for batches to become ready for `tick`. + /// + /// Kept invariants by `Batches`: + /// For each entry in the waiting_queue there exists a corresponding entry in `batches`. + waiting_queue: WaitingQueue, +} + +/// A found batch is either really found or got created so it can be found. +enum FoundBatch<'a> { + /// Batch just got created. + Created(&'a mut Batch), + /// Batch already existed. + Found(&'a mut Batch), +} + +impl Batches { + /// Create new empty `Batches`. + pub fn new() -> Self { + debug_assert!( + MAX_BATCH_LIFETIME > BATCH_COLLECTING_INTERVAL, + "Unexpectedly low `MAX_BATCH_LIFETIME`, please check parameters." + ); + Self { batches: HashMap::new(), waiting_queue: WaitingQueue::new() } + } + + /// Find a particular batch. + /// + /// That is either find it, or we create it as reflected by the result `FoundBatch`. + pub fn find_batch( + &mut self, + candidate_hash: CandidateHash, + candidate_receipt: CandidateReceipt, + ) -> FoundBatch { + debug_assert!(candidate_hash == candidate_receipt.hash()); + match self.batches.entry(candidate_hash) { + hash_map::Entry::Vacant(vacant) => { + let now = Instant::now(); + let (created, ready_at) = Batch::new(candidate_receipt, now); + let pending_wake = PendingWake { payload: candidate_hash, ready_at }; + self.waiting_queue.push(pending_wake); + FoundBatch::Created(vacant.insert(created)) + }, + hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.get_mut()), + } + } + + /// Wait for the next `tick` to check for ready batches. + /// + /// This function blocks (returns `Poll::Pending`) until at least one batch can be + /// checked for readiness meaning that `BATCH_COLLECTING_INTERVAL` has passed since the last + /// check for that batch or it reached end of life. + /// + /// If this `Batches` instance is empty (does not actually contain any batches), then this + /// function will always return `Poll::Pending`. + /// + /// Returns: A `Vec` of all `PreparedImport`s from batches that became ready. + pub async fn wait_tick(&mut self) -> Vec { + let now = Instant::now(); + + let mut imports = Vec::new(); + + // Wait for at least one batch to become ready: + self.waiting_queue.wait_ready(now); + + // Process all ready waits: + while let Some(wake) = self.waiting_queue.pop_ready(now) { + let batch = self.batches.remove(&wake.payload); + debug_assert!( + batch.is_some(), + "Entries referenced in `waiting_queue` are supposed to exist!" + ); + let batch = match batch { + None => return pending().await, + Some(batch) => batch, + }; + match batch.tick(now) { + TickResult::Done(import) => { + imports.push(import); + }, + TickResult::Alive(old_batch, next_tick) => { + let pending_wake = PendingWake { payload: wake.payload, ready_at: next_tick }; + self.waiting_queue.push(pending_wake); + self.batches.insert(wake.payload, old_batch); + }, + } + } + imports + } +} + +// Tests: +// - Make sure waker gets updated to earlier event in case an earlier event gets inserted. diff --git a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs new file mode 100644 index 000000000000..cbb493faf7aa --- /dev/null +++ b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs @@ -0,0 +1,130 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use std::{cmp::Ordering, collections::BinaryHeap, time::Instant}; + +use futures::future::pending; +use futures_timer::Delay; + +/// Wait asynchronously for given `Instant`s one after the other. +/// +/// `PendingWake`s can be inserted and `WaitingQueue` makes sure to wake calls to `pop` so those +/// structs can be processed when they are deemed ready. +pub struct WaitingQueue { + /// All pending wakes we are supposed to wait on in order. + pending_wakes: BinaryHeap>, + /// Wait for next `PendingWake`. + waker: Option, +} + +/// Represents some event waiting to be processed at `ready_at`. +/// +/// This is an event in `WaitingQueue`. It provides an `Ord` instance, that sorts descending with +/// regard to `Instant` (so we get a `min-heap` with the earliest `Instant` at the top. +#[derive(Eq, PartialEq)] +pub struct PendingWake { + pub payload: Payload, + pub ready_at: Instant, +} + +impl WaitingQueue { + /// Get a new empty `WaitingQueue`. + /// + /// If you call `pop` on this queue immediately it will always return `Poll::Pending`. + pub fn new() -> Self { + Self { pending_wakes: BinaryHeap::new(), waker: None } + } + + /// Push a `PendingWake`. + /// + /// The next call to `pop` will make sure to wake soon enough to process that newly event in a + /// timely manner. + pub fn push(&mut self, wake: PendingWake) { + self.pending_wakes.push(wake); + // Reset waker as it is potentially obsolete now: + self.waker = None; + } + + /// Pop an item from the queue if ready. + /// + /// Whether ready or not is determined based on the passed timestamp `now` which should be the + /// current time as returned by `Instant::now()` + /// + /// Returns: The next `PendingWake` after it became ready. `pop` will wait for that + /// asynchronously to happen if it is not already the case. If there is no more item, this call + /// will wait forever (return Poll::Pending without scheduling a wake). + pub async fn pop(&mut self, now: Instant) -> PendingWake { + self.wait_ready(now).await; + self.pending_wakes + .pop() + .expect("We just waited for an item to become ready. qed.") + } + + /// Pop the next ready item. + /// + /// In contrast to `pop` this function does not wait, if nothing is ready right now as + /// determined by the passed `now` timestamp, this function simply returns `None`. + pub fn pop_ready(&mut self, now: Instant) -> Option> { + let is_ready = self.pending_wakes.peek().map_or(false, |p| p.ready_at <= now); + if is_ready { + Some(self.pending_wakes.pop().expect("We just peeked. qed.")) + } else { + None + } + } + + /// Don't pop, just wait until something is ready. + /// + /// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`. + /// + /// Behaviour of this function is equal to `pop` otherwise, only difference is that the next + /// item is not actually popped. + pub async fn wait_ready(&mut self, now: Instant) { + if let Some(waker) = &mut self.waker { + // Previous timer was not done yet. + waker.await + } + loop { + let next_waiting = self.pending_wakes.peek(); + let is_ready = next_waiting.map_or(false, |p| p.ready_at <= now); + if !is_ready { + self.waker = next_waiting.map(|p| Delay::new(p.ready_at.duration_since(now))); + match &mut self.waker { + None => return pending().await, + Some(waker) => waker.await, + } + } else { + return + } + } + } +} + +impl PartialOrd> for PendingWake { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PendingWake { + fn cmp(&self, other: &Self) -> Ordering { + // Reverse order for min-heap: + match other.ready_at.cmp(&self.ready_at) { + Ordering::Equal => other.payload.cmp(&self.payload), + o => o, + } + } +} From 89d622d0c5a85d34db4ea781ced0547c81eaa846 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Tue, 30 Aug 2022 13:11:25 +0200 Subject: [PATCH 18/65] Some integration work. --- .../src/receiver/batches/batch.rs | 5 + .../src/receiver/batches/mod.rs | 6 +- .../dispute-distribution/src/receiver/mod.rs | 119 ++++++++++-------- .../protocol/src/request_response/mod.rs | 8 +- 4 files changed, 79 insertions(+), 59 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs index c5142fedb9a7..ae6975422572 100644 --- a/node/network/dispute-distribution/src/receiver/batches/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -128,6 +128,11 @@ impl Batch { &self.candidate_hash } + /// Receipt of the candidate this batch is batching votes for. + pub fn candidate_receipt(&self) -> &CandidateReceipt { + &self.candidate_receipt + } + /// Add votes from a validator into the batch. /// /// The statements are supposed to be the valid and invalid statements received in a diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs index b2d7edfa864e..5e6bc756327b 100644 --- a/node/network/dispute-distribution/src/receiver/batches/mod.rs +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -53,7 +53,7 @@ const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT - Duration::from_se /// /// - Batches can be found via `find_batch()` in order to add votes to them/check they exist. /// - Batches can be checked for being ready for flushing in order to import contained votes. -struct Batches { +pub struct Batches { /// The batches we manage. /// /// Kept invariants: @@ -68,7 +68,7 @@ struct Batches { } /// A found batch is either really found or got created so it can be found. -enum FoundBatch<'a> { +pub enum FoundBatch<'a> { /// Batch just got created. Created(&'a mut Batch), /// Batch already existed. @@ -116,7 +116,7 @@ impl Batches { /// function will always return `Poll::Pending`. /// /// Returns: A `Vec` of all `PreparedImport`s from batches that became ready. - pub async fn wait_tick(&mut self) -> Vec { + pub async fn check_batches(&mut self) -> Vec { let now = Instant::now(); let mut imports = Vec::new(); diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 52bd5f6e3f9a..f365b6877256 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use std::{ - collections::{hash_map::Entry, HashMap, HashSet, VecDeque}, + collections::HashSet, pin::Pin, task::{Context, Poll}, time::Duration, @@ -23,13 +23,11 @@ use std::{ use futures::{ channel::oneshot, - future::{poll_fn, BoxFuture, Fuse}, - pin_mut, select_biased, + future::{poll_fn, BoxFuture}, + pin_mut, stream::{FusedStream, FuturesUnordered, StreamExt}, Future, FutureExt, Stream, }; -use futures_timer::Delay; -use lru::LruCache; use polkadot_node_network_protocol::{ authority_discovery::AuthorityDiscovery, @@ -46,11 +44,10 @@ use polkadot_node_subsystem::{ overseer, }; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; -use polkadot_primitives::v2::{AuthorityDiscoveryId, CandidateHash}; use crate::{ metrics::{FAILED, SUCCEEDED}, - Metrics, LOG_TARGET, RECEIVE_RATE_LIMIT, + Metrics, LOG_TARGET, }; mod error; @@ -59,10 +56,10 @@ mod error; mod peer_queues; /// Batch imports together. -mod batch; +mod batches; use self::{ - batch::Batch, + batches::{Batches, FoundBatch, PreparedImport}, error::{log_error, JfyiError, JfyiResult, Result}, peer_queues::PeerQueues, }; @@ -103,20 +100,15 @@ pub struct DisputesReceiver { /// Rate limiting queue for each peer (only authorities). peer_queues: PeerQueues, - /// Time to check on batches whether they are ready for import. - check_batches_waker: Option>, - /// Currently active batches of imports per candidate. - /// - /// We use an `IndexMap` here as the order of insertion is important. - /// - /// We rely on `time_next_tick()` of the oldest (first inserted - batches: HashMap, + batches: Batches, /// Authority discovery service: authority_discovery: AD, /// Imports currently being processed. + /// + /// TODO: Flush batches on invalid result of first vote import. pending_imports: PendingImports, /// Log received requests. @@ -138,13 +130,17 @@ enum MuxedMessage { NewRequest(IncomingRequest), /// Rate limit timer hit - is is time to process one row of messages. - RateLimitedReady(Vec>), + /// + /// This is the result of calling self.peer_queues.pop_reqs(). + WakePeerQueuesPopReqs(Vec>), /// It is time to check batches. /// /// Every `BATCH_COLLECTING_INTERVAL` we check whether less than `MIN_KEEP_BATCH_ALIVE_VOTES` /// new votes arrived, if so the batch is ready for import. - WakeCheckBatches, + /// + /// This is the result of calling self.batches.check_batches(). + WakeCheckBatches(Vec), } impl DisputesReceiver @@ -168,8 +164,7 @@ where sender, receiver, peer_queues: PeerQueues::new(), - check_batches_waker: None, - batches: HashMap::new(), + batches: Batches::new(), authority_discovery, pending_imports: PendingImports::new(), // Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any @@ -211,7 +206,7 @@ where self.metrics.on_received_request(); self.dispatch_to_queues(req).await?; }, - MuxedMessage::RateLimitedReady(reqs) => { + MuxedMessage::WakePeerQueuesPopReqs(reqs) => { // Phase 2: for req in reqs { // No early return - we cannot cancel imports of one peer, because the import of @@ -222,35 +217,23 @@ where } } }, - MuxedMessage::WakeCheckBatches => { + MuxedMessage::WakeCheckBatches(ready_imports) => { // Phase 3: + self.import_ready_batches(ready_imports).await?; }, MuxedMessage::ConfirmedImport(m_bad) => { // Handle import confirmation: self.ban_bad_peer(m_bad)?; - return Ok(()) }, } - // Let's actually process messages, that made it through the rate limit: - // - // Batch: - // - Collect votes - get rid of duplicates. - // - Keep track of import rate. - // - Flush if import rate is not matched - // Wait for a free slot: - // - if self.pending_imports.len() >= MAX_PARALLEL_IMPORTS as usize { - // Wait for one to finish: - let r = self.pending_imports.next().await; - self.ban_bad_peer(r.expect("pending_imports.len() is greater 0. qed."))?; - } - - // All good - initiate import. - self.start_import_or_batch(incoming).await + Ok(()) } /// Receive one `MuxedMessage`. + /// + /// + /// Dispatching events to messages as they happen. async fn receive_message(&mut self) -> Result { poll_fn(|ctx| { // In case of Ready(None), we want to wait for pending requests: @@ -260,14 +243,16 @@ where let rate_limited = self.peer_queues.pop_reqs(); pin_mut!(rate_limited); + // We poll rate_limit before batches, so we don't unecessarily delay importing to + // batches. if let Poll::Ready(reqs) = rate_limited.poll(ctx) { - return Poll::Ready(Ok(MuxedMessage::RateLimitedReady(reqs))) + return Poll::Ready(Ok(MuxedMessage::WakePeerQueuesPopReqs(reqs))) } - if let Some(timer) = self.check_batches_waker.as_mut() { - if let Poll::Ready(()) = Pin::new(timer).poll(ctx) { - return Poll::Ready(Ok(MuxedMessage::WakeCheckBatches)) - } + let ready_batches = self.batches.check_batches(); + pin_mut!(ready_batches); + if let Poll::Ready(ready_batches) = ready_batches.poll(ctx) { + return Poll::Ready(Ok(MuxedMessage::WakeCheckBatches(ready_batches))) } let next_req = self.receiver.recv(|| vec![COST_INVALID_REQUEST]); @@ -361,14 +346,13 @@ where Ok(votes) => votes, }; - match self.batches.entry(*valid_vote.0.candidate_hash()) { - Entry::Vacant(vacant) => { - vacant.insert(Batch::new(candidate_receipt.clone())); + match self.batches.find_batch(*valid_vote.0.candidate_hash(), candidate_receipt) { + FoundBatch::Created(batch) => { // There was no entry yet - start import immediately: let (pending_confirmation, confirmation_rx) = oneshot::channel(); self.sender .send_message(DisputeCoordinatorMessage::ImportStatements { - candidate_receipt, + candidate_receipt: batch.candidate_receipt().clone(), session: valid_vote.0.session_index(), statements: vec![valid_vote, invalid_vote], pending_confirmation: Some(pending_confirmation), @@ -377,10 +361,8 @@ where self.pending_imports.push(peer, confirmation_rx, pending_response); }, - Entry::Occupied(mut occupied) => { - // Just import to batch: - let batch_result = - occupied.get_mut().add_votes(valid_vote, invalid_vote, pending_response); + FoundBatch::Found(batch) => { + let batch_result = batch.add_votes(valid_vote, invalid_vote, pending_response); if let Err(pending_response) = batch_result { // We don't expect honest peers to send redundant votes within a single batch, @@ -413,6 +395,37 @@ where Ok(()) } + /// Trigger import into the dispute-coordinator of ready batches (`PreparedImport`s). + async fn import_ready_batches(&mut self, ready_imports: Vec) -> Result<()> { + for import in ready_imports { + let PreparedImport { candidate_receipt, statements, pending_responses } = import; + let session_index = match statements.iter().next() { + None => { + gum::debug!( + target: LOG_TARGET, + candidate_hash = ?candidate_receipt.hash(), + "Not importing empty batch" + ); + continue + }, + Some(vote) => vote.0.session_index(), + }; + + let (pending_confirmation, confirmation_rx) = oneshot::channel(); + self.sender + .send_message(DisputeCoordinatorMessage::ImportStatements { + candidate_receipt, + session: session_index, + statements, + pending_confirmation: Some(pending_confirmation), + }) + .await; + // TODO: + // Confirmation has to trigger response senders: + } + unimplemented!("WIP") + } + /// Await an import and ban any misbehaving peers. /// /// In addition we report import metrics. diff --git a/node/network/protocol/src/request_response/mod.rs b/node/network/protocol/src/request_response/mod.rs index fb955286990e..f4e0dd11f479 100644 --- a/node/network/protocol/src/request_response/mod.rs +++ b/node/network/protocol/src/request_response/mod.rs @@ -121,6 +121,10 @@ const POV_RESPONSE_SIZE: u64 = MAX_POV_SIZE as u64 + 10_000; /// This is `MAX_CODE_SIZE` plus some additional space for protocol overhead. const STATEMENT_RESPONSE_SIZE: u64 = MAX_CODE_SIZE as u64 + 10_000; +/// We can have relative large timeouts here, there is no value of hitting a +/// timeout as we want to get statements through to each node in any case. +pub const DISPUTE_REQUEST_TIMEOUT: Duration = Duration::from_secs(12); + impl Protocol { /// Get a configuration for a given Request response protocol. /// @@ -194,9 +198,7 @@ impl Protocol { /// Responses are just confirmation, in essence not even a bit. So 100 seems /// plenty. max_response_size: 100, - /// We can have relative large timeouts here, there is no value of hitting a - /// timeout as we want to get statements through to each node in any case. - request_timeout: Duration::from_secs(12), + request_timeout: DISPUTE_REQUEST_TIMEOUT, inbound_queue: Some(tx), }, }; From 855d6da0cfa3f67b80208a3068e5630bce27ae63 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Sat, 14 May 2022 18:15:34 +0300 Subject: [PATCH 19/65] Add `DisputeState` to `DisputeCoordinatorMessage::RecentDisputes` The new signature of the message is: ``` RecentDisputes(oneshot::Sender>), ``` As part of the change also add `DispiteStatus` to `polkadot_node_primitives`. --- node/core/dispute-coordinator/src/db/v1.rs | 2 +- .../dispute-coordinator/src/initialized.rs | 10 +- node/core/dispute-coordinator/src/status.rs | 100 +--------------- node/primitives/src/disputes/mod.rs | 2 + node/primitives/src/disputes/status.rs | 112 ++++++++++++++++++ node/primitives/src/lib.rs | 4 +- node/subsystem-types/src/messages.rs | 6 +- 7 files changed, 128 insertions(+), 108 deletions(-) create mode 100644 node/primitives/src/disputes/status.rs diff --git a/node/core/dispute-coordinator/src/db/v1.rs b/node/core/dispute-coordinator/src/db/v1.rs index 4d33949db644..2c643d341de2 100644 --- a/node/core/dispute-coordinator/src/db/v1.rs +++ b/node/core/dispute-coordinator/src/db/v1.rs @@ -16,6 +16,7 @@ //! `V1` database for the dispute coordinator. +use polkadot_node_primitives::DisputeStatus; use polkadot_node_subsystem::{SubsystemError, SubsystemResult}; use polkadot_node_subsystem_util::database::{DBTransaction, Database}; use polkadot_primitives::v2::{ @@ -31,7 +32,6 @@ use crate::{ backend::{Backend, BackendWriteOp, OverlayedBackend}, error::{FatalError, FatalResult}, metrics::Metrics, - status::DisputeStatus, DISPUTE_WINDOW, LOG_TARGET, }; diff --git a/node/core/dispute-coordinator/src/initialized.rs b/node/core/dispute-coordinator/src/initialized.rs index e37459dc5142..c1b34ce134cc 100644 --- a/node/core/dispute-coordinator/src/initialized.rs +++ b/node/core/dispute-coordinator/src/initialized.rs @@ -26,8 +26,8 @@ use futures::{ use sc_keystore::LocalKeystore; use polkadot_node_primitives::{ - CandidateVotes, DisputeMessage, DisputeMessageCheckError, SignedDisputeStatement, - DISPUTE_WINDOW, + CandidateVotes, DisputeMessage, DisputeMessageCheckError, DisputeStatus, + SignedDisputeStatement, Timestamp, DISPUTE_WINDOW, }; use polkadot_node_subsystem::{ messages::{ @@ -49,7 +49,7 @@ use crate::{ error::{log_error, Error, FatalError, FatalResult, JfyiError, JfyiResult, Result}, import::{CandidateEnvironment, CandidateVoteState}, metrics::Metrics, - status::{get_active_with_status, Clock, DisputeStatus, Timestamp}, + status::{get_active_with_status, Clock}, DisputeCoordinatorSubsystem, LOG_TARGET, }; @@ -599,7 +599,9 @@ impl Initialized { }; gum::trace!(target: LOG_TARGET, "Loaded recent disputes from db"); - let _ = tx.send(recent_disputes.keys().cloned().collect()); + let _ = tx.send( + recent_disputes.into_iter().map(|(k, v)| (k.0, k.1, v)).collect::>(), + ); }, DisputeCoordinatorMessage::ActiveDisputes(tx) => { // Return error if session information is missing. diff --git a/node/core/dispute-coordinator/src/status.rs b/node/core/dispute-coordinator/src/status.rs index d2ad551bd9ad..8a0e2de524d1 100644 --- a/node/core/dispute-coordinator/src/status.rs +++ b/node/core/dispute-coordinator/src/status.rs @@ -14,10 +14,9 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use std::time::{SystemTime, UNIX_EPOCH}; - -use parity_scale_codec::{Decode, Encode}; +use polkadot_node_primitives::{DisputeStatus, Timestamp}; use polkadot_primitives::v2::{CandidateHash, SessionIndex}; +use std::time::{SystemTime, UNIX_EPOCH}; use crate::LOG_TARGET; @@ -27,101 +26,6 @@ use crate::LOG_TARGET; /// disputes. pub const ACTIVE_DURATION_SECS: Timestamp = 180; -/// Timestamp based on the 1 Jan 1970 UNIX base, which is persistent across node restarts and OS reboots. -pub type Timestamp = u64; - -/// The status of dispute. This is a state machine which can be altered by the -/// helper methods. -#[derive(Debug, Clone, Copy, Encode, Decode, PartialEq)] -pub enum DisputeStatus { - /// The dispute is active and unconcluded. - #[codec(index = 0)] - Active, - /// The dispute has been concluded in favor of the candidate - /// since the given timestamp. - #[codec(index = 1)] - ConcludedFor(Timestamp), - /// The dispute has been concluded against the candidate - /// since the given timestamp. - /// - /// This takes precedence over `ConcludedFor` in the case that - /// both are true, which is impossible unless a large amount of - /// validators are participating on both sides. - #[codec(index = 2)] - ConcludedAgainst(Timestamp), - /// Dispute has been confirmed (more than `byzantine_threshold` have already participated/ or - /// we have seen the candidate included already/participated successfully ourselves). - #[codec(index = 3)] - Confirmed, -} - -impl DisputeStatus { - /// Initialize the status to the active state. - pub fn active() -> DisputeStatus { - DisputeStatus::Active - } - - /// Move status to confirmed status, if not yet concluded/confirmed already. - pub fn confirm(self) -> DisputeStatus { - match self { - DisputeStatus::Active => DisputeStatus::Confirmed, - DisputeStatus::Confirmed => DisputeStatus::Confirmed, - DisputeStatus::ConcludedFor(_) | DisputeStatus::ConcludedAgainst(_) => self, - } - } - - /// Check whether the dispute is not a spam dispute. - pub fn is_confirmed_concluded(&self) -> bool { - match self { - &DisputeStatus::Confirmed | - &DisputeStatus::ConcludedFor(_) | - DisputeStatus::ConcludedAgainst(_) => true, - &DisputeStatus::Active => false, - } - } - - /// Transition the status to a new status after observing the dispute has concluded for the candidate. - /// This may be a no-op if the status was already concluded. - pub fn concluded_for(self, now: Timestamp) -> DisputeStatus { - match self { - DisputeStatus::Active | DisputeStatus::Confirmed => DisputeStatus::ConcludedFor(now), - DisputeStatus::ConcludedFor(at) => DisputeStatus::ConcludedFor(std::cmp::min(at, now)), - against => against, - } - } - - /// Transition the status to a new status after observing the dispute has concluded against the candidate. - /// This may be a no-op if the status was already concluded. - pub fn concluded_against(self, now: Timestamp) -> DisputeStatus { - match self { - DisputeStatus::Active | DisputeStatus::Confirmed => - DisputeStatus::ConcludedAgainst(now), - DisputeStatus::ConcludedFor(at) => - DisputeStatus::ConcludedAgainst(std::cmp::min(at, now)), - DisputeStatus::ConcludedAgainst(at) => - DisputeStatus::ConcludedAgainst(std::cmp::min(at, now)), - } - } - - /// Whether the disputed candidate is possibly invalid. - pub fn is_possibly_invalid(&self) -> bool { - match self { - DisputeStatus::Active | - DisputeStatus::Confirmed | - DisputeStatus::ConcludedAgainst(_) => true, - DisputeStatus::ConcludedFor(_) => false, - } - } - - /// Yields the timestamp this dispute concluded at, if any. - pub fn concluded_at(&self) -> Option { - match self { - DisputeStatus::Active | DisputeStatus::Confirmed => None, - DisputeStatus::ConcludedFor(at) | DisputeStatus::ConcludedAgainst(at) => Some(*at), - } - } -} - /// Get active disputes as iterator, preserving its `DisputeStatus`. pub fn get_active_with_status( recent_disputes: impl Iterator, diff --git a/node/primitives/src/disputes/mod.rs b/node/primitives/src/disputes/mod.rs index ec7bb6abc3b7..051ec0233258 100644 --- a/node/primitives/src/disputes/mod.rs +++ b/node/primitives/src/disputes/mod.rs @@ -30,6 +30,8 @@ use polkadot_primitives::v2::{ /// `DisputeMessage` and related types. mod message; pub use message::{DisputeMessage, Error as DisputeMessageCheckError, UncheckedDisputeMessage}; +mod status; +pub use status::{DisputeStatus, Timestamp}; /// A checked dispute statement from an associated validator. #[derive(Debug, Clone)] diff --git a/node/primitives/src/disputes/status.rs b/node/primitives/src/disputes/status.rs new file mode 100644 index 000000000000..14ebcf0855d9 --- /dev/null +++ b/node/primitives/src/disputes/status.rs @@ -0,0 +1,112 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use parity_scale_codec::{Decode, Encode}; + +/// Timestamp based on the 1 Jan 1970 UNIX base, which is persistent across node restarts and OS reboots. +pub type Timestamp = u64; + +/// The status of dispute. This is a state machine which can be altered by the +/// helper methods. +#[derive(Debug, Clone, Copy, Encode, Decode, PartialEq)] +pub enum DisputeStatus { + /// The dispute is active and unconcluded. + #[codec(index = 0)] + Active, + /// The dispute has been concluded in favor of the candidate + /// since the given timestamp. + #[codec(index = 1)] + ConcludedFor(Timestamp), + /// The dispute has been concluded against the candidate + /// since the given timestamp. + /// + /// This takes precedence over `ConcludedFor` in the case that + /// both are true, which is impossible unless a large amount of + /// validators are participating on both sides. + #[codec(index = 2)] + ConcludedAgainst(Timestamp), + /// Dispute has been confirmed (more than `byzantine_threshold` have already participated/ or + /// we have seen the candidate included already/participated successfully ourselves). + #[codec(index = 3)] + Confirmed, +} + +impl DisputeStatus { + /// Initialize the status to the active state. + pub fn active() -> DisputeStatus { + DisputeStatus::Active + } + + /// Move status to confirmed status, if not yet concluded/confirmed already. + pub fn confirm(self) -> DisputeStatus { + match self { + DisputeStatus::Active => DisputeStatus::Confirmed, + DisputeStatus::Confirmed => DisputeStatus::Confirmed, + DisputeStatus::ConcludedFor(_) | DisputeStatus::ConcludedAgainst(_) => self, + } + } + + /// Check whether the dispute is not a spam dispute. + pub fn is_confirmed_concluded(&self) -> bool { + match self { + &DisputeStatus::Confirmed | + &DisputeStatus::ConcludedFor(_) | + DisputeStatus::ConcludedAgainst(_) => true, + &DisputeStatus::Active => false, + } + } + + /// Transition the status to a new status after observing the dispute has concluded for the candidate. + /// This may be a no-op if the status was already concluded. + pub fn concluded_for(self, now: Timestamp) -> DisputeStatus { + match self { + DisputeStatus::Active | DisputeStatus::Confirmed => DisputeStatus::ConcludedFor(now), + DisputeStatus::ConcludedFor(at) => DisputeStatus::ConcludedFor(std::cmp::min(at, now)), + against => against, + } + } + + /// Transition the status to a new status after observing the dispute has concluded against the candidate. + /// This may be a no-op if the status was already concluded. + pub fn concluded_against(self, now: Timestamp) -> DisputeStatus { + match self { + DisputeStatus::Active | DisputeStatus::Confirmed => + DisputeStatus::ConcludedAgainst(now), + DisputeStatus::ConcludedFor(at) => + DisputeStatus::ConcludedAgainst(std::cmp::min(at, now)), + DisputeStatus::ConcludedAgainst(at) => + DisputeStatus::ConcludedAgainst(std::cmp::min(at, now)), + } + } + + /// Whether the disputed candidate is possibly invalid. + pub fn is_possibly_invalid(&self) -> bool { + match self { + DisputeStatus::Active | + DisputeStatus::Confirmed | + DisputeStatus::ConcludedAgainst(_) => true, + DisputeStatus::ConcludedFor(_) => false, + } + } + + /// Yields the timestamp this dispute concluded at, if any. + pub fn concluded_at(&self) -> Option { + match self { + DisputeStatus::Active | DisputeStatus::Confirmed => None, + DisputeStatus::ConcludedFor(at) | DisputeStatus::ConcludedAgainst(at) => Some(*at), + } + } +} diff --git a/node/primitives/src/lib.rs b/node/primitives/src/lib.rs index 882b75a0e81f..17bc5129a664 100644 --- a/node/primitives/src/lib.rs +++ b/node/primitives/src/lib.rs @@ -46,8 +46,8 @@ pub mod approval; /// Disputes related types. pub mod disputes; pub use disputes::{ - CandidateVotes, DisputeMessage, DisputeMessageCheckError, InvalidDisputeVote, - SignedDisputeStatement, UncheckedDisputeMessage, ValidDisputeVote, + CandidateVotes, DisputeMessage, DisputeMessageCheckError, DisputeStatus, InvalidDisputeVote, + SignedDisputeStatement, Timestamp, UncheckedDisputeMessage, ValidDisputeVote, }; // For a 16-ary Merkle Prefix Trie, we can expect at most 16 32-byte hashes per node diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 10a5201cc524..c896454594b9 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -35,8 +35,8 @@ use polkadot_node_network_protocol::{ use polkadot_node_primitives::{ approval::{BlockApprovalMeta, IndirectAssignmentCert, IndirectSignedApprovalVote}, AvailableData, BabeEpoch, BlockWeight, CandidateVotes, CollationGenerationConfig, - CollationSecondedSignal, DisputeMessage, ErasureChunk, PoV, SignedDisputeStatement, - SignedFullStatement, ValidationResult, + CollationSecondedSignal, DisputeMessage, DisputeStatus, ErasureChunk, PoV, + SignedDisputeStatement, SignedFullStatement, ValidationResult, }; use polkadot_primitives::v2::{ AuthorityDiscoveryId, BackedCandidate, BlockNumber, CandidateEvent, CandidateHash, @@ -271,7 +271,7 @@ pub enum DisputeCoordinatorMessage { /// Fetch a list of all recent disputes the co-ordinator is aware of. /// These are disputes which have occurred any time in recent sessions, /// and which may have already concluded. - RecentDisputes(oneshot::Sender>), + RecentDisputes(oneshot::Sender>), /// Fetch a list of all active disputes that the coordinator is aware of. /// These disputes are either not yet concluded or recently concluded. ActiveDisputes(oneshot::Sender>), From 396187e9ae95379c057d07d163a414ea105ff88a Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 11 May 2022 14:39:53 +0300 Subject: [PATCH 20/65] Move dummy_signature() in primitives/test-helpers --- node/network/approval-distribution/Cargo.toml | 1 + node/network/approval-distribution/src/tests.rs | 5 +---- primitives/test-helpers/Cargo.toml | 1 + primitives/test-helpers/src/lib.rs | 4 ++++ 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/node/network/approval-distribution/Cargo.toml b/node/network/approval-distribution/Cargo.toml index 68fae906c21e..e5c2865e4afe 100644 --- a/node/network/approval-distribution/Cargo.toml +++ b/node/network/approval-distribution/Cargo.toml @@ -21,6 +21,7 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", polkadot-node-subsystem-util = { path = "../../subsystem-util" } polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } +polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" } assert_matches = "1.4.0" schnorrkel = { version = "0.9.1", default-features = false } diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index b3d44bfe8c1e..90827f7d9e44 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -23,6 +23,7 @@ use polkadot_node_primitives::approval::{ }; use polkadot_node_subsystem::messages::{network_bridge_event, AllMessages, ApprovalCheckError}; use polkadot_node_subsystem_test_helpers as test_helpers; +use polkadot_primitives_test_helpers::dummy_signature; use polkadot_node_subsystem_util::TimeoutExt as _; use polkadot_primitives::v2::{AuthorityDiscoveryId, BlakeTwo256, HashT}; use rand::SeedableRng; @@ -32,10 +33,6 @@ use std::time::Duration; type VirtualOverseer = test_helpers::TestSubsystemContextHandle; -fn dummy_signature() -> polkadot_primitives::v2::ValidatorSignature { - sp_core::crypto::UncheckedFrom::unchecked_from([1u8; 64]) -} - fn test_harness>( mut state: State, test_fn: impl FnOnce(VirtualOverseer) -> T, diff --git a/primitives/test-helpers/Cargo.toml b/primitives/test-helpers/Cargo.toml index dd5e2ded6ae0..944e32333fd9 100644 --- a/primitives/test-helpers/Cargo.toml +++ b/primitives/test-helpers/Cargo.toml @@ -8,5 +8,6 @@ edition = "2021" sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-application-crypto = { package = "sp-application-crypto", git = "https://github.com/paritytech/substrate", branch = "master", default-features = false } sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] } polkadot-primitives = { path = "../" } rand = "0.8.5" diff --git a/primitives/test-helpers/src/lib.rs b/primitives/test-helpers/src/lib.rs index 02ba009b13cc..8873d69cdb2f 100644 --- a/primitives/test-helpers/src/lib.rs +++ b/primitives/test-helpers/src/lib.rs @@ -255,3 +255,7 @@ impl rand::RngCore for AlwaysZeroRng { Ok(()) } } + +pub fn dummy_signature() -> polkadot_primitives::v2::ValidatorSignature { + sp_core::crypto::UncheckedFrom::unchecked_from([1u8; 64]) +} From 23013f48b05441d9e4db60aefa6d1decb4880a36 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 25 May 2022 14:07:16 +0300 Subject: [PATCH 21/65] Enable staging runtime api on Rococo --- runtime/rococo/Cargo.toml | 2 +- runtime/rococo/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/rococo/Cargo.toml b/runtime/rococo/Cargo.toml index 0ca78c98d2af..cc42af8255db 100644 --- a/runtime/rococo/Cargo.toml +++ b/runtime/rococo/Cargo.toml @@ -64,7 +64,7 @@ rococo-runtime-constants = { package = "rococo-runtime-constants", path = "./con runtime-common = { package = "polkadot-runtime-common", path = "../common", default-features = false } primitives = { package = "polkadot-primitives", path = "../../primitives", default-features = false } polkadot-parachain = { path = "../../parachain", default-features = false } -runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false } +runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false, features = ["vstaging"] } xcm = { package = "xcm", path = "../../xcm", default-features = false } xcm-executor = { package = "xcm-executor", path = "../../xcm/xcm-executor", default-features = false } diff --git a/runtime/rococo/src/lib.rs b/runtime/rococo/src/lib.rs index 14f67b162774..a4cace51c42b 100644 --- a/runtime/rococo/src/lib.rs +++ b/runtime/rococo/src/lib.rs @@ -1100,7 +1100,7 @@ sp_api::impl_runtime_apis! { } fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { - unimplemented!() + runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() } } From 140c1e838fe22f4e2148703fea115318f452a871 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Mon, 16 May 2022 09:56:44 +0300 Subject: [PATCH 22/65] Implementation * Move disputes to separate module * Vote prioritisation * Duplicates handling * Double vote handling * Unit tests * Logs and metrics * Code review feedback * Fix ACTIVE/INACTIVE separation and update partition names * Add `fn dispute_is_inactive` to node primitives and refactor `fn get_active_with_status()` logic * Keep the 'old' logic if the staging api is not enabled * Fix some comments in tests * Add warning message if there are any inactive_unknown_onchain disputes * Add file headers and remove `use super::*;` usage outside tests * Adding doc comments * Fix test methods names --- Cargo.lock | 2 + node/core/dispute-coordinator/src/status.rs | 15 +- node/core/dispute-coordinator/src/tests.rs | 3 +- node/core/provisioner/Cargo.toml | 2 +- node/core/provisioner/src/disputes/mod.rs | 53 ++ .../src/disputes/with_staging_api/mod.rs | 439 +++++++++++ .../src/disputes/with_staging_api/tests.rs | 679 ++++++++++++++++++ .../src/disputes/without_staging_api/mod.rs | 192 +++++ node/core/provisioner/src/lib.rs | 295 +------- node/core/provisioner/src/metrics.rs | 51 ++ node/core/provisioner/src/onchain_disputes.rs | 77 -- node/core/provisioner/src/tests.rs | 402 +---------- .../approval-distribution/src/tests.rs | 2 +- node/primitives/src/disputes/mod.rs | 2 +- node/primitives/src/disputes/status.rs | 15 + node/primitives/src/lib.rs | 5 +- 16 files changed, 1455 insertions(+), 779 deletions(-) create mode 100644 node/core/provisioner/src/disputes/mod.rs create mode 100644 node/core/provisioner/src/disputes/with_staging_api/mod.rs create mode 100644 node/core/provisioner/src/disputes/with_staging_api/tests.rs create mode 100644 node/core/provisioner/src/disputes/without_staging_api/mod.rs delete mode 100644 node/core/provisioner/src/onchain_disputes.rs diff --git a/Cargo.lock b/Cargo.lock index bab0155beab3..a06119d698a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6014,6 +6014,7 @@ dependencies = [ "polkadot-node-subsystem-test-helpers", "polkadot-node-subsystem-util", "polkadot-primitives", + "polkadot-primitives-test-helpers", "rand 0.8.5", "rand_chacha 0.3.1", "rand_core 0.5.1", @@ -6916,6 +6917,7 @@ dependencies = [ "polkadot-primitives", "rand 0.8.5", "sp-application-crypto", + "sp-core", "sp-keyring", "sp-runtime", ] diff --git a/node/core/dispute-coordinator/src/status.rs b/node/core/dispute-coordinator/src/status.rs index 8a0e2de524d1..6332c3653274 100644 --- a/node/core/dispute-coordinator/src/status.rs +++ b/node/core/dispute-coordinator/src/status.rs @@ -14,29 +14,18 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use polkadot_node_primitives::{DisputeStatus, Timestamp}; +use polkadot_node_primitives::{dispute_is_inactive, DisputeStatus, Timestamp}; use polkadot_primitives::v2::{CandidateHash, SessionIndex}; use std::time::{SystemTime, UNIX_EPOCH}; use crate::LOG_TARGET; -/// The choice here is fairly arbitrary. But any dispute that concluded more than a few minutes ago -/// is not worth considering anymore. Changing this value has little to no bearing on consensus, -/// and really only affects the work that the node might do on startup during periods of many -/// disputes. -pub const ACTIVE_DURATION_SECS: Timestamp = 180; - /// Get active disputes as iterator, preserving its `DisputeStatus`. pub fn get_active_with_status( recent_disputes: impl Iterator, now: Timestamp, ) -> impl Iterator { - recent_disputes.filter_map(move |(disputed, status)| { - status - .concluded_at() - .filter(|at| *at + ACTIVE_DURATION_SECS < now) - .map_or(Some((disputed, status)), |_| None) - }) + recent_disputes.filter(move |(_, status)| !dispute_is_inactive(status, &now)) } pub trait Clock: Send + Sync { diff --git a/node/core/dispute-coordinator/src/tests.rs b/node/core/dispute-coordinator/src/tests.rs index 39fdc3a037e5..ff85319599ce 100644 --- a/node/core/dispute-coordinator/src/tests.rs +++ b/node/core/dispute-coordinator/src/tests.rs @@ -49,6 +49,7 @@ use sp_keyring::Sr25519Keyring; use sp_keystore::{SyncCryptoStore, SyncCryptoStorePtr}; use ::test_helpers::{dummy_candidate_receipt_bad_sig, dummy_digest, dummy_hash}; +use polkadot_node_primitives::{Timestamp, ACTIVE_DURATION_SECS}; use polkadot_node_subsystem::{ jaeger, messages::{AllMessages, BlockDescription, RuntimeApiMessage, RuntimeApiRequest}, @@ -66,7 +67,7 @@ use crate::{ backend::Backend, metrics::Metrics, participation::{participation_full_happy_path, participation_missing_availability}, - status::{Clock, Timestamp, ACTIVE_DURATION_SECS}, + status::Clock, Config, DisputeCoordinatorSubsystem, }; diff --git a/node/core/provisioner/Cargo.toml b/node/core/provisioner/Cargo.toml index 2f5c1f9aa5dd..7a06a615154e 100644 --- a/node/core/provisioner/Cargo.toml +++ b/node/core/provisioner/Cargo.toml @@ -13,8 +13,8 @@ polkadot-primitives = { path = "../../../primitives" } polkadot-node-primitives = { path = "../../primitives" } polkadot-node-subsystem = { path = "../../subsystem" } polkadot-node-subsystem-util = { path = "../../subsystem-util" } -futures-timer = "3.0.2" rand = "0.8.5" +futures-timer = "3.0.2" fatality = "0.0.6" [dev-dependencies] diff --git a/node/core/provisioner/src/disputes/mod.rs b/node/core/provisioner/src/disputes/mod.rs new file mode 100644 index 000000000000..f36f35671857 --- /dev/null +++ b/node/core/provisioner/src/disputes/mod.rs @@ -0,0 +1,53 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! The disputes module is responsible for selecting dispute votes to be sent with the inherent data. It contains two +//! different implementations, extracted in two separate modules - `with_staging_api` and `without_staging_api`. The +//! active one is controlled with a feature flag (`staging-client`). The entrypoint to these implementations is the +//! `select_disputes` function. Refer to the documentation of the modules for more details about each implementation. + +use crate::LOG_TARGET; +use futures::channel::oneshot; +use polkadot_node_primitives::CandidateVotes; +use polkadot_node_subsystem::{messages::DisputeCoordinatorMessage, overseer}; +use polkadot_primitives::v2::{CandidateHash, SessionIndex}; + +/// Request the relevant dispute statements for a set of disputes identified by `CandidateHash` and the `SessionIndex`. +async fn request_votes( + sender: &mut impl overseer::ProvisionerSenderTrait, + disputes_to_query: Vec<(SessionIndex, CandidateHash)>, +) -> Vec<(SessionIndex, CandidateHash, CandidateVotes)> { + let (tx, rx) = oneshot::channel(); + // Bounded by block production - `ProvisionerMessage::RequestInherentData`. + sender.send_unbounded_message(DisputeCoordinatorMessage::QueryCandidateVotes( + disputes_to_query, + tx, + )); + + match rx.await { + Ok(v) => v, + Err(oneshot::Canceled) => { + gum::warn!(target: LOG_TARGET, "Unable to query candidate votes"); + Vec::new() + }, + } +} + +#[cfg(feature = "staging-client")] +pub(crate) mod with_staging_api; + +#[cfg(not(feature = "staging-client"))] +pub(crate) mod without_staging_api; diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/with_staging_api/mod.rs new file mode 100644 index 000000000000..3e9bcb342b82 --- /dev/null +++ b/node/core/provisioner/src/disputes/with_staging_api/mod.rs @@ -0,0 +1,439 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! This module uses different approach for selecting dispute votes. It queries the Runtime +//! about the votes already known onchain and tries to select only relevant votes. Refer to +//! the documentation of `select_disputes` for more details about the actual implementation. + +use crate::{ + error::{Error, GetOnchainDisputesError}, + metrics, LOG_TARGET, +}; +use futures::channel::oneshot; +use polkadot_node_primitives::{dispute_is_inactive, CandidateVotes, DisputeStatus, Timestamp}; +use polkadot_node_subsystem::{ + errors::RuntimeApiError, + messages::{DisputeCoordinatorMessage, RuntimeApiMessage, RuntimeApiRequest}, + overseer, ActivatedLeaf, +}; +use polkadot_primitives::v2::{ + supermajority_threshold, CandidateHash, DisputeState, DisputeStatement, DisputeStatementSet, + Hash, InvalidDisputeStatementKind, MultiDisputeStatementSet, SessionIndex, + ValidDisputeStatementKind, ValidatorIndex, +}; +use rand as _; +use std::{ + collections::{BTreeMap, HashMap}, + time::{SystemTime, UNIX_EPOCH}, +}; + +#[cfg(test)] +mod tests; + +/// The maximum number of disputes Provisioner will include in the inherent data. +/// Serves as a protection not to flood the Runtime with excessive data. +pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200 * 1_000; +// The magic numbers are: `estimated validators count` * `estimated disputes per validator` + +/// Implements the `select_disputes` function which selects dispute votes which should +/// be sent to the Runtime. +/// +/// # How the prioritization works +/// +/// Generally speaking disputes can be described as: +/// * Active vs Inactive +/// * Known vs Unknown onchain +/// * Offchain vs Onchain +/// * Concluded onchain vs Unconcluded onchain +/// +/// Provisioner fetches all disputes from `dispute-coordinator` and separates them in multiple partitions. +/// Please refer to `struct PartitionedDisputes` for details about the actual partitions. +/// Each partition has got a priority implicitly assigned to it and the disputes are selected based on thus +/// priority (e.g. disputes in partition 1, then if there is space - disputes from partition 2 and so on). +/// +/// # Votes selection +/// +/// Besides the prioritization described above the votes in each partition are filtered too. Provisioner +/// fetches all onchain votes and filters them out from all partitions. As a result the Runtime receives +/// only fresh votes (votes it didn't know about). +/// +/// # How the onchain votes are fetched +/// +/// The logic outlined above relies on `RuntimeApiRequest::StagingDisputes` message from the Runtime staging API. +/// If the staging API is not enabled - the same logic is executed with empty onchain votes set. Effectively this +/// means that all disputes are partitioned in groups 2 or 4 and all votes are sent to the Runtime. +pub async fn select_disputes( + sender: &mut Sender, + metrics: &metrics::Metrics, + leaf: &ActivatedLeaf, +) -> Result +where + Sender: overseer::ProvisionerSenderTrait, +{ + gum::trace!(target: LOG_TARGET, ?leaf, "Selecting disputes for inherent data"); + + // Fetch the onchain disputes. We'll do a prioritization based on them. + let onchain = match get_onchain_disputes(sender, leaf.hash.clone()).await { + Ok(r) => r, + Err(GetOnchainDisputesError::NotSupported(runtime_api_err, relay_parent)) => { + gum::debug!( + target: LOG_TARGET, + ?runtime_api_err, + ?relay_parent, + "Can't fetch onchain disputes, because runtime version is not recent enough. Will continue with empty onchain disputes set.", + ); + HashMap::new() + }, + Err(GetOnchainDisputesError::Channel) => { + // This error usually means the node is shutting down. Log just in case. + gum::debug!( + target: LOG_TARGET, + "Channel error occurred while fetching onchain disputes. Will continue with empty onchain disputes set.", + ); + HashMap::new() + }, + Err(GetOnchainDisputesError::Execution(runtime_api_err, parent_hash)) => { + gum::warn!( + target: LOG_TARGET, + ?runtime_api_err, + ?parent_hash, + "Unexpected execution error occurred while fetching onchain votes. Will continue with empty onchain disputes set.", + ); + HashMap::new() + }, + }; + + let recent_disputes = request_disputes(sender).await; + gum::trace!( + target: LOG_TARGET, + ?leaf, + "Got {} recent disputes and {} onchain disputes. Runtime supports staging API: {}", + recent_disputes.len(), + onchain.len(), + if cfg!(feature = "staging-client") { true } else { false } + ); + + let partitioned = partition_recent_disputes(recent_disputes, &onchain); + metrics.on_partition_recent_disputes(&partitioned); + + if partitioned.inactive_unknown_onchain.len() > 0 { + gum::warn!( + target: LOG_TARGET, + ?leaf, + "Got {} inactive unknown onchain disputes. This should not happen!", + partitioned.inactive_unknown_onchain.len() + ); + } + let result = vote_selection(sender, partitioned, &onchain).await; + + process_selected_disputes(metrics, result) +} + +/// Selects dispute votes from `PartitionedDispites` which should be sent to the runtime. Votes which +/// are already onchain are filtered out. Result should be sorted by `(SessionIndex, CandidateHash)` +/// which is enforced by the `BTreeMap`. This is a requirement from the runtime. +async fn vote_selection( + sender: &mut Sender, + partitioned: PartitionedDisputes, + onchain: &HashMap<(SessionIndex, CandidateHash), DisputeState>, +) -> BTreeMap<(SessionIndex, CandidateHash), CandidateVotes> +where + Sender: overseer::ProvisionerSenderTrait, +{ + const BATCH_SIZE: usize = 1_100; + + // fetch in batches until there are enough votes + let mut disputes = partitioned.into_iter().collect::>(); + let mut total_votes_len = 0; + let mut result = BTreeMap::new(); + while !disputes.is_empty() { + let batch_size = std::cmp::min(BATCH_SIZE, disputes.len()); + let batch = Vec::from_iter(disputes.drain(0..batch_size)); + + // Filter votes which are already onchain + let votes = super::request_votes(sender, batch) + .await + .into_iter() + .map(|(session_index, candidate_hash, mut votes)| { + let onchain_state = + if let Some(onchain_state) = onchain.get(&(session_index, candidate_hash)) { + onchain_state + } else { + // onchain knows nothing about this dispute - add all votes + return (session_index, candidate_hash, votes) + }; + + votes.valid.retain(|validator_idx, (statement_kind, _)| { + is_vote_worth_to_keep(validator_idx, statement_kind, &onchain_state) + }); + votes.invalid.retain(|validator_idx, (statement_kind, _)| { + is_vote_worth_to_keep(validator_idx, statement_kind, &onchain_state) + }); + (session_index, candidate_hash, votes) + }) + .collect::>(); + + // Check if votes are within the limit + for (session_index, candidate_hash, selected_votes) in votes { + let votes_len = selected_votes.valid.len() + selected_votes.invalid.len(); + if votes_len + total_votes_len > MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME { + // we are done - no more votes can be added + return result + } + result.insert((session_index, candidate_hash), selected_votes); + total_votes_len += votes_len + } + } + + result +} + +/// Contains disputes by partitions. Check the field comments for further details. +#[derive(Default)] +pub(crate) struct PartitionedDisputes { + /// Concluded and inactive disputes which are completely unknown for the Runtime. + /// Hopefully this should never happen. + /// Will be sent to the Runtime with FIRST priority. + pub inactive_unknown_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Active disputes completely unknown onchain. + /// Will be sent to the Runtime with SECOND priority. + pub active_unknown_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Active disputes unconcluded onchain. + /// Will be sent to the Runtime with THIRD priority. + pub active_unconcluded_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Active disputes concluded onchain. New votes are not that important for + /// this partition. + /// Will be sent to the Runtime with FOURTH priority. + pub active_concluded_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Inactive disputes which are known onchain. These are not + /// interesting and won't be sent to the Runtime. + pub inactive_known_onchain: Vec<(SessionIndex, CandidateHash)>, +} + +impl PartitionedDisputes { + fn new() -> PartitionedDisputes { + Default::default() + } + + fn into_iter(self) -> impl Iterator { + self.inactive_unknown_onchain + .into_iter() + .chain(self.active_unknown_onchain.into_iter()) + .chain(self.active_unconcluded_onchain.into_iter()) + .chain(self.active_concluded_onchain.into_iter()) + // inactive_known_onchain is dropped on purpose + } +} + +fn secs_since_epoch() -> Timestamp { + match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(e) => { + gum::warn!( + target: LOG_TARGET, + err = ?e, + "Error getting system time." + ); + 0 + }, + } +} + +fn partition_recent_disputes( + recent: Vec<(SessionIndex, CandidateHash, DisputeStatus)>, + onchain: &HashMap<(SessionIndex, CandidateHash), DisputeState>, +) -> PartitionedDisputes { + let mut partitioned = PartitionedDisputes::new(); + + // Drop any duplicates + let unique_recent = recent + .into_iter() + .map(|(session_index, candidate_hash, dispute_state)| { + ((session_index, candidate_hash), dispute_state) + }) + .collect::>(); + + // Split ACTIVE from CONCLUDED disputes + let (active, inactive): ( + Vec<(SessionIndex, CandidateHash, DisputeStatus)>, + Vec<(SessionIndex, CandidateHash, DisputeStatus)>, + ) = unique_recent + .into_iter() + .map(|((session_index, candidate_hash), dispute_state)| { + (session_index, candidate_hash, dispute_state) + }) + .partition(|(_, _, status)| !dispute_is_inactive(status, &secs_since_epoch())); + + // Split ACTIVE in three groups... + for (session_index, candidate_hash, _) in active { + match onchain.get(&(session_index, candidate_hash)) { + Some(d) => { + // Check if there are enough onchain votes for or against to conclude the dispute + let supermajority = supermajority_threshold(d.validators_for.len()); + if d.validators_for.count_ones() >= supermajority || + d.validators_against.count_ones() >= supermajority + { + partitioned.active_concluded_onchain.push((session_index, candidate_hash)); + } else { + partitioned.active_unconcluded_onchain.push((session_index, candidate_hash)); + } + }, + None => partitioned.active_unknown_onchain.push((session_index, candidate_hash)), + }; + } + + // ... and INACTIVE in two more + for (session_index, candidate_hash, _) in inactive { + match onchain.get(&(session_index, candidate_hash)) { + Some(_) => partitioned.inactive_known_onchain.push((session_index, candidate_hash)), + None => partitioned.inactive_unknown_onchain.push((session_index, candidate_hash)), + } + } + + partitioned +} + +// Helper trait to obtain the value of vote for `InvalidDisputeStatementKind` and `ValidDisputeStatementKind`. +// The alternative was to pass a bool to `fn is_vote_worth_to_keep` explicitly but it's pointless as the value is already 'encoded' in the type. +trait VoteType { + fn vote_value() -> bool; +} + +impl VoteType for InvalidDisputeStatementKind { + fn vote_value() -> bool { + false + } +} + +impl VoteType for ValidDisputeStatementKind { + fn vote_value() -> bool { + true + } +} + +/// Determines if a vote is worth to be kept, based on the onchain disputes +fn is_vote_worth_to_keep( + validator_index: &ValidatorIndex, + _: &T, + onchain_state: &DisputeState, +) -> bool { + let offchain_vote = T::vote_value(); + let in_validators_for = onchain_state + .validators_for + .get(validator_index.0 as usize) + .as_deref() + .copied() + .unwrap_or(false); + let in_validators_against = onchain_state + .validators_against + .get(validator_index.0 as usize) + .as_deref() + .copied() + .unwrap_or(false); + + if in_validators_for && in_validators_against { + // The validator has double voted and runtime knows about this. Ignore this vote. + return false + } + + if offchain_vote && in_validators_against || !offchain_vote && in_validators_for { + // offchain vote differs from the onchain vote + // we need this vote to punish the offending validator + return true + } + + // The vote is valid. Return true if it is not seen onchain. + !in_validators_for && !in_validators_against +} + +/// Request disputes identified by `CandidateHash` and the `SessionIndex`. +async fn request_disputes( + sender: &mut impl overseer::ProvisionerSenderTrait, +) -> Vec<(SessionIndex, CandidateHash, DisputeStatus)> { + let (tx, rx) = oneshot::channel(); + let msg = DisputeCoordinatorMessage::RecentDisputes(tx); + + // Bounded by block production - `ProvisionerMessage::RequestInherentData`. + sender.send_unbounded_message(msg); + + let recent_disputes = rx.await.unwrap_or_else(|err| { + gum::warn!(target: LOG_TARGET, err=?err, "Unable to gather recent disputes"); + Vec::new() + }); + recent_disputes +} + +// This function produces the return value for `pub fn select_disputes()` +fn process_selected_disputes( + metrics: &metrics::Metrics, + dispute_candidate_votes: BTreeMap<(SessionIndex, CandidateHash), CandidateVotes>, +) -> Result { + // Transform all `CandidateVotes` into `MultiDisputeStatementSet`. + Ok(dispute_candidate_votes + .into_iter() + .map(|((session_index, candidate_hash), votes)| { + let valid_statements = votes + .valid + .into_iter() + .map(|(i, (s, sig))| (DisputeStatement::Valid(s), i, sig)); + + let invalid_statements = votes + .invalid + .into_iter() + .map(|(i, (s, sig))| (DisputeStatement::Invalid(s), i, sig)); + + metrics.inc_valid_statements_by(valid_statements.len()); + metrics.inc_invalid_statements_by(invalid_statements.len()); + metrics.inc_dispute_statement_sets_by(1); + + DisputeStatementSet { + candidate_hash, + session: session_index, + statements: valid_statements.chain(invalid_statements).collect(), + } + }) + .collect()) +} + +/// Gets the on-chain disputes at a given block number and returns them as a `HashMap` so that searching in them is cheap. +pub async fn get_onchain_disputes( + sender: &mut Sender, + relay_parent: Hash, +) -> Result, GetOnchainDisputesError> +where + Sender: overseer::ProvisionerSenderTrait, +{ + gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching on-chain disputes"); + let (tx, rx) = oneshot::channel(); + sender + .send_message(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::StagingDisputes(tx), + )) + .await; + + rx.await + .map_err(|_| GetOnchainDisputesError::Channel) + .and_then(|res| { + res.map_err(|e| match e { + RuntimeApiError::Execution { .. } => + GetOnchainDisputesError::Execution(e, relay_parent), + RuntimeApiError::NotSupported { .. } => + GetOnchainDisputesError::NotSupported(e, relay_parent), + }) + }) + .map(|v| v.into_iter().map(|e| ((e.0, e.1), e.2)).collect()) +} diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/with_staging_api/tests.rs new file mode 100644 index 000000000000..bfd896a6534a --- /dev/null +++ b/node/core/provisioner/src/disputes/with_staging_api/tests.rs @@ -0,0 +1,679 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use super::super::{ + super::{tests::common::test_harness, *}, + with_staging_api::*, +}; +use bitvec::prelude::*; +use futures::channel::mpsc; +use polkadot_node_primitives::{CandidateVotes, DisputeStatus}; +use polkadot_node_subsystem::messages::{ + AllMessages, DisputeCoordinatorMessage, RuntimeApiMessage, RuntimeApiRequest, +}; +use polkadot_node_subsystem_test_helpers::TestSubsystemSender; +use polkadot_primitives::v2::{ + CandidateHash, DisputeState, InvalidDisputeStatementKind, SessionIndex, + ValidDisputeStatementKind, ValidatorSignature, +}; +use std::sync::Arc; +use test_helpers; + +// +// Unit tests for various functions +// +#[test] +fn should_keep_vote_behaves() { + let onchain_state = DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 0, 1, 0, 1], + validators_against: bitvec![u8, Lsb0; 0, 1, 0, 0, 1], + start: 1, + concluded_at: None, + }; + + let local_valid_known = (ValidatorIndex(0), ValidDisputeStatementKind::Explicit); + let local_valid_unknown = (ValidatorIndex(3), ValidDisputeStatementKind::Explicit); + + let local_invalid_known = (ValidatorIndex(1), InvalidDisputeStatementKind::Explicit); + let local_invalid_unknown = (ValidatorIndex(3), InvalidDisputeStatementKind::Explicit); + + assert_eq!( + is_vote_worth_to_keep(&local_valid_known.0, &local_valid_known.1, &onchain_state), + false + ); + assert_eq!( + is_vote_worth_to_keep(&local_valid_unknown.0, &local_valid_unknown.1, &onchain_state), + true + ); + assert_eq!( + is_vote_worth_to_keep(&local_invalid_known.0, &local_invalid_known.1, &onchain_state), + false + ); + assert_eq!( + is_vote_worth_to_keep(&local_invalid_unknown.0, &local_invalid_unknown.1, &onchain_state), + true + ); + + //double voting - onchain knows + let local_double_vote_onchain_knows = + (ValidatorIndex(4), InvalidDisputeStatementKind::Explicit); + assert_eq!( + is_vote_worth_to_keep( + &local_double_vote_onchain_knows.0, + &local_double_vote_onchain_knows.1, + &onchain_state + ), + false + ); + + //double voting - onchain doesn't know + let local_double_vote_onchain_doesnt_knows = + (ValidatorIndex(0), InvalidDisputeStatementKind::Explicit); + assert_eq!( + is_vote_worth_to_keep( + &local_double_vote_onchain_doesnt_knows.0, + &local_double_vote_onchain_doesnt_knows.1, + &onchain_state + ), + true + ); + + // empty onchain state + let empty_onchain_state = DisputeState { + validators_for: BitVec::new(), + validators_against: BitVec::new(), + start: 1, + concluded_at: None, + }; + assert_eq!( + is_vote_worth_to_keep( + &local_double_vote_onchain_doesnt_knows.0, + &local_double_vote_onchain_doesnt_knows.1, + &empty_onchain_state + ), + true + ); +} + +#[test] +fn partitioning_happy_case() { + let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(); + let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new(); + + // Create one dispute for each partition + + let unconcluded_onchain = (0, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(unconcluded_onchain.clone()); + onchain.insert( + (unconcluded_onchain.0, unconcluded_onchain.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 0, 0, 0, 0, 0, 0], + validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + + let unknown_onchain = (1, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(unknown_onchain.clone()); + + let concluded_onchain = (2, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(concluded_onchain.clone()); + onchain.insert( + (concluded_onchain.0, concluded_onchain.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 0], + validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + + let concluded_known_onchain = + (3, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + input.push(concluded_known_onchain.clone()); + onchain.insert( + (concluded_known_onchain.0, concluded_known_onchain.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 1], + validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + + let concluded_unknown_onchain = + (4, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + input.push(concluded_unknown_onchain.clone()); + + let result = partition_recent_disputes(input, &onchain); + + assert_eq!(result.active_unconcluded_onchain.len(), 1); + assert_eq!( + result.active_unconcluded_onchain.get(0).unwrap(), + &(unconcluded_onchain.0, unconcluded_onchain.1) + ); + + assert_eq!(result.active_unknown_onchain.len(), 1); + assert_eq!( + result.active_unknown_onchain.get(0).unwrap(), + &(unknown_onchain.0, unknown_onchain.1) + ); + + assert_eq!(result.active_concluded_onchain.len(), 1); + assert_eq!( + result.active_concluded_onchain.get(0).unwrap(), + &(concluded_onchain.0, concluded_onchain.1) + ); + + assert_eq!(result.inactive_known_onchain.len(), 1); + assert_eq!( + result.inactive_known_onchain.get(0).unwrap(), + &(concluded_known_onchain.0, concluded_known_onchain.1) + ); + + assert_eq!(result.inactive_unknown_onchain.len(), 1); + assert_eq!( + result.inactive_unknown_onchain.get(0).unwrap(), + &(concluded_unknown_onchain.0, concluded_unknown_onchain.1) + ); +} + +// This test verifies the double voting behavior. Currently we don't care if a supermajority is achieved with or +// without the 'help' of a double vote (a validator voting for and against at the same time). This makes the test +// a bit pointless but anyway I'm leaving it here to make this decision explicit and have the test code ready in +// case this behavior needs to be further tested in the future. +// Link to the PR with the discussions: https://github.com/paritytech/polkadot/pull/5567 +#[test] +fn partitioning_doubled_onchain_vote() { + let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(); + let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new(); + + // Dispute A relies on a 'double onchain vote' to conclude. Validator with index 0 has voted both `for` and `against`. + // Despite that this dispute should be considered 'can conclude onchain'. + let dispute_a = (3, CandidateHash(Hash::random()), DisputeStatus::Active); + // Dispute B has supermajority + 1 votes, so the doubled onchain vote doesn't affect it. It should be considered + // as 'can conclude onchain'. + let dispute_b = (4, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(dispute_a.clone()); + input.push(dispute_b.clone()); + onchain.insert( + (dispute_a.0, dispute_a.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 0, 0], + validators_against: bitvec![u8, Lsb0; 1, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + onchain.insert( + (dispute_b.0, dispute_b.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 0], + validators_against: bitvec![u8, Lsb0; 1, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + + let result = partition_recent_disputes(input, &onchain); + + assert_eq!(result.active_unconcluded_onchain.len(), 0); + assert_eq!(result.active_concluded_onchain.len(), 2); +} + +#[test] +fn partitioning_duplicated_dispute() { + let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(); + let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new(); + + let some_dispute = (3, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + input.push(some_dispute.clone()); + input.push(some_dispute.clone()); + onchain.insert( + (some_dispute.0, some_dispute.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 1], + validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: None, + }, + ); + + let result = partition_recent_disputes(input, &onchain); + + assert_eq!(result.inactive_known_onchain.len(), 1); + assert_eq!(result.inactive_known_onchain.get(0).unwrap(), &(some_dispute.0, some_dispute.1)); +} + +// +// end-to-end tests for select_disputes() +// + +async fn mock_overseer( + mut receiver: mpsc::UnboundedReceiver, + disputes_db: &mut TestDisputes, + vote_queries_count: &mut usize, +) { + while let Some(from_job) = receiver.next().await { + match from_job { + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _, + RuntimeApiRequest::StagingDisputes(sender), + )) => { + let _ = sender.send(Ok(disputes_db + .onchain_disputes + .clone() + .into_iter() + .map(|(k, v)| (k.0, k.1, v)) + .collect::>())); + }, + AllMessages::RuntimeApi(_) => panic!("Unexpected RuntimeApi request"), + AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::RecentDisputes(sender)) => { + let _ = sender.send(disputes_db.local_disputes.clone()); + }, + AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::QueryCandidateVotes( + disputes, + sender, + )) => { + *vote_queries_count += 1; + let mut res = Vec::new(); + for d in disputes.iter() { + let v = disputes_db.votes_db.get(d).unwrap().clone(); + res.push((d.0, d.1, v)); + } + + let _ = sender.send(res); + }, + _ => panic!("Unexpected message: {:?}", from_job), + } + } +} + +fn leaf() -> ActivatedLeaf { + ActivatedLeaf { + hash: Hash::repeat_byte(0xAA), + number: 0xAA, + status: LeafStatus::Fresh, + span: Arc::new(jaeger::Span::Disabled), + } +} + +struct TestDisputes { + pub local_disputes: Vec<(SessionIndex, CandidateHash, DisputeStatus)>, + pub votes_db: HashMap<(SessionIndex, CandidateHash), CandidateVotes>, + pub onchain_disputes: HashMap<(u32, CandidateHash), DisputeState>, + validators_count: usize, +} + +impl TestDisputes { + pub fn new(validators_count: usize) -> TestDisputes { + TestDisputes { + local_disputes: Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(), + votes_db: HashMap::<(SessionIndex, CandidateHash), CandidateVotes>::new(), + onchain_disputes: HashMap::<(u32, CandidateHash), DisputeState>::new(), + validators_count, + } + } + + // Offchain disputes are on node side + fn add_offchain_dispute( + &mut self, + dispute: (SessionIndex, CandidateHash, DisputeStatus), + local_votes_count: usize, + dummy_receipt: CandidateReceipt, + ) { + self.local_disputes.push(dispute.clone()); + self.votes_db.insert( + (dispute.0, dispute.1), + CandidateVotes { + candidate_receipt: dummy_receipt, + valid: TestDisputes::generate_local_votes( + ValidDisputeStatementKind::Explicit, + 0, + local_votes_count, + ), + invalid: BTreeMap::new(), + }, + ); + } + + fn add_onchain_dispute( + &mut self, + dispute: (SessionIndex, CandidateHash, DisputeStatus), + onchain_votes_count: usize, + ) { + let concluded_at = match dispute.2 { + DisputeStatus::Active | DisputeStatus::Confirmed => None, + DisputeStatus::ConcludedAgainst(_) | DisputeStatus::ConcludedFor(_) => Some(1), + }; + self.onchain_disputes.insert( + (dispute.0, dispute.1.clone()), + DisputeState { + validators_for: TestDisputes::generate_bitvec( + self.validators_count, + 0, + onchain_votes_count, + ), + validators_against: bitvec![u8, Lsb0; 0; self.validators_count], + start: 1, + concluded_at, + }, + ); + } + + pub fn add_unconfirmed_disputes_concluded_onchain( + &mut self, + dispute_count: usize, + ) -> (u32, usize) { + let local_votes_count = self.validators_count / 100 * 90; + let onchain_votes_count = self.validators_count / 100 * 80; + let session_idx = 0; + let lf = leaf(); + let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); + for _ in 0..dispute_count { + let d = (session_idx, CandidateHash(Hash::random()), DisputeStatus::Active); + self.add_offchain_dispute(d.clone(), local_votes_count, dummy_receipt.clone()); + self.add_onchain_dispute(d, onchain_votes_count); + } + + (session_idx, (local_votes_count - onchain_votes_count) * dispute_count) + } + + pub fn add_unconfirmed_disputes_unconcluded_onchain( + &mut self, + dispute_count: usize, + ) -> (u32, usize) { + let local_votes_count = self.validators_count / 100 * 90; + let onchain_votes_count = self.validators_count / 100 * 40; + let session_idx = 1; + let lf = leaf(); + let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); + for _ in 0..dispute_count { + let d = (session_idx, CandidateHash(Hash::random()), DisputeStatus::Active); + self.add_offchain_dispute(d.clone(), local_votes_count, dummy_receipt.clone()); + self.add_onchain_dispute(d, onchain_votes_count); + } + + (session_idx, (local_votes_count - onchain_votes_count) * dispute_count) + } + + pub fn add_unconfirmed_disputes_unknown_onchain( + &mut self, + dispute_count: usize, + ) -> (u32, usize) { + let local_votes_count = self.validators_count / 100 * 70; + let session_idx = 2; + let lf = leaf(); + let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); + for _ in 0..dispute_count { + let d = (session_idx, CandidateHash(Hash::random()), DisputeStatus::Active); + self.add_offchain_dispute(d.clone(), local_votes_count, dummy_receipt.clone()); + } + (session_idx, local_votes_count * dispute_count) + } + + pub fn add_concluded_disputes_known_onchain(&mut self, dispute_count: usize) -> (u32, usize) { + let local_votes_count = self.validators_count / 100 * 80; + let onchain_votes_count = self.validators_count / 100 * 75; + let session_idx = 3; + let lf = leaf(); + let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); + for _ in 0..dispute_count { + let d = (session_idx, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + self.add_offchain_dispute(d.clone(), local_votes_count, dummy_receipt.clone()); + self.add_onchain_dispute(d, onchain_votes_count); + } + (session_idx, (local_votes_count - onchain_votes_count) * dispute_count) + } + + pub fn add_concluded_disputes_unknown_onchain(&mut self, dispute_count: usize) -> (u32, usize) { + let local_votes_count = self.validators_count / 100 * 80; + let session_idx = 4; + let lf = leaf(); + let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); + for _ in 0..dispute_count { + let d = (session_idx, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + self.add_offchain_dispute(d.clone(), local_votes_count, dummy_receipt.clone()); + } + (session_idx, local_votes_count * dispute_count) + } + + fn generate_local_votes( + statement_kind: T, + start_idx: usize, + count: usize, + ) -> BTreeMap { + assert!(start_idx < count); + (start_idx..count) + .map(|idx| { + ( + ValidatorIndex(idx as u32), + (statement_kind.clone(), test_helpers::dummy_signature()), + ) + }) + .collect::>() + } + + fn generate_bitvec( + validator_count: usize, + start_idx: usize, + count: usize, + ) -> BitVec { + assert!(start_idx < count); + assert!(start_idx + count < validator_count); + let mut res = bitvec![u8, Lsb0; 0; validator_count]; + for idx in start_idx..count { + res.set(idx, true); + } + + res + } +} + +#[test] +fn normal_flow() { + const VALIDATOR_COUNT: usize = 100; + const DISPUTES_PER_BATCH: usize = 10; + const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 1; + + let mut input = TestDisputes::new(VALIDATOR_COUNT); + + // active, concluded onchain + let (third_idx, third_votes) = + input.add_unconfirmed_disputes_concluded_onchain(DISPUTES_PER_BATCH); + + // active unconcluded onchain + let (first_idx, first_votes) = + input.add_unconfirmed_disputes_unconcluded_onchain(DISPUTES_PER_BATCH); + + //concluded disputes unknown onchain + let (fifth_idx, fifth_votes) = input.add_concluded_disputes_unknown_onchain(DISPUTES_PER_BATCH); + + // concluded disputes known onchain + let (fourth_idx, _) = input.add_concluded_disputes_known_onchain(DISPUTES_PER_BATCH); + + // active disputes unknown onchain + let (second_idx, second_votes) = + input.add_unconfirmed_disputes_unknown_onchain(DISPUTES_PER_BATCH); + + let metrics = metrics::Metrics::new_dummy(); + let mut vote_queries: usize = 0; + test_harness( + |r| mock_overseer(r, &mut input, &mut vote_queries), + |mut tx: TestSubsystemSender| async move { + let lf = leaf(); + let result = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); + + assert!(!result.is_empty()); + + if cfg!(feature = "staging-client") { + assert_eq!(result.len(), 4 * DISPUTES_PER_BATCH); + + // Naive checks that the result is partitioned correctly + let (first_batch, rest): (Vec, Vec) = + result.into_iter().partition(|d| d.session == first_idx); + assert_eq!(first_batch.len(), DISPUTES_PER_BATCH); + + let (second_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == second_idx); + assert_eq!(second_batch.len(), DISPUTES_PER_BATCH); + + let (third_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == third_idx); + assert_eq!(third_batch.len(), DISPUTES_PER_BATCH); + + let (fifth_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == fifth_idx); + assert_eq!(fifth_batch.len(), DISPUTES_PER_BATCH); + + // Ensure there are no more disputes - fourth_batch should be dropped + assert_eq!(rest.len(), 0); + + assert_eq!( + first_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + first_votes + ); + assert_eq!( + second_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + second_votes + ); + assert_eq!( + third_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + third_votes + ); + assert_eq!( + fifth_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + fifth_votes + ); + } else { + assert_eq!(result.len(), 5 * DISPUTES_PER_BATCH); + for i in 0..3 * DISPUTES_PER_BATCH { + assert_ne!(result.get(i).unwrap().session, fourth_idx); + assert_ne!(result.get(i).unwrap().session, fifth_idx); + } + } + }, + ); + assert!(vote_queries <= ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT); +} + +#[test] +fn many_batches() { + const VALIDATOR_COUNT: usize = 100; + const DISPUTES_PER_PARTITION: usize = 1000; + // Around 4_000 disputes are generated. `BATCH_SIZE` is 1_100. + const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 4; + + let mut input = TestDisputes::new(VALIDATOR_COUNT); + + // active which can conclude onchain + input.add_unconfirmed_disputes_concluded_onchain(DISPUTES_PER_PARTITION); + + // active which can't conclude onchain + input.add_unconfirmed_disputes_unconcluded_onchain(DISPUTES_PER_PARTITION); + + //concluded disputes unknown onchain + input.add_concluded_disputes_unknown_onchain(DISPUTES_PER_PARTITION); + + // concluded disputes known onchain + input.add_concluded_disputes_known_onchain(DISPUTES_PER_PARTITION); + + // active disputes unknown onchain + input.add_unconfirmed_disputes_unknown_onchain(DISPUTES_PER_PARTITION); + + let metrics = metrics::Metrics::new_dummy(); + let mut vote_queries: usize = 0; + test_harness( + |r| mock_overseer(r, &mut input, &mut vote_queries), + |mut tx: TestSubsystemSender| async move { + let lf = leaf(); + let result = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); + + assert!(!result.is_empty()); + + let vote_count = result.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v); + + assert!( + MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME - VALIDATOR_COUNT <= vote_count && + vote_count <= MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME, + "vote_count: {}", + vote_count + ); + }, + ); + + assert!( + vote_queries <= ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT, + "vote_queries: {} ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: {}", + vote_queries, + ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT + ); +} + +#[test] +fn votes_above_limit() { + const VALIDATOR_COUNT: usize = 100; + const DISPUTES_PER_PARTITION: usize = 5_000; + const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 4; + + let mut input = TestDisputes::new(VALIDATOR_COUNT); + + // active which can conclude onchain + let (_, second_votes) = + input.add_unconfirmed_disputes_concluded_onchain(DISPUTES_PER_PARTITION); + + // active which can't conclude onchain + let (_, first_votes) = + input.add_unconfirmed_disputes_unconcluded_onchain(DISPUTES_PER_PARTITION); + + //concluded disputes unknown onchain + let (_, third_votes) = input.add_concluded_disputes_unknown_onchain(DISPUTES_PER_PARTITION); + + assert!( + first_votes + second_votes + third_votes > 3 * MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME, + "Total relevant votes generated: {}", + first_votes + second_votes + third_votes + ); + + let metrics = metrics::Metrics::new_dummy(); + let mut vote_queries: usize = 0; + test_harness( + |r| mock_overseer(r, &mut input, &mut vote_queries), + |mut tx: TestSubsystemSender| async move { + let lf = leaf(); + let result = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); + + assert!(!result.is_empty()); + + let vote_count = result.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v); + + assert!( + MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME - VALIDATOR_COUNT <= vote_count && + vote_count <= MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME, + "vote_count: {}", + vote_count + ); + }, + ); + + assert!( + vote_queries <= ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT, + "vote_queries: {} ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: {}", + vote_queries, + ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT + ); +} diff --git a/node/core/provisioner/src/disputes/without_staging_api/mod.rs b/node/core/provisioner/src/disputes/without_staging_api/mod.rs new file mode 100644 index 000000000000..3bf5cac340d2 --- /dev/null +++ b/node/core/provisioner/src/disputes/without_staging_api/mod.rs @@ -0,0 +1,192 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! This module selects all RECENT disputes, fetches the votes for them from dispute-coordinator and +//! returns them as MultiDisputeStatementSet. If the RECENT disputes are more than +//! `MAX_DISPUTES_FORWARDED_TO_RUNTIME` constant - the ACTIVE disputes plus a random selection of +//! RECENT disputes (up to `MAX_DISPUTES_FORWARDED_TO_RUNTIME`) are returned instead. +//! If the ACTIVE disputes are also above `MAX_DISPUTES_FORWARDED_TO_RUNTIME` limit - a random selection +//! of them is generated. + +use crate::{error::Error, metrics, LOG_TARGET}; +use futures::channel::oneshot; +use polkadot_node_subsystem::{messages::DisputeCoordinatorMessage, overseer}; +use polkadot_primitives::v2::{ + CandidateHash, DisputeStatement, DisputeStatementSet, MultiDisputeStatementSet, SessionIndex, +}; +use std::collections::HashSet; + +#[derive(Debug)] +enum RequestType { + /// Query recent disputes, could be an excessive amount. + Recent, + /// Query the currently active and very recently concluded disputes. + Active, +} + +/// Request open disputes identified by `CandidateHash` and the `SessionIndex`. +async fn request_disputes( + sender: &mut impl overseer::ProvisionerSenderTrait, + active_or_recent: RequestType, +) -> Vec<(SessionIndex, CandidateHash)> { + let disputes = match active_or_recent { + RequestType::Recent => { + let (tx, rx) = oneshot::channel(); + let msg = DisputeCoordinatorMessage::RecentDisputes(tx); + sender.send_unbounded_message(msg); + let recent_disputes = match rx.await { + Ok(r) => r, + Err(oneshot::Canceled) => { + gum::warn!( + target: LOG_TARGET, + "Unable to gather {:?} disputes", + active_or_recent + ); + Vec::new() + }, + }; + recent_disputes + .into_iter() + .map(|(sesion_idx, candodate_hash, _)| (sesion_idx, candodate_hash)) + .collect::>() + }, + RequestType::Active => { + let (tx, rx) = oneshot::channel(); + let msg = DisputeCoordinatorMessage::ActiveDisputes(tx); + sender.send_unbounded_message(msg); + let active_disputes = match rx.await { + Ok(r) => r, + Err(oneshot::Canceled) => { + gum::warn!( + target: LOG_TARGET, + "Unable to gather {:?} disputes", + active_or_recent + ); + Vec::new() + }, + }; + active_disputes + }, + }; + + disputes +} + +/// Extend `acc` by `n` random, picks of not-yet-present in `acc` items of `recent` without repetition and additions of recent. +fn extend_by_random_subset_without_repetition( + acc: &mut Vec<(SessionIndex, CandidateHash)>, + extension: Vec<(SessionIndex, CandidateHash)>, + n: usize, +) { + use rand::Rng; + + let lut = acc.iter().cloned().collect::>(); + + let mut unique_new = + extension.into_iter().filter(|recent| !lut.contains(recent)).collect::>(); + + // we can simply add all + if unique_new.len() <= n { + acc.extend(unique_new) + } else { + acc.reserve(n); + let mut rng = rand::thread_rng(); + for _ in 0..n { + let idx = rng.gen_range(0..unique_new.len()); + acc.push(unique_new.swap_remove(idx)); + } + } + // assure sorting stays candid according to session index + acc.sort_unstable_by(|a, b| a.0.cmp(&b.0)); +} + +pub async fn select_disputes( + sender: &mut Sender, + metrics: &metrics::Metrics, +) -> Result +where + Sender: overseer::ProvisionerSenderTrait, +{ + /// The maximum number of disputes Provisioner will include in the inherent data. + /// Serves as a protection not to flood the Runtime with excessive data. + const MAX_DISPUTES_FORWARDED_TO_RUNTIME: usize = 1_000; + + // We use `RecentDisputes` instead of `ActiveDisputes` because redundancy is fine. + // It's heavier than `ActiveDisputes` but ensures that everything from the dispute + // window gets on-chain, unlike `ActiveDisputes`. + // In case of an overload condition, we limit ourselves to active disputes, and fill up to the + // upper bound of disputes to pass to wasm `fn create_inherent_data`. + // If the active ones are already exceeding the bounds, randomly select a subset. + let recent = request_disputes(sender, RequestType::Recent).await; + let disputes = if recent.len() > MAX_DISPUTES_FORWARDED_TO_RUNTIME { + gum::warn!( + target: LOG_TARGET, + "Recent disputes are excessive ({} > {}), reduce to active ones, and selected", + recent.len(), + MAX_DISPUTES_FORWARDED_TO_RUNTIME + ); + let mut active = request_disputes(sender, RequestType::Active).await; + let n_active = active.len(); + let active = if active.len() > MAX_DISPUTES_FORWARDED_TO_RUNTIME { + let mut picked = Vec::with_capacity(MAX_DISPUTES_FORWARDED_TO_RUNTIME); + extend_by_random_subset_without_repetition( + &mut picked, + active, + MAX_DISPUTES_FORWARDED_TO_RUNTIME, + ); + picked + } else { + extend_by_random_subset_without_repetition( + &mut active, + recent, + MAX_DISPUTES_FORWARDED_TO_RUNTIME.saturating_sub(n_active), + ); + active + }; + active + } else { + recent + }; + + // Load all votes for all disputes from the coordinator. + let dispute_candidate_votes = super::request_votes(sender, disputes).await; + + // Transform all `CandidateVotes` into `MultiDisputeStatementSet`. + Ok(dispute_candidate_votes + .into_iter() + .map(|(session_index, candidate_hash, votes)| { + let valid_statements = votes + .valid + .into_iter() + .map(|(i, (s, sig))| (DisputeStatement::Valid(s), i, sig)); + + let invalid_statements = votes + .invalid + .into_iter() + .map(|(i, (s, sig))| (DisputeStatement::Invalid(s), i, sig)); + + metrics.inc_valid_statements_by(valid_statements.len()); + metrics.inc_invalid_statements_by(invalid_statements.len()); + metrics.inc_dispute_statement_sets_by(1); + + DisputeStatementSet { + candidate_hash, + session: session_index, + statements: valid_statements.chain(invalid_statements).collect(), + } + }) + .collect()) +} diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 0f3099c7df33..9c1e3605e1b6 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -25,12 +25,11 @@ use futures::{ }; use futures_timer::Delay; -use polkadot_node_primitives::CandidateVotes; use polkadot_node_subsystem::{ jaeger, messages::{ - CandidateBackingMessage, ChainApiMessage, DisputeCoordinatorMessage, ProvisionableData, - ProvisionerInherentData, ProvisionerMessage, + CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData, + ProvisionerMessage, }, overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOrchestra, LeafStatus, OverseerSignal, PerLeafSpan, SpawnedSubsystem, SubsystemError, @@ -39,15 +38,14 @@ use polkadot_node_subsystem_util::{ request_availability_cores, request_persisted_validation_data, TimeoutExt, }; use polkadot_primitives::v2::{ - BackedCandidate, BlockNumber, CandidateHash, CandidateReceipt, CoreState, DisputeState, - DisputeStatement, DisputeStatementSet, Hash, MultiDisputeStatementSet, OccupiedCoreAssumption, - SessionIndex, SignedAvailabilityBitfield, ValidatorIndex, + BackedCandidate, BlockNumber, CandidateReceipt, CoreState, Hash, OccupiedCoreAssumption, + SignedAvailabilityBitfield, ValidatorIndex, }; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; +mod disputes; mod error; mod metrics; -mod onchain_disputes; pub use self::metrics::*; use error::{Error, FatalResult}; @@ -361,7 +359,13 @@ async fn send_inherent_data( relay_parent = ?leaf.hash, "Selecting disputes" ); - let disputes = select_disputes(from_job, metrics, leaf).await?; + + #[cfg(feature = "staging-client")] + let disputes = disputes::with_staging_api::select_disputes(from_job, metrics, leaf).await?; + + #[cfg(not(feature = "staging-client"))] + let disputes = disputes::without_staging_api::select_disputes(from_job, metrics).await?; + gum::trace!( target: LOG_TARGET, relay_parent = ?leaf.hash, @@ -676,276 +680,3 @@ fn bitfields_indicate_availability( 3 * availability.count_ones() >= 2 * availability.len() } - -#[derive(Debug)] -enum RequestType { - /// Query recent disputes, could be an excessive amount. - Recent, - /// Query the currently active and very recently concluded disputes. - Active, -} - -/// Request open disputes identified by `CandidateHash` and the `SessionIndex`. -async fn request_disputes( - sender: &mut impl overseer::ProvisionerSenderTrait, - active_or_recent: RequestType, -) -> Vec<(SessionIndex, CandidateHash)> { - let (tx, rx) = oneshot::channel(); - let msg = match active_or_recent { - RequestType::Recent => DisputeCoordinatorMessage::RecentDisputes(tx), - RequestType::Active => DisputeCoordinatorMessage::ActiveDisputes(tx), - }; - // Bounded by block production - `ProvisionerMessage::RequestInherentData`. - sender.send_unbounded_message(msg); - - let recent_disputes = match rx.await { - Ok(r) => r, - Err(oneshot::Canceled) => { - gum::warn!(target: LOG_TARGET, "Unable to gather {:?} disputes", active_or_recent); - Vec::new() - }, - }; - recent_disputes -} - -/// Request the relevant dispute statements for a set of disputes identified by `CandidateHash` and the `SessionIndex`. -async fn request_votes( - sender: &mut impl overseer::ProvisionerSenderTrait, - disputes_to_query: Vec<(SessionIndex, CandidateHash)>, -) -> Vec<(SessionIndex, CandidateHash, CandidateVotes)> { - // No need to send dummy request, if nothing to request: - if disputes_to_query.is_empty() { - gum::trace!(target: LOG_TARGET, "No disputes, nothing to request - returning empty `Vec`."); - - return Vec::new() - } - let (tx, rx) = oneshot::channel(); - // Bounded by block production - `ProvisionerMessage::RequestInherentData`. - sender.send_unbounded_message(DisputeCoordinatorMessage::QueryCandidateVotes( - disputes_to_query, - tx, - )); - - match rx.await { - Ok(v) => v, - Err(oneshot::Canceled) => { - gum::warn!(target: LOG_TARGET, "Unable to query candidate votes"); - Vec::new() - }, - } -} - -/// Extend `acc` by `n` random, picks of not-yet-present in `acc` items of `recent` without repetition and additions of recent. -fn extend_by_random_subset_without_repetition( - acc: &mut Vec<(SessionIndex, CandidateHash)>, - extension: Vec<(SessionIndex, CandidateHash)>, - n: usize, -) { - use rand::Rng; - - let lut = acc.iter().cloned().collect::>(); - - let mut unique_new = - extension.into_iter().filter(|recent| !lut.contains(recent)).collect::>(); - - // we can simply add all - if unique_new.len() <= n { - acc.extend(unique_new) - } else { - acc.reserve(n); - let mut rng = rand::thread_rng(); - for _ in 0..n { - let idx = rng.gen_range(0..unique_new.len()); - acc.push(unique_new.swap_remove(idx)); - } - } - // assure sorting stays candid according to session index - acc.sort_unstable_by(|a, b| a.0.cmp(&b.0)); -} - -/// The maximum number of disputes Provisioner will include in the inherent data. -/// Serves as a protection not to flood the Runtime with excessive data. -const MAX_DISPUTES_FORWARDED_TO_RUNTIME: usize = 1_000; - -async fn select_disputes( - sender: &mut impl overseer::ProvisionerSenderTrait, - metrics: &metrics::Metrics, - _leaf: &ActivatedLeaf, -) -> Result { - // Helper lambda - // Gets the active disputes as input and partitions it in seen and unseen disputes by the Runtime - // Returns as much unseen disputes as possible and optionally some seen disputes up to `MAX_DISPUTES_FORWARDED_TO_RUNTIME` limit. - let generate_unseen_active_subset = - |active: Vec<(SessionIndex, CandidateHash)>, - onchain: HashMap<(SessionIndex, CandidateHash), DisputeState>| - -> Vec<(SessionIndex, CandidateHash)> { - let (seen_onchain, mut unseen_onchain): ( - Vec<(SessionIndex, CandidateHash)>, - Vec<(SessionIndex, CandidateHash)>, - ) = active.into_iter().partition(|d| onchain.contains_key(d)); - - if unseen_onchain.len() > MAX_DISPUTES_FORWARDED_TO_RUNTIME { - // Even unseen on-chain don't fit within the limit. Add as many as possible. - let mut unseen_subset = Vec::with_capacity(MAX_DISPUTES_FORWARDED_TO_RUNTIME); - extend_by_random_subset_without_repetition( - &mut unseen_subset, - unseen_onchain, - MAX_DISPUTES_FORWARDED_TO_RUNTIME, - ); - unseen_subset - } else { - // Add all unseen onchain disputes and as much of the seen ones as there is space. - let n_unseen_onchain = unseen_onchain.len(); - extend_by_random_subset_without_repetition( - &mut unseen_onchain, - seen_onchain, - MAX_DISPUTES_FORWARDED_TO_RUNTIME.saturating_sub(n_unseen_onchain), - ); - unseen_onchain - } - }; - - // Helper lambda - // Extends the active disputes with recent ones up to `MAX_DISPUTES_FORWARDED_TO_RUNTIME` limit. Unseen recent disputes are prioritised. - let generate_active_and_unseen_recent_subset = - |recent: Vec<(SessionIndex, CandidateHash)>, - mut active: Vec<(SessionIndex, CandidateHash)>, - onchain: HashMap<(SessionIndex, CandidateHash), DisputeState>| - -> Vec<(SessionIndex, CandidateHash)> { - let mut n_active = active.len(); - // All active disputes can be sent. Fill the rest of the space with recent ones. - // We assume there is not enough space for all recent disputes. So we prioritise the unseen ones. - let (seen_onchain, unseen_onchain): ( - Vec<(SessionIndex, CandidateHash)>, - Vec<(SessionIndex, CandidateHash)>, - ) = recent.into_iter().partition(|d| onchain.contains_key(d)); - - extend_by_random_subset_without_repetition( - &mut active, - unseen_onchain, - MAX_DISPUTES_FORWARDED_TO_RUNTIME.saturating_sub(n_active), - ); - n_active = active.len(); - - if n_active < MAX_DISPUTES_FORWARDED_TO_RUNTIME { - // Looks like we can add some of the seen disputes too - extend_by_random_subset_without_repetition( - &mut active, - seen_onchain, - MAX_DISPUTES_FORWARDED_TO_RUNTIME.saturating_sub(n_active), - ); - } - active - }; - - gum::trace!( - target: LOG_TARGET, - relay_parent = ?_leaf.hash, - "Request recent disputes" - ); - - // We use `RecentDisputes` instead of `ActiveDisputes` because redundancy is fine. - // It's heavier than `ActiveDisputes` but ensures that everything from the dispute - // window gets on-chain, unlike `ActiveDisputes`. - // In case of an overload condition, we limit ourselves to active disputes, and fill up to the - // upper bound of disputes to pass to wasm `fn create_inherent_data`. - // If the active ones are already exceeding the bounds, randomly select a subset. - let recent = request_disputes(sender, RequestType::Recent).await; - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Received recent disputes" - ); - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Request on chain disputes" - ); - - // On chain disputes are fetched from the runtime. We want to prioritise the inclusion of unknown - // disputes in the inherent data. The call relies on staging Runtime API. If the staging API is not - // enabled in the binary an empty set is generated which doesn't affect the rest of the logic. - let onchain = match onchain_disputes::get_onchain_disputes(sender, _leaf.hash.clone()).await { - Ok(r) => r, - Err(e) => { - gum::debug!( - target: LOG_TARGET, - ?e, - "Can't fetch onchain disputes. Will continue with empty onchain disputes set.", - ); - HashMap::new() - }, - }; - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Received on chain disputes" - ); - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Filtering disputes" - ); - - let disputes = if recent.len() > MAX_DISPUTES_FORWARDED_TO_RUNTIME { - gum::warn!( - target: LOG_TARGET, - "Recent disputes are excessive ({} > {}), reduce to active ones, and selected", - recent.len(), - MAX_DISPUTES_FORWARDED_TO_RUNTIME - ); - let active = request_disputes(sender, RequestType::Active).await; - if active.len() > MAX_DISPUTES_FORWARDED_TO_RUNTIME { - generate_unseen_active_subset(active, onchain) - } else { - generate_active_and_unseen_recent_subset(recent, active, onchain) - } - } else { - recent - }; - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Calling `request_votes`" - ); - - // Load all votes for all disputes from the coordinator. - let dispute_candidate_votes = request_votes(sender, disputes).await; - - gum::trace!( - target: LOG_TARGET, - relay_paent = ?_leaf.hash, - "Finished `request_votes`" - ); - - // Transform all `CandidateVotes` into `MultiDisputeStatementSet`. - Ok(dispute_candidate_votes - .into_iter() - .map(|(session_index, candidate_hash, votes)| { - let valid_statements = votes - .valid - .into_iter() - .map(|(i, (s, sig))| (DisputeStatement::Valid(s), i, sig)); - - let invalid_statements = votes - .invalid - .into_iter() - .map(|(i, (s, sig))| (DisputeStatement::Invalid(s), i, sig)); - - metrics.inc_valid_statements_by(valid_statements.len()); - metrics.inc_invalid_statements_by(invalid_statements.len()); - metrics.inc_dispute_statement_sets_by(1); - - DisputeStatementSet { - candidate_hash, - session: session_index, - statements: valid_statements.chain(invalid_statements).collect(), - } - }) - .collect()) -} diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index 829293dfed53..e3991f88d664 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -14,6 +14,8 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +#[cfg(feature = "staging-client")] +use crate::disputes::with_staging_api::PartitionedDisputes; use polkadot_node_subsystem_util::metrics::{self, prometheus}; #[derive(Clone)] @@ -28,6 +30,10 @@ struct MetricsInner { /// 4 hours on Polkadot. The metrics are updated only when the node authors a block, so values vary across nodes. inherent_data_dispute_statement_sets: prometheus::Counter, inherent_data_dispute_statements: prometheus::CounterVec, + + #[cfg(feature = "staging-client")] + // The disputes received from `disputes-coordinator` by partition + partitioned_disputes: prometheus::CounterVec, } /// Provisioner metrics. @@ -95,6 +101,40 @@ impl Metrics { .inc_by(disputes.try_into().unwrap_or(0)); } } + + #[cfg(feature = "staging-client")] + pub(crate) fn on_partition_recent_disputes(&self, disputes: &PartitionedDisputes) { + if let Some(metrics) = &self.0 { + let PartitionedDisputes { + active_unconcluded_onchain: cant_conclude_onchain, + active_unknown_onchain: unknown_onchain, + active_concluded_onchain: can_conclude_onchain, + inactive_known_onchain: concluded_known_onchain, + inactive_unknown_onchain: concluded_unknown_onchain, + } = disputes; + + metrics + .partitioned_disputes + .with_label_values(&["cant_conclude_onchain"]) + .inc_by(cant_conclude_onchain.len().try_into().unwrap_or(0)); + metrics + .partitioned_disputes + .with_label_values(&["unknown_onchain"]) + .inc_by(unknown_onchain.len().try_into().unwrap_or(0)); + metrics + .partitioned_disputes + .with_label_values(&["can_conclude_onchain"]) + .inc_by(can_conclude_onchain.len().try_into().unwrap_or(0)); + metrics + .partitioned_disputes + .with_label_values(&["concluded_known_onchain"]) + .inc_by(concluded_known_onchain.len().try_into().unwrap_or(0)); + metrics + .partitioned_disputes + .with_label_values(&["unknown_onchain"]) + .inc_by(concluded_unknown_onchain.len().try_into().unwrap_or(0)); + } + } } impl metrics::Metrics for Metrics { @@ -150,6 +190,17 @@ impl metrics::Metrics for Metrics { )?, registry, )?, + #[cfg(feature = "staging-client")] + partitioned_disputes: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_provisioner_partitioned_disputes", + "some fancy description", + ), + &["partition"], + )?, + ®istry, + )?, }; Ok(Metrics(Some(metrics))) } diff --git a/node/core/provisioner/src/onchain_disputes.rs b/node/core/provisioner/src/onchain_disputes.rs deleted file mode 100644 index 6810f512173f..000000000000 --- a/node/core/provisioner/src/onchain_disputes.rs +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2017-2022 Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see . - -use crate::error::GetOnchainDisputesError; -use polkadot_node_subsystem::overseer; -use polkadot_primitives::v2::{CandidateHash, DisputeState, Hash, SessionIndex}; -use std::collections::HashMap; - -pub async fn get_onchain_disputes( - _sender: &mut Sender, - _relay_parent: Hash, -) -> Result, GetOnchainDisputesError> -where - Sender: overseer::ProvisionerSenderTrait, -{ - let _onchain = Result::< - HashMap<(SessionIndex, CandidateHash), DisputeState>, - GetOnchainDisputesError, - >::Ok(HashMap::new()); - #[cfg(feature = "staging-client")] - let _onchain = self::staging_impl::get_onchain_disputes(_sender, _relay_parent).await; - - _onchain -} - -// Merge this module with the outer (current one) when promoting to stable -#[cfg(feature = "staging-client")] -mod staging_impl { - use super::*; // remove this when promoting to stable - use crate::LOG_TARGET; - use futures::channel::oneshot; - use polkadot_node_subsystem::{ - errors::RuntimeApiError, - messages::{RuntimeApiMessage, RuntimeApiRequest}, - SubsystemSender, - }; - - /// Gets the on-chain disputes at a given block number and returns them as a `HashSet` so that searching in them is cheap. - pub async fn get_onchain_disputes( - sender: &mut impl SubsystemSender, - relay_parent: Hash, - ) -> Result, GetOnchainDisputesError> { - gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching on-chain disputes"); - let (tx, rx) = oneshot::channel(); - sender - .send_message( - RuntimeApiMessage::Request(relay_parent, RuntimeApiRequest::StagingDisputes(tx)) - .into(), - ) - .await; - - rx.await - .map_err(|_| GetOnchainDisputesError::Channel) - .and_then(|res| { - res.map_err(|e| match e { - RuntimeApiError::Execution { .. } => - GetOnchainDisputesError::Execution(e, relay_parent), - RuntimeApiError::NotSupported { .. } => - GetOnchainDisputesError::NotSupported(e, relay_parent), - }) - }) - .map(|v| v.into_iter().map(|e| ((e.0, e.1), e.2)).collect()) - } -} diff --git a/node/core/provisioner/src/tests.rs b/node/core/provisioner/src/tests.rs index d0ca425210ed..08eba8eabe80 100644 --- a/node/core/provisioner/src/tests.rs +++ b/node/core/provisioner/src/tests.rs @@ -195,7 +195,7 @@ mod select_availability_bitfields { } } -mod common { +pub(crate) mod common { use super::super::*; use futures::channel::mpsc; use polkadot_node_subsystem::messages::AllMessages; @@ -497,403 +497,3 @@ mod select_candidates { ) } } - -mod select_disputes { - use super::{super::*, common::test_harness}; - use futures::channel::mpsc; - use polkadot_node_subsystem::{ - messages::{AllMessages, DisputeCoordinatorMessage, RuntimeApiMessage, RuntimeApiRequest}, - RuntimeApiError, - }; - use polkadot_node_subsystem_test_helpers::TestSubsystemSender; - use polkadot_primitives::v2::DisputeState; - use std::sync::Arc; - use test_helpers; - - // Global Test Data - fn recent_disputes(len: usize) -> Vec<(SessionIndex, CandidateHash)> { - let mut res = Vec::with_capacity(len); - for _ in 0..len { - res.push((0, CandidateHash(Hash::random()))); - } - - res - } - - // same as recent_disputes() but with SessionIndex set to 1 - fn active_disputes(len: usize) -> Vec<(SessionIndex, CandidateHash)> { - let mut res = Vec::with_capacity(len); - for _ in 0..len { - res.push((1, CandidateHash(Hash::random()))); - } - - res - } - - fn leaf() -> ActivatedLeaf { - ActivatedLeaf { - hash: Hash::repeat_byte(0xAA), - number: 0xAA, - status: LeafStatus::Fresh, - span: Arc::new(jaeger::Span::Disabled), - } - } - - async fn mock_overseer( - leaf: ActivatedLeaf, - mut receiver: mpsc::UnboundedReceiver, - onchain_disputes: Result, RuntimeApiError>, - recent_disputes: Vec<(SessionIndex, CandidateHash)>, - active_disputes: Vec<(SessionIndex, CandidateHash)>, - ) { - while let Some(from_job) = receiver.next().await { - match from_job { - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - _, - RuntimeApiRequest::StagingDisputes(sender), - )) => { - let _ = sender.send(onchain_disputes.clone()); - }, - AllMessages::RuntimeApi(_) => panic!("Unexpected RuntimeApi request"), - AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::RecentDisputes( - sender, - )) => { - let _ = sender.send(recent_disputes.clone()); - }, - AllMessages::DisputeCoordinator(DisputeCoordinatorMessage::ActiveDisputes( - sender, - )) => { - let _ = sender.send(active_disputes.clone()); - }, - AllMessages::DisputeCoordinator( - DisputeCoordinatorMessage::QueryCandidateVotes(disputes, sender), - ) => { - let mut res = Vec::new(); - let v = CandidateVotes { - candidate_receipt: test_helpers::dummy_candidate_receipt(leaf.hash.clone()), - valid: BTreeMap::new(), - invalid: BTreeMap::new(), - }; - for r in disputes.iter() { - res.push((r.0, r.1, v.clone())); - } - - let _ = sender.send(res); - }, - _ => panic!("Unexpected message: {:?}", from_job), - } - } - } - - #[test] - fn recent_disputes_are_withing_onchain_limit() { - const RECENT_DISPUTES_SIZE: usize = 10; - let metrics = metrics::Metrics::new_dummy(); - let onchain_disputes = Ok(Vec::new()); - let active_disputes = Vec::new(); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - - let recent_disputes_overseer = recent_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes_overseer, - active_disputes, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - - assert!(!disputes.is_empty()); - - let result = disputes.iter().zip(recent_disputes.iter()); - // We should get all recent disputes. - for (d, r) in result { - assert_eq!(d.session, r.0); - assert_eq!(d.candidate_hash, r.1); - } - }, - ) - } - - #[test] - fn recent_disputes_are_too_much_but_active_are_within_limit() { - const RECENT_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ACTIVE_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME; - let metrics = metrics::Metrics::new_dummy(); - let onchain_disputes = Ok(Vec::new()); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - let active_disputes = active_disputes(ACTIVE_DISPUTES_SIZE); - - let active_disputes_overseer = active_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes, - active_disputes_overseer, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - - assert!(!disputes.is_empty()); - - let result = disputes.iter().zip(active_disputes.iter()); - // We should get all active disputes. - for (d, r) in result { - assert_eq!(d.session, r.0); - assert_eq!(d.candidate_hash, r.1); - } - }, - ) - } - - #[test] - fn recent_disputes_are_too_much_but_active_are_less_than_the_limit() { - // In this case all active disputes + a random set of recent disputes should be returned - const RECENT_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ACTIVE_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME - 10; - let metrics = metrics::Metrics::new_dummy(); - let onchain_disputes = Ok(Vec::new()); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - let active_disputes = active_disputes(ACTIVE_DISPUTES_SIZE); - - let active_disputes_overseer = active_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes, - active_disputes_overseer, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - - assert!(!disputes.is_empty()); - - // Recent disputes are generated with `SessionIndex` = 0 - let (res_recent, res_active): (Vec, Vec) = - disputes.into_iter().partition(|d| d.session == 0); - - // It should be good enough the count the number of active disputes and not compare them one by one. Checking the exact values is already covered by the previous tests. - assert_eq!(res_active.len(), active_disputes.len()); // We have got all active disputes - assert_eq!(res_active.len() + res_recent.len(), MAX_DISPUTES_FORWARDED_TO_RUNTIME); - // And some recent ones. - }, - ) - } - - //These tests rely on staging Runtime functions so they are separated and compiled conditionally. - #[cfg(feature = "staging-client")] - mod staging_tests { - use super::*; - - fn dummy_dispute_state() -> DisputeState { - DisputeState { - validators_for: BitVec::new(), - validators_against: BitVec::new(), - start: 0, - concluded_at: None, - } - } - - #[test] - fn recent_disputes_are_too_much_active_fits_test_recent_prioritisation() { - // In this case recent disputes are above `MAX_DISPUTES_FORWARDED_TO_RUNTIME` limit and the active ones are below it. - // The expected behaviour is to send all active disputes and extend the set with recent ones. During the extension the disputes unknown for the Runtime are added with priority. - const RECENT_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ACTIVE_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME - 10; - const ONCHAIN_DISPUTE_SIZE: usize = RECENT_DISPUTES_SIZE - 9; - let metrics = metrics::Metrics::new_dummy(); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - let active_disputes = active_disputes(ACTIVE_DISPUTES_SIZE); - let onchain_disputes: Result< - Vec<(SessionIndex, CandidateHash, DisputeState)>, - RuntimeApiError, - > = Ok(Vec::from(&recent_disputes[0..ONCHAIN_DISPUTE_SIZE]) - .iter() - .map(|(session_index, candidate_hash)| { - (*session_index, candidate_hash.clone(), dummy_dispute_state()) - }) - .collect()); - let active_disputes_overseer = active_disputes.clone(); - let recent_disputes_overseer = recent_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes_overseer, - active_disputes_overseer, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - - assert!(!disputes.is_empty()); - - // Recent disputes are generated with `SessionIndex` = 0 - let (res_recent, res_active): ( - Vec, - Vec, - ) = disputes.into_iter().partition(|d| d.session == 0); - - // It should be good enough the count the number of the disputes and not compare them one by one as this was already covered in other tests. - assert_eq!(res_active.len(), active_disputes.len()); // We've got all active disputes. - assert_eq!( - res_recent.len(), - MAX_DISPUTES_FORWARDED_TO_RUNTIME - active_disputes.len() - ); // And some recent ones. - - // Check if the recent disputes were unknown for the Runtime. - let expected_recent_disputes = - Vec::from(&recent_disputes[ONCHAIN_DISPUTE_SIZE..]); - let res_recent_set: HashSet<(SessionIndex, CandidateHash)> = HashSet::from_iter( - res_recent.iter().map(|d| (d.session, d.candidate_hash)), - ); - - // Explicitly check that all unseen disputes are sent to the Runtime. - for d in &expected_recent_disputes { - assert_eq!(res_recent_set.contains(d), true); - } - }, - ) - } - - #[test] - fn active_disputes_are_too_much_test_active_prioritisation() { - // In this case the active disputes are above the `MAX_DISPUTES_FORWARDED_TO_RUNTIME` limit so the unseen ones should be prioritised. - const RECENT_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ACTIVE_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ONCHAIN_DISPUTE_SIZE: usize = ACTIVE_DISPUTES_SIZE - 9; - - let metrics = metrics::Metrics::new_dummy(); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - let active_disputes = active_disputes(ACTIVE_DISPUTES_SIZE); - let onchain_disputes: Result< - Vec<(SessionIndex, CandidateHash, DisputeState)>, - RuntimeApiError, - > = Ok(Vec::from(&active_disputes[0..ONCHAIN_DISPUTE_SIZE]) - .iter() - .map(|(session_index, candidate_hash)| { - (*session_index, candidate_hash.clone(), dummy_dispute_state()) - }) - .collect()); - let active_disputes_overseer = active_disputes.clone(); - let recent_disputes_overseer = recent_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes_overseer, - active_disputes_overseer, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - - assert!(!disputes.is_empty()); - - // Recent disputes are generated with `SessionIndex` = 0 - let (res_recent, res_active): ( - Vec, - Vec, - ) = disputes.into_iter().partition(|d| d.session == 0); - - // It should be good enough the count the number of the disputes and not compare them one by one - assert_eq!(res_recent.len(), 0); // We expect no recent disputes - assert_eq!(res_active.len(), MAX_DISPUTES_FORWARDED_TO_RUNTIME); - - let expected_active_disputes = - Vec::from(&active_disputes[ONCHAIN_DISPUTE_SIZE..]); - let res_active_set: HashSet<(SessionIndex, CandidateHash)> = HashSet::from_iter( - res_active.iter().map(|d| (d.session, d.candidate_hash)), - ); - - // Explicitly check that the unseen disputes are delivered to the Runtime. - for d in &expected_active_disputes { - assert_eq!(res_active_set.contains(d), true); - } - }, - ) - } - - #[test] - fn active_disputes_are_too_much_and_are_all_unseen() { - // In this case there are a lot of active disputes unseen by the Runtime. The focus of the test is to verify that in such cases known disputes are NOT sent to the Runtime. - const RECENT_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 10; - const ACTIVE_DISPUTES_SIZE: usize = MAX_DISPUTES_FORWARDED_TO_RUNTIME + 5; - const ONCHAIN_DISPUTE_SIZE: usize = 5; - - let metrics = metrics::Metrics::new_dummy(); - let recent_disputes = recent_disputes(RECENT_DISPUTES_SIZE); - let active_disputes = active_disputes(ACTIVE_DISPUTES_SIZE); - let onchain_disputes: Result< - Vec<(SessionIndex, CandidateHash, DisputeState)>, - RuntimeApiError, - > = Ok(Vec::from(&active_disputes[0..ONCHAIN_DISPUTE_SIZE]) - .iter() - .map(|(session_index, candidate_hash)| { - (*session_index, candidate_hash.clone(), dummy_dispute_state()) - }) - .collect()); - let active_disputes_overseer = active_disputes.clone(); - let recent_disputes_overseer = recent_disputes.clone(); - test_harness( - |r| { - mock_overseer( - leaf(), - r, - onchain_disputes, - recent_disputes_overseer, - active_disputes_overseer, - ) - }, - |mut tx: TestSubsystemSender| async move { - let lf = leaf(); - let disputes = select_disputes(&mut tx, &metrics, &lf).await.unwrap(); - assert!(!disputes.is_empty()); - - // Recent disputes are generated with `SessionIndex` = 0 - let (res_recent, res_active): ( - Vec, - Vec, - ) = disputes.into_iter().partition(|d| d.session == 0); - - // It should be good enough the count the number of the disputes and not compare them one by one - assert_eq!(res_recent.len(), 0); - assert_eq!(res_active.len(), MAX_DISPUTES_FORWARDED_TO_RUNTIME); - - // For sure we don't want to see any of this disputes in the result - let unexpected_active_disputes = - Vec::from(&active_disputes[0..ONCHAIN_DISPUTE_SIZE]); - let res_active_set: HashSet<(SessionIndex, CandidateHash)> = HashSet::from_iter( - res_active.iter().map(|d| (d.session, d.candidate_hash)), - ); - - // Verify that the result DOESN'T contain known disputes (because there is an excessive number of unknown onces). - for d in &unexpected_active_disputes { - assert_eq!(res_active_set.contains(d), false); - } - }, - ) - } - } -} diff --git a/node/network/approval-distribution/src/tests.rs b/node/network/approval-distribution/src/tests.rs index 90827f7d9e44..a96a89bb58eb 100644 --- a/node/network/approval-distribution/src/tests.rs +++ b/node/network/approval-distribution/src/tests.rs @@ -23,9 +23,9 @@ use polkadot_node_primitives::approval::{ }; use polkadot_node_subsystem::messages::{network_bridge_event, AllMessages, ApprovalCheckError}; use polkadot_node_subsystem_test_helpers as test_helpers; -use polkadot_primitives_test_helpers::dummy_signature; use polkadot_node_subsystem_util::TimeoutExt as _; use polkadot_primitives::v2::{AuthorityDiscoveryId, BlakeTwo256, HashT}; +use polkadot_primitives_test_helpers::dummy_signature; use rand::SeedableRng; use sp_authority_discovery::AuthorityPair as AuthorityDiscoveryPair; use sp_core::crypto::Pair as PairT; diff --git a/node/primitives/src/disputes/mod.rs b/node/primitives/src/disputes/mod.rs index 051ec0233258..ee047c7bcc22 100644 --- a/node/primitives/src/disputes/mod.rs +++ b/node/primitives/src/disputes/mod.rs @@ -31,7 +31,7 @@ use polkadot_primitives::v2::{ mod message; pub use message::{DisputeMessage, Error as DisputeMessageCheckError, UncheckedDisputeMessage}; mod status; -pub use status::{DisputeStatus, Timestamp}; +pub use status::{dispute_is_inactive, DisputeStatus, Timestamp, ACTIVE_DURATION_SECS}; /// A checked dispute statement from an associated validator. #[derive(Debug, Clone)] diff --git a/node/primitives/src/disputes/status.rs b/node/primitives/src/disputes/status.rs index 14ebcf0855d9..52d003ab7c7f 100644 --- a/node/primitives/src/disputes/status.rs +++ b/node/primitives/src/disputes/status.rs @@ -110,3 +110,18 @@ impl DisputeStatus { } } } + +/// The choice here is fairly arbitrary. But any dispute that concluded more than a few minutes ago +/// is not worth considering anymore. Changing this value has little to no bearing on consensus, +/// and really only affects the work that the node might do on startup during periods of many +/// disputes. +pub const ACTIVE_DURATION_SECS: Timestamp = 180; + +/// Checks if dispute is inactive. Returns true if EITHER of the following statements is valid: +/// - The dispute has concluded OR +/// - The dispute has been active for duration more than ACTIVE_DURATION_SECS +pub fn dispute_is_inactive(status: &DisputeStatus, now: &Timestamp) -> bool { + let at = status.concluded_at(); + + at.is_some() && at.unwrap() + ACTIVE_DURATION_SECS < *now +} diff --git a/node/primitives/src/lib.rs b/node/primitives/src/lib.rs index 17bc5129a664..895ed2732024 100644 --- a/node/primitives/src/lib.rs +++ b/node/primitives/src/lib.rs @@ -46,8 +46,9 @@ pub mod approval; /// Disputes related types. pub mod disputes; pub use disputes::{ - CandidateVotes, DisputeMessage, DisputeMessageCheckError, DisputeStatus, InvalidDisputeVote, - SignedDisputeStatement, Timestamp, UncheckedDisputeMessage, ValidDisputeVote, + dispute_is_inactive, CandidateVotes, DisputeMessage, DisputeMessageCheckError, DisputeStatus, + InvalidDisputeVote, SignedDisputeStatement, Timestamp, UncheckedDisputeMessage, + ValidDisputeVote, ACTIVE_DURATION_SECS, }; // For a 16-ary Merkle Prefix Trie, we can expect at most 16 32-byte hashes per node From 040d1fbe93de41b1d96e8b89763ec8bdce3d82f0 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 24 Aug 2022 11:50:50 +0300 Subject: [PATCH 23/65] Fix staging api usage --- node/core/provisioner/src/disputes/mod.rs | 2 - .../src/disputes/with_staging_api/tests.rs | 88 +++++++++---------- node/core/provisioner/src/lib.rs | 69 +++++++++++++-- node/core/provisioner/src/metrics.rs | 4 - 4 files changed, 102 insertions(+), 61 deletions(-) diff --git a/node/core/provisioner/src/disputes/mod.rs b/node/core/provisioner/src/disputes/mod.rs index f36f35671857..f89d2b974809 100644 --- a/node/core/provisioner/src/disputes/mod.rs +++ b/node/core/provisioner/src/disputes/mod.rs @@ -46,8 +46,6 @@ async fn request_votes( } } -#[cfg(feature = "staging-client")] pub(crate) mod with_staging_api; -#[cfg(not(feature = "staging-client"))] pub(crate) mod without_staging_api; diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/with_staging_api/tests.rs index bfd896a6534a..6eb74cb044d4 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/tests.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/tests.rs @@ -503,8 +503,8 @@ fn normal_flow() { //concluded disputes unknown onchain let (fifth_idx, fifth_votes) = input.add_concluded_disputes_unknown_onchain(DISPUTES_PER_BATCH); - // concluded disputes known onchain - let (fourth_idx, _) = input.add_concluded_disputes_known_onchain(DISPUTES_PER_BATCH); + // concluded disputes known onchain - these should be ignored + let (_, _) = input.add_concluded_disputes_known_onchain(DISPUTES_PER_BATCH); // active disputes unknown onchain let (second_idx, second_votes) = @@ -520,52 +520,44 @@ fn normal_flow() { assert!(!result.is_empty()); - if cfg!(feature = "staging-client") { - assert_eq!(result.len(), 4 * DISPUTES_PER_BATCH); - - // Naive checks that the result is partitioned correctly - let (first_batch, rest): (Vec, Vec) = - result.into_iter().partition(|d| d.session == first_idx); - assert_eq!(first_batch.len(), DISPUTES_PER_BATCH); - - let (second_batch, rest): (Vec, Vec) = - rest.into_iter().partition(|d| d.session == second_idx); - assert_eq!(second_batch.len(), DISPUTES_PER_BATCH); - - let (third_batch, rest): (Vec, Vec) = - rest.into_iter().partition(|d| d.session == third_idx); - assert_eq!(third_batch.len(), DISPUTES_PER_BATCH); - - let (fifth_batch, rest): (Vec, Vec) = - rest.into_iter().partition(|d| d.session == fifth_idx); - assert_eq!(fifth_batch.len(), DISPUTES_PER_BATCH); - - // Ensure there are no more disputes - fourth_batch should be dropped - assert_eq!(rest.len(), 0); - - assert_eq!( - first_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), - first_votes - ); - assert_eq!( - second_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), - second_votes - ); - assert_eq!( - third_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), - third_votes - ); - assert_eq!( - fifth_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), - fifth_votes - ); - } else { - assert_eq!(result.len(), 5 * DISPUTES_PER_BATCH); - for i in 0..3 * DISPUTES_PER_BATCH { - assert_ne!(result.get(i).unwrap().session, fourth_idx); - assert_ne!(result.get(i).unwrap().session, fifth_idx); - } - } + assert_eq!(result.len(), 4 * DISPUTES_PER_BATCH); + + // Naive checks that the result is partitioned correctly + let (first_batch, rest): (Vec, Vec) = + result.into_iter().partition(|d| d.session == first_idx); + assert_eq!(first_batch.len(), DISPUTES_PER_BATCH); + + let (second_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == second_idx); + assert_eq!(second_batch.len(), DISPUTES_PER_BATCH); + + let (third_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == third_idx); + assert_eq!(third_batch.len(), DISPUTES_PER_BATCH); + + let (fifth_batch, rest): (Vec, Vec) = + rest.into_iter().partition(|d| d.session == fifth_idx); + assert_eq!(fifth_batch.len(), DISPUTES_PER_BATCH); + + // Ensure there are no more disputes - fourth_batch should be dropped + assert_eq!(rest.len(), 0); + + assert_eq!( + first_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + first_votes + ); + assert_eq!( + second_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + second_votes + ); + assert_eq!( + third_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + third_votes + ); + assert_eq!( + fifth_batch.iter().map(|d| d.statements.len()).fold(0, |acc, v| acc + v), + fifth_votes + ); }, ); assert!(vote_queries <= ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT); diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 9c1e3605e1b6..5b80cda09741 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -29,10 +29,10 @@ use polkadot_node_subsystem::{ jaeger, messages::{ CandidateBackingMessage, ChainApiMessage, ProvisionableData, ProvisionerInherentData, - ProvisionerMessage, + ProvisionerMessage, RuntimeApiMessage, RuntimeApiRequest, }, overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOrchestra, LeafStatus, OverseerSignal, - PerLeafSpan, SpawnedSubsystem, SubsystemError, + PerLeafSpan, RuntimeApiError, SpawnedSubsystem, SubsystemError, }; use polkadot_node_subsystem_util::{ request_availability_cores, request_persisted_validation_data, TimeoutExt, @@ -60,6 +60,8 @@ const SEND_INHERENT_DATA_TIMEOUT: std::time::Duration = core::time::Duration::fr const LOG_TARGET: &str = "parachain::provisioner"; +const STAGING_RUNTIME_VERSION_REQUIREMENT: u32 = 3; + /// The provisioner subsystem. pub struct ProvisionerSubsystem { metrics: Metrics, @@ -360,11 +362,13 @@ async fn send_inherent_data( "Selecting disputes" ); - #[cfg(feature = "staging-client")] - let disputes = disputes::with_staging_api::select_disputes(from_job, metrics, leaf).await?; - - #[cfg(not(feature = "staging-client"))] - let disputes = disputes::without_staging_api::select_disputes(from_job, metrics).await?; + let disputes = + match has_staging_runtime(from_job, leaf.hash.clone(), STAGING_RUNTIME_VERSION_REQUIREMENT) + .await + { + true => disputes::with_staging_api::select_disputes(from_job, metrics, leaf).await?, + false => disputes::without_staging_api::select_disputes(from_job, metrics).await?, + }; gum::trace!( target: LOG_TARGET, @@ -680,3 +684,54 @@ fn bitfields_indicate_availability( 3 * availability.count_ones() >= 2 * availability.len() } + +async fn has_staging_runtime( + sender: &mut impl overseer::ProvisionerSenderTrait, + relay_parent: Hash, + required_runtime_version: u32, +) -> bool { + gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching runtime version"); + + let (tx, rx) = oneshot::channel(); + sender + .send_message(RuntimeApiMessage::Request(relay_parent, RuntimeApiRequest::Version(tx))) + .await; + + match rx.await { + Result::Ok(Ok(runtime_version)) => { + gum::trace!( + target: LOG_TARGET, + ?relay_parent, + ?runtime_version, + ?required_runtime_version, + "Fetched runtime version" + ); + runtime_version >= required_runtime_version + }, + Result::Ok(Err(RuntimeApiError::Execution { source: error, .. })) => { + gum::trace!( + target: LOG_TARGET, + ?relay_parent, + ?error, + "Execution error while fetching runtime version" + ); + false + }, + Result::Ok(Err(RuntimeApiError::NotSupported { .. })) => { + gum::trace!( + target: LOG_TARGET, + ?relay_parent, + "NotSupported error while fetching runtime version" + ); + false + }, + Result::Err(_) => { + gum::trace!( + target: LOG_TARGET, + ?relay_parent, + "Cancelled error while fetching runtime version" + ); + false + }, + } +} diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index e3991f88d664..23c3a84de228 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -14,7 +14,6 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -#[cfg(feature = "staging-client")] use crate::disputes::with_staging_api::PartitionedDisputes; use polkadot_node_subsystem_util::metrics::{self, prometheus}; @@ -31,7 +30,6 @@ struct MetricsInner { inherent_data_dispute_statement_sets: prometheus::Counter, inherent_data_dispute_statements: prometheus::CounterVec, - #[cfg(feature = "staging-client")] // The disputes received from `disputes-coordinator` by partition partitioned_disputes: prometheus::CounterVec, } @@ -102,7 +100,6 @@ impl Metrics { } } - #[cfg(feature = "staging-client")] pub(crate) fn on_partition_recent_disputes(&self, disputes: &PartitionedDisputes) { if let Some(metrics) = &self.0 { let PartitionedDisputes { @@ -190,7 +187,6 @@ impl metrics::Metrics for Metrics { )?, registry, )?, - #[cfg(feature = "staging-client")] partitioned_disputes: prometheus::register( prometheus::CounterVec::new( prometheus::Opts::new( From b8f65667da168be07d47fa6bca89b0a1d98cfe51 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 24 Aug 2022 15:39:48 +0300 Subject: [PATCH 24/65] Fix `get_disputes` runtime function implementation --- node/core/runtime-api/src/lib.rs | 2 +- node/core/runtime-api/src/tests.rs | 4 ---- node/subsystem-types/src/runtime_client.rs | 6 +++--- primitives/src/runtime_api.rs | 4 ++-- runtime/kusama/src/lib.rs | 13 ++++--------- runtime/polkadot/src/lib.rs | 13 ++++--------- runtime/rococo/src/lib.rs | 3 ++- runtime/test-runtime/src/lib.rs | 4 ---- runtime/westend/src/lib.rs | 3 ++- 9 files changed, 18 insertions(+), 34 deletions(-) diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index a815b76a8d7c..3ee143d87918 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -500,6 +500,6 @@ where Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), Request::StagingDisputes(sender) => - query!(StagingDisputes, staging_get_disputes(), ver = 2, sender), + query!(StagingDisputes, get_disputes(), ver = 2, sender), } } diff --git a/node/core/runtime-api/src/tests.rs b/node/core/runtime-api/src/tests.rs index b1a1bba73769..563749319d3d 100644 --- a/node/core/runtime-api/src/tests.rs +++ b/node/core/runtime-api/src/tests.rs @@ -196,10 +196,6 @@ sp_api::mock_impl_runtime_apis! { ) -> Option { self.validation_code_hash.get(¶).map(|c| c.clone()) } - - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { - unimplemented!() - } } impl BabeApi for MockRuntimeApi { diff --git a/node/subsystem-types/src/runtime_client.rs b/node/subsystem-types/src/runtime_client.rs index 2aa9e2bffb82..b7e458fdd6d1 100644 --- a/node/subsystem-types/src/runtime_client.rs +++ b/node/subsystem-types/src/runtime_client.rs @@ -186,7 +186,7 @@ pub trait RuntimeApiSubsystemClient { /// Returns all onchain disputes. /// This is a staging method! Do not use on production runtimes! - async fn staging_get_disputes( + async fn get_disputes( &self, at: Hash, ) -> Result)>, ApiError>; @@ -375,10 +375,10 @@ where self.runtime_api().session_info_before_version_2(&BlockId::Hash(at), index) } - async fn staging_get_disputes( + async fn get_disputes( &self, at: Hash, ) -> Result)>, ApiError> { - self.runtime_api().staging_get_disputes(&BlockId::Hash(at)) + self.runtime_api().get_disputes(&BlockId::Hash(at)) } } diff --git a/primitives/src/runtime_api.rs b/primitives/src/runtime_api.rs index 84d2cf0ec4ca..2c95b2b0eb26 100644 --- a/primitives/src/runtime_api.rs +++ b/primitives/src/runtime_api.rs @@ -153,7 +153,7 @@ sp_api::decl_runtime_apis! { /***** STAGING *****/ /// Returns all onchain disputes. - /// This is a staging method! Do not use on production runtimes! - fn staging_get_disputes() -> Vec<(v2::SessionIndex, v2::CandidateHash, v2::DisputeState)>; + #[api_version(3)] + fn get_disputes() -> Vec<(v2::SessionIndex, v2::CandidateHash, v2::DisputeState)>; } } diff --git a/runtime/kusama/src/lib.rs b/runtime/kusama/src/lib.rs index 250dcfa6868a..330040b343f1 100644 --- a/runtime/kusama/src/lib.rs +++ b/runtime/kusama/src/lib.rs @@ -23,11 +23,10 @@ use pallet_transaction_payment::CurrencyAdapter; use parity_scale_codec::{Decode, Encode, MaxEncodedLen}; use primitives::v2::{ - AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CandidateHash, - CommittedCandidateReceipt, CoreState, DisputeState, GroupRotationInfo, Hash, Id as ParaId, - InboundDownwardMessage, InboundHrmpMessage, Moment, Nonce, OccupiedCoreAssumption, - PersistedValidationData, ScrapedOnChainVotes, SessionInfo, Signature, ValidationCode, - ValidationCodeHash, ValidatorId, ValidatorIndex, + AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CommittedCandidateReceipt, + CoreState, GroupRotationInfo, Hash, Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, + Moment, Nonce, OccupiedCoreAssumption, PersistedValidationData, ScrapedOnChainVotes, + SessionInfo, Signature, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, }; use runtime_common::{ auctions, claims, crowdloan, impl_runtime_weights, impls::DealWithFees, paras_registrar, @@ -1670,10 +1669,6 @@ sp_api::impl_runtime_apis! { { parachains_runtime_api_impl::validation_code_hash::(para_id, assumption) } - - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { - unimplemented!() - } } impl beefy_primitives::BeefyApi for Runtime { diff --git a/runtime/polkadot/src/lib.rs b/runtime/polkadot/src/lib.rs index d429ea849915..c0fc2ced9f23 100644 --- a/runtime/polkadot/src/lib.rs +++ b/runtime/polkadot/src/lib.rs @@ -54,11 +54,10 @@ use pallet_session::historical as session_historical; use pallet_transaction_payment::{FeeDetails, RuntimeDispatchInfo}; use parity_scale_codec::{Decode, Encode, MaxEncodedLen}; use primitives::v2::{ - AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CandidateHash, - CommittedCandidateReceipt, CoreState, DisputeState, GroupRotationInfo, Hash, Id as ParaId, - InboundDownwardMessage, InboundHrmpMessage, Moment, Nonce, OccupiedCoreAssumption, - PersistedValidationData, ScrapedOnChainVotes, SessionInfo, Signature, ValidationCode, - ValidationCodeHash, ValidatorId, ValidatorIndex, + AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CommittedCandidateReceipt, + CoreState, GroupRotationInfo, Hash, Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, + Moment, Nonce, OccupiedCoreAssumption, PersistedValidationData, ScrapedOnChainVotes, + SessionInfo, Signature, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, }; use sp_core::OpaqueMetadata; use sp_mmr_primitives as mmr; @@ -1799,10 +1798,6 @@ sp_api::impl_runtime_apis! { { parachains_runtime_api_impl::validation_code_hash::(para_id, assumption) } - - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { - unimplemented!() - } } impl beefy_primitives::BeefyApi for Runtime { diff --git a/runtime/rococo/src/lib.rs b/runtime/rococo/src/lib.rs index a4cace51c42b..48e9552af023 100644 --- a/runtime/rococo/src/lib.rs +++ b/runtime/rococo/src/lib.rs @@ -1004,6 +1004,7 @@ sp_api::impl_runtime_apis! { } } + #[api_version(3)] impl primitives::runtime_api::ParachainHost for Runtime { fn validators() -> Vec { runtime_api_impl::validators::() @@ -1099,7 +1100,7 @@ sp_api::impl_runtime_apis! { runtime_api_impl::validation_code_hash::(para_id, assumption) } - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { + fn get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() } } diff --git a/runtime/test-runtime/src/lib.rs b/runtime/test-runtime/src/lib.rs index d18a2c9bb95c..d5bdbf1e0b07 100644 --- a/runtime/test-runtime/src/lib.rs +++ b/runtime/test-runtime/src/lib.rs @@ -905,10 +905,6 @@ sp_api::impl_runtime_apis! { { runtime_impl::validation_code_hash::(para_id, assumption) } - - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { - polkadot_runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() - } } impl beefy_primitives::BeefyApi for Runtime { diff --git a/runtime/westend/src/lib.rs b/runtime/westend/src/lib.rs index 9bdde2ea5f38..db06daa5fa96 100644 --- a/runtime/westend/src/lib.rs +++ b/runtime/westend/src/lib.rs @@ -1276,6 +1276,7 @@ sp_api::impl_runtime_apis! { } } + #[api_version(3)] impl primitives::runtime_api::ParachainHost for Runtime { fn validators() -> Vec { parachains_runtime_api_impl::validators::() @@ -1374,7 +1375,7 @@ sp_api::impl_runtime_apis! { parachains_runtime_api_impl::validation_code_hash::(para_id, assumption) } - fn staging_get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { + fn get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() } } From 1be798383da114cefdf1c51661e17ebf6acf8a28 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 24 Aug 2022 17:11:01 +0300 Subject: [PATCH 25/65] Fix compilation error --- node/core/runtime-api/src/tests.rs | 10 +++++----- runtime/test-runtime/src/lib.rs | 11 +++++------ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/node/core/runtime-api/src/tests.rs b/node/core/runtime-api/src/tests.rs index 563749319d3d..9c14561b1f48 100644 --- a/node/core/runtime-api/src/tests.rs +++ b/node/core/runtime-api/src/tests.rs @@ -23,11 +23,11 @@ use polkadot_node_subsystem_test_helpers::make_subsystem_context; use polkadot_primitives::{ runtime_api::ParachainHost, v2::{ - AuthorityDiscoveryId, Block, BlockNumber, CandidateEvent, CandidateHash, - CommittedCandidateReceipt, CoreState, DisputeState, GroupRotationInfo, Id as ParaId, - InboundDownwardMessage, InboundHrmpMessage, OccupiedCoreAssumption, - PersistedValidationData, PvfCheckStatement, ScrapedOnChainVotes, SessionIndex, SessionInfo, - ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, ValidatorSignature, + AuthorityDiscoveryId, Block, CandidateEvent, CommittedCandidateReceipt, CoreState, + GroupRotationInfo, Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, + OccupiedCoreAssumption, PersistedValidationData, PvfCheckStatement, ScrapedOnChainVotes, + SessionIndex, SessionInfo, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, + ValidatorSignature, }, }; use sp_api::ProvideRuntimeApi; diff --git a/runtime/test-runtime/src/lib.rs b/runtime/test-runtime/src/lib.rs index d5bdbf1e0b07..f2b84b5f0616 100644 --- a/runtime/test-runtime/src/lib.rs +++ b/runtime/test-runtime/src/lib.rs @@ -45,12 +45,11 @@ use pallet_session::historical as session_historical; use pallet_transaction_payment::{FeeDetails, RuntimeDispatchInfo}; use polkadot_runtime_parachains::reward_points::RewardValidatorsWithEraPoints; use primitives::v2::{ - AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CandidateHash, - CommittedCandidateReceipt, CoreState, DisputeState, GroupRotationInfo, Hash as HashT, - Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, Moment, Nonce, - OccupiedCoreAssumption, PersistedValidationData, ScrapedOnChainVotes, - SessionInfo as SessionInfoData, Signature, ValidationCode, ValidationCodeHash, ValidatorId, - ValidatorIndex, + AccountId, AccountIndex, Balance, BlockNumber, CandidateEvent, CommittedCandidateReceipt, + CoreState, GroupRotationInfo, Hash as HashT, Id as ParaId, InboundDownwardMessage, + InboundHrmpMessage, Moment, Nonce, OccupiedCoreAssumption, PersistedValidationData, + ScrapedOnChainVotes, SessionInfo as SessionInfoData, Signature, ValidationCode, + ValidationCodeHash, ValidatorId, ValidatorIndex, }; use runtime_common::{ claims, impl_runtime_weights, paras_sudo_wrapper, BlockHashCount, BlockLength, From 60c1ddba04ae17104dfdf17f362f5ae67e455de6 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 14:33:55 +0300 Subject: [PATCH 26/65] Fix arithmetic operations in tests --- .../src/disputes/with_staging_api/tests.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/with_staging_api/tests.rs index 6eb74cb044d4..a67aab69eb02 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/tests.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/tests.rs @@ -379,8 +379,8 @@ impl TestDisputes { &mut self, dispute_count: usize, ) -> (u32, usize) { - let local_votes_count = self.validators_count / 100 * 90; - let onchain_votes_count = self.validators_count / 100 * 80; + let local_votes_count = self.validators_count * 90 / 100; + let onchain_votes_count = self.validators_count * 80 / 100; let session_idx = 0; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -397,8 +397,8 @@ impl TestDisputes { &mut self, dispute_count: usize, ) -> (u32, usize) { - let local_votes_count = self.validators_count / 100 * 90; - let onchain_votes_count = self.validators_count / 100 * 40; + let local_votes_count = self.validators_count * 90 / 100; + let onchain_votes_count = self.validators_count * 40 / 100; let session_idx = 1; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -415,7 +415,7 @@ impl TestDisputes { &mut self, dispute_count: usize, ) -> (u32, usize) { - let local_votes_count = self.validators_count / 100 * 70; + let local_votes_count = self.validators_count * 70 / 100; let session_idx = 2; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -427,8 +427,8 @@ impl TestDisputes { } pub fn add_concluded_disputes_known_onchain(&mut self, dispute_count: usize) -> (u32, usize) { - let local_votes_count = self.validators_count / 100 * 80; - let onchain_votes_count = self.validators_count / 100 * 75; + let local_votes_count = self.validators_count * 80 / 100; + let onchain_votes_count = self.validators_count * 75 / 100; let session_idx = 3; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -441,7 +441,7 @@ impl TestDisputes { } pub fn add_concluded_disputes_unknown_onchain(&mut self, dispute_count: usize) -> (u32, usize) { - let local_votes_count = self.validators_count / 100 * 80; + let local_votes_count = self.validators_count * 80 / 100; let session_idx = 4; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); From e1654e7702bc83b065bb21da8eaf1a2c99cef67e Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 14:36:43 +0300 Subject: [PATCH 27/65] Use smaller test data --- .../src/disputes/with_staging_api/mod.rs | 6 +++++ .../src/disputes/with_staging_api/tests.rs | 23 +++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/with_staging_api/mod.rs index 3e9bcb342b82..6630fe942131 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/mod.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/mod.rs @@ -45,7 +45,10 @@ mod tests; /// The maximum number of disputes Provisioner will include in the inherent data. /// Serves as a protection not to flood the Runtime with excessive data. +#[cfg(not(test))] pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200 * 1_000; +#[cfg(test)] +pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200; // The magic numbers are: `estimated validators count` * `estimated disputes per validator` /// Implements the `select_disputes` function which selects dispute votes which should @@ -153,7 +156,10 @@ async fn vote_selection( where Sender: overseer::ProvisionerSenderTrait, { + #[cfg(not(test))] const BATCH_SIZE: usize = 1_100; + #[cfg(test)] + const BATCH_SIZE: usize = 11; // fetch in batches until there are enough votes let mut disputes = partitioned.into_iter().collect::>(); diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/with_staging_api/tests.rs index a67aab69eb02..c8c30d9dbd9e 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/tests.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/tests.rs @@ -415,7 +415,7 @@ impl TestDisputes { &mut self, dispute_count: usize, ) -> (u32, usize) { - let local_votes_count = self.validators_count * 70 / 100; + let local_votes_count = self.validators_count * 90 / 100; let session_idx = 2; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -427,7 +427,7 @@ impl TestDisputes { } pub fn add_concluded_disputes_known_onchain(&mut self, dispute_count: usize) -> (u32, usize) { - let local_votes_count = self.validators_count * 80 / 100; + let local_votes_count = self.validators_count * 90 / 100; let onchain_votes_count = self.validators_count * 75 / 100; let session_idx = 3; let lf = leaf(); @@ -441,7 +441,7 @@ impl TestDisputes { } pub fn add_concluded_disputes_unknown_onchain(&mut self, dispute_count: usize) -> (u32, usize) { - let local_votes_count = self.validators_count * 80 / 100; + let local_votes_count = self.validators_count * 90 / 100; let session_idx = 4; let lf = leaf(); let dummy_receipt = test_helpers::dummy_candidate_receipt(lf.hash.clone()); @@ -486,8 +486,8 @@ impl TestDisputes { #[test] fn normal_flow() { - const VALIDATOR_COUNT: usize = 100; - const DISPUTES_PER_BATCH: usize = 10; + const VALIDATOR_COUNT: usize = 10; + const DISPUTES_PER_BATCH: usize = 2; const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 1; let mut input = TestDisputes::new(VALIDATOR_COUNT); @@ -565,9 +565,12 @@ fn normal_flow() { #[test] fn many_batches() { - const VALIDATOR_COUNT: usize = 100; - const DISPUTES_PER_PARTITION: usize = 1000; - // Around 4_000 disputes are generated. `BATCH_SIZE` is 1_100. + const VALIDATOR_COUNT: usize = 10; + const DISPUTES_PER_PARTITION: usize = 10; + + // 10 disputes per partition * 4 partitions = 40 disputes + // BATCH_SIZE = 11 + // => There should be no more than 40 / 11 queries ( ~4 ) const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 4; let mut input = TestDisputes::new(VALIDATOR_COUNT); @@ -618,8 +621,8 @@ fn many_batches() { #[test] fn votes_above_limit() { - const VALIDATOR_COUNT: usize = 100; - const DISPUTES_PER_PARTITION: usize = 5_000; + const VALIDATOR_COUNT: usize = 10; + const DISPUTES_PER_PARTITION: usize = 50; const ACCEPTABLE_RUNTIME_VOTES_QUERIES_COUNT: usize = 4; let mut input = TestDisputes::new(VALIDATOR_COUNT); From e3179cbb736a1411ba5a779d1a47f01d3a23eb47 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 15:33:19 +0300 Subject: [PATCH 28/65] Rename `RuntimeApiRequest::StagingDisputes` to `RuntimeApiRequest::Disputes` --- .../provisioner/src/disputes/with_staging_api/mod.rs | 7 ++++--- .../provisioner/src/disputes/with_staging_api/tests.rs | 2 +- node/core/provisioner/src/error.rs | 2 +- node/core/runtime-api/src/cache.rs | 2 +- node/core/runtime-api/src/lib.rs | 10 +++++----- node/subsystem-types/src/messages.rs | 4 ++-- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/with_staging_api/mod.rs index 6630fe942131..4f0adeb74d99 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/mod.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/mod.rs @@ -75,7 +75,7 @@ pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200; /// /// # How the onchain votes are fetched /// -/// The logic outlined above relies on `RuntimeApiRequest::StagingDisputes` message from the Runtime staging API. +/// The logic outlined above relies on `RuntimeApiRequest::Disputes` message from the Runtime staging API. /// If the staging API is not enabled - the same logic is executed with empty onchain votes set. Effectively this /// means that all disputes are partitioned in groups 2 or 4 and all votes are sent to the Runtime. pub async fn select_disputes( @@ -92,7 +92,8 @@ where let onchain = match get_onchain_disputes(sender, leaf.hash.clone()).await { Ok(r) => r, Err(GetOnchainDisputesError::NotSupported(runtime_api_err, relay_parent)) => { - gum::debug!( + // Runtime version is checked before calling this method, so the error below should never happen! + gum::error!( target: LOG_TARGET, ?runtime_api_err, ?relay_parent, @@ -427,7 +428,7 @@ where sender .send_message(RuntimeApiMessage::Request( relay_parent, - RuntimeApiRequest::StagingDisputes(tx), + RuntimeApiRequest::Disputes(tx), )) .await; diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/with_staging_api/tests.rs index c8c30d9dbd9e..c03a2612acd2 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/tests.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/tests.rs @@ -272,7 +272,7 @@ async fn mock_overseer( match from_job { AllMessages::RuntimeApi(RuntimeApiMessage::Request( _, - RuntimeApiRequest::StagingDisputes(sender), + RuntimeApiRequest::Disputes(sender), )) => { let _ = sender.send(Ok(disputes_db .onchain_disputes diff --git a/node/core/provisioner/src/error.rs b/node/core/provisioner/src/error.rs index 05e437854eac..9411a580b6ee 100644 --- a/node/core/provisioner/src/error.rs +++ b/node/core/provisioner/src/error.rs @@ -89,7 +89,7 @@ pub enum GetOnchainDisputesError { Execution(#[source] RuntimeApiError, Hash), #[error( - "runtime doesn't support RuntimeApiRequest::Disputes/RuntimeApiRequest::StagingDisputes for parent {1}" + "runtime doesn't support RuntimeApiRequest::Disputes for parent {1}" )] NotSupported(#[source] RuntimeApiError, Hash), } diff --git a/node/core/runtime-api/src/cache.rs b/node/core/runtime-api/src/cache.rs index 6f5fdc5d4657..0fe9b74dc86d 100644 --- a/node/core/runtime-api/src/cache.rs +++ b/node/core/runtime-api/src/cache.rs @@ -463,5 +463,5 @@ pub(crate) enum RequestResult { SubmitPvfCheckStatement(Hash, PvfCheckStatement, ValidatorSignature, ()), ValidationCodeHash(Hash, ParaId, OccupiedCoreAssumption, Option), Version(Hash, u32), - StagingDisputes(Hash, Vec<(SessionIndex, CandidateHash, DisputeState)>), + Disputes(Hash, Vec<(SessionIndex, CandidateHash, DisputeState)>), } diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index 3ee143d87918..297cc8db9c65 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -153,7 +153,7 @@ where .cache_validation_code_hash((relay_parent, para_id, assumption), hash), Version(relay_parent, version) => self.requests_cache.cache_version(relay_parent, version), - StagingDisputes(relay_parent, disputes) => + Disputes(relay_parent, disputes) => self.requests_cache.cache_disputes(relay_parent, disputes), } } @@ -256,8 +256,8 @@ where Request::ValidationCodeHash(para, assumption, sender) => query!(validation_code_hash(para, assumption), sender) .map(|sender| Request::ValidationCodeHash(para, assumption, sender)), - Request::StagingDisputes(sender) => - query!(disputes(), sender).map(|sender| Request::StagingDisputes(sender)), + Request::Disputes(sender) => + query!(disputes(), sender).map(|sender| Request::Disputes(sender)), } } @@ -499,7 +499,7 @@ where }, Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), - Request::StagingDisputes(sender) => - query!(StagingDisputes, get_disputes(), ver = 2, sender), + Request::Disputes(sender) => + query!(Disputes, get_disputes(), ver = 2, sender), } } diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index c896454594b9..458110939936 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -699,8 +699,8 @@ pub enum RuntimeApiRequest { OccupiedCoreAssumption, RuntimeApiSender>, ), - /// Returns all on-chain disputes at given block number. - StagingDisputes( + /// Returns all on-chain disputes at given block number. Available in v3. + Disputes( RuntimeApiSender)>>, ), } From 7e3bab7051d1843f0dfa0215c1131b82d2ba59b9 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 16:25:37 +0300 Subject: [PATCH 29/65] Remove `staging-client` feature flag --- Cargo.toml | 1 - cli/Cargo.toml | 1 - node/core/provisioner/Cargo.toml | 3 --- node/core/provisioner/src/disputes/with_staging_api/mod.rs | 3 +-- node/service/Cargo.toml | 2 -- 5 files changed, 1 insertion(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cf238625e2d6..0e662030faa8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -199,7 +199,6 @@ try-runtime = [ "polkadot-cli/try-runtime" ] fast-runtime = [ "polkadot-cli/fast-runtime" ] runtime-metrics = [ "polkadot-cli/runtime-metrics" ] pyroscope = ["polkadot-cli/pyroscope"] -staging-client = ["polkadot-cli/staging-client"] # Configuration for building a .deb package - for use with `cargo-deb` [package.metadata.deb] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 1a161db7916d..0f009e2afb81 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -74,4 +74,3 @@ rococo-native = ["service/rococo-native"] malus = ["full-node", "service/malus"] runtime-metrics = ["service/runtime-metrics", "polkadot-node-metrics/runtime-metrics"] -staging-client = ["service/staging-client"] diff --git a/node/core/provisioner/Cargo.toml b/node/core/provisioner/Cargo.toml index 7a06a615154e..47e519087723 100644 --- a/node/core/provisioner/Cargo.toml +++ b/node/core/provisioner/Cargo.toml @@ -22,6 +22,3 @@ sp-application-crypto = { git = "https://github.com/paritytech/substrate", branc sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } test-helpers = { package = "polkadot-primitives-test-helpers", path = "../../../primitives/test-helpers" } - -[features] -staging-client = [] diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/with_staging_api/mod.rs index 4f0adeb74d99..7b75d1e2facd 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/mod.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/mod.rs @@ -124,10 +124,9 @@ where gum::trace!( target: LOG_TARGET, ?leaf, - "Got {} recent disputes and {} onchain disputes. Runtime supports staging API: {}", + "Got {} recent disputes and {} onchain disputes.", recent_disputes.len(), onchain.len(), - if cfg!(feature = "staging-client") { true } else { false } ); let partitioned = partition_recent_disputes(recent_disputes, &onchain); diff --git a/node/service/Cargo.toml b/node/service/Cargo.toml index a3873d9cf2da..43c246675762 100644 --- a/node/service/Cargo.toml +++ b/node/service/Cargo.toml @@ -200,5 +200,3 @@ runtime-metrics = [ "polkadot-runtime/runtime-metrics", "polkadot-runtime-parachains/runtime-metrics" ] - -staging-client = ["polkadot-node-core-provisioner/staging-client"] From 24ae8cea1f8473b219b00e793a6652a3406e4c94 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 16:31:36 +0300 Subject: [PATCH 30/65] fmt --- node/core/provisioner/src/disputes/with_staging_api/mod.rs | 5 +---- node/core/provisioner/src/error.rs | 4 +--- node/core/runtime-api/src/lib.rs | 3 +-- node/subsystem-types/src/messages.rs | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/with_staging_api/mod.rs index 7b75d1e2facd..6886b3a47792 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/mod.rs +++ b/node/core/provisioner/src/disputes/with_staging_api/mod.rs @@ -425,10 +425,7 @@ where gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching on-chain disputes"); let (tx, rx) = oneshot::channel(); sender - .send_message(RuntimeApiMessage::Request( - relay_parent, - RuntimeApiRequest::Disputes(tx), - )) + .send_message(RuntimeApiMessage::Request(relay_parent, RuntimeApiRequest::Disputes(tx))) .await; rx.await diff --git a/node/core/provisioner/src/error.rs b/node/core/provisioner/src/error.rs index 9411a580b6ee..9fb958c4f339 100644 --- a/node/core/provisioner/src/error.rs +++ b/node/core/provisioner/src/error.rs @@ -88,9 +88,7 @@ pub enum GetOnchainDisputesError { #[error("runtime execution error occurred while fetching onchain disputes for parent {1}")] Execution(#[source] RuntimeApiError, Hash), - #[error( - "runtime doesn't support RuntimeApiRequest::Disputes for parent {1}" - )] + #[error("runtime doesn't support RuntimeApiRequest::Disputes for parent {1}")] NotSupported(#[source] RuntimeApiError, Hash), } diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index 297cc8db9c65..cbae6b266422 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -499,7 +499,6 @@ where }, Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), - Request::Disputes(sender) => - query!(Disputes, get_disputes(), ver = 2, sender), + Request::Disputes(sender) => query!(Disputes, get_disputes(), ver = 2, sender), } } diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 458110939936..73063ed27c9e 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -700,9 +700,7 @@ pub enum RuntimeApiRequest { RuntimeApiSender>, ), /// Returns all on-chain disputes at given block number. Available in v3. - Disputes( - RuntimeApiSender)>>, - ), + Disputes(RuntimeApiSender)>>), } /// A message to the Runtime API subsystem. From 00bd01ba12456cd554ac9c62b0a3a47e5a6fe53c Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 16:46:09 +0300 Subject: [PATCH 31/65] Remove `vstaging` feature flag --- runtime/parachains/Cargo.toml | 1 - runtime/parachains/src/runtime_api_impl/mod.rs | 1 - runtime/rococo/Cargo.toml | 2 +- runtime/test-runtime/Cargo.toml | 2 +- runtime/westend/Cargo.toml | 2 +- 5 files changed, 3 insertions(+), 5 deletions(-) diff --git a/runtime/parachains/Cargo.toml b/runtime/parachains/Cargo.toml index b913adcf1639..becbf26e68e0 100644 --- a/runtime/parachains/Cargo.toml +++ b/runtime/parachains/Cargo.toml @@ -109,4 +109,3 @@ try-runtime = [ "pallet-vesting/try-runtime", ] runtime-metrics = ["sp-tracing/with-tracing", "polkadot-runtime-metrics/runtime-metrics"] -vstaging = [] diff --git a/runtime/parachains/src/runtime_api_impl/mod.rs b/runtime/parachains/src/runtime_api_impl/mod.rs index 603b6c4cb385..e11728f032ee 100644 --- a/runtime/parachains/src/runtime_api_impl/mod.rs +++ b/runtime/parachains/src/runtime_api_impl/mod.rs @@ -21,5 +21,4 @@ //! to a v2 would be done. pub mod v2; -#[cfg(feature = "vstaging")] pub mod vstaging; diff --git a/runtime/rococo/Cargo.toml b/runtime/rococo/Cargo.toml index cc42af8255db..0ca78c98d2af 100644 --- a/runtime/rococo/Cargo.toml +++ b/runtime/rococo/Cargo.toml @@ -64,7 +64,7 @@ rococo-runtime-constants = { package = "rococo-runtime-constants", path = "./con runtime-common = { package = "polkadot-runtime-common", path = "../common", default-features = false } primitives = { package = "polkadot-primitives", path = "../../primitives", default-features = false } polkadot-parachain = { path = "../../parachain", default-features = false } -runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false, features = ["vstaging"] } +runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false } xcm = { package = "xcm", path = "../../xcm", default-features = false } xcm-executor = { package = "xcm-executor", path = "../../xcm/xcm-executor", default-features = false } diff --git a/runtime/test-runtime/Cargo.toml b/runtime/test-runtime/Cargo.toml index 2313be141443..3779390b2365 100644 --- a/runtime/test-runtime/Cargo.toml +++ b/runtime/test-runtime/Cargo.toml @@ -58,7 +58,7 @@ runtime-common = { package = "polkadot-runtime-common", path = "../common", defa primitives = { package = "polkadot-primitives", path = "../../primitives", default-features = false } pallet-xcm = { path = "../../xcm/pallet-xcm", default-features = false } polkadot-parachain = { path = "../../parachain", default-features = false } -polkadot-runtime-parachains = { path = "../parachains", default-features = false, features = ["vstaging"]} +polkadot-runtime-parachains = { path = "../parachains", default-features = false } xcm-builder = { path = "../../xcm/xcm-builder", default-features = false } xcm-executor = { path = "../../xcm/xcm-executor", default-features = false } xcm = { path = "../../xcm", default-features = false } diff --git a/runtime/westend/Cargo.toml b/runtime/westend/Cargo.toml index 0e0a6dbce3f7..3509cfaeda1f 100644 --- a/runtime/westend/Cargo.toml +++ b/runtime/westend/Cargo.toml @@ -87,7 +87,7 @@ hex-literal = { version = "0.3.4", optional = true } runtime-common = { package = "polkadot-runtime-common", path = "../common", default-features = false } primitives = { package = "polkadot-primitives", path = "../../primitives", default-features = false } polkadot-parachain = { path = "../../parachain", default-features = false } -runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false, features = ["vstaging"] } +runtime-parachains = { package = "polkadot-runtime-parachains", path = "../parachains", default-features = false } xcm = { package = "xcm", path = "../../xcm", default-features = false } xcm-executor = { package = "xcm-executor", path = "../../xcm/xcm-executor", default-features = false } From c771aa618220f2988ce012e9caeaf2984f39903b Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 16:57:00 +0300 Subject: [PATCH 32/65] Some comments regarding the staging api --- primitives/src/lib.rs | 6 +++--- primitives/src/vstaging/mod.rs | 2 +- runtime/parachains/src/runtime_api_impl/mod.rs | 11 ++++++++--- runtime/parachains/src/runtime_api_impl/vstaging.rs | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/primitives/src/lib.rs b/primitives/src/lib.rs index 121f7cb40d23..168b5795b040 100644 --- a/primitives/src/lib.rs +++ b/primitives/src/lib.rs @@ -22,9 +22,9 @@ // `v2` is currently the latest stable version of the runtime API. pub mod v2; -// The 'staging' version is special - while other versions are set in stone, -// the staging version is malleable. Once it's released, it gets the next -// version number. +// The 'staging' version is special - it contains primitives which are +// still in development. Once they are considered stable, they will be +// moved to a new versioned module. pub mod vstaging; // `runtime_api` contains the actual API implementation. It contains stable and diff --git a/primitives/src/vstaging/mod.rs b/primitives/src/vstaging/mod.rs index 2f29ffbe60b7..64671bd48a60 100644 --- a/primitives/src/vstaging/mod.rs +++ b/primitives/src/vstaging/mod.rs @@ -16,4 +16,4 @@ //! Staging Primitives. -// Put any primitives used by staging API functions here +// Put any primitives used by staging APIs functions here diff --git a/runtime/parachains/src/runtime_api_impl/mod.rs b/runtime/parachains/src/runtime_api_impl/mod.rs index e11728f032ee..deee7c7827bb 100644 --- a/runtime/parachains/src/runtime_api_impl/mod.rs +++ b/runtime/parachains/src/runtime_api_impl/mod.rs @@ -17,8 +17,13 @@ //! Runtime API implementations for Parachains. //! //! These are exposed as different modules using different sets of primitives. -//! At the moment there is only a v2 module and it is not completely clear how migration -//! to a v2 would be done. - +//! At the moment there is a v2 module for the current stable api and +//! vstaging module for all staging methods. +//! When new version of the stable api is released it will be based on v2 and +//! will contain methods from vstaging. +//! The promotion consists of the following steps: +//! 1. Bump the version of the stable module (e.g. v2 becomes v3) +//! 2. Move methods from vstaging to v3. The new stable version can include some +//! or all methods from vstaging. pub mod v2; pub mod vstaging; diff --git a/runtime/parachains/src/runtime_api_impl/vstaging.rs b/runtime/parachains/src/runtime_api_impl/vstaging.rs index 8715cdc53121..7ae235c8133a 100644 --- a/runtime/parachains/src/runtime_api_impl/vstaging.rs +++ b/runtime/parachains/src/runtime_api_impl/vstaging.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -// Put implementations of functions from staging API here. +//! Put implementations of functions from staging APIs here. use crate::disputes; use primitives::v2::{CandidateHash, DisputeState, SessionIndex}; From 543b4ef2ccd8a5474d0193c6f48a8a4c171280f2 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 17:13:00 +0300 Subject: [PATCH 33/65] Rename dispute selection modules in provisioner with_staging_api -> prioritized_selection without_staging_api -> random_selection --- node/core/provisioner/src/disputes/mod.rs | 12 ++++++----- .../mod.rs | 0 .../tests.rs | 2 +- .../mod.rs | 0 node/core/provisioner/src/lib.rs | 21 +++++++++++-------- node/core/provisioner/src/metrics.rs | 2 +- 6 files changed, 21 insertions(+), 16 deletions(-) rename node/core/provisioner/src/disputes/{with_staging_api => prioritized_selection}/mod.rs (100%) rename node/core/provisioner/src/disputes/{with_staging_api => prioritized_selection}/tests.rs (99%) rename node/core/provisioner/src/disputes/{without_staging_api => random_selection}/mod.rs (100%) diff --git a/node/core/provisioner/src/disputes/mod.rs b/node/core/provisioner/src/disputes/mod.rs index f89d2b974809..404e800702b1 100644 --- a/node/core/provisioner/src/disputes/mod.rs +++ b/node/core/provisioner/src/disputes/mod.rs @@ -15,9 +15,11 @@ // along with Polkadot. If not, see . //! The disputes module is responsible for selecting dispute votes to be sent with the inherent data. It contains two -//! different implementations, extracted in two separate modules - `with_staging_api` and `without_staging_api`. The -//! active one is controlled with a feature flag (`staging-client`). The entrypoint to these implementations is the -//! `select_disputes` function. Refer to the documentation of the modules for more details about each implementation. +//! different implementations, extracted in two separate modules - `random_selection` and `prioritized_selection`. Which +//! implementation will be executed depends on the version of the runtime. Runtime v2 supports `random_selection`. Runtime +//! v3 and above - `prioritized_selection`. The entrypoint to these implementations is the `select_disputes` function. +//! prioritized_selection` is considered superior and will be the default one in the future. Refer to the documentation of +//! the modules for more details about each implementation. use crate::LOG_TARGET; use futures::channel::oneshot; @@ -46,6 +48,6 @@ async fn request_votes( } } -pub(crate) mod with_staging_api; +pub(crate) mod prioritized_selection; -pub(crate) mod without_staging_api; +pub(crate) mod random_selection; diff --git a/node/core/provisioner/src/disputes/with_staging_api/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs similarity index 100% rename from node/core/provisioner/src/disputes/with_staging_api/mod.rs rename to node/core/provisioner/src/disputes/prioritized_selection/mod.rs diff --git a/node/core/provisioner/src/disputes/with_staging_api/tests.rs b/node/core/provisioner/src/disputes/prioritized_selection/tests.rs similarity index 99% rename from node/core/provisioner/src/disputes/with_staging_api/tests.rs rename to node/core/provisioner/src/disputes/prioritized_selection/tests.rs index c03a2612acd2..ee566e362b7a 100644 --- a/node/core/provisioner/src/disputes/with_staging_api/tests.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/tests.rs @@ -16,7 +16,7 @@ use super::super::{ super::{tests::common::test_harness, *}, - with_staging_api::*, + prioritized_selection::*, }; use bitvec::prelude::*; use futures::channel::mpsc; diff --git a/node/core/provisioner/src/disputes/without_staging_api/mod.rs b/node/core/provisioner/src/disputes/random_selection/mod.rs similarity index 100% rename from node/core/provisioner/src/disputes/without_staging_api/mod.rs rename to node/core/provisioner/src/disputes/random_selection/mod.rs diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 5b80cda09741..33f76dc95da9 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -60,7 +60,7 @@ const SEND_INHERENT_DATA_TIMEOUT: std::time::Duration = core::time::Duration::fr const LOG_TARGET: &str = "parachain::provisioner"; -const STAGING_RUNTIME_VERSION_REQUIREMENT: u32 = 3; +const PRIORITIZED_SELECTION_RUNTIME_VERSION_REQUIREMENT: u32 = 3; /// The provisioner subsystem. pub struct ProvisionerSubsystem { @@ -362,13 +362,16 @@ async fn send_inherent_data( "Selecting disputes" ); - let disputes = - match has_staging_runtime(from_job, leaf.hash.clone(), STAGING_RUNTIME_VERSION_REQUIREMENT) - .await - { - true => disputes::with_staging_api::select_disputes(from_job, metrics, leaf).await?, - false => disputes::without_staging_api::select_disputes(from_job, metrics).await?, - }; + let disputes = match has_required_runtime( + from_job, + leaf.hash.clone(), + PRIORITIZED_SELECTION_RUNTIME_VERSION_REQUIREMENT, + ) + .await + { + true => disputes::prioritized_selection::select_disputes(from_job, metrics, leaf).await?, + false => disputes::random_selection::select_disputes(from_job, metrics).await?, + }; gum::trace!( target: LOG_TARGET, @@ -685,7 +688,7 @@ fn bitfields_indicate_availability( 3 * availability.count_ones() >= 2 * availability.len() } -async fn has_staging_runtime( +async fn has_required_runtime( sender: &mut impl overseer::ProvisionerSenderTrait, relay_parent: Hash, required_runtime_version: u32, diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index 23c3a84de228..4498f66fc7e0 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::disputes::with_staging_api::PartitionedDisputes; +use crate::disputes::prioritized_selection::PartitionedDisputes; use polkadot_node_subsystem_util::metrics::{self, prometheus}; #[derive(Clone)] From 98ee76f7d098f754613433358bdc3d1be8f1ec50 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 17:45:34 +0300 Subject: [PATCH 34/65] Comments for staging api --- primitives/src/runtime_api.rs | 102 ++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/primitives/src/runtime_api.rs b/primitives/src/runtime_api.rs index 2c95b2b0eb26..5acb9f041c10 100644 --- a/primitives/src/runtime_api.rs +++ b/primitives/src/runtime_api.rs @@ -18,31 +18,97 @@ //! of the Runtime API exposed from the Runtime to the Host. //! //! The functions in trait ParachainHost` can be part of the stable API -//! (which is versioned) or they can be staging (aka unstable functions). +//! (which is versioned) or they can be staging (aka unstable/testing +//! functions). //! -//! All stable API functions should use primitives from the latest version. -//! In the time of writing of this document - this is v2. So for example: -//! ```ignore -//! fn validators() -> Vec; -//! ``` -//! indicates a function from the stable v2 API. +//! The separation outlined above is achieved with the versioned api feature +//! of `decl_runtime_apis!` and `impl_runtime_apis!`. Before moving on let's +//! see a quick example about how api versioning works. //! -//! On the other hand a staging function's name should be prefixed with -//! `staging_` like this: -//! ```ignore -//! fn staging_get_disputes() -> Vec<(vstaging::SessionIndex, vstaging::CandidateHash, vstaging::DisputeState)>; +//! # Runtime api versioning crash course +//! +//! The versioning is achieved with the `api_version` attribute. It can be +//! placed on: +//! * trait declaration - represents the base version of the api. +//! * method declaration (inside a trait declaration) - represents a versioned +//! method, which is not available in the base version. +//! * trait implementation - represents which version of the api is being +//! implemented. +//! +//! Let's see a quick example: +//! +//! ```rust(ignore) +//! sp_api::decl_runtime_apis! { +//! #[api_version(2)] +//! pub trait MyApi { +//! fn fn1(); +//! fn fn2(); +//! #[api_version(3)] +//! fn fn3(); +//! #[api_version(4)] +//! fn fn4(); +//! } +//! } +//! +//! struct Runtime {} +//! +//! sp_api::impl_runtime_apis! { +//! #[api_version(3)] +//! impl self::MyApi for Runtime { +//! fn fn1() {} +//! fn fn2() {} +//! fn fn3() {} +//! } +//! } //! ``` +//! A new api named `MyApi` is declared with `decl_runtime_apis!`. The trait declaration +//! has got an `api_version` attribute which represents its base version - 2 in this case. +//! +//! The api has got three methods - `fn1`, `fn2`, `fn3` and `fn4`. `fn3` and `fn4` has got +//! an `api_version` attribute which makes them versioned methods. These methods do not exist +//! in the base version of the api. Behind the scenes the declaration above creates three +//! runtime apis: +//! * MyApiV2 with `fn1` and `fn2` +//! * MyApiV3 with `fn1`, `fn2` and `fn3`. +//! * MyApiV4 with `fn1`, `fn2`, `fn3` and `fn4`. //! -//! How a staging function becomes stable? +//! Please note that v4 contains all methods from v3, v3 all methods from v2 and so on. //! -//! Once a staging function is ready to be versioned the `renamed` macro -//! should be used to rename it and version it. For the example above: +//! Back to our example. At the end runtime api is implemented for `struct Runtime` with +//! `impl_runtime_apis` macro. `api_version` attribute is attached to the impl block which +//! means that a version different from the base one is being implemented - in our case this +//! is v3. +//! +//! This version of the api contains three methods so the `impl` block has got definitions +//! for them. Note that `fn4` is not implemented as it is not part of this version of the api. +//! `impl_runtime_apis` generates a default implementation for it calling `unimplemented!()`. +//! +//! Hopefully this should be all you need to know in order to use versioned methods in the node. +//! For more details about how the api versioning works refer to `spi_api` +//! documentation [here](https://docs.substrate.io/rustdocs/latest/sp_api/macro.decl_runtime_apis.html). +//! +//! # How versioned methods are used for `ParachainHost` +//! +//! Let's introduce two types of `ParachainHost` api implementation: +//! * stable - used on stable production networks like Polkadot and Kusama. There is only one +//! stable api at a single point in time. +//! * staging - used on test networks like Westend or Rococo. Depending on the development needs +//! there can be zero, one or multiple staging apis. +//! +//! The stable version of `ParachainHost` is indicated by the base version of the api. Any staging +//! method must use `api_version` attribute so that it is assigned to a specific version of a +//! staging api. This way in a single declaration one can see what's the stable version of +//! `ParachainHost` and what staging versions/functions are available. +//! +//! All stable api functions should use primitives from the latest version. +//! In the time of writing of this document - this is v2. So for example: //! ```ignore -//! #[renamed("staging_get_session_disputes", 3)] -//! fn get_session_disputes() -> Vec<(v3::SessionIndex, v3::CandidateHash, v3::DisputeState)>; +//! fn validators() -> Vec; //! ``` -//! For more details about how the API versioning works refer to `spi_api` -//! documentation [here](https://docs.substrate.io/rustdocs/latest/sp_api/macro.decl_runtime_apis.html). +//! indicates a function from the stable v2 API. +//! +//! All staging api functions should use primitives from vstaging. They should be clearly separated +//! from the stable primitives. use crate::v2; use parity_scale_codec::{Decode, Encode}; From 1884262e29536200eec4be0da7e4467ce4386091 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 17:58:13 +0300 Subject: [PATCH 35/65] Comments --- .../core/provisioner/src/disputes/prioritized_selection/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index 6886b3a47792..3a96a2c66034 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -46,10 +46,9 @@ mod tests; /// The maximum number of disputes Provisioner will include in the inherent data. /// Serves as a protection not to flood the Runtime with excessive data. #[cfg(not(test))] -pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200 * 1_000; +pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200_000; #[cfg(test)] pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200; -// The magic numbers are: `estimated validators count` * `estimated disputes per validator` /// Implements the `select_disputes` function which selects dispute votes which should /// be sent to the Runtime. From e6a684340dea8a8422f85e4b29fdfb7f47106f97 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 25 Aug 2022 21:23:22 +0300 Subject: [PATCH 36/65] Additional logging --- .../provisioner/src/disputes/prioritized_selection/mod.rs | 6 +++++- node/core/provisioner/src/disputes/random_selection/mod.rs | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index 3a96a2c66034..da7f051fa19b 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -85,7 +85,11 @@ pub async fn select_disputes( where Sender: overseer::ProvisionerSenderTrait, { - gum::trace!(target: LOG_TARGET, ?leaf, "Selecting disputes for inherent data"); + gum::trace!( + target: LOG_TARGET, + ?leaf, + "Selecting disputes for inherent data using prioritized selection" + ); // Fetch the onchain disputes. We'll do a prioritization based on them. let onchain = match get_onchain_disputes(sender, leaf.hash.clone()).await { diff --git a/node/core/provisioner/src/disputes/random_selection/mod.rs b/node/core/provisioner/src/disputes/random_selection/mod.rs index 3bf5cac340d2..96a00801cbac 100644 --- a/node/core/provisioner/src/disputes/random_selection/mod.rs +++ b/node/core/provisioner/src/disputes/random_selection/mod.rs @@ -120,6 +120,8 @@ pub async fn select_disputes( where Sender: overseer::ProvisionerSenderTrait, { + gum::trace!(target: LOG_TARGET, "Selecting disputes for inherent data using random selection"); + /// The maximum number of disputes Provisioner will include in the inherent data. /// Serves as a protection not to flood the Runtime with excessive data. const MAX_DISPUTES_FORWARDED_TO_RUNTIME: usize = 1_000; From 5f8b4622ddc751e39e88e0282b13b1502bdf995c Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Mon, 29 Aug 2022 22:30:36 +0300 Subject: [PATCH 37/65] Code review feedback process_selected_disputes -> into_multi_dispute_statement_set typo In trait VoteType: vote_value -> is_valid --- .../src/disputes/prioritized_selection/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index da7f051fa19b..82eef0416e6f 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -145,10 +145,10 @@ where } let result = vote_selection(sender, partitioned, &onchain).await; - process_selected_disputes(metrics, result) + into_multi_dispute_statement_set(metrics, result) } -/// Selects dispute votes from `PartitionedDispites` which should be sent to the runtime. Votes which +/// Selects dispute votes from `PartitionedDisputes` which should be sent to the runtime. Votes which /// are already onchain are filtered out. Result should be sorted by `(SessionIndex, CandidateHash)` /// which is enforced by the `BTreeMap`. This is a requirement from the runtime. async fn vote_selection( @@ -318,17 +318,17 @@ fn partition_recent_disputes( // Helper trait to obtain the value of vote for `InvalidDisputeStatementKind` and `ValidDisputeStatementKind`. // The alternative was to pass a bool to `fn is_vote_worth_to_keep` explicitly but it's pointless as the value is already 'encoded' in the type. trait VoteType { - fn vote_value() -> bool; + fn is_valid() -> bool; } impl VoteType for InvalidDisputeStatementKind { - fn vote_value() -> bool { + fn is_valid() -> bool { false } } impl VoteType for ValidDisputeStatementKind { - fn vote_value() -> bool { + fn is_valid() -> bool { true } } @@ -339,7 +339,7 @@ fn is_vote_worth_to_keep( _: &T, onchain_state: &DisputeState, ) -> bool { - let offchain_vote = T::vote_value(); + let offchain_vote = T::is_valid(); let in_validators_for = onchain_state .validators_for .get(validator_index.0 as usize) @@ -386,7 +386,7 @@ async fn request_disputes( } // This function produces the return value for `pub fn select_disputes()` -fn process_selected_disputes( +fn into_multi_dispute_statement_set( metrics: &metrics::Metrics, dispute_candidate_votes: BTreeMap<(SessionIndex, CandidateHash), CandidateVotes>, ) -> Result { From 95b0096cfd65d99b2d267aaf096bb0614fdd1777 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Tue, 30 Aug 2022 10:12:56 +0300 Subject: [PATCH 38/65] Code review feedback --- node/core/runtime-api/src/lib.rs | 2 +- node/primitives/src/disputes/status.rs | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index cbae6b266422..b9bde6cc07ae 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -499,6 +499,6 @@ where }, Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), - Request::Disputes(sender) => query!(Disputes, get_disputes(), ver = 2, sender), + Request::Disputes(sender) => query!(Disputes, get_disputes(), ver = 3, sender), } } diff --git a/node/primitives/src/disputes/status.rs b/node/primitives/src/disputes/status.rs index 52d003ab7c7f..44aed9b78e20 100644 --- a/node/primitives/src/disputes/status.rs +++ b/node/primitives/src/disputes/status.rs @@ -117,9 +117,7 @@ impl DisputeStatus { /// disputes. pub const ACTIVE_DURATION_SECS: Timestamp = 180; -/// Checks if dispute is inactive. Returns true if EITHER of the following statements is valid: -/// - The dispute has concluded OR -/// - The dispute has been active for duration more than ACTIVE_DURATION_SECS +/// Returns true if the dispute has concluded for longer than ACTIVE_DURATION_SECS pub fn dispute_is_inactive(status: &DisputeStatus, now: &Timestamp) -> bool { let at = status.concluded_at(); From 0f9cf6aca33e65008b3f3928e762481603a69a07 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Tue, 30 Aug 2022 10:19:38 +0300 Subject: [PATCH 39/65] Fix metrics --- node/core/provisioner/src/metrics.rs | 30 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index 4498f66fc7e0..7385f774cf42 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -103,33 +103,33 @@ impl Metrics { pub(crate) fn on_partition_recent_disputes(&self, disputes: &PartitionedDisputes) { if let Some(metrics) = &self.0 { let PartitionedDisputes { - active_unconcluded_onchain: cant_conclude_onchain, - active_unknown_onchain: unknown_onchain, - active_concluded_onchain: can_conclude_onchain, - inactive_known_onchain: concluded_known_onchain, - inactive_unknown_onchain: concluded_unknown_onchain, + active_unconcluded_onchain, + active_unknown_onchain, + active_concluded_onchain, + inactive_known_onchain, + inactive_unknown_onchain, } = disputes; metrics .partitioned_disputes - .with_label_values(&["cant_conclude_onchain"]) - .inc_by(cant_conclude_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["active_unconcluded_onchain"]) + .inc_by(active_unconcluded_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["unknown_onchain"]) - .inc_by(unknown_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["active_unknown_onchain"]) + .inc_by(active_unknown_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["can_conclude_onchain"]) - .inc_by(can_conclude_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["active_concluded_onchain"]) + .inc_by(active_concluded_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["concluded_known_onchain"]) - .inc_by(concluded_known_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["inactive_known_onchain"]) + .inc_by(inactive_known_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["unknown_onchain"]) - .inc_by(concluded_unknown_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["inactive_unknown_onchain"]) + .inc_by(inactive_unknown_onchain.len().try_into().unwrap_or(0)); } } } From 308a3637cfdbb7e87225f99564ca76ad09c91af1 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Tue, 30 Aug 2022 10:26:39 +0300 Subject: [PATCH 40/65] get_disputes -> disputes --- node/core/runtime-api/src/lib.rs | 2 +- node/subsystem-types/src/runtime_client.rs | 6 +++--- primitives/src/runtime_api.rs | 2 +- runtime/rococo/src/lib.rs | 2 +- runtime/westend/src/lib.rs | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index b9bde6cc07ae..91340f11df91 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -499,6 +499,6 @@ where }, Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), - Request::Disputes(sender) => query!(Disputes, get_disputes(), ver = 3, sender), + Request::Disputes(sender) => query!(Disputes, disputes(), ver = 3, sender), } } diff --git a/node/subsystem-types/src/runtime_client.rs b/node/subsystem-types/src/runtime_client.rs index b7e458fdd6d1..259c94fd4e51 100644 --- a/node/subsystem-types/src/runtime_client.rs +++ b/node/subsystem-types/src/runtime_client.rs @@ -186,7 +186,7 @@ pub trait RuntimeApiSubsystemClient { /// Returns all onchain disputes. /// This is a staging method! Do not use on production runtimes! - async fn get_disputes( + async fn disputes( &self, at: Hash, ) -> Result)>, ApiError>; @@ -375,10 +375,10 @@ where self.runtime_api().session_info_before_version_2(&BlockId::Hash(at), index) } - async fn get_disputes( + async fn disputes( &self, at: Hash, ) -> Result)>, ApiError> { - self.runtime_api().get_disputes(&BlockId::Hash(at)) + self.runtime_api().disputes(&BlockId::Hash(at)) } } diff --git a/primitives/src/runtime_api.rs b/primitives/src/runtime_api.rs index 5acb9f041c10..d0d0b7220bb9 100644 --- a/primitives/src/runtime_api.rs +++ b/primitives/src/runtime_api.rs @@ -220,6 +220,6 @@ sp_api::decl_runtime_apis! { /// Returns all onchain disputes. #[api_version(3)] - fn get_disputes() -> Vec<(v2::SessionIndex, v2::CandidateHash, v2::DisputeState)>; + fn disputes() -> Vec<(v2::SessionIndex, v2::CandidateHash, v2::DisputeState)>; } } diff --git a/runtime/rococo/src/lib.rs b/runtime/rococo/src/lib.rs index 48e9552af023..64ad3646e6e1 100644 --- a/runtime/rococo/src/lib.rs +++ b/runtime/rococo/src/lib.rs @@ -1100,7 +1100,7 @@ sp_api::impl_runtime_apis! { runtime_api_impl::validation_code_hash::(para_id, assumption) } - fn get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { + fn disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() } } diff --git a/runtime/westend/src/lib.rs b/runtime/westend/src/lib.rs index db06daa5fa96..6d08049910e3 100644 --- a/runtime/westend/src/lib.rs +++ b/runtime/westend/src/lib.rs @@ -1375,7 +1375,7 @@ sp_api::impl_runtime_apis! { parachains_runtime_api_impl::validation_code_hash::(para_id, assumption) } - fn get_disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { + fn disputes() -> Vec<(SessionIndex, CandidateHash, DisputeState)> { runtime_parachains::runtime_api_impl::vstaging::get_session_disputes::() } } From a969eba8c309d640d25151c65354b3b07e1e74f6 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Tue, 30 Aug 2022 10:38:04 +0300 Subject: [PATCH 41/65] Get time only once during partitioning --- .../provisioner/src/disputes/prioritized_selection/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index 82eef0416e6f..1effdec0d930 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -275,7 +275,8 @@ fn partition_recent_disputes( }) .collect::>(); - // Split ACTIVE from CONCLUDED disputes + // Split recent disputes in ACTIVE and INACTIVE + let time_now = &secs_since_epoch(); let (active, inactive): ( Vec<(SessionIndex, CandidateHash, DisputeStatus)>, Vec<(SessionIndex, CandidateHash, DisputeStatus)>, @@ -284,7 +285,7 @@ fn partition_recent_disputes( .map(|((session_index, candidate_hash), dispute_state)| { (session_index, candidate_hash, dispute_state) }) - .partition(|(_, _, status)| !dispute_is_inactive(status, &secs_since_epoch())); + .partition(|(_, _, status)| !dispute_is_inactive(status, time_now)); // Split ACTIVE in three groups... for (session_index, candidate_hash, _) in active { From 57e1b8889a6969ceba09201a102befb502efda49 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Tue, 30 Aug 2022 16:23:23 +0300 Subject: [PATCH 42/65] Fix partitioning --- .../src/disputes/prioritized_selection/mod.rs | 59 ++++++---- .../disputes/prioritized_selection/tests.rs | 104 ++++++++++++------ node/core/provisioner/src/metrics.rs | 27 +++-- 3 files changed, 121 insertions(+), 69 deletions(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index 1effdec0d930..5316da5cb9db 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -74,9 +74,9 @@ pub const MAX_DISPUTE_VOTES_FORWARDED_TO_RUNTIME: usize = 200; /// /// # How the onchain votes are fetched /// -/// The logic outlined above relies on `RuntimeApiRequest::Disputes` message from the Runtime staging API. -/// If the staging API is not enabled - the same logic is executed with empty onchain votes set. Effectively this -/// means that all disputes are partitioned in groups 2 or 4 and all votes are sent to the Runtime. +/// The logic outlined above relies on `RuntimeApiRequest::Disputes` message from the Runtime. The user +/// check the Runtime version before calling `select_disputes`. If the function is used with old runtime +/// an error is logged and the logic will continue with empty onchain votes HashMap. pub async fn select_disputes( sender: &mut Sender, metrics: &metrics::Metrics, @@ -217,19 +217,26 @@ pub(crate) struct PartitionedDisputes { /// Hopefully this should never happen. /// Will be sent to the Runtime with FIRST priority. pub inactive_unknown_onchain: Vec<(SessionIndex, CandidateHash)>, - /// Active disputes completely unknown onchain. + /// Disputes which are INACTIVE locally but they are unconcluded for the Runtime. + /// A dispute can have enough local vote to conclude and at the same time the + /// Runtime knows nothing about them at treats it as unconcluded. This discrepancy + /// should be treated with high priority. /// Will be sent to the Runtime with SECOND priority. + pub inactive_unconcluded_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Active disputes completely unknown onchain. + /// Will be sent to the Runtime with THIRD priority. pub active_unknown_onchain: Vec<(SessionIndex, CandidateHash)>, /// Active disputes unconcluded onchain. - /// Will be sent to the Runtime with THIRD priority. + /// Will be sent to the Runtime with FOURTH priority. pub active_unconcluded_onchain: Vec<(SessionIndex, CandidateHash)>, /// Active disputes concluded onchain. New votes are not that important for /// this partition. - /// Will be sent to the Runtime with FOURTH priority. + /// Will be sent to the Runtime with FIFTH priority. pub active_concluded_onchain: Vec<(SessionIndex, CandidateHash)>, - /// Inactive disputes which are known onchain. These are not - /// interesting and won't be sent to the Runtime. - pub inactive_known_onchain: Vec<(SessionIndex, CandidateHash)>, + /// Inactive disputes which has concluded onchain. These are not interesting and + /// won't be sent to the Runtime. + /// Will be DROPPED + pub inactive_concluded_onchain: Vec<(SessionIndex, CandidateHash)>, } impl PartitionedDisputes { @@ -240,10 +247,11 @@ impl PartitionedDisputes { fn into_iter(self) -> impl Iterator { self.inactive_unknown_onchain .into_iter() + .chain(self.inactive_unconcluded_onchain.into_iter()) .chain(self.active_unknown_onchain.into_iter()) .chain(self.active_unconcluded_onchain.into_iter()) .chain(self.active_concluded_onchain.into_iter()) - // inactive_known_onchain is dropped on purpose + // inactive_concluded_onchain is dropped on purpose } } @@ -261,6 +269,14 @@ fn secs_since_epoch() -> Timestamp { } } +fn concluded_onchain(onchain_state: &DisputeState) -> bool { + // Check if there are enough onchain votes for or against to conclude the dispute + let supermajority = supermajority_threshold(onchain_state.validators_for.len()); + + onchain_state.validators_for.count_ones() >= supermajority || + onchain_state.validators_against.count_ones() >= supermajority +} + fn partition_recent_disputes( recent: Vec<(SessionIndex, CandidateHash, DisputeStatus)>, onchain: &HashMap<(SessionIndex, CandidateHash), DisputeState>, @@ -290,25 +306,24 @@ fn partition_recent_disputes( // Split ACTIVE in three groups... for (session_index, candidate_hash, _) in active { match onchain.get(&(session_index, candidate_hash)) { - Some(d) => { - // Check if there are enough onchain votes for or against to conclude the dispute - let supermajority = supermajority_threshold(d.validators_for.len()); - if d.validators_for.count_ones() >= supermajority || - d.validators_against.count_ones() >= supermajority - { - partitioned.active_concluded_onchain.push((session_index, candidate_hash)); - } else { - partitioned.active_unconcluded_onchain.push((session_index, candidate_hash)); - } + Some(d) => match concluded_onchain(d) { + true => partitioned.active_concluded_onchain.push((session_index, candidate_hash)), + false => + partitioned.active_unconcluded_onchain.push((session_index, candidate_hash)), }, None => partitioned.active_unknown_onchain.push((session_index, candidate_hash)), }; } - // ... and INACTIVE in two more + // ... and INACTIVE in three more for (session_index, candidate_hash, _) in inactive { match onchain.get(&(session_index, candidate_hash)) { - Some(_) => partitioned.inactive_known_onchain.push((session_index, candidate_hash)), + Some(onchain_state) => match concluded_onchain(onchain_state) { + true => + partitioned.inactive_concluded_onchain.push((session_index, candidate_hash)), + false => + partitioned.inactive_unconcluded_onchain.push((session_index, candidate_hash)), + }, None => partitioned.inactive_unknown_onchain.push((session_index, candidate_hash)), } } diff --git a/node/core/provisioner/src/disputes/prioritized_selection/tests.rs b/node/core/provisioner/src/disputes/prioritized_selection/tests.rs index ee566e362b7a..6b7f99a1df07 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/tests.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/tests.rs @@ -20,7 +20,7 @@ use super::super::{ }; use bitvec::prelude::*; use futures::channel::mpsc; -use polkadot_node_primitives::{CandidateVotes, DisputeStatus}; +use polkadot_node_primitives::{CandidateVotes, DisputeStatus, ACTIVE_DURATION_SECS}; use polkadot_node_subsystem::messages::{ AllMessages, DisputeCoordinatorMessage, RuntimeApiMessage, RuntimeApiRequest, }; @@ -112,13 +112,24 @@ fn should_keep_vote_behaves() { fn partitioning_happy_case() { let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(); let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new(); + let time_now = secs_since_epoch(); // Create one dispute for each partition + let inactive_unknown_onchain = ( + 0, + CandidateHash(Hash::random()), + DisputeStatus::ConcludedFor(time_now - ACTIVE_DURATION_SECS * 2), + ); + input.push(inactive_unknown_onchain.clone()); - let unconcluded_onchain = (0, CandidateHash(Hash::random()), DisputeStatus::Active); - input.push(unconcluded_onchain.clone()); + let inactive_unconcluded_onchain = ( + 1, + CandidateHash(Hash::random()), + DisputeStatus::ConcludedFor(time_now - ACTIVE_DURATION_SECS * 2), + ); + input.push(inactive_unconcluded_onchain.clone()); onchain.insert( - (unconcluded_onchain.0, unconcluded_onchain.1.clone()), + (inactive_unconcluded_onchain.0, inactive_unconcluded_onchain.1.clone()), DisputeState { validators_for: bitvec![u8, Lsb0; 1, 1, 1, 0, 0, 0, 0, 0, 0], validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], @@ -127,68 +138,86 @@ fn partitioning_happy_case() { }, ); - let unknown_onchain = (1, CandidateHash(Hash::random()), DisputeStatus::Active); - input.push(unknown_onchain.clone()); + let active_unknown_onchain = (2, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(active_unknown_onchain.clone()); - let concluded_onchain = (2, CandidateHash(Hash::random()), DisputeStatus::Active); - input.push(concluded_onchain.clone()); + let active_unconcluded_onchain = (3, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(active_unconcluded_onchain.clone()); onchain.insert( - (concluded_onchain.0, concluded_onchain.1.clone()), + (active_unconcluded_onchain.0, active_unconcluded_onchain.1.clone()), DisputeState { - validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 0], + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 0, 0, 0, 0, 0, 0], validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], start: 1, concluded_at: None, }, ); - let concluded_known_onchain = - (3, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); - input.push(concluded_known_onchain.clone()); + let active_concluded_onchain = (4, CandidateHash(Hash::random()), DisputeStatus::Active); + input.push(active_concluded_onchain.clone()); onchain.insert( - (concluded_known_onchain.0, concluded_known_onchain.1.clone()), + (active_concluded_onchain.0, active_concluded_onchain.1.clone()), DisputeState { - validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 1], + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 0], validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], start: 1, - concluded_at: None, + concluded_at: Some(3), }, ); - let concluded_unknown_onchain = - (4, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); - input.push(concluded_unknown_onchain.clone()); + let inactive_concluded_onchain = ( + 5, + CandidateHash(Hash::random()), + DisputeStatus::ConcludedFor(time_now - ACTIVE_DURATION_SECS * 2), + ); + input.push(inactive_concluded_onchain.clone()); + onchain.insert( + (inactive_concluded_onchain.0, inactive_concluded_onchain.1.clone()), + DisputeState { + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 0, 0], + validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], + start: 1, + concluded_at: Some(3), + }, + ); let result = partition_recent_disputes(input, &onchain); - assert_eq!(result.active_unconcluded_onchain.len(), 1); + // Check results + assert_eq!(result.inactive_unknown_onchain.len(), 1); assert_eq!( - result.active_unconcluded_onchain.get(0).unwrap(), - &(unconcluded_onchain.0, unconcluded_onchain.1) + result.inactive_unknown_onchain.get(0).unwrap(), + &(inactive_unknown_onchain.0, inactive_unknown_onchain.1) + ); + + assert_eq!(result.inactive_unconcluded_onchain.len(), 1); + assert_eq!( + result.inactive_unconcluded_onchain.get(0).unwrap(), + &(inactive_unconcluded_onchain.0, inactive_unconcluded_onchain.1) ); assert_eq!(result.active_unknown_onchain.len(), 1); assert_eq!( result.active_unknown_onchain.get(0).unwrap(), - &(unknown_onchain.0, unknown_onchain.1) + &(active_unknown_onchain.0, active_unknown_onchain.1) ); - assert_eq!(result.active_concluded_onchain.len(), 1); + assert_eq!(result.active_unconcluded_onchain.len(), 1); assert_eq!( - result.active_concluded_onchain.get(0).unwrap(), - &(concluded_onchain.0, concluded_onchain.1) + result.active_unconcluded_onchain.get(0).unwrap(), + &(active_unconcluded_onchain.0, active_unconcluded_onchain.1) ); - assert_eq!(result.inactive_known_onchain.len(), 1); + assert_eq!(result.active_concluded_onchain.len(), 1); assert_eq!( - result.inactive_known_onchain.get(0).unwrap(), - &(concluded_known_onchain.0, concluded_known_onchain.1) + result.active_concluded_onchain.get(0).unwrap(), + &(active_concluded_onchain.0, active_concluded_onchain.1) ); - assert_eq!(result.inactive_unknown_onchain.len(), 1); + assert_eq!(result.inactive_concluded_onchain.len(), 1); assert_eq!( - result.inactive_unknown_onchain.get(0).unwrap(), - &(concluded_unknown_onchain.0, concluded_unknown_onchain.1) + result.inactive_concluded_onchain.get(0).unwrap(), + &(inactive_concluded_onchain.0, inactive_concluded_onchain.1) ); } @@ -240,13 +269,13 @@ fn partitioning_duplicated_dispute() { let mut input = Vec::<(SessionIndex, CandidateHash, DisputeStatus)>::new(); let mut onchain = HashMap::<(u32, CandidateHash), DisputeState>::new(); - let some_dispute = (3, CandidateHash(Hash::random()), DisputeStatus::ConcludedFor(0)); + let some_dispute = (3, CandidateHash(Hash::random()), DisputeStatus::Active); input.push(some_dispute.clone()); input.push(some_dispute.clone()); onchain.insert( (some_dispute.0, some_dispute.1.clone()), DisputeState { - validators_for: bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1, 1], + validators_for: bitvec![u8, Lsb0; 1, 1, 1, 0, 0, 0, 0, 0, 0], validators_against: bitvec![u8, Lsb0; 0, 0, 0, 0, 0, 0, 0, 0, 0], start: 1, concluded_at: None, @@ -255,8 +284,11 @@ fn partitioning_duplicated_dispute() { let result = partition_recent_disputes(input, &onchain); - assert_eq!(result.inactive_known_onchain.len(), 1); - assert_eq!(result.inactive_known_onchain.get(0).unwrap(), &(some_dispute.0, some_dispute.1)); + assert_eq!(result.active_unconcluded_onchain.len(), 1); + assert_eq!( + result.active_unconcluded_onchain.get(0).unwrap(), + &(some_dispute.0, some_dispute.1) + ); } // diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index 7385f774cf42..aaa57fcc9da5 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -103,33 +103,38 @@ impl Metrics { pub(crate) fn on_partition_recent_disputes(&self, disputes: &PartitionedDisputes) { if let Some(metrics) = &self.0 { let PartitionedDisputes { - active_unconcluded_onchain, + inactive_unknown_onchain, + inactive_unconcluded_onchain: inactive_unconcluded_known_onchain, active_unknown_onchain, + active_unconcluded_onchain, active_concluded_onchain, - inactive_known_onchain, - inactive_unknown_onchain, + inactive_concluded_onchain: inactive_concluded_known_onchain, } = disputes; metrics .partitioned_disputes - .with_label_values(&["active_unconcluded_onchain"]) - .inc_by(active_unconcluded_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["inactive_unknown_onchain"]) + .inc_by(inactive_unknown_onchain.len().try_into().unwrap_or(0)); + metrics + .partitioned_disputes + .with_label_values(&["inactive_unconcluded_known_onchain"]) + .inc_by(inactive_unconcluded_known_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes .with_label_values(&["active_unknown_onchain"]) .inc_by(active_unknown_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["active_concluded_onchain"]) - .inc_by(active_concluded_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["active_unconcluded_onchain"]) + .inc_by(active_unconcluded_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["inactive_known_onchain"]) - .inc_by(inactive_known_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["active_concluded_onchain"]) + .inc_by(active_concluded_onchain.len().try_into().unwrap_or(0)); metrics .partitioned_disputes - .with_label_values(&["inactive_unknown_onchain"]) - .inc_by(inactive_unknown_onchain.len().try_into().unwrap_or(0)); + .with_label_values(&["inactive_concluded_known_onchain"]) + .inc_by(inactive_concluded_known_onchain.len().try_into().unwrap_or(0)); } } } From db34125a105f68c8544a459201277e89cfc9c2d1 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 31 Aug 2022 10:02:39 +0300 Subject: [PATCH 43/65] Comments --- runtime/parachains/src/runtime_api_impl/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runtime/parachains/src/runtime_api_impl/mod.rs b/runtime/parachains/src/runtime_api_impl/mod.rs index deee7c7827bb..c045b4747868 100644 --- a/runtime/parachains/src/runtime_api_impl/mod.rs +++ b/runtime/parachains/src/runtime_api_impl/mod.rs @@ -23,7 +23,8 @@ //! will contain methods from vstaging. //! The promotion consists of the following steps: //! 1. Bump the version of the stable module (e.g. v2 becomes v3) -//! 2. Move methods from vstaging to v3. The new stable version can include some -//! or all methods from vstaging. +//! 2. Move methods from vstaging to v3. The new stable version should include +//! all methods from vstaging tagged with the new version number (e.g. all +//! v3 methods). pub mod v2; pub mod vstaging; From d2af967d4a00af4e2a2d185ee0da82e949bb2de1 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 31 Aug 2022 10:43:46 +0300 Subject: [PATCH 44/65] Reduce the number of hardcoded api versions --- node/core/provisioner/src/lib.rs | 3 ++- node/core/runtime-api/src/lib.rs | 8 +++++--- node/subsystem-types/src/messages.rs | 7 +++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 33f76dc95da9..4c14d9977f75 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -60,7 +60,8 @@ const SEND_INHERENT_DATA_TIMEOUT: std::time::Duration = core::time::Duration::fr const LOG_TARGET: &str = "parachain::provisioner"; -const PRIORITIZED_SELECTION_RUNTIME_VERSION_REQUIREMENT: u32 = 3; +const PRIORITIZED_SELECTION_RUNTIME_VERSION_REQUIREMENT: u32 = + RuntimeApiRequest::DISPUTES_RUNTIME_REQ; /// The provisioner subsystem. pub struct ProvisionerSubsystem { diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index 91340f11df91..4dad23a9c0be 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -351,8 +351,9 @@ where let _timer = metrics.time_make_runtime_api_request(); macro_rules! query { - ($req_variant:ident, $api_name:ident ($($param:expr),*), ver = $version:literal, $sender:expr) => {{ + ($req_variant:ident, $api_name:ident ($($param:expr),*), ver = $version:expr, $sender:expr) => {{ let sender = $sender; + let version: u32 = $version; // enforce type for the version expression let runtime_version = client.api_version_parachain_host(relay_parent).await .unwrap_or_else(|e| { gum::warn!( @@ -370,7 +371,7 @@ where 0 }); - let res = if runtime_version >= $version { + let res = if runtime_version >= version { client.$api_name(relay_parent $(, $param.clone() )*).await .map_err(|e| RuntimeApiError::Execution { runtime_api_name: stringify!($api_name), @@ -499,6 +500,7 @@ where }, Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), - Request::Disputes(sender) => query!(Disputes, disputes(), ver = 3, sender), + Request::Disputes(sender) => + query!(Disputes, disputes(), ver = Request::DISPUTES_RUNTIME_REQ, sender), } } diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 73063ed27c9e..aa88162e8505 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -703,6 +703,13 @@ pub enum RuntimeApiRequest { Disputes(RuntimeApiSender)>>), } +impl RuntimeApiRequest { + /// Runtime version requirements for each message + + /// `Disputes` + pub const DISPUTES_RUNTIME_REQ: u32 = 3; +} + /// A message to the Runtime API subsystem. #[derive(Debug)] pub enum RuntimeApiMessage { From 401ffb8ddc76127a4f40f0cd8c38d96065bd80d8 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Wed, 31 Aug 2022 12:32:47 +0200 Subject: [PATCH 45/65] Finish. Missing: Tests --- .../network/dispute-distribution/src/error.rs | 2 - .../src/receiver/batches/batch.rs | 41 +-- .../src/receiver/batches/mod.rs | 26 +- .../src/receiver/batches/waiting_queue.rs | 22 +- .../src/receiver/error.rs | 16 +- .../dispute-distribution/src/receiver/mod.rs | 262 ++++++++---------- .../src/receiver/peer_queues.rs | 4 +- .../dispute-distribution/src/sender/mod.rs | 5 - .../src/node/disputes/dispute-distribution.md | 13 +- 9 files changed, 178 insertions(+), 213 deletions(-) diff --git a/node/network/dispute-distribution/src/error.rs b/node/network/dispute-distribution/src/error.rs index 660a10fe08ce..cc5be11602f4 100644 --- a/node/network/dispute-distribution/src/error.rs +++ b/node/network/dispute-distribution/src/error.rs @@ -57,8 +57,6 @@ pub type Result = std::result::Result; pub type FatalResult = std::result::Result; -pub type JfyiResult = std::result::Result; - /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to diff --git a/node/network/dispute-distribution/src/receiver/batches/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs index ae6975422572..496ae7f7407b 100644 --- a/node/network/dispute-distribution/src/receiver/batches/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -17,8 +17,9 @@ use std::{collections::HashMap, time::Instant}; use gum::CandidateHash; -use polkadot_node_network_protocol::request_response::{ - incoming::OutgoingResponseSender, v1::DisputeRequest, +use polkadot_node_network_protocol::{ + request_response::{incoming::OutgoingResponseSender, v1::DisputeRequest}, + PeerId, }; use polkadot_node_primitives::SignedDisputeStatement; use polkadot_primitives::v2::{CandidateReceipt, ValidatorIndex}; @@ -64,7 +65,7 @@ pub struct Batch { best_before: Instant, /// Requesters waiting for a response. - pending_responses: Vec>, + requesters: Vec<(PeerId, OutgoingResponseSender)>, } /// Result of checking a batch every `BATCH_COLLECTING_INTERVAL`. @@ -79,12 +80,19 @@ pub(super) enum TickResult { pub struct PreparedImport { pub candidate_receipt: CandidateReceipt, pub statements: Vec<(SignedDisputeStatement, ValidatorIndex)>, - pub pending_responses: Vec>, + /// Information about original requesters. + pub requesters: Vec<(PeerId, OutgoingResponseSender)>, } impl From for PreparedImport { fn from(batch: Batch) -> Self { - let Batch { candidate_receipt, valid_votes, invalid_votes, pending_responses, .. } = batch; + let Batch { + candidate_receipt, + valid_votes, + invalid_votes, + requesters: pending_responses, + .. + } = batch; let statements = valid_votes .into_iter() @@ -92,7 +100,7 @@ impl From for PreparedImport { .map(|(index, statement)| (statement, index)) .collect(); - Self { candidate_receipt, statements, pending_responses } + Self { candidate_receipt, statements, requesters: pending_responses } } } @@ -118,14 +126,10 @@ impl Batch { invalid_votes: HashMap::new(), votes_batched_since_last_tick: 0, best_before: Instant::now() + MAX_BATCH_LIFETIME, - pending_responses: Vec::new(), + requesters: Vec::new(), }; - (s, s.calculate_next_tick(now)) - } - - /// Hash of the candidate this batch is batching votes for. - pub fn candidate_hash(&self) -> &CandidateHash { - &self.candidate_hash + let next_tick = s.calculate_next_tick(now); + (s, next_tick) } /// Receipt of the candidate this batch is batching votes for. @@ -138,13 +142,15 @@ impl Batch { /// The statements are supposed to be the valid and invalid statements received in a /// `DisputeRequest`. /// - /// The given `pending_response` is the corresponding response sender. If at least one of the - /// votes is new as far as this batch is concerned we record the pending_response, for later - /// use. In case both votes are known already, we return the response sender as an `Err` value. + /// The given `pending_response` is the corresponding response sender for responding to `peer`. + /// If at least one of the votes is new as far as this batch is concerned we record the + /// pending_response, for later use. In case both votes are known already, we return the + /// response sender as an `Err` value. pub fn add_votes( &mut self, valid_vote: (SignedDisputeStatement, ValidatorIndex), invalid_vote: (SignedDisputeStatement, ValidatorIndex), + peer: PeerId, pending_response: OutgoingResponseSender, ) -> Result<(), OutgoingResponseSender> { debug_assert!(valid_vote.0.candidate_hash() == invalid_vote.0.candidate_hash()); @@ -164,6 +170,7 @@ impl Batch { if duplicate { Err(pending_response) } else { + self.requesters.push((peer, pending_response)); Ok(()) } } @@ -172,7 +179,7 @@ impl Batch { /// /// This function is supposed to be called at instants given at construction and as returned as /// part of `TickResult`. - pub fn tick(self, now: Instant) -> TickResult { + pub(super) fn tick(mut self, now: Instant) -> TickResult { if self.votes_batched_since_last_tick >= MIN_KEEP_BATCH_ALIVE_VOTES && now < self.best_before { diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs index 5e6bc756327b..c3abf029f120 100644 --- a/node/network/dispute-distribution/src/receiver/batches/mod.rs +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -29,7 +29,10 @@ use crate::receiver::batches::{batch::TickResult, waiting_queue::PendingWake}; pub use self::batch::{Batch, PreparedImport}; use self::waiting_queue::WaitingQueue; -use super::BATCH_COLLECTING_INTERVAL; +use super::{ + error::{JfyiError, JfyiResult}, + BATCH_COLLECTING_INTERVAL, +}; /// A single batch (per candidate) as managed by `Batches`. mod batch; @@ -41,7 +44,12 @@ mod waiting_queue; /// /// If the batch life time exceeded the time the sender is willing to wait for a confirmation, we /// would trigger pointless re-sends. -const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT - Duration::from_secs(2); +const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT.saturating_sub(Duration::from_secs(2)); + +/// Limit the number of batches that can be alive at any given time. +/// +/// Reasoning for this number, see guide. +pub const MAX_BATCHES: usize = 1000; /// TODO: Limit number of batches @@ -92,9 +100,12 @@ impl Batches { &mut self, candidate_hash: CandidateHash, candidate_receipt: CandidateReceipt, - ) -> FoundBatch { + ) -> JfyiResult { + if self.batches.len() >= MAX_BATCHES { + return Err(JfyiError::MaxBatchLimitReached) + } debug_assert!(candidate_hash == candidate_receipt.hash()); - match self.batches.entry(candidate_hash) { + let result = match self.batches.entry(candidate_hash) { hash_map::Entry::Vacant(vacant) => { let now = Instant::now(); let (created, ready_at) = Batch::new(candidate_receipt, now); @@ -102,8 +113,9 @@ impl Batches { self.waiting_queue.push(pending_wake); FoundBatch::Created(vacant.insert(created)) }, - hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.get_mut()), - } + hash_map::Entry::Occupied(occupied) => FoundBatch::Found(occupied.into_mut()), + }; + Ok(result) } /// Wait for the next `tick` to check for ready batches. @@ -122,7 +134,7 @@ impl Batches { let mut imports = Vec::new(); // Wait for at least one batch to become ready: - self.waiting_queue.wait_ready(now); + self.waiting_queue.wait_ready(now).await; // Process all ready waits: while let Some(wake) = self.waiting_queue.pop_ready(now) { diff --git a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs index cbb493faf7aa..d69ebc1c4279 100644 --- a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs +++ b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs @@ -58,21 +58,6 @@ impl WaitingQueue { self.waker = None; } - /// Pop an item from the queue if ready. - /// - /// Whether ready or not is determined based on the passed timestamp `now` which should be the - /// current time as returned by `Instant::now()` - /// - /// Returns: The next `PendingWake` after it became ready. `pop` will wait for that - /// asynchronously to happen if it is not already the case. If there is no more item, this call - /// will wait forever (return Poll::Pending without scheduling a wake). - pub async fn pop(&mut self, now: Instant) -> PendingWake { - self.wait_ready(now).await; - self.pending_wakes - .pop() - .expect("We just waited for an item to become ready. qed.") - } - /// Pop the next ready item. /// /// In contrast to `pop` this function does not wait, if nothing is ready right now as @@ -90,8 +75,11 @@ impl WaitingQueue { /// /// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`. /// - /// Behaviour of this function is equal to `pop` otherwise, only difference is that the next - /// item is not actually popped. + /// Whether ready or not is determined based on the passed timestamp `now` which should be the + /// current time as returned by `Instant::now()` + /// + /// This function waits asynchronously for an item to become ready. If there is no more item, + /// this call will wait forever (return Poll::Pending without scheduling a wake). pub async fn wait_ready(&mut self, now: Instant) { if let Some(waker) = &mut self.waker { // Previous timer was not done yet. diff --git a/node/network/dispute-distribution/src/receiver/error.rs b/node/network/dispute-distribution/src/receiver/error.rs index 575e74f18ef6..4b780d2fa037 100644 --- a/node/network/dispute-distribution/src/receiver/error.rs +++ b/node/network/dispute-distribution/src/receiver/error.rs @@ -19,6 +19,7 @@ use fatality::Nested; +use gum::CandidateHash; use polkadot_node_network_protocol::{request_response::incoming, PeerId}; use polkadot_node_subsystem_util::runtime; use polkadot_primitives::v2::AuthorityDiscoveryId; @@ -36,8 +37,8 @@ pub enum Error { #[error("Retrieving next incoming request failed.")] IncomingRequest(#[from] incoming::Error), - #[error("Sending back response to peer {0} failed.")] - SendResponse(PeerId), + #[error("Sending back response to peers {0:#?} failed.")] + SendResponses(Vec), #[error("Changing peer's ({0}) reputation failed.")] SetPeerReputation(PeerId), @@ -48,12 +49,19 @@ pub enum Error { #[error("Received votes from peer {0} have been completely redundant.")] RedundantMessage(PeerId), - #[error("Import of dispute got canceled for peer {0} - import failed for some reason.")] - ImportCanceled(PeerId), + #[error("Import of dispute got canceled for candidate {0} - import failed for some reason.")] + ImportCanceled(CandidateHash), #[error("Peer {0} attempted to participate in dispute and is not a validator.")] NotAValidator(PeerId), + #[error("Force flush for batch that could not be found attempted, candidate hash: {0}")] + ForceFlushBatchDoesNotExist(CandidateHash), + + // shoud never happen in practice: + #[error("We needed to drop messages, because we reached limit on concurrent batches.")] + MaxBatchLimitReached, + #[error("Authority {0} sent messages at a too high rate.")] AuthorityFlooding(AuthorityDiscoveryId), } diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index f365b6877256..1f39efd27b67 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -15,7 +15,6 @@ // along with Polkadot. If not, see . use std::{ - collections::HashSet, pin::Pin, task::{Context, Poll}, time::Duration, @@ -23,12 +22,13 @@ use std::{ use futures::{ channel::oneshot, - future::{poll_fn, BoxFuture}, + future::poll_fn, pin_mut, - stream::{FusedStream, FuturesUnordered, StreamExt}, - Future, FutureExt, Stream, + stream::{FuturesUnordered, StreamExt}, + Future, }; +use gum::CandidateHash; use polkadot_node_network_protocol::{ authority_discovery::AuthorityDiscovery, request_response::{ @@ -45,10 +45,7 @@ use polkadot_node_subsystem::{ }; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; -use crate::{ - metrics::{FAILED, SUCCEEDED}, - Metrics, LOG_TARGET, -}; +use crate::{Metrics, LOG_TARGET}; mod error; @@ -66,7 +63,8 @@ use self::{ const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Received message could not be decoded."); const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Signatures were invalid."); -const COST_INVALID_CANDIDATE: Rep = Rep::Malicious("Reported candidate was not available."); +const COST_INVALID_IMPORT: Rep = + Rep::Malicious("Import was deemed invalid by dispute-coordinator."); const COST_NOT_A_VALIDATOR: Rep = Rep::CostMajor("Reporting peer was not a validator."); /// Mildly punish peers exceeding their rate limit. /// @@ -107,9 +105,7 @@ pub struct DisputesReceiver { authority_discovery: AD, /// Imports currently being processed. - /// - /// TODO: Flush batches on invalid result of first vote import. - pending_imports: PendingImports, + pending_imports: FuturesUnordered, /// Log received requests. metrics: Metrics, @@ -123,8 +119,8 @@ enum MuxedMessage { /// /// - We need to make sure responses are actually sent (therefore we need to await futures /// promptly). - /// - We need to update `banned_peers` accordingly to the result. - ConfirmedImport(JfyiResult<(PeerId, ImportStatementsResult)>), + /// - We need to punish peers whose import got rejected. + ConfirmedImport(ImportResult), /// A new request has arrived and should be handled. NewRequest(IncomingRequest), @@ -166,7 +162,7 @@ where peer_queues: PeerQueues::new(), batches: Batches::new(), authority_discovery, - pending_imports: PendingImports::new(), + pending_imports: FuturesUnordered::new(), // Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any // malicious requests still pending in the incoming queue. metrics, @@ -219,11 +215,11 @@ where }, MuxedMessage::WakeCheckBatches(ready_imports) => { // Phase 3: - self.import_ready_batches(ready_imports).await?; + self.import_ready_batches(ready_imports).await; }, - MuxedMessage::ConfirmedImport(m_bad) => { - // Handle import confirmation: - self.ban_bad_peer(m_bad)?; + MuxedMessage::ConfirmedImport(import_result) => { + // Confirm imports to requesters/punish them on invalid imports: + send_responses_to_requesters(import_result).await?; }, } @@ -238,7 +234,7 @@ where poll_fn(|ctx| { // In case of Ready(None), we want to wait for pending requests: if let Poll::Ready(Some(v)) = self.pending_imports.poll_next_unpin(ctx) { - return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v))) + return Poll::Ready(Ok(MuxedMessage::ConfirmedImport(v?))) } let rate_limited = self.peer_queues.pop_reqs(); @@ -290,7 +286,7 @@ where reputation_changes: vec![COST_NOT_A_VALIDATOR], sent_feedback: None, }) - .map_err(|_| JfyiError::SendResponse(peer))?; + .map_err(|_| JfyiError::SendResponses(vec![peer]))?; return Err(JfyiError::NotAValidator(peer).into()) }, Some(auth_id) => auth_id, @@ -303,7 +299,7 @@ where reputation_changes: vec![COST_APPARENT_FLOOD], sent_feedback: None, }) - .map_err(|_| JfyiError::SendResponse(peer))?; + .map_err(|_| JfyiError::SendResponses(vec![peer]))?; return Err(JfyiError::AuthorityFlooding(authority_id)) } Ok(()) @@ -346,23 +342,19 @@ where Ok(votes) => votes, }; - match self.batches.find_batch(*valid_vote.0.candidate_hash(), candidate_receipt) { + match self.batches.find_batch(*valid_vote.0.candidate_hash(), candidate_receipt)? { FoundBatch::Created(batch) => { // There was no entry yet - start import immediately: - let (pending_confirmation, confirmation_rx) = oneshot::channel(); - self.sender - .send_message(DisputeCoordinatorMessage::ImportStatements { - candidate_receipt: batch.candidate_receipt().clone(), - session: valid_vote.0.session_index(), - statements: vec![valid_vote, invalid_vote], - pending_confirmation: Some(pending_confirmation), - }) - .await; - - self.pending_imports.push(peer, confirmation_rx, pending_response); + let import = PreparedImport { + candidate_receipt: batch.candidate_receipt().clone(), + statements: vec![valid_vote, invalid_vote], + requesters: vec![(peer, pending_response)], + }; + self.start_import(import).await; }, FoundBatch::Found(batch) => { - let batch_result = batch.add_votes(valid_vote, invalid_vote, pending_response); + let batch_result = + batch.add_votes(valid_vote, invalid_vote, peer, pending_response); if let Err(pending_response) = batch_result { // We don't expect honest peers to send redundant votes within a single batch, @@ -386,7 +378,7 @@ where reputation_changes: Vec::new(), sent_feedback: None, }) - .map_err(|_| JfyiError::SendResponse(peer))?; + .map_err(|_| JfyiError::SendResponses(vec![peer]))?; return Err(From::from(JfyiError::RedundantMessage(peer))) } }, @@ -396,143 +388,107 @@ where } /// Trigger import into the dispute-coordinator of ready batches (`PreparedImport`s). - async fn import_ready_batches(&mut self, ready_imports: Vec) -> Result<()> { + async fn import_ready_batches(&mut self, ready_imports: Vec) { for import in ready_imports { - let PreparedImport { candidate_receipt, statements, pending_responses } = import; - let session_index = match statements.iter().next() { - None => { - gum::debug!( - target: LOG_TARGET, - candidate_hash = ?candidate_receipt.hash(), - "Not importing empty batch" - ); - continue - }, - Some(vote) => vote.0.session_index(), - }; - - let (pending_confirmation, confirmation_rx) = oneshot::channel(); - self.sender - .send_message(DisputeCoordinatorMessage::ImportStatements { - candidate_receipt, - session: session_index, - statements, - pending_confirmation: Some(pending_confirmation), - }) - .await; - // TODO: - // Confirmation has to trigger response senders: + self.start_import(import).await; } - unimplemented!("WIP") } - /// Await an import and ban any misbehaving peers. - /// - /// In addition we report import metrics. - fn ban_bad_peer( - &mut self, - result: JfyiResult<(PeerId, ImportStatementsResult)>, - ) -> JfyiResult<()> { - match result? { - (_, ImportStatementsResult::ValidImport) => { - self.metrics.on_imported(SUCCEEDED); - }, - (bad_peer, ImportStatementsResult::InvalidImport) => { - self.metrics.on_imported(FAILED); - self.banned_peers.put(bad_peer, ()); + /// Start import and add response receiver to `pending_imports`. + async fn start_import(&mut self, import: PreparedImport) { + let PreparedImport { candidate_receipt, statements, requesters } = import; + let (session_index, candidate_hash) = match statements.iter().next() { + None => { + gum::debug!( + target: LOG_TARGET, + candidate_hash = ?candidate_receipt.hash(), + "Not importing empty batch" + ); + return }, - } - Ok(()) - } -} - -/// Manage pending imports in a way that preserves invariants. -struct PendingImports { - /// Futures in flight. - futures: FuturesUnordered)>>, - /// Peers whose requests are currently in flight. - peers: HashSet, -} - -impl PendingImports { - pub fn new() -> Self { - Self { futures: FuturesUnordered::new(), peers: HashSet::new() } - } + Some(vote) => (vote.0.session_index(), vote.0.candidate_hash().clone()), + }; - pub fn push( - &mut self, - peer: PeerId, - handled: oneshot::Receiver, - pending_response: OutgoingResponseSender, - ) { - self.peers.insert(peer); - self.futures.push( - async move { - let r = respond_to_request(peer, handled, pending_response).await; - (peer, r) - } - .boxed(), - ) - } + let (pending_confirmation, confirmation_rx) = oneshot::channel(); + self.sender + .send_message(DisputeCoordinatorMessage::ImportStatements { + candidate_receipt, + session: session_index, + statements, + pending_confirmation: Some(pending_confirmation), + }) + .await; - /// Returns the number of contained futures. - pub fn len(&self) -> usize { - self.futures.len() - } + let pending = + PendingImport { candidate_hash, requesters, pending_response: confirmation_rx }; - /// Check whether a peer has a pending import. - pub fn peer_is_pending(&self, peer: &PeerId) -> bool { - self.peers.contains(peer) + self.pending_imports.push(pending); } } -impl Stream for PendingImports { - type Item = JfyiResult<(PeerId, ImportStatementsResult)>; - fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll> { - match Pin::new(&mut self.futures).poll_next(ctx) { - Poll::Pending => Poll::Pending, - Poll::Ready(None) => Poll::Ready(None), - Poll::Ready(Some((peer, result))) => { - self.peers.remove(&peer); - Poll::Ready(Some(result.map(|r| (peer, r)))) - }, - } - } -} -impl FusedStream for PendingImports { - fn is_terminated(&self) -> bool { - self.futures.is_terminated() - } -} +async fn send_responses_to_requesters(import_result: ImportResult) -> JfyiResult<()> { + let ImportResult { requesters, result } = import_result; -// Future for `PendingImports` -// -// - Wait for import -// - Punish peer -// - Deliver result -async fn respond_to_request( - peer: PeerId, - handled: oneshot::Receiver, - pending_response: OutgoingResponseSender, -) -> JfyiResult { - let result = handled.await.map_err(|_| JfyiError::ImportCanceled(peer))?; - - let response = match result { - ImportStatementsResult::ValidImport => OutgoingResponse { + let mk_response = match result { + ImportStatementsResult::ValidImport => || OutgoingResponse { result: Ok(DisputeResponse::Confirmed), reputation_changes: Vec::new(), sent_feedback: None, }, - ImportStatementsResult::InvalidImport => OutgoingResponse { + ImportStatementsResult::InvalidImport => || OutgoingResponse { result: Err(()), - reputation_changes: vec![COST_INVALID_CANDIDATE], + reputation_changes: vec![COST_INVALID_IMPORT], sent_feedback: None, }, }; - pending_response - .send_outgoing_response(response) - .map_err(|_| JfyiError::SendResponse(peer))?; + let mut sending_failed_for = Vec::new(); + for (peer, pending_response) in requesters { + if let Err(()) = pending_response.send_outgoing_response(mk_response()) { + sending_failed_for.push(peer); + } + } + + if !sending_failed_for.is_empty() { + Err(JfyiError::SendResponses(sending_failed_for)) + } else { + Ok(()) + } +} + +/// A future that resolves into an `ImportResult` when ready. +/// +/// This future is used on import calls for the response receiver to: +/// - Keep track of concerned `CandidateHash` so we can flush batches if needed. +/// - Keep track of requesting peers so we can confirm the import/punish them on invalid imports. +struct PendingImport { + candidate_hash: CandidateHash, + requesters: Vec<(PeerId, OutgoingResponseSender)>, + pending_response: oneshot::Receiver, +} + +/// A `PendingImport` becomes an `ImportResult` once done. +struct ImportResult { + /// Requesters of that import. + requesters: Vec<(PeerId, OutgoingResponseSender)>, + /// Actual result of the import. + result: ImportStatementsResult, +} + +impl PendingImport { + async fn wait_for_result(&mut self) -> JfyiResult { + let result = (&mut self.pending_response) + .await + .map_err(|_| JfyiError::ImportCanceled(self.candidate_hash))?; + Ok(ImportResult { requesters: std::mem::take(&mut self.requesters), result }) + } +} - Ok(result) +impl Future for PendingImport { + type Output = JfyiResult; + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let fut = self.wait_for_result(); + pin_mut!(fut); + fut.poll(cx) + } } diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index 9be45f4a7785..1004b1fec05e 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -16,7 +16,7 @@ use std::collections::{hash_map::Entry, HashMap, VecDeque}; -use futures::future::{pending, Fuse}; +use futures::future::pending; use futures_timer::Delay; use polkadot_node_network_protocol::request_response::{v1::DisputeRequest, IncomingRequest}; use polkadot_primitives::v2::AuthorityDiscoveryId; @@ -70,7 +70,7 @@ impl PeerQueues { if occupied.get().len() >= PEER_QUEUE_CAPACITY { return Err((occupied.key().clone(), req)) } - occupied.get_mut() + occupied.into_mut() }, }; queue.push_back(req); diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index cd2df7272469..018b101e430b 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -380,11 +380,6 @@ impl RateLimit { std::mem::replace(self, RateLimit::new()) } - /// Replace the `RateLimit` and replace `Self::new_limit()`. - fn replace_with_limit(&mut self) -> Self { - std::mem::replace(self, Self::new_limit()) - } - /// Wait until ready and prepare for next call. async fn limit(&mut self) { let old = self.take(); diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index b0d873a9df76..dc69876aba9a 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -323,12 +323,13 @@ For a thousand validators a limit on batches of around 1000 should never be reached in practice. Hence due to rate limiting we have a very good chance to not ever having to drop a potential valid dispute due to some resource limit. -Further safe guards: The dispute-coordinator actually confirms/denies imports. -So once we receive a denial by the dispute-coordinator for the initial imported -votes, we can opt into flushing the batch immediately and importing the votes. -This swaps memory usage for more CPU usage, but if that import is deemed invalid -again we can immediately decrease the reputation of the sending peers, so this -should be a net win. +Further safe guards are possible: The dispute-coordinator actually +confirms/denies imports. So once we receive a denial by the dispute-coordinator +for the initial imported votes, we can opt into flushing the batch immediately +and importing the votes. This swaps memory usage for more CPU usage, but if that +import is deemed invalid again we can immediately decrease the reputation of the +sending peers, so this should be a net win. For the time being we punt on this +for simplicity. Instead of filling batches to maximize memory usage, attackers could also try to overwhelm the dispute coordinator by only sending votes for new candidates all From 1366d905f0ad75d775e194e66e30bae4fd67221f Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 31 Aug 2022 17:15:55 +0300 Subject: [PATCH 46/65] Code review feedback --- .../src/disputes/prioritized_selection/mod.rs | 12 ++++++------ node/core/provisioner/src/lib.rs | 2 +- node/core/runtime-api/src/lib.rs | 2 +- node/subsystem-types/src/messages.rs | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index 5316da5cb9db..e43be6fa9f1d 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -318,12 +318,12 @@ fn partition_recent_disputes( // ... and INACTIVE in three more for (session_index, candidate_hash, _) in inactive { match onchain.get(&(session_index, candidate_hash)) { - Some(onchain_state) => match concluded_onchain(onchain_state) { - true => - partitioned.inactive_concluded_onchain.push((session_index, candidate_hash)), - false => - partitioned.inactive_unconcluded_onchain.push((session_index, candidate_hash)), - }, + Some(onchain_state) => + if concluded_onchain(onchain_state) { + partitioned.inactive_concluded_onchain.push((session_index, candidate_hash)); + } else { + partitioned.inactive_unconcluded_onchain.push((session_index, candidate_hash)); + }, None => partitioned.inactive_unknown_onchain.push((session_index, candidate_hash)), } } diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index 4c14d9977f75..fc6fa3e18ddd 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -61,7 +61,7 @@ const SEND_INHERENT_DATA_TIMEOUT: std::time::Duration = core::time::Duration::fr const LOG_TARGET: &str = "parachain::provisioner"; const PRIORITIZED_SELECTION_RUNTIME_VERSION_REQUIREMENT: u32 = - RuntimeApiRequest::DISPUTES_RUNTIME_REQ; + RuntimeApiRequest::DISPUTES_RUNTIME_REQUIREMENT; /// The provisioner subsystem. pub struct ProvisionerSubsystem { diff --git a/node/core/runtime-api/src/lib.rs b/node/core/runtime-api/src/lib.rs index 4dad23a9c0be..36355b5759e6 100644 --- a/node/core/runtime-api/src/lib.rs +++ b/node/core/runtime-api/src/lib.rs @@ -501,6 +501,6 @@ where Request::ValidationCodeHash(para, assumption, sender) => query!(ValidationCodeHash, validation_code_hash(para, assumption), ver = 2, sender), Request::Disputes(sender) => - query!(Disputes, disputes(), ver = Request::DISPUTES_RUNTIME_REQ, sender), + query!(Disputes, disputes(), ver = Request::DISPUTES_RUNTIME_REQUIREMENT, sender), } } diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index aa88162e8505..c37f773b3839 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -707,7 +707,7 @@ impl RuntimeApiRequest { /// Runtime version requirements for each message /// `Disputes` - pub const DISPUTES_RUNTIME_REQ: u32 = 3; + pub const DISPUTES_RUNTIME_REQUIREMENT: u32 = 3; } /// A message to the Runtime API subsystem. From 3dd204178e1ee1d7ec5503d31034cef80b11283e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 1 Sep 2022 09:18:05 +0200 Subject: [PATCH 47/65] Typo. --- node/network/dispute-distribution/src/receiver/batches/batch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs index 496ae7f7407b..d302ceab74b2 100644 --- a/node/network/dispute-distribution/src/receiver/batches/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -48,7 +48,7 @@ pub struct Batch { /// We differentiate between valid and invalid votes, so we can detect (and drop) duplicates, /// while still allowing validators to equivocate. /// - /// Detecting and rejecting duplicats is crucial in order to effectively enforce + /// Detecting and rejecting duplicates is crucial in order to effectively enforce /// `MIN_KEEP_BATCH_ALIVE_VOTES` per `BATCH_COLLECTING_INTERVAL`. If we would count duplicates /// here, the mechanism would be broken. valid_votes: HashMap, From aeef11657c308b236b1028f490c6cfb324ae9cc1 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Thu, 1 Sep 2022 10:39:06 +0300 Subject: [PATCH 48/65] Unused import --- node/core/provisioner/src/disputes/prioritized_selection/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs index e43be6fa9f1d..056fd61b7227 100644 --- a/node/core/provisioner/src/disputes/prioritized_selection/mod.rs +++ b/node/core/provisioner/src/disputes/prioritized_selection/mod.rs @@ -34,7 +34,6 @@ use polkadot_primitives::v2::{ Hash, InvalidDisputeStatementKind, MultiDisputeStatementSet, SessionIndex, ValidDisputeStatementKind, ValidatorIndex, }; -use rand as _; use std::{ collections::{BTreeMap, HashMap}, time::{SystemTime, UNIX_EPOCH}, From f9fc6c885688c870759d27b2f56318a1f066e9bf Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 1 Sep 2022 12:07:43 +0200 Subject: [PATCH 49/65] Docs. --- node/network/dispute-distribution/src/lib.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/node/network/dispute-distribution/src/lib.rs b/node/network/dispute-distribution/src/lib.rs index 2f6641fe3569..56683d94e0c2 100644 --- a/node/network/dispute-distribution/src/lib.rs +++ b/node/network/dispute-distribution/src/lib.rs @@ -59,8 +59,6 @@ use self::sender::{DisputeSender, TaskFinish}; /// ## The receiver [`DisputesReceiver`] /// -/// TODO: Obsolete: -/// /// The receiving side is implemented as `DisputesReceiver` and is run as a separate long running task within /// this subsystem ([`DisputesReceiver::run`]). /// @@ -68,16 +66,19 @@ use self::sender::{DisputeSender, TaskFinish}; /// via a dedicated channel and forwarding them to the dispute coordinator via /// `DisputeCoordinatorMessage::ImportStatements`. Being the interface to the network and untrusted /// nodes, the reality is not that simple of course. Before importing statements the receiver will -/// make sure as good as it can to filter out malicious/unwanted/spammy requests. For this it does -/// the following: +/// batch up imports as well as possible for efficient imports while maintaining timely dispute +/// resolution and handling of spamming validators: /// /// - Drop all messages from non validator nodes, for this it requires the [`AuthorityDiscovery`] /// service. -/// - Drop messages from a node, if we are already importing a message from that node (flood). -/// - Drop messages from nodes, that provided us messages where the statement import failed. +/// - Drop messages from a node, if it sends at a too high rate. +/// - Filter out duplicate messages (over some period of time). /// - Drop any obviously invalid votes (invalid signatures for example). /// - Ban peers whose votes were deemed invalid. /// +/// In general dispute-distribution works on limiting the work the dispute-coordinator will have to +/// do, while at the same time making it aware of new disputes as fast as possible. +/// /// For successfully imported votes, we will confirm the receipt of the message back to the sender. /// This way a received confirmation guarantees, that the vote has been stored to disk by the /// receiver. From 43d946d3ad26f7ed67a4fded8f19bac50e681330 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 1 Sep 2022 13:29:53 +0200 Subject: [PATCH 50/65] Report missing metric. --- node/network/dispute-distribution/src/metrics.rs | 7 +++++-- .../dispute-distribution/src/receiver/mod.rs | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/node/network/dispute-distribution/src/metrics.rs b/node/network/dispute-distribution/src/metrics.rs index 3f717bd105c3..aa2feeaad3a0 100644 --- a/node/network/dispute-distribution/src/metrics.rs +++ b/node/network/dispute-distribution/src/metrics.rs @@ -72,9 +72,12 @@ impl Metrics { } /// Statements have been imported. - pub fn on_imported(&self, label: &'static str) { + pub fn on_imported(&self, label: &'static str, num_requests: usize) { if let Some(metrics) = &self.0 { - metrics.imported_requests.with_label_values(&[label]).inc() + metrics + .imported_requests + .with_label_values(&[label]) + .inc_by(num_requests as u64) } } diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 1f39efd27b67..37435ac464a4 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -46,6 +46,10 @@ use polkadot_node_subsystem::{ use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; use crate::{Metrics, LOG_TARGET}; +use crate::{ + metrics::{FAILED, SUCCEEDED}, + Metrics, LOG_TARGET, +}; mod error; @@ -218,6 +222,7 @@ where self.import_ready_batches(ready_imports).await; }, MuxedMessage::ConfirmedImport(import_result) => { + self.update_imported_requests_metrics(&import_result); // Confirm imports to requesters/punish them on invalid imports: send_responses_to_requesters(import_result).await?; }, @@ -424,6 +429,14 @@ where self.pending_imports.push(pending); } + + fn update_imported_requests_metrics(&self, result: &ImportResult) { + let label = match result.result { + ImportStatementsResult::ValidImport => SUCCEEDED, + ImportStatementsResult::InvalidImport => FAILED, + }; + self.metrics.on_imported(label, result.requesters.len()); + } } async fn send_responses_to_requesters(import_result: ImportResult) -> JfyiResult<()> { From 3e5ceaa81cc673eb20b4103b0e20c7856476ca01 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Thu, 1 Sep 2022 13:30:28 +0200 Subject: [PATCH 51/65] Doc pass. --- .../src/receiver/batches/batch.rs | 8 +++---- .../src/receiver/batches/mod.rs | 23 ++++++++++++------- .../src/receiver/batches/waiting_queue.rs | 16 ++++++------- .../src/receiver/error.rs | 2 +- .../dispute-distribution/src/receiver/mod.rs | 15 ++++++------ .../src/receiver/peer_queues.rs | 10 ++++---- .../dispute-distribution/src/sender/mod.rs | 6 ++--- .../src/sender/send_task.rs | 9 ++++---- 8 files changed, 47 insertions(+), 42 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs index d302ceab74b2..3d39b4a687b3 100644 --- a/node/network/dispute-distribution/src/receiver/batches/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -61,7 +61,7 @@ pub struct Batch { /// Expiry time for the batch. /// - /// By this time the lastest this batch will get flushed. + /// By this time the latest this batch will get flushed. best_before: Instant, /// Requesters waiting for a response. @@ -111,8 +111,8 @@ impl Batch { /// /// Arguments: /// - /// * candidate_recipt - The candidate this batch is meant to track votes for. - /// * `now` - current timestamp for calculating the first tick. + /// * `candidate_receipt` - The candidate this batch is meant to track votes for. + /// * `now` - current time stamp for calculating the first tick. /// /// Returns: /// @@ -214,5 +214,5 @@ impl Batch { // Test tick behaviour: // - If less than `MIN_KEEP_BATCH_ALIVE_VOTES` trickled in since last tick - batch should become // done. -// - If batch surpased its `best_before` it should become done. +// - If batch surpassed its `best_before` it should become done. // - Batch does not count duplicate votes. diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs index c3abf029f120..c887e9e7c7c7 100644 --- a/node/network/dispute-distribution/src/receiver/batches/mod.rs +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -24,7 +24,10 @@ use futures::future::pending; use polkadot_node_network_protocol::request_response::DISPUTE_REQUEST_TIMEOUT; use polkadot_primitives::v2::{CandidateHash, CandidateReceipt}; -use crate::receiver::batches::{batch::TickResult, waiting_queue::PendingWake}; +use crate::{ + receiver::batches::{batch::TickResult, waiting_queue::PendingWake}, + LOG_TARGET, +}; pub use self::batch::{Batch, PreparedImport}; use self::waiting_queue::WaitingQueue; @@ -37,7 +40,7 @@ use super::{ /// A single batch (per candidate) as managed by `Batches`. mod batch; -/// Simple abstraction queue events in time and wait for them to become ready. +/// Queue events in time and wait for them to become ready. mod waiting_queue; /// Safe-guard in case votes trickle in real slow. @@ -51,12 +54,6 @@ const MAX_BATCH_LIFETIME: Duration = DISPUTE_REQUEST_TIMEOUT.saturating_sub(Dura /// Reasoning for this number, see guide. pub const MAX_BATCHES: usize = 1000; -/// TODO: Limit number of batches - -// - Batches can be added very rate limit timeout. -// - They have to be checked every BATCH_COLLECTING_INTERVAL. -// - We can get the earliest next wakeup - keep ordered list of wakeups! Then we always know when -// the next one comes - only needs to get updated on insert. - Tada! /// Manage batches. /// /// - Batches can be found via `find_batch()` in order to add votes to them/check they exist. @@ -149,9 +146,19 @@ impl Batches { }; match batch.tick(now) { TickResult::Done(import) => { + gum::trace!( + target: LOG_TARGET, + candidate_hash = ?wake.payload, + "Batch became ready." + ); imports.push(import); }, TickResult::Alive(old_batch, next_tick) => { + gum::trace!( + target: LOG_TARGET, + candidate_hash = ?wake.payload, + "Batch found to be still alive on check." + ); let pending_wake = PendingWake { payload: wake.payload, ready_at: next_tick }; self.waiting_queue.push(pending_wake); self.batches.insert(wake.payload, old_batch); diff --git a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs index d69ebc1c4279..dff9e4d428e1 100644 --- a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs +++ b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs @@ -21,8 +21,8 @@ use futures_timer::Delay; /// Wait asynchronously for given `Instant`s one after the other. /// -/// `PendingWake`s can be inserted and `WaitingQueue` makes sure to wake calls to `pop` so those -/// structs can be processed when they are deemed ready. +/// `PendingWake`s can be inserted and `WaitingQueue` makes `wait_ready()` to always wait for the +/// next `Instant` in the queue. pub struct WaitingQueue { /// All pending wakes we are supposed to wait on in order. pending_wakes: BinaryHeap>, @@ -33,7 +33,7 @@ pub struct WaitingQueue { /// Represents some event waiting to be processed at `ready_at`. /// /// This is an event in `WaitingQueue`. It provides an `Ord` instance, that sorts descending with -/// regard to `Instant` (so we get a `min-heap` with the earliest `Instant` at the top. +/// regard to `Instant` (so we get a `min-heap` with the earliest `Instant` at the top). #[derive(Eq, PartialEq)] pub struct PendingWake { pub payload: Payload, @@ -43,14 +43,14 @@ pub struct PendingWake { impl WaitingQueue { /// Get a new empty `WaitingQueue`. /// - /// If you call `pop` on this queue immediately it will always return `Poll::Pending`. + /// If you call `pop` on this queue immediately, it will always return `Poll::Pending`. pub fn new() -> Self { Self { pending_wakes: BinaryHeap::new(), waker: None } } /// Push a `PendingWake`. /// - /// The next call to `pop` will make sure to wake soon enough to process that newly event in a + /// The next call to `wait_ready` will make sure to wake soon enough to process that new event in a /// timely manner. pub fn push(&mut self, wake: PendingWake) { self.pending_wakes.push(wake); @@ -60,8 +60,8 @@ impl WaitingQueue { /// Pop the next ready item. /// - /// In contrast to `pop` this function does not wait, if nothing is ready right now as - /// determined by the passed `now` timestamp, this function simply returns `None`. + /// This function does not wait, if nothing is ready right now as determined by the passed + /// `now` time stamp, this function simply returns `None`. pub fn pop_ready(&mut self, now: Instant) -> Option> { let is_ready = self.pending_wakes.peek().map_or(false, |p| p.ready_at <= now); if is_ready { @@ -75,7 +75,7 @@ impl WaitingQueue { /// /// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`. /// - /// Whether ready or not is determined based on the passed timestamp `now` which should be the + /// Whether ready or not is determined based on the passed time stamp `now` which should be the /// current time as returned by `Instant::now()` /// /// This function waits asynchronously for an item to become ready. If there is no more item, diff --git a/node/network/dispute-distribution/src/receiver/error.rs b/node/network/dispute-distribution/src/receiver/error.rs index 4b780d2fa037..4477335440d0 100644 --- a/node/network/dispute-distribution/src/receiver/error.rs +++ b/node/network/dispute-distribution/src/receiver/error.rs @@ -58,7 +58,7 @@ pub enum Error { #[error("Force flush for batch that could not be found attempted, candidate hash: {0}")] ForceFlushBatchDoesNotExist(CandidateHash), - // shoud never happen in practice: + // Should never happen in practice: #[error("We needed to drop messages, because we reached limit on concurrent batches.")] MaxBatchLimitReached, diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 37435ac464a4..0fb4213562ac 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -45,7 +45,6 @@ use polkadot_node_subsystem::{ }; use polkadot_node_subsystem_util::{runtime, runtime::RuntimeInfo}; -use crate::{Metrics, LOG_TARGET}; use crate::{ metrics::{FAILED, SUCCEEDED}, Metrics, LOG_TARGET, @@ -80,12 +79,13 @@ const COST_APPARENT_FLOOD: Rep = Rep::CostMinor("Peer exceeded the rate limit.") /// /// in order for a batch to stay alive and not get flushed/imported to the dispute-coordinator. /// -/// This ensures a timely import once of batches. +/// This ensures a timely import of batches. pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 10; /// Time we allow to pass for new votes to trickle in. /// -/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above. Must be greater or equal to `RECEIVE_RATE_LIMIT`. +/// See `MIN_KEEP_BATCH_ALIVE_VOTES` above. +/// Should be greater or equal to `RECEIVE_RATE_LIMIT` (there is no point in checking any faster). pub const BATCH_COLLECTING_INTERVAL: Duration = Duration::from_millis(500); /// State for handling incoming `DisputeRequest` messages. @@ -108,7 +108,7 @@ pub struct DisputesReceiver { /// Authority discovery service: authority_discovery: AD, - /// Imports currently being processed. + /// Imports currently being processed by the `dispute-coordinator`. pending_imports: FuturesUnordered, /// Log received requests. @@ -167,8 +167,6 @@ where batches: Batches::new(), authority_discovery, pending_imports: FuturesUnordered::new(), - // Size of MAX_PARALLEL_IMPORTS ensures we are going to immediately get rid of any - // malicious requests still pending in the incoming queue. metrics, } } @@ -471,8 +469,9 @@ async fn send_responses_to_requesters(import_result: ImportResult) -> JfyiResult /// A future that resolves into an `ImportResult` when ready. /// -/// This future is used on import calls for the response receiver to: -/// - Keep track of concerned `CandidateHash` so we can flush batches if needed. +/// This future is used on `dispute-coordinator` import messages for the oneshot response receiver +/// to: +/// - Keep track of concerned `CandidateHash` for reporting errors. /// - Keep track of requesting peers so we can confirm the import/punish them on invalid imports. struct PendingImport { candidate_hash: CandidateHash, diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index 1004b1fec05e..bda06e02bc9c 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -26,8 +26,8 @@ use crate::RECEIVE_RATE_LIMIT; /// How many messages we are willing to queue per peer (validator). /// /// The larger this value is, the larger bursts are allowed to be without us dropping messages. On -/// the flip side we should this gets allocated per validator, so for a size of 10 this will result -/// in 10_000 * size_of(`IncomingRequest`). +/// the flip side this gets allocated per validator, so for a size of 10 this will result +/// in 10_000 * size_of(`IncomingRequest`) in the worst case. /// /// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons. pub const PEER_QUEUE_CAPACITY: usize = 10; @@ -75,7 +75,7 @@ impl PeerQueues { }; queue.push_back(req); - // We have at least one element to process - rate limit waker needs to exist now: + // We have at least one element to process - rate limit `waker` needs to exist now: self.ensure_waker(); Ok(()) } @@ -118,14 +118,14 @@ impl PeerQueues { self.queues.is_empty() } - /// Ensure there is an active waker. + /// Ensure there is an active `waker`. /// /// Checks whether one exists and if not creates one. fn ensure_waker(&mut self) -> &mut Delay { self.rate_limit_waker.get_or_insert(Delay::new(RECEIVE_RATE_LIMIT)) } - /// Wait for waker if it exists, or be `Pending` forever. + /// Wait for `waker` if it exists, or be `Pending` forever. /// /// Afterwards it gets set back to `None`. async fn wait_for_waker(&mut self) { diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index 018b101e430b..6d419b3eae33 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -40,7 +40,7 @@ mod send_task; use send_task::SendTask; pub use send_task::TaskFinish; -/// Error and [`Result`] type for sender +/// Error and [`Result`] type for sender. mod error; pub use error::{Error, FatalError, JfyiError, Result}; @@ -53,7 +53,7 @@ use crate::{Metrics, LOG_TARGET, SEND_RATE_LIMIT}; /// particular dispute. The `DisputeSender` keeps track of those tasks, informs them about new /// sessions/validator sets and cleans them up when they become obsolete. /// -/// The unit of work for the `DisputeSender` is a dispute, represended by `SendTask`s. +/// The unit of work for the `DisputeSender` is a dispute, represented by `SendTask`s. pub struct DisputeSender { /// All heads we currently consider active. active_heads: Vec, @@ -168,7 +168,7 @@ impl DisputeSender { let sends_happened = dispute .refresh_sends(ctx, runtime, &self.active_sessions, &self.metrics) .await?; - // Only rate limit if we actually sent someting out _and_ it was not just because + // Only rate limit if we actually sent something out _and_ it was not just because // of errors on previous sends. // // Reasoning: It would not be acceptable to slow down the whole subsystem, just diff --git a/node/network/dispute-distribution/src/sender/send_task.rs b/node/network/dispute-distribution/src/sender/send_task.rs index e564a4d95838..89b5c099bde9 100644 --- a/node/network/dispute-distribution/src/sender/send_task.rs +++ b/node/network/dispute-distribution/src/sender/send_task.rs @@ -45,12 +45,12 @@ use crate::{ /// /// The unit of work for a `SendTask` is an authority/validator. pub struct SendTask { - /// The request we are supposed to get out to all parachain validators of the dispute's session + /// The request we are supposed to get out to all `parachain` validators of the dispute's session /// and to all current authorities. request: DisputeRequest, /// The set of authorities we need to send our messages to. This set will change at session - /// boundaries. It will always be at least the parachain validators of the session where the + /// boundaries. It will always be at least the `parachain` validators of the session where the /// dispute happened and the authorities of the current sessions as determined by active heads. deliveries: HashMap, @@ -208,9 +208,8 @@ impl SendTask { /// Determine all validators that should receive the given dispute requests. /// - /// This is all parachain validators of the session the candidate occurred and all authorities + /// This is all `parachain` validators of the session the candidate occurred and all authorities /// of all currently active sessions, determined by currently active heads. - async fn get_relevant_validators( &self, ctx: &mut Context, @@ -308,7 +307,7 @@ async fn wait_response_task( gum::debug!( target: LOG_TARGET, %err, - "Failed to notify susystem about dispute sending result." + "Failed to notify subsystem about dispute sending result." ); } } From 0e11f4f71f6c7c8ba091960dba24ca2414327c51 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 11:29:08 +0200 Subject: [PATCH 52/65] Tests for waiting_queue. --- .../src/receiver/batches/mod.rs | 3 - .../src/receiver/batches/waiting_queue.rs | 114 ++++++++++++++++-- 2 files changed, 101 insertions(+), 16 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs index c887e9e7c7c7..6163d523a4c6 100644 --- a/node/network/dispute-distribution/src/receiver/batches/mod.rs +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -168,6 +168,3 @@ impl Batches { imports } } - -// Tests: -// - Make sure waker gets updated to earlier event in case an earlier event gets inserted. diff --git a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs index dff9e4d428e1..95e739b3cd07 100644 --- a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs +++ b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs @@ -19,6 +19,8 @@ use std::{cmp::Ordering, collections::BinaryHeap, time::Instant}; use futures::future::pending; use futures_timer::Delay; +use crate::LOG_TARGET; + /// Wait asynchronously for given `Instant`s one after the other. /// /// `PendingWake`s can be inserted and `WaitingQueue` makes `wait_ready()` to always wait for the @@ -73,7 +75,8 @@ impl WaitingQueue { /// Don't pop, just wait until something is ready. /// - /// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`. + /// Once this function returns `Poll::Ready(())` `pop_ready()` will return `Some`, if passed + /// the same `Instant`. /// /// Whether ready or not is determined based on the passed time stamp `now` which should be the /// current time as returned by `Instant::now()` @@ -85,18 +88,17 @@ impl WaitingQueue { // Previous timer was not done yet. waker.await } - loop { - let next_waiting = self.pending_wakes.peek(); - let is_ready = next_waiting.map_or(false, |p| p.ready_at <= now); - if !is_ready { - self.waker = next_waiting.map(|p| Delay::new(p.ready_at.duration_since(now))); - match &mut self.waker { - None => return pending().await, - Some(waker) => waker.await, - } - } else { - return - } + + let next_waiting = self.pending_wakes.peek(); + let is_ready = next_waiting.map_or(false, |p| p.ready_at <= now); + if is_ready { + return + } + + self.waker = next_waiting.map(|p| Delay::new(p.ready_at.duration_since(now))); + match &mut self.waker { + None => return pending().await, + Some(waker) => waker.await, } } } @@ -116,3 +118,89 @@ impl Ord for PendingWake { } } } +#[cfg(test)] +mod tests { + use std::{ + task::Poll, + time::{Duration, Instant}, + }; + + use assert_matches::assert_matches; + use futures::{future::poll_fn, pin_mut, Future}; + + use crate::LOG_TARGET; + + use super::{PendingWake, WaitingQueue}; + + #[test] + fn wait_ready_waits_for_earliest_event_always() { + sp_tracing::try_init_simple(); + let mut queue = WaitingQueue::new(); + let now = Instant::now(); + let start = now; + queue.push(PendingWake { payload: 1u32, ready_at: now + Duration::from_millis(3) }); + // Push another one in order: + queue.push(PendingWake { payload: 2u32, ready_at: now + Duration::from_millis(5) }); + // Push one out of order: + queue.push(PendingWake { payload: 0u32, ready_at: now + Duration::from_millis(1) }); + // Push another one at same timestamp (should become ready at the same time) + queue.push(PendingWake { payload: 10u32, ready_at: now + Duration::from_millis(1) }); + + futures::executor::block_on(async move { + // No time passed yet - nothing should be ready. + assert!(queue.pop_ready(now).is_none(), "No time has passed, nothing should be ready"); + + // Receive them in order at expected times: + queue.wait_ready(now).await; + gum::trace!(target: LOG_TARGET, "After first wait."); + + let now = start + Duration::from_millis(1); + assert!(Instant::now() - start >= Duration::from_millis(1)); + assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(0u32)); + // One more should be ready: + assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(10u32)); + assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready."); + + queue.wait_ready(now).await; + gum::trace!(target: LOG_TARGET, "After second wait."); + let now = start + Duration::from_millis(3); + assert!(Instant::now() - start >= Duration::from_millis(3)); + assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(1u32)); + assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready."); + + // Push in between wait: + poll_fn(|cx| { + let fut = queue.wait_ready(now); + pin_mut!(fut); + assert_matches!(fut.poll(cx), Poll::Pending); + Poll::Ready(()) + }) + .await; + queue.push(PendingWake { payload: 3u32, ready_at: start + Duration::from_millis(4) }); + + queue.wait_ready(now).await; + // Newly pushed element should have become ready: + gum::trace!(target: LOG_TARGET, "After third wait."); + let now = start + Duration::from_millis(4); + assert!(Instant::now() - start >= Duration::from_millis(4)); + assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(3u32)); + assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready."); + + queue.wait_ready(now).await; + gum::trace!(target: LOG_TARGET, "After fourth wait."); + let now = start + Duration::from_millis(5); + assert!(Instant::now() - start >= Duration::from_millis(5)); + assert_eq!(queue.pop_ready(now).map(|p| p.payload), Some(2u32)); + assert!(queue.pop_ready(now).is_none(), "No more entry expected to be ready."); + + // queue empty - should wait forever now: + poll_fn(|cx| { + let fut = queue.wait_ready(now); + pin_mut!(fut); + assert_matches!(fut.poll(cx), Poll::Pending); + Poll::Ready(()) + }) + .await; + }); + } +} From 2fcebad80815ba80171410e271e0fbebe019e37b Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 15:37:08 +0200 Subject: [PATCH 53/65] Speed up some crypto by 10x. --- Cargo.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b23c37d8ad5b..cf188adfe6cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,9 +128,9 @@ maintenance = { status = "actively-developed" } # # This list is ordered alphabetically. [profile.dev.package] -blake2b_simd = { opt-level = 3 } blake2 = { opt-level = 3 } blake2-rfc = { opt-level = 3 } +blake2b_simd = { opt-level = 3 } chacha20poly1305 = { opt-level = 3 } cranelift-codegen = { opt-level = 3 } cranelift-wasm = { opt-level = 3 } @@ -141,8 +141,8 @@ curve25519-dalek = { opt-level = 3 } ed25519-dalek = { opt-level = 3 } flate2 = { opt-level = 3 } futures-channel = { opt-level = 3 } -hashbrown = { opt-level = 3 } hash-db = { opt-level = 3 } +hashbrown = { opt-level = 3 } hmac = { opt-level = 3 } httparse = { opt-level = 3 } integer-sqrt = { opt-level = 3 } @@ -154,8 +154,8 @@ libz-sys = { opt-level = 3 } mio = { opt-level = 3 } nalgebra = { opt-level = 3 } num-bigint = { opt-level = 3 } -parking_lot_core = { opt-level = 3 } parking_lot = { opt-level = 3 } +parking_lot_core = { opt-level = 3 } percent-encoding = { opt-level = 3 } primitive-types = { opt-level = 3 } reed-solomon-novelpoly = { opt-level = 3 } @@ -165,6 +165,7 @@ sha2 = { opt-level = 3 } sha3 = { opt-level = 3 } smallvec = { opt-level = 3 } snow = { opt-level = 3 } +substrate-bip39 = {opt-level = 3} twox-hash = { opt-level = 3 } uint = { opt-level = 3 } wasmi = { opt-level = 3 } From 3d444aebac470047434f6d0d31366adfb6bb7736 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 16:01:24 +0200 Subject: [PATCH 54/65] Fix redundant import. --- .../dispute-distribution/src/receiver/batches/waiting_queue.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs index 95e739b3cd07..3bbaa6376da1 100644 --- a/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs +++ b/node/network/dispute-distribution/src/receiver/batches/waiting_queue.rs @@ -19,8 +19,6 @@ use std::{cmp::Ordering, collections::BinaryHeap, time::Instant}; use futures::future::pending; use futures_timer::Delay; -use crate::LOG_TARGET; - /// Wait asynchronously for given `Instant`s one after the other. /// /// `PendingWake`s can be inserted and `WaitingQueue` makes `wait_ready()` to always wait for the From e7923a73202bb1a145742acae802a78041d47bfa Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 16:01:40 +0200 Subject: [PATCH 55/65] Add some tracing. --- node/network/dispute-distribution/src/receiver/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index 0fb4213562ac..eb9ba44d3d8b 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -345,9 +345,16 @@ where Ok(votes) => votes, }; - match self.batches.find_batch(*valid_vote.0.candidate_hash(), candidate_receipt)? { + let candidate_hash = *valid_vote.0.candidate_hash(); + + match self.batches.find_batch(candidate_hash, candidate_receipt)? { FoundBatch::Created(batch) => { // There was no entry yet - start import immediately: + gum::trace!( + target: LOG_TARGET, + ?candidate_hash, + "No batch yet - triggering immediate import" + ); let import = PreparedImport { candidate_receipt: batch.candidate_receipt().clone(), statements: vec![valid_vote, invalid_vote], @@ -356,6 +363,7 @@ where self.start_import(import).await; }, FoundBatch::Found(batch) => { + gum::trace!(target: LOG_TARGET, ?candidate_hash, "Batch exists - batching request"); let batch_result = batch.add_votes(valid_vote, invalid_vote, peer, pending_response); From 8b8f7229db3a5ead07af47f96ef667cde0a11a7b Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 16:02:00 +0200 Subject: [PATCH 56/65] Better sender rate limit --- .../dispute-distribution/src/sender/mod.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index 6d419b3eae33..fbec05e7d5ac 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -102,6 +102,8 @@ impl DisputeSender { runtime: &mut RuntimeInfo, msg: DisputeMessage, ) -> Result<()> { + self.rate_limit.limit().await; + let req: DisputeRequest = msg.into(); let candidate_hash = req.0.candidate_receipt.hash(); match self.disputes.entry(candidate_hash) { @@ -122,9 +124,6 @@ impl DisputeSender { vacant.insert(send_task); }, } - - self.rate_limit.limit().await; - Ok(()) } @@ -163,8 +162,12 @@ impl DisputeSender { .retain(|candidate_hash, _| active_disputes.contains(candidate_hash)); // Iterates in order of insertion: + let mut should_rate_limit = true; for dispute in self.disputes.values_mut() { if have_new_sessions || dispute.has_failed_sends() { + if should_rate_limit { + self.rate_limit.limit().await; + } let sends_happened = dispute .refresh_sends(ctx, runtime, &self.active_sessions, &self.metrics) .await?; @@ -174,9 +177,7 @@ impl DisputeSender { // Reasoning: It would not be acceptable to slow down the whole subsystem, just // because of a few bad peers having problems. It is actually better to risk // running into their rate limit in that case and accept a minor reputation change. - if sends_happened && have_new_sessions { - self.rate_limit.limit().await; - } + should_rate_limit = sends_happened && have_new_sessions; } } @@ -186,8 +187,8 @@ impl DisputeSender { // recovered at startup will be relatively "old" anyway and we assume that no more than a // third of the validators will go offline at any point in time anyway. for dispute in unknown_disputes { - self.start_send_for_dispute(ctx, runtime, dispute).await?; self.rate_limit.limit().await; + self.start_send_for_dispute(ctx, runtime, dispute).await?; } Ok(()) } From a79a96d6b56606979e51013e5e9dd5e7fe5c6fa1 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 2 Sep 2022 16:02:34 +0200 Subject: [PATCH 57/65] Some tests. --- .../dispute-distribution/src/tests/mod.rs | 159 ++++++++++++++---- 1 file changed, 127 insertions(+), 32 deletions(-) diff --git a/node/network/dispute-distribution/src/tests/mod.rs b/node/network/dispute-distribution/src/tests/mod.rs index 8ef8286ea197..7f72d6df0af4 100644 --- a/node/network/dispute-distribution/src/tests/mod.rs +++ b/node/network/dispute-distribution/src/tests/mod.rs @@ -17,7 +17,12 @@ //! Subsystem unit tests -use std::{collections::HashSet, sync::Arc, task::Poll, time::Duration}; +use std::{ + collections::HashSet, + sync::Arc, + task::Poll, + time::{Duration, Instant}, +}; use assert_matches::assert_matches; use futures::{ @@ -52,7 +57,7 @@ use polkadot_node_subsystem_test_helpers::{ mock::make_ferdie_keystore, subsystem_test_harness, TestSubsystemContextHandle, }; use polkadot_primitives::v2::{ - AuthorityDiscoveryId, CandidateHash, Hash, SessionIndex, SessionInfo, + AuthorityDiscoveryId, CandidateHash, CandidateReceipt, Hash, SessionIndex, SessionInfo, }; use self::mock::{ @@ -60,7 +65,7 @@ use self::mock::{ MOCK_AUTHORITY_DISCOVERY, MOCK_NEXT_SESSION_INDEX, MOCK_NEXT_SESSION_INFO, MOCK_SESSION_INDEX, MOCK_SESSION_INFO, }; -use crate::{DisputeDistributionSubsystem, Metrics, LOG_TARGET}; +use crate::{DisputeDistributionSubsystem, Metrics, LOG_TARGET, SEND_RATE_LIMIT}; /// Useful mock providers. pub mod mock; @@ -72,42 +77,131 @@ fn send_dispute_sends_dispute() { let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); - let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; - handle - .send(FromOrchestra::Communication { - msg: DisputeDistributionMessage::SendDispute(message.clone()), - }) - .await; + send_dispute(&mut handle, candidate, true).await; + conclude(&mut handle).await; + }; + test_harness(test); +} + +#[test] +fn send_honors_rate_limit() { + sp_tracing::try_init_simple(); + let test = |mut handle: TestSubsystemContextHandle, _req_cfg| async move { + let _ = handle_subsystem_startup(&mut handle, None).await; + + let relay_parent = Hash::random(); + let candidate = make_candidate_receipt(relay_parent); + let before_request = Instant::now(); + send_dispute(&mut handle, candidate, true).await; + // First send should not be rate limited: + gum::trace!("Passed time: {:#?}", Instant::now().saturating_duration_since(before_request)); + assert!(Instant::now().saturating_duration_since(before_request) < SEND_RATE_LIMIT); + + let relay_parent = Hash::random(); + let candidate = make_candidate_receipt(relay_parent); + send_dispute(&mut handle, candidate, false).await; + // Second send should be rate limited: + gum::trace!( + "Passed time for send_dispute: {:#?}", + Instant::now().saturating_duration_since(before_request) + ); + assert!(Instant::now() - before_request >= SEND_RATE_LIMIT); + conclude(&mut handle).await; + }; + test_harness(test); +} + +/// Helper for sending a new dispute to dispute-distribution sender and handling resulting messages. +async fn send_dispute( + handle: &mut TestSubsystemContextHandle, + candidate: CandidateReceipt, + needs_session_info: bool, +) { + let before_request = Instant::now(); + let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; + gum::trace!( + "Passed time for making message: {:#?}", + Instant::now().saturating_duration_since(before_request) + ); + let before_request = Instant::now(); + handle + .send(FromOrchestra::Communication { + msg: DisputeDistributionMessage::SendDispute(message.clone()), + }) + .await; + gum::trace!( + "Passed time for sending message: {:#?}", + Instant::now().saturating_duration_since(before_request) + ); + if needs_session_info { // Requests needed session info: assert_matches!( - handle.recv().await, - AllMessages::RuntimeApi( - RuntimeApiMessage::Request( - hash, - RuntimeApiRequest::SessionInfo(session_index, tx) + handle.recv().await, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request( + hash, + RuntimeApiRequest::SessionInfo(session_index, tx) ) ) => { - assert_eq!(session_index, MOCK_SESSION_INDEX); - assert_eq!( - hash, - message.candidate_receipt().descriptor.relay_parent + assert_eq!(session_index, MOCK_SESSION_INDEX); + assert_eq!( + hash, + message.candidate_receipt().descriptor.relay_parent ); - tx.send(Ok(Some(MOCK_SESSION_INFO.clone()))).expect("Receiver should stay alive."); - } + tx.send(Ok(Some(MOCK_SESSION_INFO.clone()))).expect("Receiver should stay alive."); + } ); + } - let expected_receivers = { - let info = &MOCK_SESSION_INFO; - info.discovery_keys - .clone() - .into_iter() - .filter(|a| a != &Sr25519Keyring::Ferdie.public().into()) - .collect() - // All validators are also authorities in the first session, so we are - // done here. - }; - check_sent_requests(&mut handle, expected_receivers, true).await; + let expected_receivers = { + let info = &MOCK_SESSION_INFO; + info.discovery_keys + .clone() + .into_iter() + .filter(|a| a != &Sr25519Keyring::Ferdie.public().into()) + .collect() + // All validators are also authorities in the first session, so we are + // done here. + }; + check_sent_requests(handle, expected_receivers, true).await; +} +// Things to test: +// - Request triggers import +// - Subsequent imports get batched +// - Batch live gets renewed if enough requests trickle in +// - Batch gets flushed if not. +// x Non authority requests get dropped. +// - Sending rate limit is honored. +// - Receiving rate limit is honored. + +#[test] +fn received_non_authorities_are_dropped() { + let test = |mut handle: TestSubsystemContextHandle, + mut req_cfg: RequestResponseConfig| async move { + let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); + let _ = handle_subsystem_startup(&mut handle, None).await; + + let relay_parent = Hash::random(); + let candidate = make_candidate_receipt(relay_parent); + let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; + + // Non validator request should get dropped: + let rx_response = + send_network_dispute_request(req_tx, PeerId::random(), message.clone().into()).await; + + assert_matches!( + rx_response.await, + Ok(resp) => { + let sc_network::config::OutgoingResponse { + result: _, + reputation_changes, + sent_feedback: _, + } = resp; + // Peer should get punished: + assert_eq!(reputation_changes.len(), 1); + } + ); conclude(&mut handle).await; }; test_harness(test); @@ -115,6 +209,8 @@ fn send_dispute_sends_dispute() { #[test] fn received_request_triggers_import() { + sp_tracing::try_init_simple(); + let test = |mut handle: TestSubsystemContextHandle, mut req_cfg: RequestResponseConfig| async move { let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); @@ -140,7 +236,6 @@ fn received_request_triggers_import() { assert_eq!(reputation_changes.len(), 1); } ); - // Nested valid and invalid import. // // Nested requests from same peer should get dropped. For the invalid request even From fd24899ced5b64dbe7ea2061609a2aa2210a294b Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Mon, 5 Sep 2022 10:14:41 +0300 Subject: [PATCH 58/65] Comments --- node/core/provisioner/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/core/provisioner/src/metrics.rs b/node/core/provisioner/src/metrics.rs index aaa57fcc9da5..b8032a32f89d 100644 --- a/node/core/provisioner/src/metrics.rs +++ b/node/core/provisioner/src/metrics.rs @@ -30,7 +30,7 @@ struct MetricsInner { inherent_data_dispute_statement_sets: prometheus::Counter, inherent_data_dispute_statements: prometheus::CounterVec, - // The disputes received from `disputes-coordinator` by partition + /// The disputes received from `disputes-coordinator` by partition partitioned_disputes: prometheus::CounterVec, } From c1ed61512f81d7ee37a28ad9d2858e3b5eeee78e Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 5 Sep 2022 12:07:38 +0200 Subject: [PATCH 59/65] Tests --- .../src/receiver/batches/batch.rs | 5 +- .../dispute-distribution/src/receiver/mod.rs | 3 + .../src/receiver/peer_queues.rs | 3 + .../dispute-distribution/src/tests/mock.rs | 28 +- .../dispute-distribution/src/tests/mod.rs | 338 ++++++++++++------ 5 files changed, 267 insertions(+), 110 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/batch.rs b/node/network/dispute-distribution/src/receiver/batches/batch.rs index 3d39b4a687b3..d96de1dfe8c3 100644 --- a/node/network/dispute-distribution/src/receiver/batches/batch.rs +++ b/node/network/dispute-distribution/src/receiver/batches/batch.rs @@ -114,10 +114,7 @@ impl Batch { /// * `candidate_receipt` - The candidate this batch is meant to track votes for. /// * `now` - current time stamp for calculating the first tick. /// - /// Returns: - /// - /// A batch and the first `Instant` you are supposed to call `tick`. - /// + /// Returns: A batch and the first `Instant` you are supposed to call `tick`. pub(super) fn new(candidate_receipt: CandidateReceipt, now: Instant) -> (Self, Instant) { let s = Self { candidate_hash: candidate_receipt.hash(), diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index eb9ba44d3d8b..e97fdb382561 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -80,7 +80,10 @@ const COST_APPARENT_FLOOD: Rep = Rep::CostMinor("Peer exceeded the rate limit.") /// in order for a batch to stay alive and not get flushed/imported to the dispute-coordinator. /// /// This ensures a timely import of batches. +#[cfg(not(test))] pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 10; +#[cfg(test)] +pub const MIN_KEEP_BATCH_ALIVE_VOTES: u32 = 2; /// Time we allow to pass for new votes to trickle in. /// diff --git a/node/network/dispute-distribution/src/receiver/peer_queues.rs b/node/network/dispute-distribution/src/receiver/peer_queues.rs index bda06e02bc9c..1c53ee271f47 100644 --- a/node/network/dispute-distribution/src/receiver/peer_queues.rs +++ b/node/network/dispute-distribution/src/receiver/peer_queues.rs @@ -30,7 +30,10 @@ use crate::RECEIVE_RATE_LIMIT; /// in 10_000 * size_of(`IncomingRequest`) in the worst case. /// /// `PEER_QUEUE_CAPACITY` must not be 0 for obvious reasons. +#[cfg(not(test))] pub const PEER_QUEUE_CAPACITY: usize = 10; +#[cfg(test)] +pub const PEER_QUEUE_CAPACITY: usize = 2; /// Queues for messages from authority peers for rate limiting. /// diff --git a/node/network/dispute-distribution/src/tests/mock.rs b/node/network/dispute-distribution/src/tests/mock.rs index 08428d5852cc..aa2a4485d480 100644 --- a/node/network/dispute-distribution/src/tests/mock.rs +++ b/node/network/dispute-distribution/src/tests/mock.rs @@ -20,6 +20,7 @@ use std::{ collections::{HashMap, HashSet}, sync::Arc, + time::Instant, }; use async_trait::async_trait; @@ -38,6 +39,8 @@ use polkadot_primitives::v2::{ }; use polkadot_primitives_test_helpers::dummy_candidate_descriptor; +use crate::LOG_TARGET; + pub const MOCK_SESSION_INDEX: SessionIndex = 1; pub const MOCK_NEXT_SESSION_INDEX: SessionIndex = 2; pub const MOCK_VALIDATORS: [Sr25519Keyring; 6] = [ @@ -54,6 +57,8 @@ pub const MOCK_AUTHORITIES_NEXT_SESSION: [Sr25519Keyring; 2] = pub const FERDIE_INDEX: ValidatorIndex = ValidatorIndex(0); pub const ALICE_INDEX: ValidatorIndex = ValidatorIndex(1); +pub const BOB_INDEX: ValidatorIndex = ValidatorIndex(2); +pub const CHARLIE_INDEX: ValidatorIndex = ValidatorIndex(3); lazy_static! { @@ -148,12 +153,22 @@ pub async fn make_dispute_message( invalid_validator: ValidatorIndex, ) -> DisputeMessage { let candidate_hash = candidate.hash(); + let before_request = Instant::now(); let valid_vote = make_explicit_signed(MOCK_VALIDATORS[valid_validator.0 as usize], candidate_hash, true) .await; + gum::trace!( + "Passed time for valid vote: {:#?}", + Instant::now().saturating_duration_since(before_request) + ); + let before_request = Instant::now(); let invalid_vote = make_explicit_signed(MOCK_VALIDATORS[invalid_validator.0 as usize], candidate_hash, false) .await; + gum::trace!( + "Passed time for invald vote: {:#?}", + Instant::now().saturating_duration_since(before_request) + ); DisputeMessage::from_signed_statements( valid_vote, valid_validator, @@ -206,10 +221,15 @@ impl AuthorityDiscovery for MockAuthorityDiscovery { ) -> Option> { for (a, p) in self.peer_ids.iter() { if p == &peer_id { - return Some(HashSet::from([MOCK_VALIDATORS_DISCOVERY_KEYS - .get(&a) - .unwrap() - .clone()])) + let result = + HashSet::from([MOCK_VALIDATORS_DISCOVERY_KEYS.get(&a).unwrap().clone()]); + gum::trace!( + target: LOG_TARGET, + %peer_id, + ?result, + "Returning authority ids for peer id" + ); + return Some(result) } } diff --git a/node/network/dispute-distribution/src/tests/mod.rs b/node/network/dispute-distribution/src/tests/mod.rs index 7f72d6df0af4..56cdd467fd62 100644 --- a/node/network/dispute-distribution/src/tests/mod.rs +++ b/node/network/dispute-distribution/src/tests/mod.rs @@ -27,7 +27,7 @@ use std::{ use assert_matches::assert_matches; use futures::{ channel::{mpsc, oneshot}, - future::poll_fn, + future::{poll_fn, ready}, pin_mut, Future, SinkExt, }; use futures_timer::Delay; @@ -65,7 +65,11 @@ use self::mock::{ MOCK_AUTHORITY_DISCOVERY, MOCK_NEXT_SESSION_INDEX, MOCK_NEXT_SESSION_INFO, MOCK_SESSION_INDEX, MOCK_SESSION_INFO, }; -use crate::{DisputeDistributionSubsystem, Metrics, LOG_TARGET, SEND_RATE_LIMIT}; +use crate::{ + receiver::BATCH_COLLECTING_INTERVAL, + tests::mock::{BOB_INDEX, CHARLIE_INDEX}, + DisputeDistributionSubsystem, Metrics, LOG_TARGET, SEND_RATE_LIMIT, +}; /// Useful mock providers. pub mod mock; @@ -95,7 +99,8 @@ fn send_honors_rate_limit() { send_dispute(&mut handle, candidate, true).await; // First send should not be rate limited: gum::trace!("Passed time: {:#?}", Instant::now().saturating_duration_since(before_request)); - assert!(Instant::now().saturating_duration_since(before_request) < SEND_RATE_LIMIT); + // This test would likely be flaky on CI: + //assert!(Instant::now().saturating_duration_since(before_request) < SEND_RATE_LIMIT); let relay_parent = Hash::random(); let candidate = make_candidate_receipt(relay_parent); @@ -167,13 +172,14 @@ async fn send_dispute( } // Things to test: -// - Request triggers import -// - Subsequent imports get batched -// - Batch live gets renewed if enough requests trickle in -// - Batch gets flushed if not. +// x Request triggers import +// x Subsequent imports get batched +// x Batch gets flushed. +// x Batch gets renewed. // x Non authority requests get dropped. -// - Sending rate limit is honored. -// - Receiving rate limit is honored. +// x Sending rate limit is honored. +// x Receiving rate limit is honored. +// x Duplicate requests on batch are dropped #[test] fn received_non_authorities_are_dropped() { @@ -209,8 +215,6 @@ fn received_non_authorities_are_dropped() { #[test] fn received_request_triggers_import() { - sp_tracing::try_init_simple(); - let test = |mut handle: TestSubsystemContextHandle, mut req_cfg: RequestResponseConfig| async move { let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); @@ -220,125 +224,255 @@ fn received_request_triggers_import() { let candidate = make_candidate_receipt(relay_parent); let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; - // Non validator request should get dropped: - let rx_response = - send_network_dispute_request(req_tx, PeerId::random(), message.clone().into()).await; + nested_network_dispute_request( + &mut handle, + req_tx, + MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), + message.clone().into(), + ImportStatementsResult::ValidImport, + true, + move |_handle, _req_tx, _message| ready(()), + ) + .await; - assert_matches!( - rx_response.await, - Ok(resp) => { - let sc_network::config::OutgoingResponse { - result: _, - reputation_changes, - sent_feedback: _, - } = resp; - // Peer should get punished: - assert_eq!(reputation_changes.len(), 1); - } - ); - // Nested valid and invalid import. - // - // Nested requests from same peer should get dropped. For the invalid request even - // subsequent requests should get dropped. + gum::trace!(target: LOG_TARGET, "Concluding."); + conclude(&mut handle).await; + }; + test_harness(test); +} + +#[test] +fn batching_works() { + let test = |mut handle: TestSubsystemContextHandle, + mut req_cfg: RequestResponseConfig| async move { + let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); + let _ = handle_subsystem_startup(&mut handle, None).await; + + let relay_parent = Hash::random(); + let candidate = make_candidate_receipt(relay_parent); + let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; + + // Initial request should get forwarded immediately: nested_network_dispute_request( &mut handle, req_tx, MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), message.clone().into(), - ImportStatementsResult::InvalidImport, + ImportStatementsResult::ValidImport, true, - move |handle, req_tx, message| { - nested_network_dispute_request( - handle, - req_tx, - MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob), - message.clone().into(), - ImportStatementsResult::ValidImport, - false, - move |_, req_tx, message| async move { - // Another request from Alice should get dropped (request already in - // flight): - { - let rx_response = send_network_dispute_request( - req_tx, - MOCK_AUTHORITY_DISCOVERY - .get_peer_id_by_authority(Sr25519Keyring::Alice), - message.clone(), - ) - .await; - - assert_matches!( - rx_response.await, - Err(err) => { - gum::trace!( - target: LOG_TARGET, - ?err, - "Request got dropped - other request already in flight" - ); - } - ); - } - // Another request from Bob should get dropped (request already in - // flight): - { - let rx_response = send_network_dispute_request( - req_tx, - MOCK_AUTHORITY_DISCOVERY - .get_peer_id_by_authority(Sr25519Keyring::Bob), - message.clone(), - ) - .await; - - assert_matches!( - rx_response.await, - Err(err) => { - gum::trace!( - target: LOG_TARGET, - ?err, - "Request got dropped - other request already in flight" - ); - } - ); - } - }, - ) - }, + move |_handle, _req_tx, _message| ready(()), ) .await; - // Subsequent sends from Alice should fail (peer is banned): - { - let rx_response = send_network_dispute_request( - req_tx, - MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), - message.clone().into(), - ) - .await; + let mut rx_responses = Vec::new(); + + let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX).await; + let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob); + rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + + let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX).await; + let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Charlie); + rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + gum::trace!("Imported 3 votes into batch"); + + Delay::new(BATCH_COLLECTING_INTERVAL).await; + gum::trace!("Batch should still be alive"); + // Batch should still be alive (2 new votes): + // Let's import two more votes, but fully duplicates - should not extend batch live. + gum::trace!("Importing duplicate votes"); + let mut rx_responses_duplicate = Vec::new(); + let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX).await; + let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob); + rx_responses_duplicate + .push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + + let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX).await; + let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Charlie); + rx_responses_duplicate + .push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + + for rx_response in rx_responses_duplicate { + assert_matches!( + rx_response.await, + Ok(resp) => { + let sc_network::config::OutgoingResponse { + result, + reputation_changes, + sent_feedback: _, + } = resp; + gum::trace!( + target: LOG_TARGET, + ?reputation_changes, + "Received reputation changes." + ); + // We don't punish on that. + assert_eq!(reputation_changes.len(), 0); + assert_matches!(result, Err(())); + } + ); + } + + Delay::new(BATCH_COLLECTING_INTERVAL).await; + gum::trace!("Batch should be ready now (only duplicates have been added)"); + + let pending_confirmation = assert_matches!( + handle.recv().await, + AllMessages::DisputeCoordinator( + DisputeCoordinatorMessage::ImportStatements { + candidate_receipt: _, + session, + statements, + pending_confirmation: Some(pending_confirmation), + } + ) => { + assert_eq!(session, MOCK_SESSION_INDEX); + assert_eq!(statements.len(), 3); + pending_confirmation + } + ); + pending_confirmation.send(ImportStatementsResult::ValidImport).unwrap(); + + for rx_response in rx_responses { assert_matches!( rx_response.await, - Err(err) => { + Ok(resp) => { + let sc_network::config::OutgoingResponse { + result, + reputation_changes: _, + sent_feedback, + } = resp; + + let result = result.unwrap(); + let decoded = + ::decode(&mut result.as_slice()).unwrap(); + + assert!(decoded == DisputeResponse::Confirmed); + if let Some(sent_feedback) = sent_feedback { + sent_feedback.send(()).unwrap(); + } gum::trace!( target: LOG_TARGET, - ?err, - "Request got dropped - peer is banned." + "Valid import happened." ); + } ); } - // But should work fine for Bob: + gum::trace!(target: LOG_TARGET, "Concluding."); + conclude(&mut handle).await; + }; + test_harness(test); +} + +#[test] +fn receive_rate_limit_is_enforced() { + let test = |mut handle: TestSubsystemContextHandle, + mut req_cfg: RequestResponseConfig| async move { + let req_tx = req_cfg.inbound_queue.as_mut().unwrap(); + let _ = handle_subsystem_startup(&mut handle, None).await; + + let relay_parent = Hash::random(); + let candidate = make_candidate_receipt(relay_parent); + let message = make_dispute_message(candidate.clone(), ALICE_INDEX, FERDIE_INDEX).await; + + // Initial request should get forwarded immediately: nested_network_dispute_request( &mut handle, req_tx, - MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob), + MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Alice), message.clone().into(), ImportStatementsResult::ValidImport, - false, - |_, _, _| async {}, + true, + move |_handle, _req_tx, _message| ready(()), ) .await; + let mut rx_responses = Vec::new(); + + let peer = MOCK_AUTHORITY_DISCOVERY.get_peer_id_by_authority(Sr25519Keyring::Bob); + + let message = make_dispute_message(candidate.clone(), BOB_INDEX, FERDIE_INDEX).await; + rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + + let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, FERDIE_INDEX).await; + rx_responses.push(send_network_dispute_request(req_tx, peer, message.clone().into()).await); + + gum::trace!("Import one too much:"); + + let message = make_dispute_message(candidate.clone(), CHARLIE_INDEX, ALICE_INDEX).await; + let rx_response_flood = + send_network_dispute_request(req_tx, peer, message.clone().into()).await; + + assert_matches!( + rx_response_flood.await, + Ok(resp) => { + let sc_network::config::OutgoingResponse { + result: _, + reputation_changes, + sent_feedback: _, + } = resp; + gum::trace!( + target: LOG_TARGET, + ?reputation_changes, + "Received reputation changes." + ); + // Received punishment for flood: + assert_eq!(reputation_changes.len(), 1); + } + ); + gum::trace!("Need to wait 2 patch intervals:"); + Delay::new(BATCH_COLLECTING_INTERVAL).await; + Delay::new(BATCH_COLLECTING_INTERVAL).await; + + gum::trace!("Batch should be ready now"); + + let pending_confirmation = assert_matches!( + handle.recv().await, + AllMessages::DisputeCoordinator( + DisputeCoordinatorMessage::ImportStatements { + candidate_receipt: _, + session, + statements, + pending_confirmation: Some(pending_confirmation), + } + ) => { + assert_eq!(session, MOCK_SESSION_INDEX); + // Only 3 as fourth was flood: + assert_eq!(statements.len(), 3); + pending_confirmation + } + ); + pending_confirmation.send(ImportStatementsResult::ValidImport).unwrap(); + + for rx_response in rx_responses { + assert_matches!( + rx_response.await, + Ok(resp) => { + let sc_network::config::OutgoingResponse { + result, + reputation_changes: _, + sent_feedback, + } = resp; + + let result = result.unwrap(); + let decoded = + ::decode(&mut result.as_slice()).unwrap(); + + assert!(decoded == DisputeResponse::Confirmed); + if let Some(sent_feedback) = sent_feedback { + sent_feedback.send(()).unwrap(); + } + gum::trace!( + target: LOG_TARGET, + "Valid import happened." + ); + + } + ); + } + gum::trace!(target: LOG_TARGET, "Concluding."); conclude(&mut handle).await; }; From f4c530c8c26e9fa6115c524efefe28151f7c3389 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Mon, 5 Sep 2022 12:33:16 +0200 Subject: [PATCH 60/65] Add logging to rate limiter --- .../dispute-distribution/src/sender/mod.rs | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/node/network/dispute-distribution/src/sender/mod.rs b/node/network/dispute-distribution/src/sender/mod.rs index fbec05e7d5ac..09b902173ede 100644 --- a/node/network/dispute-distribution/src/sender/mod.rs +++ b/node/network/dispute-distribution/src/sender/mod.rs @@ -16,10 +16,16 @@ use std::{ collections::{HashMap, HashSet}, + pin::Pin, + task::Poll, time::Duration, }; -use futures::channel::{mpsc, oneshot}; +use futures::{ + channel::{mpsc, oneshot}, + future::poll_fn, + Future, +}; use futures_timer::Delay; use indexmap::{map::Entry, IndexMap}; @@ -376,15 +382,23 @@ impl RateLimit { Self { limit: Delay::new(SEND_RATE_LIMIT) } } - /// Take the `RateLimit` and replace with one that immediately is ready. - fn take(&mut self) -> Self { - std::mem::replace(self, RateLimit::new()) - } - /// Wait until ready and prepare for next call. async fn limit(&mut self) { - let old = self.take(); - old.limit.await; + // Wait for rate limit and add some logging: + poll_fn(|cx| { + let old_limit = Pin::new(&mut self.limit); + match old_limit.poll(cx) { + Poll::Pending => { + gum::debug!( + target: LOG_TARGET, + "Sending rate limit hit, slowing down requests" + ); + Poll::Pending + }, + Poll::Ready(()) => Poll::Ready(()), + } + }) + .await; *self = Self::new_limit(); } } From 795c6c1ce8daa40e6122fd43ffe1933061341e07 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 9 Sep 2022 01:37:03 +0200 Subject: [PATCH 61/65] Update roadmap/implementers-guide/src/node/disputes/dispute-distribution.md Co-authored-by: Tsvetomir Dimitrov --- .../src/node/disputes/dispute-distribution.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index dc69876aba9a..c2b046b5971c 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -142,7 +142,7 @@ except that if the local node deemed the candidate valid, the `SendDispute` message will contain a valid vote signed by our node and will contain the initially received `Invalid` vote. -Note, that we rely on the coordinator to check validity of a dispute for spam +Note, that we rely on `dispute-coordinator` to check validity of a dispute for spam protection (see below). ## Sending of messages From d46be7670012e86cc2554180f893747a6e49c1db Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 9 Sep 2022 01:38:00 +0200 Subject: [PATCH 62/65] Update roadmap/implementers-guide/src/node/disputes/dispute-distribution.md Co-authored-by: Tsvetomir Dimitrov --- .../src/node/disputes/dispute-distribution.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index c2b046b5971c..06770326e9bb 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -297,7 +297,7 @@ is `500ms` and above `RATE_LIMIT` is `100ms`. 1/3 of validators are malicious, so for 1000 this means around 330 malicious actors worst case. All those actors can send a message every `100ms`, that is 10 per second. This -means at the begin of an attack they can open up around 3300 batches. Each +means at the begining of an attack they can open up around 3300 batches. Each containing two votes. So memory usage is still negligible. In reality it is even less, as we also demand 10 new votes to trickle in per batch in order to keep it alive, every `500ms`. Hence for the first second, each batch requires 20 votes From 9b8787d83e13e3ee2d0e4132a41d74ca00dced99 Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 9 Sep 2022 01:38:25 +0200 Subject: [PATCH 63/65] Update node/network/dispute-distribution/src/receiver/mod.rs Co-authored-by: Tsvetomir Dimitrov --- node/network/dispute-distribution/src/receiver/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/network/dispute-distribution/src/receiver/mod.rs b/node/network/dispute-distribution/src/receiver/mod.rs index e97fdb382561..ee92100d190f 100644 --- a/node/network/dispute-distribution/src/receiver/mod.rs +++ b/node/network/dispute-distribution/src/receiver/mod.rs @@ -245,7 +245,7 @@ where let rate_limited = self.peer_queues.pop_reqs(); pin_mut!(rate_limited); - // We poll rate_limit before batches, so we don't unecessarily delay importing to + // We poll rate_limit before batches, so we don't unnecessarily delay importing to // batches. if let Poll::Ready(reqs) = rate_limited.poll(ctx) { return Poll::Ready(Ok(MuxedMessage::WakePeerQueuesPopReqs(reqs))) From 9795098956be5187b3d0e04a6ecbc29d29b7bc9f Mon Sep 17 00:00:00 2001 From: Robert Klotzner Date: Fri, 9 Sep 2022 14:02:21 +0200 Subject: [PATCH 64/65] Review feedback. --- node/network/dispute-distribution/src/receiver/batches/mod.rs | 2 +- .../src/node/disputes/dispute-distribution.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/node/network/dispute-distribution/src/receiver/batches/mod.rs b/node/network/dispute-distribution/src/receiver/batches/mod.rs index 6163d523a4c6..d9561adb7b83 100644 --- a/node/network/dispute-distribution/src/receiver/batches/mod.rs +++ b/node/network/dispute-distribution/src/receiver/batches/mod.rs @@ -133,7 +133,7 @@ impl Batches { // Wait for at least one batch to become ready: self.waiting_queue.wait_ready(now).await; - // Process all ready waits: + // Process all ready entries: while let Some(wake) = self.waiting_queue.pop_ready(now) { let batch = self.batches.remove(&wake.payload); debug_assert!( diff --git a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md index 06770326e9bb..0bd204934ee3 100644 --- a/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md +++ b/roadmap/implementers-guide/src/node/disputes/dispute-distribution.md @@ -161,7 +161,7 @@ statements so they can include them in blocks. We only consider a message transmitted, once we received a confirmation message. If not, we will keep retrying getting that message out as long as the dispute is -deemed alive. To determine whether a dispute is still alive we will the +deemed alive. To determine whether a dispute is still alive we will ask the `dispute-coordinator` for a list of all still active disputes via a `DisputeCoordinatorMessage::ActiveDisputes` message before each retry run. Once a dispute is no longer live, we will clean up the state accordingly. @@ -217,7 +217,7 @@ not send messages at a high rate. We can therefore enforce conservative rate limits and thus minimize harm spamming malicious nodes can have. Before we dive into how rate limiting solves all spam issues elegantly, let's -further discuss that honest behaviour further: +discuss that honest behaviour further: What about session changes? Here we might have to inform a new validator set of lots of already existing disputes at once. From 49a7b24cd76f1d038b3476d21b82bc797c7910e0 Mon Sep 17 00:00:00 2001 From: Tsvetomir Dimitrov Date: Wed, 14 Sep 2022 13:44:03 +0300 Subject: [PATCH 65/65] More precise log messages --- node/core/provisioner/src/lib.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/node/core/provisioner/src/lib.rs b/node/core/provisioner/src/lib.rs index fc6fa3e18ddd..0754e2c4c87e 100644 --- a/node/core/provisioner/src/lib.rs +++ b/node/core/provisioner/src/lib.rs @@ -689,12 +689,14 @@ fn bitfields_indicate_availability( 3 * availability.count_ones() >= 2 * availability.len() } +// If we have to be absolutely precise here, this method gets the version of the `ParachainHost` api. +// For brevity we'll just call it 'runtime version'. async fn has_required_runtime( sender: &mut impl overseer::ProvisionerSenderTrait, relay_parent: Hash, required_runtime_version: u32, ) -> bool { - gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching runtime version"); + gum::trace!(target: LOG_TARGET, ?relay_parent, "Fetching ParachainHost runtime api version"); let (tx, rx) = oneshot::channel(); sender @@ -708,7 +710,7 @@ async fn has_required_runtime( ?relay_parent, ?runtime_version, ?required_runtime_version, - "Fetched runtime version" + "Fetched ParachainHost runtime api version" ); runtime_version >= required_runtime_version }, @@ -717,7 +719,7 @@ async fn has_required_runtime( target: LOG_TARGET, ?relay_parent, ?error, - "Execution error while fetching runtime version" + "Execution error while fetching ParachainHost runtime api version" ); false }, @@ -725,7 +727,7 @@ async fn has_required_runtime( gum::trace!( target: LOG_TARGET, ?relay_parent, - "NotSupported error while fetching runtime version" + "NotSupported error while fetching ParachainHost runtime api version" ); false }, @@ -733,7 +735,7 @@ async fn has_required_runtime( gum::trace!( target: LOG_TARGET, ?relay_parent, - "Cancelled error while fetching runtime version" + "Cancelled error while fetching ParachainHost runtime api version" ); false },