-
Notifications
You must be signed in to change notification settings - Fork 176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Matchmaking finalizers #357
Changes from all commits
ab72bb9
05c8030
f61d419
b016be6
59a9706
8aef089
dc86913
ad57f86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ | |
from hivemind.p2p import P2P, P2PContext, P2PHandlerError, PeerID, ServicerBase | ||
from hivemind.proto import averaging_pb2 | ||
from hivemind.utils import TimedStorage, get_dht_time, get_logger, timed_storage | ||
from hivemind.utils.asyncio import anext | ||
from hivemind.utils.asyncio import anext, cancel_and_wait | ||
|
||
logger = get_logger(__name__) | ||
|
||
|
@@ -127,10 +127,9 @@ async def look_for_group(self, *, data_for_gather: bytes, timeout: Optional[floa | |
raise | ||
|
||
finally: | ||
if not request_leaders_task.done(): | ||
request_leaders_task.cancel() | ||
if not self.assembled_group.done(): | ||
self.assembled_group.cancel() | ||
await cancel_and_wait(request_leaders_task) | ||
self.assembled_group.cancel() | ||
|
||
while len(self.current_followers) > 0: | ||
await self.follower_was_discarded.wait() | ||
self.follower_was_discarded.clear() | ||
|
@@ -229,7 +228,7 @@ async def request_join_group(self, leader: PeerID, expiration_time: DHTExpiratio | |
logger.debug(f"{self} - potential leader {leader} did not respond within {self.request_timeout}") | ||
return None | ||
except (P2PHandlerError, StopAsyncIteration) as e: | ||
logger.error(f"{self} - failed to request potential leader {leader}: {e}") | ||
logger.exception(f"{self} - failed to request potential leader {leader}:") | ||
return None | ||
|
||
finally: | ||
|
@@ -413,10 +412,9 @@ async def begin_search(self, key_manager: GroupKeyManager, timeout: Optional[flo | |
try: | ||
yield self | ||
finally: | ||
if not update_queue_task.done(): | ||
update_queue_task.cancel() | ||
if declare and not declare_averager_task.done(): | ||
declare_averager_task.cancel() | ||
await cancel_and_wait(update_queue_task) | ||
if declare: | ||
await cancel_and_wait(declare_averager_task) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
|
||
|
||
for field in ( | ||
self.past_attempts, | ||
|
@@ -477,37 +475,31 @@ def request_expiration_time(self) -> float: | |
else: | ||
return min(get_dht_time() + self.averaging_expiration, self.search_end_time) | ||
|
||
async def _update_queue_periodically(self, key_manager: GroupKeyManager): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code is identical besides removing the |
||
try: | ||
DISCREPANCY = timed_storage.MAX_DHT_TIME_DISCREPANCY_SECONDS | ||
while get_dht_time() < self.search_end_time: | ||
new_peers = await key_manager.get_averagers(key_manager.current_key, only_active=True) | ||
self.max_assured_time = max( | ||
self.max_assured_time, get_dht_time() + self.averaging_expiration - DISCREPANCY | ||
) | ||
async def _update_queue_periodically(self, key_manager: GroupKeyManager) -> None: | ||
DISCREPANCY = timed_storage.MAX_DHT_TIME_DISCREPANCY_SECONDS | ||
while get_dht_time() < self.search_end_time: | ||
new_peers = await key_manager.get_averagers(key_manager.current_key, only_active=True) | ||
self.max_assured_time = max( | ||
self.max_assured_time, get_dht_time() + self.averaging_expiration - DISCREPANCY | ||
) | ||
|
||
self.leader_queue.clear() | ||
for peer, peer_expiration_time in new_peers: | ||
if peer == self.peer_id or (peer, peer_expiration_time) in self.past_attempts: | ||
continue | ||
self.leader_queue.store(peer, peer_expiration_time, peer_expiration_time) | ||
self.max_assured_time = max(self.max_assured_time, peer_expiration_time - DISCREPANCY) | ||
self.leader_queue.clear() | ||
for peer, peer_expiration_time in new_peers: | ||
if peer == self.peer_id or (peer, peer_expiration_time) in self.past_attempts: | ||
continue | ||
self.leader_queue.store(peer, peer_expiration_time, peer_expiration_time) | ||
self.max_assured_time = max(self.max_assured_time, peer_expiration_time - DISCREPANCY) | ||
|
||
self.update_finished.set() | ||
self.update_finished.set() | ||
|
||
await asyncio.wait( | ||
{self.running.wait(), self.update_triggered.wait()}, | ||
return_when=asyncio.ALL_COMPLETED, | ||
timeout=self.search_end_time - get_dht_time() if isfinite(self.search_end_time) else None, | ||
) | ||
self.update_triggered.clear() | ||
except (concurrent.futures.CancelledError, asyncio.CancelledError): | ||
return # note: this is a compatibility layer for python3.7 | ||
except Exception as e: | ||
logger.error(f"{self.peer_id} - caught {type(e)}: {e}") | ||
raise | ||
await asyncio.wait( | ||
{self.running.wait(), self.update_triggered.wait()}, | ||
return_when=asyncio.ALL_COMPLETED, | ||
timeout=self.search_end_time - get_dht_time() if isfinite(self.search_end_time) else None, | ||
) | ||
self.update_triggered.clear() | ||
|
||
async def _declare_averager_periodically(self, key_manager: GroupKeyManager): | ||
async def _declare_averager_periodically(self, key_manager: GroupKeyManager) -> None: | ||
async with self.lock_declare: | ||
try: | ||
while True: | ||
|
@@ -521,10 +513,6 @@ async def _declare_averager_periodically(self, key_manager: GroupKeyManager): | |
await asyncio.sleep(self.declared_expiration_time - get_dht_time()) | ||
if self.running.is_set() and len(self.leader_queue) == 0: | ||
await key_manager.update_key_on_not_enough_peers() | ||
except (concurrent.futures.CancelledError, asyncio.CancelledError): | ||
pass # note: this is a compatibility layer for python3.7 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [non-blocking] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have edited this PR to only remove the These methods are only awaited in |
||
except Exception as e: # note: we catch exceptions here because otherwise they are never printed | ||
logger.error(f"{self.peer_id} - caught {type(e)}: {e}") | ||
finally: | ||
if self.declared_group_key is not None: | ||
prev_declared_key, prev_expiration_time = self.declared_group_key, self.declared_expiration_time | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,7 @@ | |
from hivemind.dht.routing import DHTID, DHTKey, DHTValue, Subkey | ||
from hivemind.dht.validation import CompositeValidator, RecordValidatorBase | ||
from hivemind.p2p import P2P, PeerID | ||
from hivemind.utils import DHTExpiration, MPFuture, ValueWithExpiration, await_cancelled, get_logger, switch_to_uvloop | ||
from hivemind.utils import DHTExpiration, MPFuture, ValueWithExpiration, get_logger, switch_to_uvloop | ||
|
||
logger = get_logger(__name__) | ||
|
||
|
@@ -261,18 +261,11 @@ def run_coroutine( | |
async def _run_coroutine( | ||
self, coro: Callable[[DHT, DHTNode], Awaitable[ReturnType]], future: MPFuture[ReturnType] | ||
): | ||
main_task = asyncio.create_task(coro(self, self._node)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cancels here did not work since the new MPFuture implementation does not support asyncio They produced the following exception that was accidentally suppressed in
This PR removes them since they are not used, as discussed with @justheuristic. |
||
cancel_task = asyncio.create_task(await_cancelled(future)) | ||
try: | ||
await asyncio.wait({main_task, cancel_task}, return_when=asyncio.FIRST_COMPLETED) | ||
if future.cancelled(): | ||
main_task.cancel() | ||
else: | ||
future.set_result(await main_task) | ||
future.set_result(await coro(self, self._node)) | ||
except BaseException as e: | ||
logger.exception(f"Caught an exception when running a coroutine: {e}") | ||
if not future.done(): | ||
future.set_exception(e) | ||
logger.exception("Caught an exception when running a coroutine:") | ||
future.set_exception(e) | ||
|
||
def add_validators(self, record_validators: Iterable[RecordValidatorBase]) -> None: | ||
if not self._ready.done(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.cancel()
just returns False when the awaitable is done.