Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use atom #286

Open
wants to merge 9 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 9 additions & 177 deletions folding/base/miner.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,22 @@
# The MIT License (MIT)
# Copyright © 2023 Yuma Rao
# Copyright © 2024 Macrocosmos AI.

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

import time
import asyncio
import threading
import argparse
import traceback

import bittensor as bt

from folding.base.neuron import BaseNeuron
from folding.protocol import PingSynapse
from folding.utils.config import add_miner_args
from folding.utils.logger import logger
from folding.protocol import PingSynapse
from folding.base.neuron import BaseFolding
from folding.__init__ import __spec_version__

from atom.base.miner import BaseMinerNeuron as AtomBaseMinerNeuron


class BaseMinerNeuron(BaseNeuron):
class BaseMinerNeuron(AtomBaseMinerNeuron, BaseFolding):
"""
Base class for Bittensor miners.
"""

@classmethod
def add_args(cls, parser: argparse.ArgumentParser):
super().add_args(parser)
add_miner_args(cls, parser)

def __init__(self, config=None):
super().__init__(config=config)

# Warn if allowing incoming requests from anyone.
if not self.config.blacklist.force_validator_permit:
logger.warning(
"You are allowing non-validators to send requests to your miner. This is a security risk."
)
if self.config.blacklist.allow_non_registered:
logger.warning(
"You are allowing non-registered entities to send requests to your miner. This is a security risk."
)

# The axon handles request processing, allowing validators to send this miner requests.
self.axon = bt.axon(
wallet=self.wallet,
config=self.config,
)

# Attach determiners which functions are called when servicing a request.
logger.info(f"Attaching forward function to miner axon.")
self.axon.attach(
forward_fn=self.forward,
blacklist_fn=self.blacklist,
priority_fn=self.priority,
).attach(
forward_fn=self.ping_forward, # not sure if we need blacklist on this.
)
logger.info(f"Axon created: {self.axon}")

# Instantiate runners
self.should_exit: bool = False
self.is_running: bool = False
self.thread: threading.Thread = None
self.lock = asyncio.Lock()
def spec_version(self):
return __spec_version__

def ping_forward(self, synapse: PingSynapse):
"""Respond to the validator with the necessary information about serving
Expand All @@ -91,114 +34,3 @@ def ping_forward(self, synapse: PingSynapse):
synapse.can_serve = True
logger.success("Telling validator you can serve ✅")
return synapse

def run(self):
"""
Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.

This function performs the following primary tasks:
1. Check for registration on the Bittensor network.
2. Starts the miner's axon, making it active on the network.
3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights.

The miner continues its operations until `should_exit` is set to True or an external interruption occurs.
During each epoch of its operation, the miner waits for new blocks on the Bittensor network, updates its
knowledge of the network (metagraph), and sets its weights. This process ensures the miner remains active
and up-to-date with the network's latest state.

Note:
- The function leverages the global configurations set during the initialization of the miner.
- The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests.

Raises:
KeyboardInterrupt: If the miner is stopped by a manual interruption.
Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis.
"""

# Check that miner is registered on the network.
self.sync()

# Serve passes the axon information to the network + netuid we are hosting on.
# This will auto-update if the axon port of external ip have changed.
logger.info(
f"Serving miner axon {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
)
self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)

# Start starts the miner's axon, making it active on the network.
self.axon.start()

logger.info(f"Miner starting at block: {self.block}")

# This loop maintains the miner's operations until intentionally stopped.
try:
while not self.should_exit:
time.sleep(10)
self.sync()

# If someone intentionally stops the miner, it'll safely terminate operations.
except KeyboardInterrupt:
self.axon.stop()
logger.success("Miner killed by keyboard interrupt.")
exit()

# In case of unforeseen errors, the miner will log the error and continue operations.
except Exception as e:
logger.error(traceback.format_exc())

def run_in_background_thread(self):
"""
Starts the miner's operations in a separate background thread.
This is useful for non-blocking operations.
"""
if not self.is_running:
logger.debug("Starting miner in background thread.")
self.should_exit = False
self.thread = threading.Thread(target=self.run, daemon=True)
self.thread.start()
self.is_running = True
logger.debug("Started")

def stop_run_thread(self):
"""
Stops the miner's operations that are running in the background thread.
"""
if self.is_running:
logger.debug("Stopping miner in background thread.")
self.should_exit = True
self.thread.join(5)
self.is_running = False
logger.debug("Stopped")

def __enter__(self):
"""
Starts the miner's operations in a background thread upon entering the context.
This method facilitates the use of the miner in a 'with' statement.
"""
self.run_in_background_thread()
return self

def __exit__(self, exc_type, exc_value, traceback):
"""
Stops the miner's background operations upon exiting the context.
This method facilitates the use of the miner in a 'with' statement.

Args:
exc_type: The type of the exception that caused the context to be exited.
None if the context was exited without an exception.
exc_value: The instance of the exception that caused the context to be exited.
None if the context was exited without an exception.
traceback: A traceback object encoding the stack trace.
None if the context was exited without an exception.
"""
self.stop_run_thread()

def resync_metagraph(self):
"""Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph."""
logger.info("resync_metagraph()")

# Sync the metagraph.
self.metagraph.sync(subtensor=self.subtensor)

def set_weights(self):
pass
177 changes: 4 additions & 173 deletions folding/base/neuron.py
Original file line number Diff line number Diff line change
@@ -1,109 +1,13 @@
# The MIT License (MIT)
# Copyright © 2023 Yuma Rao

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

import copy
import bittensor as bt
from abc import ABC, abstractmethod
import os

import openmm
from tenacity import RetryError

# Sync calls set weights and also resyncs the metagraph.
from folding.utils.config import check_config, add_args, config
from folding.utils.misc import ttl_get_block
from folding import __spec_version__ as spec_version
from folding import __OPENMM_VERSION_TAG__
from folding.utils.ops import OpenMMException, load_pkl, write_pkl
from folding.mock import MockSubtensor, MockMetagraph
from folding.utils.logger import logger
from folding.__init__ import __OPENMM_VERSION_TAG__
from folding.utils.ops import load_pkl, write_pkl, OpenMMException


class BaseNeuron(ABC):
"""
Base class for Bittensor miners. This class is abstract and should be inherited by a subclass. It contains the core logic for all neurons; validators and miners.

In addition to creating a wallet, subtensor, and metagraph, this class also handles the synchronization of the network state via a basic checkpointing mechanism based on epoch length.
"""

@classmethod
def check_config(cls, config: "bt.Config"):
check_config(cls, config)

@classmethod
def add_args(cls, parser):
add_args(cls, parser)

@classmethod
def config(cls):
return config(cls)

subtensor: "bt.subtensor"
wallet: "bt.wallet"
metagraph: "bt.metagraph"
spec_version: int = spec_version

@property
def block(self):
return ttl_get_block(self)

def __init__(self, config=None):
base_config = copy.deepcopy(config or BaseNeuron.config())
self.config = self.config()
self.config.merge(base_config)
self.check_config(self.config)

# If a gpu is required, set the device to cuda:N (e.g. cuda:0)
self.device = self.config.neuron.device

# Log the configuration for reference.
logger.info(self.config)

# Build Bittensor objects
# These are core Bittensor classes to interact with the network.
logger.info("Setting up bittensor objects.")

# The wallet holds the cryptographic key pairs for the miner.
if self.config.mock:
self.wallet = bt.MockWallet(config=self.config)
self.subtensor = MockSubtensor(self.config.netuid, wallet=self.wallet)
self.metagraph = MockMetagraph(self.config.netuid, subtensor=self.subtensor)
else:
self.wallet = bt.wallet(config=self.config)
self.subtensor = bt.subtensor(config=self.config)
self.metagraph = self.subtensor.metagraph(self.config.netuid)

# Check OpenMM version if we are not in mock mode.
self.check_openmm_version()
self.setup_wandb_logging()

logger.info(f"Wallet: {self.wallet}")
logger.info(f"Subtensor: {self.subtensor}")
logger.info(f"Metagraph: {self.metagraph}")

# Check if the miner is registered on the Bittensor network before proceeding further.
self.check_registered()

# Each miner gets a unique identity (UID) in the network for differentiation.
self.uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
logger.info(
f"Running neuron on subnet: {self.config.netuid} with uid {self.uid} using network: {self.subtensor.chain_endpoint}"
)
self.step = 0
class BaseFolding:
"""This is the BaseFolding class that is inherited by all validator/miner classes in the folding module."""

def check_openmm_version(self):
"""
Expand Down Expand Up @@ -137,76 +41,3 @@ def add_wandb_id(self, pdb_id: str, wandb_id: str):
def remove_wandb_id(self, pdb_id: str):
self.wandb_ids.pop(pdb_id)
write_pkl(self.wandb_ids, f"{self.config.neuron.full_path}/wandb_ids.pkl", "wb")

@abstractmethod
async def forward(self, synapse: bt.Synapse) -> bt.Synapse:
...

def sync(self):
"""
Wrapper for synchronizing the state of the network for the given miner or validator.
"""
# Ensure miner or validator hotkey is still registered on the network.
self.check_registered()

if self.should_sync_metagraph():
self.resync_metagraph()

if self.should_set_weights():
try:
logger.info("Attempting to set weights...")
self.set_weights()
logger.success("Weight setting successful!")
except RetryError as e:
logger.error(
f"Failed to set weights after retry attempts. Skipping for {self.config.neuron.epoch_length} blocks."
)

# Always save state.
self.save_state()

def check_registered(self):
# --- Check for registration.
if not self.subtensor.is_hotkey_registered(
netuid=self.config.netuid,
hotkey_ss58=self.wallet.hotkey.ss58_address,
):
logger.error(
f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}."
f" Please register the hotkey using `btcli subnets register` before trying again"
)
exit()

def should_sync_metagraph(self):
"""
Check if enough epoch blocks have elapsed since the last checkpoint to sync.
"""
return (
self.block - self.metagraph.last_update[self.uid]
) > self.config.neuron.metagraph_resync_length

def should_set_weights(self) -> bool:
# Don't set weights on initialization.
if self.step == 0:
return False

# Check if enough epoch blocks have elapsed since the last epoch.
if self.config.neuron.disable_set_weights:
return False

# Do not allow weight setting if the neuron is not a validator.
if not self.metagraph.validator_permit[self.uid]:
return False

# Define appropriate logic for when set weights.
return (
self.block - self.metagraph.last_update[self.uid]
) > self.config.neuron.epoch_length

def save_state(self):
pass

def load_state(self):
logger.warning(
"load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data."
)
Loading