diff --git a/.gitignore b/.gitignore index c894ee73..5dfe5518 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ __pycache__/ *.py[cod] *$py.class .DS_Store +**/.DS_Store + # C extensions *.so diff --git a/neurons/miners/__init__.py b/neurons/miners/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/neurons/miners/huggingface/README.md b/neurons/miners/huggingface/README.md new file mode 100644 index 00000000..2bf4602a --- /dev/null +++ b/neurons/miners/huggingface/README.md @@ -0,0 +1,47 @@ +# Hugging Face Bittensor Miner +This repository contains a Bittensor Miner integrated with 🤗 Hugging Face pipelines. The miner connects to the Bittensor network, registers its wallet, and serves a hugging face model to the network. + +## Prerequisites + +- Python 3.8+ +- OpenAI Python API (https://github.com/openai/openai) + +## Installation +1. Clone the repository +```bash +git clone https://github.com/opentensor/prompting.git +``` +2. Install the required packages for the [repository requirements](../../../requirements.txt) with `pip install -r requirements.txt` + + +For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. + +## Example Usage + +Here are some model examples that could be leveraged by the HuggingFace Miner, alongside suggested GPU footprint to run the models comfortably: +| model_id | Default GPU footprint | 8bits quantization GPU footprint | 4bits quantization GPU footprint | +| --- | ---- | ---- | ---- | +| HuggingFaceH4/zephyr-7b-beta | 18 GB | 12 GB | 7 GB | +| teknium/OpenHermes-2.5-Mistral-7B | 30 GB | 10 GB | 7 GB | +| upstage/SOLAR-10.7B-Instruct-v1.0 | 42 GB | 14 GB| 8 GB | +| mistralai/Mixtral-8x7B-Instruct-v0.1 | 92 GB* | 64 GB* | 30 GB* | + +> \* Big models such as mixtral are very costly to run and optimize, so always bear in mind the trade-offs between model speed, model quality and infra cost. + + +To run the Hugging Face Bittensor Miner with default settings, use the following command: +```bash +python3 neurons/miners/huggingface/miner.py \ + --wallet.name <> \ + --wallet.hotkey <> + --neuron.model_id <> +``` + +You can also run automatic quantization by adding the flag `--neuron.load_in_8bit` for 8bits quantization and `--neuron.load_in_4bit` for 4 bits quantization: +```bash +python3 neurons/miners/huggingface/miner.py \ + --wallet.name <> \ + --wallet.hotkey <> + --neuron.model_id <> + --neuron.load_in_8bit True +``` \ No newline at end of file diff --git a/neurons/miners/huggingface/miner.py b/neurons/miners/huggingface/miner.py new file mode 100644 index 00000000..29eaa640 --- /dev/null +++ b/neurons/miners/huggingface/miner.py @@ -0,0 +1,31 @@ +# The MIT License (MIT) +# Copyright © 2024 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import time +import bittensor as bt +from prompting.miners import HuggingFaceMiner + + +# This is the main function, which runs the miner. +if __name__ == "__main__": + with HuggingFaceMiner() as miner: + while True: + miner.log_status() + time.sleep(5) + + if miner.should_exit: + bt.logging.warning("Ending miner...") + break diff --git a/neurons/miners/openai/__init__.py b/neurons/miners/openai/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/neurons/miners/openai/miner.py b/neurons/miners/openai/miner.py index a776caba..e8e5a254 100644 --- a/neurons/miners/openai/miner.py +++ b/neurons/miners/openai/miner.py @@ -14,140 +14,9 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - -import os import time import bittensor as bt -import argparse - -# Bittensor Miner Template: -import prompting -from prompting.protocol import PromptingSynapse - -# import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner - -from langchain.prompts import ChatPromptTemplate -from langchain_core.output_parsers import StrOutputParser -from langchain.chat_models import ChatOpenAI -from dotenv import load_dotenv, find_dotenv -from langchain.callbacks import get_openai_callback - - -class OpenAIMiner(Miner): - """Langchain-based miner which uses OpenAI's API as the LLM. - - You should also install the dependencies for this miner, which can be found in the requirements.txt file in this directory. - """ - - @classmethod - def add_args(cls, parser: argparse.ArgumentParser): - """ - Adds OpenAI-specific arguments to the command line parser. - """ - super().add_args(parser) - - def __init__(self, config=None): - super().__init__(config=config) - - bt.logging.info(f"Initializing with model {self.config.neuron.model_id}...") - - if self.config.wandb.on: - self.identity_tags = ("openai_miner",) + (self.config.neuron.model_id,) - - _ = load_dotenv(find_dotenv()) - api_key = os.environ.get("OPENAI_API_KEY") - - # Set openai key and other args - self.model = ChatOpenAI( - api_key=api_key, - model_name=self.config.neuron.model_id, - max_tokens=self.config.neuron.max_tokens, - temperature=self.config.neuron.temperature, - ) - - self.system_prompt = "You are a friendly chatbot who always responds concisely and helpfully. You are honest about things you don't know." - self.accumulated_total_tokens = 0 - self.accumulated_prompt_tokens = 0 - self.accumulated_completion_tokens = 0 - self.accumulated_total_cost = 0 - - def get_cost_logging(self, cb): - bt.logging.info(f"Total Tokens: {cb.total_tokens}") - bt.logging.info(f"Prompt Tokens: {cb.prompt_tokens}") - bt.logging.info(f"Completion Tokens: {cb.completion_tokens}") - bt.logging.info(f"Total Cost (USD): ${round(cb.total_cost,4)}") - - self.accumulated_total_tokens += cb.total_tokens - self.accumulated_prompt_tokens += cb.prompt_tokens - self.accumulated_completion_tokens += cb.completion_tokens - self.accumulated_total_cost += cb.total_cost - - return { - "total_tokens": cb.total_tokens, - "prompt_tokens": cb.prompt_tokens, - "completion_tokens": cb.completion_tokens, - "total_cost": cb.total_cost, - "accumulated_total_tokens": self.accumulated_total_tokens, - "accumulated_prompt_tokens": self.accumulated_prompt_tokens, - "accumulated_completion_tokens": self.accumulated_completion_tokens, - "accumulated_total_cost": self.accumulated_total_cost, - } - - async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: - """ - Processes the incoming synapse by performing a predefined operation on the input data. - This method should be replaced with actual logic relevant to the miner's purpose. - - Args: - synapse (PromptingSynapse): The synapse object containing the 'dummy_input' data. - - Returns: - PromptingSynapse: The synapse object with the 'dummy_output' field set to twice the 'dummy_input' value. - - The 'forward' function is a placeholder and should be overridden with logic that is appropriate for - the miner's intended operation. This method demonstrates a basic transformation of input data. - """ - try: - with get_openai_callback() as cb: - t0 = time.time() - bt.logging.debug(f"📧 Message received, forwarding synapse: {synapse}") - - prompt = ChatPromptTemplate.from_messages( - [("system", self.system_prompt), ("user", "{input}")] - ) - chain = prompt | self.model | StrOutputParser() - - role = synapse.roles[-1] - message = synapse.messages[-1] - - bt.logging.debug(f"💬 Querying openai: {prompt}") - response = chain.invoke({"role": role, "input": message}) - - synapse.completion = response - synapse_latency = time.time() - t0 - - if self.config.wandb.on: - self.log_event( - timing=synapse_latency, - prompt=message, - completion=response, - system_prompt=self.system_prompt, - extra_info=self.get_cost_logging(cb), - ) - - bt.logging.debug(f"✅ Served Response: {response}") - self.step += 1 - - return synapse - except Exception as e: - bt.logging.error(f"Error in forward: {e}") - synapse.completion = "Error: " + str(e) - finally: - if self.config.neuron.stop_on_forward_exception: - self.should_exit = True - return synapse - +from prompting.miners import OpenAIMiner # This is the main function, which runs the miner. if __name__ == "__main__": diff --git a/neurons/miners/openai/requirements.txt b/neurons/miners/openai/requirements.txt deleted file mode 100644 index 1436c237..00000000 --- a/neurons/miners/openai/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# TODO: Are we expecting that the miners should install the validator dependency first? -# If so, we need to make it clear on the README. Otherwise, we should have a completely separated requirements for the miner -openai==1.9.0 -langchain==0.1.0 -python-dotenv \ No newline at end of file diff --git a/neurons/miners/test/echo.py b/neurons/miners/test/echo.py index 7598aaa5..697dd9f5 100644 --- a/neurons/miners/test/echo.py +++ b/neurons/miners/test/echo.py @@ -14,37 +14,9 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - import time -import typing import bittensor as bt - -# Bittensor Miner Template: -import prompting -from prompting.protocol import PromptingSynapse - -# import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner - - -class EchoMiner(Miner): - """ - This little fella just repeats the last message it received. - """ - - def __init__(self, config=None): - super().__init__(config=config) - - async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: - synapse.completion = synapse.messages[-1] - self.step += 1 - return synapse - - async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: - return False, "All good here" - - async def priority(self, synapse: PromptingSynapse) -> float: - return 1e6 +from prompting.miners import EchoMiner # This is the main function, which runs the miner. diff --git a/neurons/miners/test/mock.py b/neurons/miners/test/mock.py index 86b076a1..6e2d6716 100644 --- a/neurons/miners/test/mock.py +++ b/neurons/miners/test/mock.py @@ -14,37 +14,9 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - import time -import typing import bittensor as bt - -# Bittensor Miner Template: -import prompting -from prompting.protocol import PromptingSynapse - -# import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner - - -class MockMiner(Miner): - """ - This little fella responds with a static message. - """ - - def __init__(self, config=None): - super().__init__(config=config) - - async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: - synapse.completion = f"Hey you reached mock miner {self.config.wallet.hotkey!r}. Please leave a message after the tone.. Beep!" - self.step += 1 - return synapse - - async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: - return False, "All good here" - - async def priority(self, synapse: PromptingSynapse) -> float: - return 1e6 +from prompting.miners import MockMiner # This is the main function, which runs the miner. diff --git a/neurons/miners/test/phrase.py b/neurons/miners/test/phrase.py index 7127a251..a3c92984 100644 --- a/neurons/miners/test/phrase.py +++ b/neurons/miners/test/phrase.py @@ -14,49 +14,9 @@ # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. - import time -import typing -import argparse import bittensor as bt - -# Bittensor Miner Template: -import prompting -from prompting.protocol import PromptingSynapse - -# import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner - - -class PhraseMiner(Miner): - """ - This little fella responds with whatever phrase you give it. - """ - - @classmethod - def add_args(cls, parser: argparse.ArgumentParser): - super().add_args(parser) - - parser.add_argument( - "--neuron.phrase", - type=str, - help="The phrase to use when running a phrase (test) miner.", - default="Can you please repeat that?", - ) - - def __init__(self, config=None): - super().__init__(config=config) - - async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: - synapse.completion = self.config.neuron.phrase - self.step += 1 - return synapse - - async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: - return False, "All good here" - - async def priority(self, synapse: PromptingSynapse) -> float: - return 1e6 +from prompting.miners import PhraseMiner # This is the main function, which runs the miner. diff --git a/neurons/miners/wiki_agent/README.md b/neurons/miners/wiki_agent/README.md deleted file mode 100644 index 0d0b8a7a..00000000 --- a/neurons/miners/wiki_agent/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# WikiAgent Bittensor Miner -This repository contains a Bittensor Miner that uses a simple ReACT langchain agent to retrieve data from OpenAI's model alongside the wikipedia tool. The miner connects to the Bittensor network, registers its wallet, and serves the GPT model to the network. - -## Prerequisites - -- Python 3.8+ -- OpenAI Python API (https://github.com/openai/openai) - -## Installation - -1. Clone the repository -```bash -git clone https://github.com/opentensor/prompting.git -``` - -2. Install the required packages for the [repository requirements](../../../requirements.txt) with `pip install -r requirements.txt` -3. Install the required packages for the [wikipedia agent miner](requirements.txt) with `pip install -r requirements.txt` -3. Ensure that you have a `.env` file with your `OPENAI_API` key -```.env -echo OPENAI_API_KEY=YOUR-KEY > .env -``` - -For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. - -## Example Usage - -To run the WikiAgent Bittensor Miner with default settings, we recommend using the model `gpt-3.5-turbo-16k` or any model with a big context window. You can run the miner using the following command: - -```bash -python3 neurons/miners/wiki_agent/miner.py \ - --wallet.name <> \ - --wallet.hotkey <> - --neuron.model_id gpt-3.5-turbo-16k -``` \ No newline at end of file diff --git a/neurons/miners/wiki_agent/requirements.txt b/neurons/miners/wiki_agent/requirements.txt deleted file mode 100644 index 07f7b80e..00000000 --- a/neurons/miners/wiki_agent/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# If so, we need to make it clear on the README. Otherwise, we should have a completely separated requirements for the miner -openai==0.28 -langchain==0.1.0 -python-dotenv -wikipedia \ No newline at end of file diff --git a/neurons/miners/zephyr/README.md b/neurons/miners/zephyr/README.md deleted file mode 100644 index b3581c16..00000000 --- a/neurons/miners/zephyr/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Zephyr Bittensor Miner -This repository contains a Bittensor Miner that uses [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta). The miner connects to the Bittensor network, registers its wallet, and serves the zephyr model to the network. - -## Prerequisites - -- Python 3.8+ -- OpenAI Python API (https://github.com/openai/openai) - -## Installation -1. Clone the repository -```bash -git clone https://github.com/opentensor/prompting.git -``` -2. Install the required packages for the [repository requirements](../../../requirements.txt) with `pip install -r requirements.txt` -3. Install the required packages for the [wikipedia agent miner](requirements.txt) with `pip install -r requirements.txt` - - -For more configuration options related to the wallet, axon, subtensor, logging, and metagraph, please refer to the Bittensor documentation. - -## Example Usage - -To run the Zephyr Bittensor Miner with default settings, use the following command: -```bash -python3 neurons/miners/zephyr/miner.py \ - --wallet.name <> \ - --wallet.hotkey <> - --neuron.model_id HuggingFaceH4/zephyr-7b-beta -``` - -You will need 18GB of GPU to run this miner in comfortable settings. - -You can also run the quantized version of this model that takes ~10GB of GPU RAM by adding the flag `--neuron.load_quantized`: -```bash -python3 neurons/miners/zephyr/miner.py \ - --wallet.name <> \ - --wallet.hotkey <> - --neuron.model_id HuggingFaceH4/zephyr-7b-beta - --neuron.load_quantized True -``` \ No newline at end of file diff --git a/neurons/miners/zephyr/__init__.py b/neurons/miners/zephyr/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/neurons/validator.py b/neurons/validator.py index be5d328f..86443c67 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -39,7 +39,6 @@ def __init__(self, config=None): self.llm_pipeline = load_pipeline( model_id=self.config.neuron.model_id, - torch_dtype=torch.bfloat16, device=self.device, mock=self.config.mock, ) diff --git a/neurons/miner.py b/prompting/base/prompting_miner.py similarity index 95% rename from neurons/miner.py rename to prompting/base/prompting_miner.py index f9461a94..0ee53337 100644 --- a/neurons/miner.py +++ b/prompting/base/prompting_miner.py @@ -28,7 +28,7 @@ from datetime import datetime -class Miner(BaseMinerNeuron): +class BasePromptingMiner(BaseMinerNeuron): """ Your miner neuron class. You should use this class to define your miner's behavior. In particular, you should replace the forward function with your own logic. You may also want to override the blacklist and priority functions according to your needs. @@ -38,7 +38,7 @@ class Miner(BaseMinerNeuron): """ def __init__(self, config=None): - super(Miner, self).__init__(config=config) + super().__init__(config=config) self.identity_tags = None async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: @@ -189,15 +189,3 @@ def log_status(self): bt.logging.info( f"Miner running:: network: {self.subtensor.network} | step: {self.step} | uid: {self.uid} | trust: {m.trust[self.uid]:.3f} | emission {m.emission[self.uid]:.3f}" ) - - -# This is the main function, which runs the miner. -if __name__ == "__main__": - with Miner() as miner: - while True: - miner.log_status() - time.sleep(5) - - if miner.should_exit: - bt.logging.warning("Ending miner...") - break diff --git a/prompting/llm.py b/prompting/llm.py index 85438c4e..effdd198 100644 --- a/prompting/llm.py +++ b/prompting/llm.py @@ -16,7 +16,7 @@ # DEALINGS IN THE SOFTWARE. import time - +import torch from typing import List, Dict import bittensor as bt @@ -37,21 +37,13 @@ def load_pipeline( if not device.startswith("cuda"): bt.logging.warning("Only crazy people run this on CPU. It is not recommended.") - # model_kwargs torch type definition conflicts with pipeline torch_dtype, so we need to differentiate them + # Sets default model torch type in case is not defined if model_kwargs is None: - llm_pipeline = pipeline( - "text-generation", - model=model_id, - device=device, - torch_dtype=torch_dtype, - ) - else: - llm_pipeline = pipeline( - "text-generation", - model=model_id, - device_map=device, - model_kwargs=model_kwargs, - ) + model_kwargs = dict(torch_dtype=torch.bfloat16) + + llm_pipeline = pipeline( + "text-generation", model=model_id, device_map=device, model_kwargs=model_kwargs + ) return llm_pipeline diff --git a/prompting/miners/__init__.py b/prompting/miners/__init__.py new file mode 100644 index 00000000..65c7d06c --- /dev/null +++ b/prompting/miners/__init__.py @@ -0,0 +1,10 @@ +# Test miners +from .echo import EchoMiner +from .mock import MockMiner +from .phrase import PhraseMiner + +# Real miners +from .hf_miner import HuggingFaceMiner +from .openai_miner import OpenAIMiner +from .agent_miner import AgentMiner +from .tool_miner import ToolMiner diff --git a/prompting/miners/agent_miner.py b/prompting/miners/agent_miner.py new file mode 100644 index 00000000..7d51bacd --- /dev/null +++ b/prompting/miners/agent_miner.py @@ -0,0 +1,113 @@ +# The MIT License (MIT) +# Copyright © 2024 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import time +import bittensor as bt +import argparse +from deprecation import deprecated + +# Bittensor Miner Template: +from prompting.protocol import PromptingSynapse + +# import base miner class which takes care of most of the boilerplate +from prompting.base.prompting_miner import BasePromptingMiner +from dotenv import load_dotenv, find_dotenv +from prompting.miners.agents import SingleActionAgent, ReactAgent +from langchain.callbacks import get_openai_callback + +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") +class AgentMiner(BasePromptingMiner): + """Langchain-based miner which uses OpenAI's API as the LLM. This uses the ReAct framework. + + You should also install the dependencies for this miner, which can be found in the requirements.txt file in this directory. + """ + + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + """ + Adds OpenAI-specific arguments to the command line parser. + """ + super().add_args(parser) + + parser.add_argument( + "--use_react_agent", + type=bool, + default=False, + help="Flag to enable the ReAct agent", + ) + + def __init__(self, config=None): + super().__init__(config=config) + + bt.logging.info( + f"🤖📖 Initializing wikipedia agent with model {self.config.neuron.model_id}..." + ) + + if self.config.wandb.on: + self.identity_tags = ("wikipedia_agent_miner",) + ( + self.config.neuron.model_id, + ) + + _ = load_dotenv(find_dotenv()) + + if self.config.use_react_agent: + self.agent = ReactAgent( + self.config.neuron.model_id, + self.config.neuron.temperature, + self.config.neuron.max_tokens, + self.config.neuron.load_in_8bits, + self.config.neuron.load_in_4bits, + ) + else: + self.agent = SingleActionAgent( + self.config.neuron.model_id, + self.config.neuron.temperature, + self.config.neuron.max_tokens, + self.config.neuron.load_in_8bits, + self.config.neuron.load_in_4bits, + ) + + self.accumulated_total_tokens = 0 + self.accumulated_prompt_tokens = 0 + self.accumulated_completion_tokens = 0 + self.accumulated_total_cost = 0 + + def get_cost_logging(self, cb): + bt.logging.info(f"Total Tokens: {cb.total_tokens}") + bt.logging.info(f"Prompt Tokens: {cb.prompt_tokens}") + bt.logging.info(f"Completion Tokens: {cb.completion_tokens}") + bt.logging.info(f"Total Cost (USD): ${cb.total_cost}") + + self.accumulated_total_tokens += cb.total_tokens + self.accumulated_prompt_tokens += cb.prompt_tokens + self.accumulated_completion_tokens += cb.completion_tokens + self.accumulated_total_cost += cb.total_cost + + return { + "total_tokens": cb.total_tokens, + "prompt_tokens": cb.prompt_tokens, + "completion_tokens": cb.completion_tokens, + "total_cost": cb.total_cost, + "accumulated_total_tokens": self.accumulated_total_tokens, + "accumulated_prompt_tokens": self.accumulated_prompt_tokens, + "accumulated_completion_tokens": self.accumulated_completion_tokens, + "accumulated_total_cost": self.accumulated_total_cost, + } + + async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: + self.should_exit = True + return synapse diff --git a/prompting/miners/agents/__init__.py b/prompting/miners/agents/__init__.py new file mode 100644 index 00000000..63cdcde5 --- /dev/null +++ b/prompting/miners/agents/__init__.py @@ -0,0 +1,3 @@ +from .base_agent import BaseAgent +from .single_action_agent import SingleActionAgent +from .react_agent import ReactAgent diff --git a/prompting/miners/agents/base_agent.py b/prompting/miners/agents/base_agent.py new file mode 100644 index 00000000..7335f304 --- /dev/null +++ b/prompting/miners/agents/base_agent.py @@ -0,0 +1,7 @@ +from abc import ABC +from deprecation import deprecated + +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") +class BaseAgent(ABC): + def run(self, input: str) -> str: + pass diff --git a/prompting/miners/agents/react_agent.py b/prompting/miners/agents/react_agent.py new file mode 100644 index 00000000..09037821 --- /dev/null +++ b/prompting/miners/agents/react_agent.py @@ -0,0 +1,60 @@ +import bittensor as bt +from prompting.miners.agents.utils import load_hf_llm +from prompting.miners.agents.base_agent import BaseAgent +from langchain.agents import AgentExecutor, create_react_agent +from langchain import hub +from langchain.chat_models import ChatOpenAI +from langchain.utilities import WikipediaAPIWrapper +from langchain.agents import Tool +from langchain.tools import WikipediaQueryRun +from deprecation import deprecated + +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") +class ReactAgent(BaseAgent): + def __init__( + self, + model_id: str, + model_temperature: float, + max_new_tokens: int = 1024, + load_in_8bits: bool = False, + load_in_4bits: bool = False, + ): + self.wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) + tools = [ + Tool( + name="Wikipedia", + func=self.wikipedia.run, + description="Useful for when you need to look up a topic, event, country or person on wikipedia", + ) + ] + + bt.logging.info( + f"""Initializing ReACT agent with follow parameters: + - model_temperature: {model_temperature} + - max_new_tokens: {max_new_tokens} + - load_in_8bits: {load_in_8bits} + - load_in_4bits: {load_in_4bits}""" + ) + + prompt = hub.pull("hwchase17/react") + + if "gpt" not in model_id: + llm = load_hf_llm(model_id, max_new_tokens, load_in_8bits, load_in_4bits) + else: + llm = ChatOpenAI(model_name=model_id, temperature=model_temperature) + + # Construct the ReAct agent + agent = create_react_agent(llm, tools, prompt) + + # Create an agent executor by passing in the agent and tools + self.agent_executor = AgentExecutor( + agent=agent, + tools=tools, + verbose=True, + handle_parsing_errors=True, + max_iterations=5, + ) + + def run(self, input: str) -> str: + response = self.agent_executor.invoke({"input": input})["output"] + return response diff --git a/neurons/miners/wiki_agent/agent.py b/prompting/miners/agents/single_action_agent.py similarity index 73% rename from neurons/miners/wiki_agent/agent.py rename to prompting/miners/agents/single_action_agent.py index 5ea1485b..b82731cb 100644 --- a/neurons/miners/wiki_agent/agent.py +++ b/prompting/miners/agents/single_action_agent.py @@ -9,12 +9,13 @@ from langchain.schema import AgentAction, AgentFinish, OutputParserException import re import bittensor as bt +from prompting.miners.agents.utils import load_hf_llm +from prompting.miners.agents.base_agent import BaseAgent from typing import Union from typing import List from langchain.prompts import StringPromptTemplate -from langchain import OpenAI +from langchain.chat_models import ChatOpenAI from langchain.agents import Tool -from langchain.agents import initialize_agent from langchain.chains import LLMChain from langchain.agents import ( Tool, @@ -22,7 +23,8 @@ LLMSingleActionAgent, AgentOutputParser, ) - +from langchain.tools import WikipediaQueryRun +from deprecation import deprecated # Set up the base template template = """Answer the following questions as best you can. You have access to the following tools: @@ -43,7 +45,7 @@ Question: {input} {agent_scratchpad}""" - +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") # Set up a prompt template class CustomPromptTemplate(StringPromptTemplate): # The template to use @@ -69,7 +71,7 @@ def format(self, **kwargs) -> str: kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools]) return self.template.format(**kwargs) - +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") class CustomOutputParser(AgentOutputParser): def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]: # Check if agent should finish @@ -92,13 +94,20 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]: tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output ) - -class WikiAgent: - def __init__(self, model_id: str, model_temperature: float): - self.wikipedia = WikipediaAPIWrapper() +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="AgentMiner is unsupported.") +class SingleActionAgent(BaseAgent): + def __init__( + self, + model_id: str, + model_temperature: float, + max_new_tokens: int = 1024, + load_in_8bits: bool = False, + load_in_4bits: bool = False, + ): + self.wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()) tools = [ Tool( - name="wikipedia", + name="Wikipedia", func=self.wikipedia.run, description="Useful for when you need to look up a topic, country or person on wikipedia", ) @@ -113,9 +122,18 @@ def __init__(self, model_id: str, model_temperature: float): ) bt.logging.info( - f"Initializing agent with model_id: {model_id} and model_temperature: {model_temperature}" + f"""Initializing single action agent with follow parameters: + - model_id: {model_id} + - model_temperature: {model_temperature} + - max_new_tokens: {max_new_tokens} + - load_in_8bits: {load_in_8bits} + - load_in_4bits: {load_in_4bits}""" ) - llm = OpenAI(model_name=model_id, temperature=model_temperature) + + if "gpt" not in model_id: + llm = load_hf_llm(model_id, max_new_tokens, load_in_8bits, load_in_4bits) + else: + llm = ChatOpenAI(model_name=model_id, temperature=model_temperature) llm_chain = LLMChain(llm=llm, prompt=prompt) output_parser = CustomOutputParser() @@ -127,8 +145,12 @@ def __init__(self, model_id: str, model_temperature: float): allowed_tools=tools, ) - self.agent_executor = AgentExecutor.from_agent_and_tools( - agent=agent, tools=tools, verbose=True, handle_parsing_errors=True + self.agent_executor = AgentExecutor( + agent=agent, + tools=tools, + verbose=True, + handle_parsing_errors=True, + max_iterations=5, ) def run(self, input: str) -> str: diff --git a/prompting/miners/agents/utils.py b/prompting/miners/agents/utils.py new file mode 100644 index 00000000..aa6fc754 --- /dev/null +++ b/prompting/miners/agents/utils.py @@ -0,0 +1,24 @@ +import torch +from langchain.llms.huggingface_pipeline import HuggingFacePipeline + + +def load_hf_llm( + model_id: str, max_new_tokens: int, load_in_8bits: bool, load_in_4bits: bool +): + model_kwargs = {"torch_dtype": torch.float16} + + if load_in_8bits: + model_kwargs["load_in_8bit"] = True + elif load_in_4bits: + model_kwargs["load_in_4bit"] = True + + llm = HuggingFacePipeline.from_model_id( + model_id=model_id, + task="text-generation", + # TODO: Add device from config dynamically + device=0, + pipeline_kwargs={"max_new_tokens": max_new_tokens}, + model_kwargs=model_kwargs, + ) + + return llm diff --git a/prompting/miners/echo.py b/prompting/miners/echo.py new file mode 100644 index 00000000..e534b46e --- /dev/null +++ b/prompting/miners/echo.py @@ -0,0 +1,46 @@ +# The MIT License (MIT) +# Copyright © 2024 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import typing +import bittensor as bt + +# Bittensor Miner Template: +from prompting.protocol import PromptingSynapse + +# import base miner class which takes care of most of the boilerplate +from prompting.base.prompting_miner import BasePromptingMiner + + +class EchoMiner(BasePromptingMiner): + """ + This little fella just repeats the last message it received. + """ + + def __init__(self, config=None): + super().__init__(config=config) + + async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: + synapse.completion = synapse.messages[-1] + + bt.logging.success(f"✅ Echoing the message {synapse.completion}...") + + return synapse + + async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: + return False, "All good here" + + async def priority(self, synapse: PromptingSynapse) -> float: + return 1e6 diff --git a/neurons/miners/zephyr/miner.py b/prompting/miners/hf_miner.py similarity index 73% rename from neurons/miners/zephyr/miner.py rename to prompting/miners/hf_miner.py index 9667f0a4..bdb69aeb 100644 --- a/neurons/miners/zephyr/miner.py +++ b/prompting/miners/hf_miner.py @@ -21,22 +21,21 @@ import bittensor as bt # Bittensor Miner Template: -import prompting from prompting.protocol import PromptingSynapse from prompting.llm import load_pipeline from prompting.llm import HuggingFaceLLM # import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner +from prompting.base.prompting_miner import BasePromptingMiner -class ZephyrMiner(Miner): +class HuggingFaceMiner(BasePromptingMiner): """ Base miner which runs zephyr (https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) This requires a GPU with at least 20GB of memory. To run this miner from the project root directory: - python neurons/miners/zephyr/miner.py --wallet.name --wallet.hotkey --subtensor.network --netuid --axon.port --axon.external_port --logging.debug True --neuron.model_id HuggingFaceH4/zephyr-7b-beta --neuron.system_prompt "Hello, I am a chatbot. I am here to help you with your questions." --neuron.max_tokens 64 --neuron.do_sample True --neuron.temperature 0.9 --neuron.top_k 50 --neuron.top_p 0.95 --wandb.on True --wandb.entity sn1 --wandb.project_name miners_experiments + python neurons/miners/huggingface/miner.py --wallet.name --wallet.hotkey --neuron.model_id --subtensor.network --netuid --axon.port --axon.external_port --logging.debug True --neuron.model_id HuggingFaceH4/zephyr-7b-beta --neuron.system_prompt "Hello, I am a chatbot. I am here to help you with your questions." --neuron.max_tokens 64 --neuron.do_sample True --neuron.temperature 0.9 --neuron.top_k 50 --neuron.top_p 0.95 --wandb.on True --wandb.entity sn1 --wandb.project_name miners_experiments """ @classmethod @@ -50,28 +49,42 @@ def __init__(self, config=None): super().__init__(config=config) model_kwargs = None - if self.config.neuron.load_quantized: - bt.logging.info("Loading quantized model...") + if self.config.neuron.load_in_8bit: + bt.logging.info("Loading 8 bit quantized model...") model_kwargs = dict( torch_dtype=torch.float16, load_in_8bit=True, ) + if self.config.neuron.load_in_4bit: + bt.logging.info("Loading 4 bit quantized model...") + model_kwargs = dict( + torch_dtype=torch.float32, + load_in_4bit=True, + ) + if self.config.wandb.on: - self.identity_tags = ("zephyr_miner",) + self.identity_tags = ("hf_miner",) + + if self.config.neuron.load_in_8bit: + self.identity_tags += ("8bit_quantization",) + elif self.config.neuron.load_in_4bit: + self.identity_tags += ("4bit_quantization",) - if self.config.neuron.load_quantized: - self.identity_tags += ("8bits_quantization",) + # Forces model loading behaviour over mock flag + mock = ( + False if self.config.neuron.should_force_model_loading else self.config.mock + ) self.llm_pipeline = load_pipeline( model_id=self.config.neuron.model_id, - torch_dtype=torch.float16, device=self.device, - mock=self.config.mock, + mock=mock, model_kwargs=model_kwargs, ) - self.system_prompt = "You are a friendly chatbot who always responds concisely and helpfully. You are honest about things you don't know." + self.model_id = self.config.neuron.model_id + self.system_prompt = self.config.neuron.system_prompt async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: """ @@ -93,7 +106,7 @@ async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: bt.logging.debug(f"📧 Message received, forwarding synapse: {synapse}") prompt = synapse.messages[-1] - bt.logging.debug(f"💬 Querying zephyr: {prompt}") + bt.logging.debug(f"💬 Querying {self.model_id}: {prompt}") response = HuggingFaceLLM( llm_pipeline=self.llm_pipeline, @@ -132,15 +145,3 @@ async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: if self.config.neuron.stop_on_forward_exception: self.should_exit = True return synapse - - -# This is the main function, which runs the miner. -if __name__ == "__main__": - with ZephyrMiner() as miner: - while True: - miner.log_status() - time.sleep(5) - - if miner.should_exit: - bt.logging.warning("Ending miner...") - break diff --git a/prompting/miners/mock.py b/prompting/miners/mock.py new file mode 100644 index 00000000..25b33da3 --- /dev/null +++ b/prompting/miners/mock.py @@ -0,0 +1,45 @@ +# The MIT License (MIT) +# Copyright © 2024 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import typing +import bittensor as bt + +# Bittensor Miner Template: +from prompting.protocol import PromptingSynapse + +# import base miner class which takes care of most of the boilerplate +from prompting.base.prompting_miner import BasePromptingMiner + + +class MockMiner(BasePromptingMiner): + """ + This little fella responds with a static message. + """ + + def __init__(self, config=None): + super().__init__(config=config) + + async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: + synapse.completion = f"Hey you reached mock miner {self.config.wallet.hotkey!r}. Please leave a message after the tone.. Beep!" + bt.logging.success(f"✅ Mock miner replied with {synapse.completion}") + + return synapse + + async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: + return False, "All good here" + + async def priority(self, synapse: PromptingSynapse) -> float: + return 1e6 diff --git a/neurons/miners/wiki_agent/miner.py b/prompting/miners/openai_miner.py similarity index 73% rename from neurons/miners/wiki_agent/miner.py rename to prompting/miners/openai_miner.py index b3c9bfbc..818b3cf2 100644 --- a/neurons/miners/wiki_agent/miner.py +++ b/prompting/miners/openai_miner.py @@ -15,6 +15,7 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +import os import time import bittensor as bt import argparse @@ -23,15 +24,18 @@ from prompting.protocol import PromptingSynapse # import base miner class which takes care of most of the boilerplate -from neurons.miner import Miner +from prompting.base.prompting_miner import BasePromptingMiner + +from langchain.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from langchain.chat_models import ChatOpenAI from dotenv import load_dotenv, find_dotenv -from agent import WikiAgent from langchain.callbacks import get_openai_callback -class WikipediaAgentMiner(Miner): - """Langchain-based miner which uses OpenAI's API as the LLM. This uses the ReAct framework. - +class OpenAIMiner(BasePromptingMiner): + """Langchain-based miner which uses OpenAI's API as the LLM. +This miner does not use any tools or external APIs when processing requests - it relies entirely on the models' own representation and world model. In some cases, this can produce lower quality results. You should also install the dependencies for this miner, which can be found in the requirements.txt file in this directory. """ @@ -45,20 +49,23 @@ def add_args(cls, parser: argparse.ArgumentParser): def __init__(self, config=None): super().__init__(config=config) - bt.logging.info( - f"🤖📖 Initializing wikipedia agent with model {self.config.neuron.model_id}..." - ) + bt.logging.info(f"Initializing with model {self.config.neuron.model_id}...") if self.config.wandb.on: - self.identity_tags = ("wikipedia_agent_miner",) + ( - self.config.neuron.model_id, - ) + self.identity_tags = ("openai_miner",) + (self.config.neuron.model_id,) _ = load_dotenv(find_dotenv()) - - self.agent = WikiAgent( - self.config.neuron.model_id, self.config.neuron.temperature + api_key = os.environ.get("OPENAI_API_KEY") + + # Set openai key and other args + self.model = ChatOpenAI( + api_key=api_key, + model_name=self.config.neuron.model_id, + max_tokens=self.config.neuron.max_tokens, + temperature=self.config.neuron.temperature, ) + + self.system_prompt = self.config.neuron.system_prompt self.accumulated_total_tokens = 0 self.accumulated_prompt_tokens = 0 self.accumulated_completion_tokens = 0 @@ -68,7 +75,7 @@ def get_cost_logging(self, cb): bt.logging.info(f"Total Tokens: {cb.total_tokens}") bt.logging.info(f"Prompt Tokens: {cb.prompt_tokens}") bt.logging.info(f"Completion Tokens: {cb.completion_tokens}") - bt.logging.info(f"Total Cost (USD): ${cb.total_cost}") + bt.logging.info(f"Total Cost (USD): ${round(cb.total_cost,4)}") self.accumulated_total_tokens += cb.total_tokens self.accumulated_prompt_tokens += cb.prompt_tokens @@ -95,21 +102,23 @@ async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: synapse (PromptingSynapse): The synapse object containing the 'dummy_input' data. Returns: - PromptingSynapse: The synapse object with the '`dummy_output' field set to twice the 'dummy_input' value. - - The 'forward' function is a placeholder and should be overridden with logic that is appropriate for - the miner's intended operation. This method demonstrates a basic transformation of input data. + PromptingSynapse: The synapse object with the 'completion' field set to the miner output """ try: with get_openai_callback() as cb: t0 = time.time() bt.logging.debug(f"📧 Message received, forwarding synapse: {synapse}") - message = synapse.messages[-1] + prompt = ChatPromptTemplate.from_messages( + [("system", self.system_prompt), ("user", "{input}")] + ) + chain = prompt | self.model | StrOutputParser() - bt.logging.debug(f"💬 Querying openai and wikipedia: {message}") + role = synapse.roles[-1] + message = synapse.messages[-1] - response = self.agent.run(message) + bt.logging.debug(f"💬 Querying openai: {prompt}") + response = chain.invoke({"role": role, "input": message}) synapse.completion = response synapse_latency = time.time() - t0 @@ -119,13 +128,11 @@ async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: timing=synapse_latency, prompt=message, completion=response, - system_prompt="", + system_prompt=self.system_prompt, extra_info=self.get_cost_logging(cb), ) bt.logging.debug(f"✅ Served Response: {response}") - self.step += 1 - return synapse except Exception as e: bt.logging.error(f"Error in forward: {e}") @@ -134,15 +141,3 @@ async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: if self.config.neuron.stop_on_forward_exception: self.should_exit = True return synapse - - -# This is the main function, which runs the miner. -if __name__ == "__main__": - with WikipediaAgentMiner() as miner: - while True: - miner.log_status() - time.sleep(5) - - if miner.should_exit: - bt.logging.warning("Ending miner...") - break diff --git a/prompting/miners/phrase.py b/prompting/miners/phrase.py new file mode 100644 index 00000000..8d4cbc1d --- /dev/null +++ b/prompting/miners/phrase.py @@ -0,0 +1,57 @@ +# The MIT License (MIT) +# Copyright © 2024 Yuma Rao + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import typing +import argparse +import bittensor as bt + +# Bittensor Miner Template: +from prompting.protocol import PromptingSynapse + +# import base miner class which takes care of most of the boilerplate +from prompting.base.prompting_miner import BasePromptingMiner + + +class PhraseMiner(BasePromptingMiner): + """ + This little fella responds with whatever phrase you give it. + """ + + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + super().add_args(parser) + + parser.add_argument( + "--neuron.phrase", + type=str, + help="The phrase to use when running a phrase (test) miner.", + default="Can you please repeat that?", + ) + + def __init__(self, config=None): + super().__init__(config=config) + + async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: + synapse.completion = self.config.neuron.phrase + bt.logging.success(f"✅ Phrase miner replied with {synapse.completion}") + + return synapse + + async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: + return False, "All good here" + + async def priority(self, synapse: PromptingSynapse) -> float: + return 1e6 diff --git a/prompting/miners/tool_miner.py b/prompting/miners/tool_miner.py new file mode 100644 index 00000000..f16479dd --- /dev/null +++ b/prompting/miners/tool_miner.py @@ -0,0 +1,111 @@ +import os +import typing +import argparse +import bittensor as bt +import wikipedia +import time + +# Bittensor Miner Template: +from prompting.protocol import PromptingSynapse + +# import base miner class which takes care of most of the boilerplate +from prompting.base.prompting_miner import BasePromptingMiner +from langchain.chat_models import ChatOpenAI +from dotenv import load_dotenv, find_dotenv +from langchain.callbacks import get_openai_callback +from langchain.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +from traceback import print_exception +from deprecation import deprecated + +@deprecated(deprecated_in="1.1.2", removed_in="2.0", details="ToolMiner is unsupported.") +class ToolMiner(BasePromptingMiner): + @classmethod + def add_args(cls, parser: argparse.ArgumentParser): + super().add_args(parser) + + def __init__(self, config=None): + super().__init__(config=config) + + bt.logging.info(f"Initializing with model {self.config.neuron.model_id}...") + + if self.config.wandb.on: + self.identity_tags = ("openai_miner",) + (self.config.neuron.model_id,) + + _ = load_dotenv(find_dotenv()) + api_key = os.environ.get("OPENAI_API_KEY") + + # Set openai key and other args + self.model = ChatOpenAI( + api_key=api_key, + model_name=self.config.neuron.model_id, + max_tokens=self.config.neuron.max_tokens, + temperature=self.config.neuron.temperature, + ) + + self.system_prompt = """You are a nice AI assistant that uses the provided context to answer user queries. + ## Context + {context} + """ + + async def forward(self, synapse: PromptingSynapse) -> PromptingSynapse: + try: + with get_openai_callback() as cb: + t0 = time.time() + bt.logging.debug(f"📧 Message received, forwarding synapse: {synapse}") + + role = synapse.roles[-1] + message = synapse.messages[-1] + + # Message needs to be limited to 300 characters for wikipedia search, otherwise it will a return an error + matches = wikipedia.search(message[:300]) + + # If we find a match, we add the context to the system prompt + if len(matches) > 0: + title = matches[0] + page = wikipedia.page(title) + context = page.content + + if len(context) > 12_000: + context = context[:12_000] + + formatted_system_prompt = self.system_prompt.format(context=context) + else: + formatted_system_prompt = self.config.neuron.system_prompt + + prompt = ChatPromptTemplate.from_messages( + [("system", formatted_system_prompt), ("user", "{input}")] + ) + chain = prompt | self.model | StrOutputParser() + + bt.logging.debug(f"💬 Querying openai: {prompt}") + response = chain.invoke({"role": role, "input": message}) + + synapse.completion = response + synapse_latency = time.time() - t0 + + if self.config.wandb.on: + self.log_event( + timing=synapse_latency, + prompt=message, + completion=response, + system_prompt=self.system_prompt, + extra_info=self.get_cost_logging(cb), + ) + + bt.logging.debug(f"✅ Served Response: {response}") + return synapse + except Exception as e: + bt.logging.error(f"Error in forward: {e}") + bt.logging.error(print_exception(value=e)) + synapse.completion = "Error: " + str(e) + finally: + if self.config.neuron.stop_on_forward_exception: + self.should_exit = True + return synapse + + async def blacklist(self, synapse: PromptingSynapse) -> typing.Tuple[bool, str]: + return False, "All good here" + + async def priority(self, synapse: PromptingSynapse) -> float: + return 1e6 diff --git a/prompting/utils/config.py b/prompting/utils/config.py index 529cb2cf..def252b1 100644 --- a/prompting/utils/config.py +++ b/prompting/utils/config.py @@ -22,8 +22,6 @@ import bittensor as bt from loguru import logger -# TODO: enable 4bit and 8bit precision llms via config - def check_config(cls, config: "bt.Config"): r"""Checks/validates the config namespace object.""" @@ -43,7 +41,8 @@ def check_config(cls, config: "bt.Config"): if not os.path.exists(config.neuron.full_path): os.makedirs(config.neuron.full_path, exist_ok=True) - if not config.neuron.dont_save_events: + log_level_exists = "EVENTS" in logger._core.levels + if not config.neuron.dont_save_events and not log_level_exists: # Add custom event logger for the events. logger.level("EVENTS", no=38, icon="📝") logger.add( @@ -151,10 +150,17 @@ def add_miner_args(cls, parser): ) parser.add_argument( - "--neuron.load_quantized", + "--neuron.load_in_8bit", + type=str, + default=False, + help="Load quantized model in 8 bits. Note that this parameter only applies to hugging face miners.", + ) + + parser.add_argument( + "--neuron.load_in_4bit", type=str, default=False, - help="Load quantized model.", + help="Load quantized model in 4 bits. Note that this parameter only applies to hugging face miners.", ) parser.add_argument( @@ -175,7 +181,7 @@ def add_miner_args(cls, parser): "--neuron.system_prompt", type=str, help="The system prompt to use for the miner.", - default="You are a helpful AI assistant. You answer questions, summarize documents, and debug code. You are always straight to the point and honest.", + default="You are a friendly chatbot who always responds concisely and helpfully. You are honest about things you don't know.", ) parser.add_argument( @@ -213,6 +219,13 @@ def add_miner_args(cls, parser): help="Set miner to stop on forward exception.", ) + parser.add_argument( + "--neuron.should_force_model_loading", + type=bool, + default=False, + help="Force model loading independent of mock flag.", + ) + parser.add_argument( "--wandb.on", type=bool, diff --git a/requirements.txt b/requirements.txt index 66f4ec4b..9185969d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ bittensor==6.6.0 bs4 click==8.1.3 datasets==2.14.6 +deprecation==2.1.0 torch==2.1.1 torchmetrics transformers==4.36.2 @@ -16,4 +17,8 @@ wandb==0.15.10 tenacity antlr4-python3-runtime==4.11 wikipedia +openai==1.9.0 +langchain==0.1.5 +langchainhub==0.1.14 +python-dotenv wikipedia_sections