macrocosm-os · p-ferreira · Jun 5, 2024 · May 19, 2024 · May 19, 2024 · May 19, 2024
diff --git a/README.md b/README.md
@@ -47,8 +47,8 @@ bash install.sh
 
 # Compute Requirements
 
-1. To run a **validator**, you will need at least 24GB of VRAM. 
-2. To run the default huggingface **miner**, you will need at least 18GB of VRAM. 
+1. To run a **validator**, you will need at least 62GB of VRAM. 
+2. To run the default huggingface **miner**, you will need at least 62GB of VRAM. 
 
 </div>
 
@@ -77,10 +77,11 @@ For ease of use, you can run the scripts as well with PM2. Installation of PM2 i
 sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
 ``` 
 
-Example of running a SOLAR miner: 
+Example of running a Llama3 miner:
+
 ```bash
-pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 1  --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug 
-``` 
+pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 1  --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug
+```
 
 # Testnet 
 We highly recommend that you run your miners on testnet before deploying on main. This is give you an opportunity to debug your systems, and ensure that you will not lose valuable immunity time. The SN1 testnet is **netuid 61**. 
@@ -90,7 +91,7 @@ In order to run on testnet, you will need to go through the same hotkey registra
 To run:
 
 ```bash
-pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 61  --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug 
+pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 61 --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug
 ```
 
 # Limitations

diff --git a/prompting/llms/hf.py b/prompting/llms/hf.py
@@ -19,7 +19,7 @@
 from typing import List, Dict
 import bittensor as bt
 
-from transformers import Pipeline, pipeline, AutoTokenizer, TextIteratorStreamer
+from transformers import BitsAndBytesConfig, pipeline, AutoTokenizer, TextIteratorStreamer
 from prompting.mock import MockPipeline
 from prompting.cleaners.cleaner import CleanerPipeline
 from transformers import pipeline, TextIteratorStreamer, AutoTokenizer
@@ -83,12 +83,15 @@ def load_hf_pipeline(
             streamer=streamer,
         )
     else:
+        kwargs = model_kwargs.copy()
+        kwargs["bnb_4bit_compute_dtype"] = kwargs.pop("torch_dtype")
+        quant_config = BitsAndBytesConfig(**kwargs)
         llm_pipeline = pipeline(
             "text-generation",
             model=model_id,
             tokenizer=tokenizer,
             device_map=device,
-            model_kwargs=model_kwargs,
+            quant_config=quant_config,
             streamer=streamer,
         )
 

diff --git a/prompting/utils/config.py b/prompting/utils/config.py
@@ -83,7 +83,7 @@ def add_args(cls, parser):
         "--neuron.llm_max_allowed_memory_in_gb",
         type=int,
         help="The max gpu memory utilization set for initializing the model. This parameter currently reflects on the property `gpu_memory_utilization` of vllm",
-        default=60,
+        default=62,
     )
 
     parser.add_argument(

diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,6 @@ datasets==2.14.6
 deprecation==2.1.0
 torch==2.1.1
 torchmetrics
-transformers==4.36.2
 pre-commit==3.3.2
 git+https://github.com/synapse-alpha/mathgenerator.git@main#egg=mathgenerator
 numpy==1.22.0
@@ -24,4 +23,6 @@ python-dotenv
 wikipedia_sections
 vllm
 loguru
-argostranslate
+argostranslate
+transformers==4.41.1
+autoawq==0.2.5