macrocosm-os · steffencruz · Feb 13, 2024 · Feb 7, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/neurons/validator.py b/neurons/validator.py
@@ -24,7 +24,6 @@
 from prompting.llm import load_pipeline
 from prompting.base.validator import BaseValidatorNeuron
 from prompting.rewards import RewardPipeline
-from prompting.utils.uids import check_uid_availability
 
 class Validator(BaseValidatorNeuron):
     """
@@ -58,8 +57,6 @@ def __init__(self, config=None):
         # Load the reward pipeline
         self.reward_pipeline = RewardPipeline(selected_tasks=self.active_tasks, device=self.device)
 
-        for i, axon in enumerate(self.metagraph.axons):
-            check_uid_availability(self.metagraph, i, self.config.neuron.vpermit_tao_limit)
 
     async def forward(self):
         """

diff --git a/prompting/forward.py b/prompting/forward.py
@@ -56,7 +56,7 @@ async def run_step(
     # Record event start time.
     start_time = time.time()
     # Get the list of uids to query for this step.
-    uids = get_random_uids(self, k=k, exclude=exclude or []).to(self.device)
+    uids = get_random_uids(self, exclude=exclude or []).to(self.device)
     axons = [self.metagraph.axons[uid] for uid in uids]
 
     # Make calls to the network with the prompt.

diff --git a/prompting/utils/config.py b/prompting/utils/config.py
@@ -348,17 +348,17 @@ def add_validator_args(cls, parser):
 
 
     parser.add_argument(
-        "--neuron.query_unique_coldkeys",
-        action="store_true",
-        help="Only query a single hotkey per coldkey.",
-        default=False,
+        "--neuron.unique_coldkey_prob",
+        type=float,
+        help="Probability of querying a given coldkey only once per step.",
+        default=0.9,
         )
 
     parser.add_argument(
-        "--neuron.query_unique_ips",
-        action="store_true",
-        help="Only query a single hotkey per ip.",
-        default=False,
+        "--neuron.unique_ip_prob",
+        type=float,
+        help="Probability of querying a given ip only once per step.",
+        default=0.9,
         )
 
 def config(cls):

diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py
@@ -1,84 +1,71 @@
 import torch
-import random
+import numpy as np
 import bittensor as bt
 from typing import List
 
 
-def check_uid_availability(
-    metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int, coldkeys: set = None, ips: set = None,
-) -> bool:
-    """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake
-    Args:
-        metagraph (:obj: bt.metagraph.Metagraph): Metagraph object
-        uid (int): uid to be checked
-        vpermit_tao_limit (int): Validator permit tao limit
-        coldkeys (set): Set of coldkeys to exclude
-        ips (set): Set of ips to exclude
-    Returns:
-        bool: True if uid is available, False otherwise
-    """
-    # Filter non serving axons.
-    if not metagraph.axons[uid].is_serving:
-        bt.logging.debug(f"uid: {uid} is not serving")
-        return False
-
-    # Filter validator permit > 1024 stake.
-    if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit:
-        bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
-        return False
-
-    if coldkeys and metagraph.axons[uid].coldkey in coldkeys:
-        return False
-
-    if ips and metagraph.axons[uid].ip in ips:
-        return False
-
-    # Available otherwise.
-    return True
-
-
 def get_random_uids(
-    self, k: int, exclude: List[int] = None
+    self, exclude: List[int] = None
 ) -> torch.LongTensor:
     """Returns k available random uids from the metagraph.
     Args:
-        k (int): Number of uids to return.
         exclude (List[int]): List of uids to exclude from the random sampling.
     Returns:
         uids (torch.LongTensor): Randomly sampled available uids.
     Notes:
-        If `k` is larger than the number of available `uids`, set `k` to the number of available `uids`.
+        If self.config.neuron.sample_size is larger than the number of available `uids`, the function will return all available `uids`.
     """
-    candidate_uids = []
-    avail_uids = []
-    coldkeys = set()
-    ips = set()
-    for uid in range(self.metagraph.n.item()):
-        if uid == self.uid:
+
+    uids = []
+    coldkeys = {}
+    ips = {}
+    # shuffled list of all UIDs
+    all_uids = np.random.choice(range(self.metagraph.n.item()), size=self.metagraph.n.item(), replace=False)
+    all_coldkeys = self.metagraph.coldkeys
+    all_ips = [axon.ip for axon in self.metagraph.axons]
+    for uid in all_uids:
+
+        if uid == self.uid or (exclude is not None and uid in exclude):
             continue
 
-        uid_is_available = check_uid_availability(
-            self.metagraph, uid, self.config.neuron.vpermit_tao_limit, coldkeys, ips,
-        )
-        if not uid_is_available:
+        # Filter non serving axons.
+        if not self.metagraph.axons[uid].is_serving:
+            bt.logging.debug(f"uid: {uid} is not serving")
             continue
 
-        if self.config.neuron.query_unique_coldkeys:
-            coldkeys.add(self.metagraph.axons[uid].coldkey)
+        # Filter validator permit > 1024 stake.
+        if self.metagraph.validator_permit[uid] and self.metagraph.S[uid] > self.config.neuron.vpermit_tao_limit:
+            bt.logging.debug(f"uid: {uid} has vpermit and stake ({self.metagraph.S[uid]}) > {self.config.neuron.vpermit_tao_limit}")
+            continue
+
+        # get the coldkey for the uid
+        coldkey = all_coldkeys[uid]
+        ip = all_ips[uid]
+        # get the number of times the coldkey has been queried in the current step
+        ck_counts = coldkeys.get(coldkey,0)
+        # get the number of times the ip has been queried in the current step
+        ip_counts = ips.get(ip,0)
+        # if it's already been queried query again with some smaller probability
+        if ck_counts > 0 or ip_counts > 0:
+
+            # here we use the probability of not querying the same coldkey
+            # for example if unique_coldkey_prob = 0.9 and the coldkey has already been queried 2 times in this forward pass, then the probability of querying the same coldkey again is (1-0.9)^2=0.01
+            ck_threshold = (1-self.config.neuron.unique_coldkey_prob) ** ck_counts
+            ip_threshold = (1-self.config.neuron.unique_ip_prob) ** ip_counts
+
+            # Take the product of the two probabilities as the likelihood of querying the same coldkey and ip again
+            if np.random.random() > ck_threshold * ip_threshold:
+                continue
 
-        if self.config.neuron.query_unique_ips:
-            ips.add(self.metagraph.axons[uid].ip)
+        coldkeys[coldkey] = coldkeys.get(coldkey, 0 ) + 1
+        ips[ip] = ips.get(ip, 0 ) + 1
 
-        avail_uids.append(uid)
-        if exclude is None or uid not in exclude:
-            candidate_uids.append(uid)
+        uids.append(uid)
+        if len(uids) == self.config.neuron.sample_size:
+            break
 
-    # Check if candidate_uids contain enough for querying, if not grab all avaliable uids
-    available_uids = candidate_uids
-    if len(candidate_uids) < k:
-        available_uids += random.sample(
-            [uid for uid in avail_uids if uid not in candidate_uids],
-            k - len(candidate_uids),
-        )
-    uids = torch.tensor(random.sample(available_uids, k))
-    return uids
+    self._selected_coldkeys = coldkeys
+    self._selected_ips = ips
+    if len(uids) < self.config.neuron.sample_size:
+        bt.logging.warning(f"Only {len(uids)} uids available for querying, requested {self.config.neuron.sample_size}.")
+    return torch.tensor(uids)
diff --git a/tests/test_uids.py b/tests/test_uids.py
@@ -1,48 +1,75 @@
 
 import torch
+import random
 import pytest
 from types import SimpleNamespace
+from typing import List, Set
 from prompting.utils.uids import get_random_uids
 
-
-def make_mock_neuron(unique_coldkeys=False, unique_ips=False, vpermit_tao_limit=1000):
+def make_mock_neuron(sample_size, unique_coldkey_prob=0, unique_ip_prob=0, add_validator=False, add_inactive=False, add_validator_above_limit=False):
 
     axons = [
         SimpleNamespace(coldkey="a", ip="0.0.0.1", is_serving=True),
-        SimpleNamespace(coldkey="a", ip="0.0.0.0", is_serving=True),
+        SimpleNamespace(coldkey="a", ip="0.0.0.2", is_serving=True),
         SimpleNamespace(coldkey="b", ip="0.0.0.1", is_serving=True),
-        SimpleNamespace(coldkey="b", ip="0.0.0.0", is_serving=True),
-        SimpleNamespace(coldkey="c", ip="0.0.0.2", is_serving=True),
+        SimpleNamespace(coldkey="b", ip="0.0.0.2", is_serving=True),
+        SimpleNamespace(coldkey="c", ip="1.0.0.0", is_serving=True),
+        SimpleNamespace(coldkey="d", ip="0.1.0.0", is_serving=True),
     ]
+    if add_validator:
+        axons.append(SimpleNamespace(coldkey="e", ip="0.0.1.0", is_serving=True))
+    if add_inactive:
+        axons.append(SimpleNamespace(coldkey="f", ip="0.0.0.1", is_serving=False))
+    if add_validator_above_limit:
+        axons.append(SimpleNamespace(coldkey="g", ip="1.1.1.1", is_serving=True))
+
     metagraph = SimpleNamespace(
         axons = axons,
+        coldkeys = [axon.coldkey for axon in axons],
         validator_permit = torch.ones(len(axons), dtype=torch.bool),
         S = torch.zeros(len(axons)),
         n = torch.tensor(len(axons))
     )
+
+    if add_validator_above_limit:
+        metagraph.S[-1] = 2000
 
     return SimpleNamespace(
-        uid = 4,
+        uid = 6,
         config = SimpleNamespace(
             neuron = SimpleNamespace(
-                vpermit_tao_limit = vpermit_tao_limit,
-                query_unique_coldkeys = unique_coldkeys,
-                query_unique_ips = unique_ips,
+                sample_size = sample_size,
+                vpermit_tao_limit = 1000,
+                unique_coldkey_prob = unique_coldkey_prob,
+                unique_ip_prob = unique_ip_prob,
             )
         ),
         metagraph = metagraph
     )
 
+ALL_IPS = {'0.0.0.1', '0.0.0.2', '1.0.0.0', '0.1.0.0'}
+ALL_COLDKEYS = {'a', 'b', 'c', 'd'}
+
 @pytest.mark.parametrize(
-    "unique_coldkeys, unique_ips, k, expected_result", [
-        (False, False, 4, [0, 1, 2, 3]),
-        (True, False, 2, [0, 2]),
-        (False, True, 2, [0, 1]),
-        (True, True, 2, [0, 3])
+    "unique_coldkey_prob, unique_ip_prob, sample_size, expected_coldkeys, expected_ips, expected_count", [
+        (0, 0, 10, ALL_COLDKEYS, ALL_IPS, 6),
+        (0, 0, 8,  ALL_COLDKEYS, ALL_IPS, 6),
+        (0, 1, 8,  None,         ALL_IPS, 4),
+        (1, 0, 8,  ALL_COLDKEYS, None,    4),
+        (1, 1, 8,  ALL_COLDKEYS, ALL_IPS, 4),
         ])
-def test_get_random_uids(unique_coldkeys, unique_ips, k, expected_result):
+@pytest.mark.parametrize('add_validator', [True, False])
+@pytest.mark.parametrize('add_inactive', [True, False])
+@pytest.mark.parametrize('add_validator_above_limit', [True, False])
+@pytest.mark.parametrize('trial', range(5))
+def test_get_random_uids(unique_coldkey_prob: bool, unique_ip_prob: bool, sample_size: int, expected_coldkeys: Set[str], expected_ips: Set[str], expected_count: int, add_validator:bool, add_inactive: bool, add_validator_above_limit: bool, trial: int):
 
-    mock_neuron = make_mock_neuron(unique_coldkeys, unique_ips)
+    mock_neuron = make_mock_neuron(sample_size=sample_size, unique_coldkey_prob=unique_coldkey_prob, unique_ip_prob=unique_ip_prob, add_validator=add_validator, add_inactive=add_inactive, add_validator_above_limit=add_validator_above_limit)
+    uids = get_random_uids(mock_neuron).tolist()
+    coldkeys = [mock_neuron.metagraph.coldkeys[uid] for uid in uids]
+    ips = [mock_neuron.metagraph.axons[uid].ip for uid in uids]
 
-    assert sorted(get_random_uids(mock_neuron, k).tolist()) == expected_result, "Incorrect uids returned."
+    assert len(uids) == expected_count
+    assert expected_coldkeys is None or set(coldkeys) == expected_coldkeys
+    assert expected_ips is None or set(ips) == expected_ips