ethereum · ChihChengLiang · Feb 5, 2019 · Jan 30, 2019
diff --git a/eth2_testgen/shuffling/constants.py b/eth2_testgen/shuffling/constants.py
@@ -1,4 +1,5 @@
-
-SHARD_COUNT = 2**10  # 1024
 EPOCH_LENGTH = 2**6  # 64 slots, 6.4 minutes
+FAR_FUTURE_EPOCH = 2**64 - 1  # uint64 max
+SHARD_COUNT = 2**10  # 1024
 TARGET_COMMITTEE_SIZE = 2**8  # 256 validators
+ENTRY_EXIT_DELAY = 2**2  # 4 epochs
diff --git a/eth2_testgen/shuffling/core_helpers.py b/eth2_testgen/shuffling/core_helpers.py
@@ -1,35 +1,39 @@
 """
-copy-pasted from specs
+copy-pasted from specs. Compatible with v0.1:
+https://github.com/ethereum/eth2.0-specs/releases/tag/v0.1
 """
 
-from typing import Any, Dict, List
+from typing import Any, Dict, List, NewType
 
 import yaml
 from eth_typing import Hash32
 
 from constants import EPOCH_LENGTH, SHARD_COUNT, TARGET_COMMITTEE_SIZE
-from enums import ValidatorStatusCode
 from utils import hash
-from yaml_objects import ShardCommittee, ValidatorRecord
+from yaml_objects import Validator
 
+EpochNumber = NewType("EpochNumber", int)
+ValidatorIndex = NewType("ValidatorIndex", int)
+Bytes32 = NewType("Bytes32", bytes)
 
-def is_active_validator(validator: ValidatorRecord) -> bool:
+
+def is_active_validator(validator: Validator, epoch: EpochNumber) -> bool:
     """
-    Checks if ``validator`` is active.
+    Check if ``validator`` is active.
     """
-    return validator.status in [ValidatorStatusCode.ACTIVE, ValidatorStatusCode.ACTIVE_PENDING_EXIT]
+    return validator.activation_epoch <= epoch < validator.exit_epoch
 
 
-def get_active_validator_indices(validators: [ValidatorRecord]) -> List[int]:
+def get_active_validator_indices(validators: List[Validator], epoch: EpochNumber) -> List[ValidatorIndex]:
     """
-    Gets indices of active validators from ``validators``.
+    Get indices of active validators from ``validators``.
     """
-    return [i for i, v in enumerate(validators) if is_active_validator(v)]
+    return [i for i, v in enumerate(validators) if is_active_validator(v, epoch)]
 
 
-def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
+def shuffle(values: List[Any], seed: Bytes32) -> List[Any]:
     """
-    Returns the shuffled ``values`` with ``seed`` as entropy.
+    Return the shuffled ``values`` with ``seed`` as entropy.
     """
     values_count = len(values)
 
@@ -57,8 +61,7 @@ def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
                 break
 
             # Read 3-bytes of `source` as a 24-bit big-endian integer.
-            sample_from_source = int.from_bytes(
-                source[position:position + rand_bytes], 'big')
+            sample_from_source = int.from_bytes(source[position:position + rand_bytes], 'big')
 
             # Sample values greater than or equal to `sample_max` will cause
             # modulo bias when mapped into the `remaining` range.
@@ -78,57 +81,49 @@ def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
     return output
 
 
-def split(values: List[Any], split_count: int) -> List[Any]:
+def split(values: List[Any], split_count: int) -> List[List[Any]]:
     """
     Splits ``values`` into ``split_count`` pieces.
     """
     list_length = len(values)
     return [
-        values[
-            (list_length * i // split_count): (list_length * (i + 1) // split_count)
-        ]
+        values[(list_length * i // split_count): (list_length * (i + 1) // split_count)]
         for i in range(split_count)
     ]
 
-
-def get_new_shuffling(seed: Hash32,
-                      validators: List[ValidatorRecord],
-                      crosslinking_start_shard: int) -> List[List[ShardCommittee]]:
-    """
-    Shuffles ``validators`` into shard committees using ``seed`` as entropy.
-    """
-    active_validator_indices = get_active_validator_indices(validators)
-
-    committees_per_slot = max(
+def get_epoch_committee_count(active_validator_count: int) -> int:
+    return max(
         1,
         min(
             SHARD_COUNT // EPOCH_LENGTH,
-            len(active_validator_indices) // EPOCH_LENGTH // TARGET_COMMITTEE_SIZE,
+            active_validator_count // EPOCH_LENGTH // TARGET_COMMITTEE_SIZE,
         )
-    )
+    ) * EPOCH_LENGTH
 
-    # Shuffle with seed
-    shuffled_active_validator_indices = shuffle(active_validator_indices, seed)
 
-    # Split the shuffled list into epoch_length pieces
-    validators_per_slot = split(
-        shuffled_active_validator_indices, EPOCH_LENGTH)
+def xor(a: bytes, b: bytes) -> bytes:
+    return bytes(i ^ j for (i, j) in zip(a, b))
 
-    output = []
-    for slot, slot_indices in enumerate(validators_per_slot):
-        # Split the shuffled list into committees_per_slot pieces
-        shard_indices = split(slot_indices, committees_per_slot)
 
-        shard_id_start = crosslinking_start_shard + slot * committees_per_slot
+def int_to_bytes32(x) -> bytes:
+    return x.to_bytes(32, 'big')
 
-        shard_committees = [
-            ShardCommittee(
-                shard=(shard_id_start + shard_position) % SHARD_COUNT,
-                committee=indices,
-                total_validator_count=len(active_validator_indices),
-            )
-            for shard_position, indices in enumerate(shard_indices)
-        ]
-        output.append(shard_committees)
 
-    return output
+def get_shuffling(seed: Bytes32, validators: List[Validator], epoch: EpochNumber) -> List[List[ValidatorIndex]]:
+    """
+    Shuffles ``validators`` into crosslink committees seeded by ``seed`` and ``epoch``.
+    Returns a list of ``committees_per_epoch`` committees where each
+    committee is itself a list of validator indices.
+    """
+
+    active_validator_indices = get_active_validator_indices(validators, epoch)
+
+    committees_per_epoch = get_epoch_committee_count(
+        len(active_validator_indices))
+
+    # Shuffle
+    seed = xor(seed, int_to_bytes32(epoch))
+    shuffled_active_validator_indices = shuffle(active_validator_indices, seed)
+
+    # Split the shuffled list into committees_per_epoch pieces
+    return split(shuffled_active_validator_indices, committees_per_epoch)
diff --git a/eth2_testgen/shuffling/enums.py b/eth2_testgen/shuffling/enums.py
diff --git a/eth2_testgen/shuffling/tgen_shuffling.py b/eth2_testgen/shuffling/tgen_shuffling.py
@@ -4,10 +4,9 @@
 
 import yaml
 
-from constants import SHARD_COUNT
-from core_helpers import get_new_shuffling
-from enums import ValidatorStatusCode
-from yaml_objects import ShardCommittee, ValidatorRecord
+from constants import ENTRY_EXIT_DELAY, FAR_FUTURE_EPOCH
+from core_helpers import get_shuffling
+from yaml_objects import Validator
 
 
 def noop(self, *args, **kw):
@@ -18,19 +17,22 @@ def noop(self, *args, **kw):
 yaml.emitter.Emitter.process_tag = noop
 
 
-def yaml_ValidatorStatusCode(dumper, data):
-    # Try to deal with enums - otherwise for "ValidatorStatus.Active" you get [1], instead of 1
-    return dumper.represent_data(data.value)
+EPOCH = 1000  # The epoch, also a mean for the normal distribution
 
+# Standard deviation, around 8% validators will activate or exit within
+# ENTRY_EXIT_DELAY inclusive from EPOCH thus creating an edge case for validator
+# shuffling
+RAND_EPOCH_STD = 35
+
+MAX_EXIT_EPOCH = 5000  # Maximum exit_epoch for easier reading
 
-yaml.add_representer(ValidatorStatusCode, yaml_ValidatorStatusCode)
 
 if __name__ == '__main__':
 
     # Order not preserved - https://github.com/yaml/pyyaml/issues/110
     metadata = {
         'title': 'Shuffling Algorithm Tests',
-        'summary': 'Test vectors for shuffling a list based upon a seed using `shuffle`',
+        'summary': 'Test vectors for validator shuffling. Note: only relevant validator fields are defined.',
         'test_suite': 'shuffle',
         'fork': 'tchaikovsky',
         'version': 1.0
@@ -39,24 +41,50 @@ def yaml_ValidatorStatusCode(dumper, data):
     # Config
     random.seed(int("0xEF00BEAC", 16))
     num_cases = 10
-    list_val_state = list(ValidatorStatusCode)
-    test_cases = []
 
+    test_cases = []
     for case in range(num_cases):
         seedhash = bytes(random.randint(0, 255) for byte in range(32))
-        num_val = random.randint(128, 512)
-        validators = [
-            ValidatorRecord(
-                status=random.choice(list_val_state),
-                original_index=num_val)
-            for num_val in range(num_val)
-        ]
+        idx_max = random.randint(128, 512)
+
+        validators = []
+        for idx in range(idx_max):
+            v = Validator(original_index=idx)
+            # 4/5 of all validators are active
+            if random.random() < 0.8:
+                # Choose a normally distributed epoch number
+                rand_epoch = round(random.gauss(EPOCH, RAND_EPOCH_STD))
+
+                # for 1/2 of *active* validators rand_epoch is the activation epoch
+                if random.random() < 0.5:
+                    v.activation_epoch = rand_epoch
+
+                    # 1/4 of active validators will exit in forseeable future
+                    if random.random() < 0.5:
+                        v.exit_epoch = random.randint(
+                            rand_epoch + ENTRY_EXIT_DELAY + 1, MAX_EXIT_EPOCH)
+                    # 1/4 of active validators in theory remain in the set indefinitely
+                    else:
+                        v.exit_epoch = FAR_FUTURE_EPOCH
+                # for the other active 1/2 rand_epoch is the exit epoch
+                else:
+                    v.activation_epoch = random.randint(
+                        0, rand_epoch - ENTRY_EXIT_DELAY)
+                    v.exit_epoch = rand_epoch
+
+            # The remaining 1/5 of all validators is not activated
+            else:
+                v.activation_epoch = FAR_FUTURE_EPOCH
+                v.exit_epoch = FAR_FUTURE_EPOCH
+
+            validators.append(v)
+
         input_ = {
-            'validators_status': [v.status.value for v in validators],
-            'crosslinking_start_shard': random.randint(0, SHARD_COUNT)
+            'validators': validators,
+            'epoch': EPOCH
         }
-        output = get_new_shuffling(
-            seedhash, validators, input_['crosslinking_start_shard'])
+        output = get_shuffling(
+            seedhash, validators, input_['epoch'])
 
         test_cases.append({
             'seed': '0x' + seedhash.hex(), 'input': input_, 'output': output

diff --git a/eth2_testgen/shuffling/yaml_objects.py b/eth2_testgen/shuffling/yaml_objects.py
@@ -3,33 +3,15 @@
 import yaml
 
 
-class ValidatorRecord(yaml.YAMLObject):
+class Validator(yaml.YAMLObject):
+    """ 
+    A validator stub containing only the fields relevant for get_shuffling()
+    """
     fields = {
-        # Status code
-        'status': 'ValidatorStatusCode',
+        'activation_epoch': 'uint64',
+        'exit_epoch': 'uint64',
         # Extra index field to ease testing/debugging
-        'original_index': 'uint64'
-    }
-
-    def __init__(self, **kwargs):
-        for k in self.fields.keys():
-            setattr(self, k, kwargs.get(k))
-
-    def __setattr__(self, name: str, value: Any) -> None:
-        super().__setattr__(name, value)
-
-    def __getattribute__(self, name: str) -> Any:
-        return super().__getattribute__(name)
-
-
-class ShardCommittee(yaml.YAMLObject):
-    fields = {
-        # Shard number
-        'shard': 'uint64',
-        # Validator indices
-        'committee': ['uint24'],
-        # Total validator count (for proofs of custody)
-        'total_validator_count': 'uint64',
+        'original_index': 'uint64',
     }
 
     def __init__(self, **kwargs):