Merge pull request #103 from macrocosm-os/dev

Release 2.3.0
macrocosm-os · Nov 1, 2024 · 9598e92 · 9598e92
2 parents ecd856f + 7bd0ff3
commit 9598e92
Show file tree

Hide file tree

Showing 8 changed files with 425 additions and 244 deletions.
diff --git a/constants/__init__.py b/constants/__init__.py
@@ -26,7 +26,7 @@
 # Project Constants.
 # ---------------------------------
 
-__version__ = "2.2.1"
+__version__ = "2.3.0"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))
@@ -38,6 +38,16 @@
 # to start from a fresh state.
 VALIDATOR_STATE_VERSION = 3
 
+# Block the subnet was registered.
+GENESIS_BLOCK = 3138611
+# Define the number of blocks per vali "sync". This cadence is used to align validator behavior for better vtrust.
+SYNC_BLOCK_CADENCE = 90
+# Rough estimate of the number of seconds per block.
+SECONDS_PER_BLOCK = 12
+# Any miners with a combined competition weight below this threshold will instead receive 0 weight.
+# This is to help vtrust by more quickly deprecating previous top models that are being phased out.
+# At 1 eval per 90 blocks, this should mean a model is phased out in ~1.5 epochs.
+MIN_WEIGHT_THRESHOLD = 0.005
 # The validator WANDB project.
 WANDB_PROJECT = "finetuning"
 WANDB_ENTITY = "rusticluftig"
@@ -52,8 +62,6 @@
 PROMPTING_MAX_AGE = dt.timedelta(hours=4)
 # Minimum number of samples allowed to consider MMLU as an eval task.
 MIN_ALLOWED_SAMPLES = 50
-# Percentage of promping miners who must have gotten the question correct to include in the eval set.
-PROMPTING_MIN_CORRECT_MINERS = 0
 # Minimum stake to consider a validator when checking for miners with weights.
 WEIGHT_SYNC_VALI_MIN_STAKE = 100_000
 # Minimum percent of weight on a vali for a miner to be considered a top miner.
@@ -92,9 +100,6 @@
     ),
 }
 
-# Block at which word sorting is including in the competition eval.
-WORD_SORTING_BLOCK = 4139465
-
 # Schedule of competitions by block.
 COMPETITION_SCHEDULE_BY_BLOCK: List[Tuple[int, List[Competition]]] = [
     (
@@ -134,5 +139,5 @@
 scan_top_model_cadence = dt.timedelta(minutes=30)
 # validator eval batch min to keep for next loop.
 sample_min = 4
-# We allow the sample_min per competition + 10 additional models to be held at any one time.
-updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID) + 10
+# We allow the sample_min per competition + 16 additional models to be held at any one time.
+updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID) + 16
diff --git a/finetune/__init__.py b/finetune/__init__.py
@@ -19,3 +19,4 @@
 from . import mining
 from . import validation
 from . import model
+from . import utils
diff --git a/finetune/datasets/subnet/prompting_subset_loader.py b/finetune/datasets/subnet/prompting_subset_loader.py
diff --git a/finetune/utils.py b/finetune/utils.py
@@ -0,0 +1,45 @@
+import math
+import bittensor as bt
+import datetime as dt
+
+
+def get_block_timestamp(subtensor: bt.subtensor, block_number: int) -> dt.datetime:
+    """Returns a timezone-aware datetime object for the timestamp of the block."""
+    block_data = subtensor.substrate.get_block(block_number=block_number)
+    timestamp = block_data["extrinsics"][0]["call"]["call_args"][0]["value"]
+    timestamp_seconds = timestamp.value / 1000
+    return dt.datetime.fromtimestamp(timestamp_seconds).astimezone(dt.timezone.utc)
+
+
+def get_hash_of_block(subtensor: bt.subtensor, block_number: int) -> int:
+    """Returns the hash of the block at the given block number."""
+    return hash(subtensor.get_block_hash(block_number))
+
+
+def get_sync_block(block: int, sync_cadence: int, genesis: int = 0) -> int:
+    """Returns the most recent sync block that is on or before `block`.
+
+    Args:
+        block (int): The block number.
+        sync_cadence (int): The cadence of blocks to sync on.
+        genesis (int, optional): The genesis block number. Defaults to 0. This can be used to synchronize on a subnet's epoch.
+    """
+    sync_block = (block - genesis) // sync_cadence * sync_cadence + genesis
+    return sync_block
+
+
+def get_next_sync_block(block: int, sync_cadence: int, genesis: int = 0) -> int:
+    """Returns the next sync block that is after "block"
+
+    Args:
+        block (int): The block number.
+        sync_cadence (int): The cadence of blocks to sync on.
+        genesis (int, optional): The genesis block number. Defaults to 0. This can be used to synchronize on a subnet's epoch.
+    """
+    sync_block = (
+        int(math.ceil((block - genesis) / sync_cadence)) * sync_cadence + genesis
+    )
+    # Make sure the sync_block is strictly after the block.
+    if sync_block == block:
+        sync_block += sync_cadence
+    return sync_block
diff --git a/neurons/config.py b/neurons/config.py
@@ -40,16 +40,10 @@ def validator_config():
         default=100,
         help="Number of blocks to wait before setting weights.",
     )
-    parser.add_argument(
-        "--latest_prompting_steps",
-        type=int,
-        default=500,  # Sample more steps since prompting runs this less frequently.
-        help="Number of most recent Prompting steps to sample data from",
-    )
     parser.add_argument(
         "--latest_prompting_samples",
         type=int,
-        default=400,
+        default=700,
         help="Number of most recent Prompting samples to eval against",
     )
     parser.add_argument(