feat: tune cubic function for scores

tensorplex-labs · Oct 25, 2024 · 0a23e1d · 0a23e1d
1 parent 8e557a7
commit 0a23e1d
Showing 1 changed file with 112 additions and 12 deletions.
diff --git a/commons/scoring.py b/commons/scoring.py
@@ -44,6 +44,59 @@ class Config:
     icc_by_miner: torch.Tensor
 
 
+def _reward_cubic(
+    miner_outputs: np.ndarray,
+    ground_truth: np.ndarray,
+    scaling: float,
+    translation: float,
+    offset: float,
+) -> np.ndarray:
+    """Calculate cubic reward based on miner outputs and ground truth.
+
+    Args:
+        miner_outputs (np.ndarray): 2D array of miner outputs (shape: num_miners x num_completions).
+        ground_truth (np.ndarray): 1D array of ground truth values (shape: num_completions).
+        scaling (float): Scaling factor for the cubic function.
+        translation (float): Translation factor for the cubic function.
+        offset (float): Offset for the cubic function.
+
+    Returns:
+        np.ndarray: Transformed points based on the cubic function.
+    """
+    # ensure ground truth is a column vector for broadcasting
+    # shape: (1, num_completions)
+    ground_truth = ground_truth.reshape(1, -1)
+
+    # ensure dims for broadcasting
+    assert len(ground_truth.shape) == 2
+    assert len(miner_outputs.shape) == 2
+
+    # Shape: (num_miners, num_completions)
+    x = miner_outputs - ground_truth
+
+    # apply the cubic transformation
+    points = (scaling * (x - translation) ** 3 + offset).flatten()
+    logger.debug(f"scoring: cubic reward\n{points}")
+
+    # case where a miner provides the same score for all completions
+    # convert any nans to zero
+    points = np.where(np.isnan(points), 0, points)
+    logger.debug(f"scoring: cubic reward no nans\n{points}")
+
+    # ensure all values are in the range [0, 1]
+    points = minmax_scale(points)
+    logger.debug(f"scoring: cubic reward minmax scaled\n{points}")
+
+    points = points.numpy()
+
+    assert isinstance(points, np.ndarray)
+    return points
+
+
+def _reward_l1_norm(miner_outputs: np.ndarray, ground_truth: np.ndarray):
+    return np.linalg.norm(miner_outputs - ground_truth, axis=1)
+
+
 class Score(BaseModel):
     class Config:
         arbitrary_types_allowed = True
@@ -338,18 +391,31 @@ def ground_truth_score_V1(
         logger.info(f"scoring: Miner outputs\n{miner_outputs}")
         logger.info(f"scoring: Ground truth\n{ground_truth_arr}")
 
-        l1_norm = np.linalg.norm(miner_outputs - ground_truth_arr, axis=1)
-        logger.debug(f"scoring: l1 norm\n{l1_norm}")
-
-        # case where a miner provides the same score for all completions
-        # convert any nans to zero
-        l1_norm = np.where(np.isnan(l1_norm), 0, l1_norm)
+        # l1_norm = np.linalg.norm(miner_outputs - ground_truth_arr, axis=1)
+        # l1_norm = np.linalg.norm(miner_outputs - ground_truth_arr, axis=1)
+        cubic_reward: np.ndarray = _reward_cubic(
+            miner_outputs, ground_truth_arr, 0.04, 7, 2
+        )
+        logger.debug(f"scoring: cubic reward\n{cubic_reward}")
 
-        logger.debug(f"scoring: l1 norm no nans\n{l1_norm}")
         # normalize to ensure sum is 1
-        l1_norm = l1_norm / np.sum(l1_norm)
-        logger.debug(f"scoring: l1 norm normalized (sum=1)\n{l1_norm}")
-        return torch.from_numpy(l1_norm)
+        cubic_reward = cubic_reward / np.sum(cubic_reward)
+
+        logger.debug(f"scoring: cubic reward normalized (sum=1)\n{cubic_reward}")
+
+        # calculate sum for each segment of the cubic reward
+        try:
+            segment_size = len(cubic_reward) // 5
+            segment_sums = [
+                np.sum(cubic_reward[i * segment_size : (i + 1) * segment_size])
+                for i in range(5)
+            ]
+            logger.debug(f"scoring: segment sums\n{segment_sums}")
+        except Exception as e:
+            logger.debug(f"scoring: error calculating segment sums: {e}")
+            pass
+
+        return torch.from_numpy(cubic_reward)
 
     @staticmethod
     def cmp_ground_truth(
@@ -777,13 +843,47 @@ def _test_ground_truth_score_v1():
     import matplotlib.pyplot as plt
 
     scores = Scoring.ground_truth_score_V1(criteria, gt, miner_responses)
-    scores = [score / sum(scores) for score in scores]  # Normalize to ensure sum is 1
+    scores, _ = torch.sort(scores, descending=False)
+    # Check if the sum of scores is 1
+    print(f"{scores=}")
 
     plt.figure(figsize=(10, 6))
-    plt.bar(range(len(scores)), scores)
+    (line,) = plt.plot(range(len(scores)), scores, marker="o")
     plt.xlabel("Miner Response Index")
     plt.ylabel("Score")
     plt.title("Ground Truth Scores")
+
+    annot = plt.gca().annotate(
+        "",
+        xy=(0, 0),
+        xytext=(20, 20),
+        textcoords="offset points",
+        bbox=dict(boxstyle="round", fc="w"),
+        arrowprops=dict(arrowstyle="->"),
+    )
+    annot.set_visible(False)
+
+    def update_annot(line, ind):
+        x, y = line.get_data()
+        annot.xy = (x[ind["ind"][0]], y[ind["ind"][0]])
+        text = f"{y[ind['ind'][0]]:.2f}"
+        annot.set_text(text)
+        annot.get_bbox_patch().set_alpha(0.4)
+
+    def hover(event):
+        vis = annot.get_visible()
+        if event.inaxes == plt.gca():
+            cont, ind = line.contains(event)
+            if cont:
+                update_annot(line, ind)
+                annot.set_visible(True)
+                plt.gcf().canvas.draw_idle()
+            else:
+                if vis:
+                    annot.set_visible(False)
+                    plt.gcf().canvas.draw_idle()
+
+    plt.gcf().canvas.mpl_connect("motion_notify_event", hover)
     plt.show()