Add new note status history field and update versions

twitter · Jul 3, 2024 · 91b24d1 · 91b24d1
1 parent 1b9281a
commit 91b24d1
Show file tree

Hide file tree

Showing 10 changed files with 101 additions and 49 deletions.
diff --git a/sourcecode/scoring/constants.py b/sourcecode/scoring/constants.py
@@ -424,6 +424,7 @@ def rater_factor_key(i):
 currentGroupStatusKey = "currentGroupStatus"
 currentDecidedByKey = "currentDecidedBy"
 currentModelingGroupKey = "currentModelingGroup"
+timestampMillisOfMostRecentStatusChangeKey = "timestampMillisOfMostRecentStatusChange"
 
 noteStatusHistoryTSVColumnsAndTypes = [
   (noteIdKey, np.int64),
@@ -443,6 +444,7 @@ def rater_factor_key(i):
   (currentGroupStatusKey, object),
   (currentDecidedByKey, object),
   (currentModelingGroupKey, np.double),  # TODO: int
+  (timestampMillisOfMostRecentStatusChangeKey, np.double),  # double because nullable.
 ]
 noteStatusHistoryTSVColumns = [col for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
 noteStatusHistoryTSVTypes = [dtype for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]

diff --git a/sourcecode/scoring/helpfulness_scores.py b/sourcecode/scoring/helpfulness_scores.py
@@ -155,7 +155,7 @@ def compute_general_helpfulness_scores(
 
     helpfulRatingsOnBadNotesCount = (
       helpfulRatingsOnBadNotes[[c.raterParticipantIdKey, c.totalHelpfulHarassmentRatingsPenaltyKey]]
-      .groupby(c.raterParticipantIdKey)
+      .groupby(c.raterParticipantIdKey)[[c.totalHelpfulHarassmentRatingsPenaltyKey]]
       .sum()
       .reset_index()
     )

diff --git a/sourcecode/scoring/note_ratings.py b/sourcecode/scoring/note_ratings.py
@@ -138,10 +138,7 @@ def get_ratings_before_note_status_and_public_tsv(
     )
   ]
 
-  combinedRatingsBeforeStatus = pd.concat(
-    [ratingsBeforeStatusNewNotes, first5RatingsOldNotes],
-    unsafeAllowed=c.defaultIndexKey,
-  )
+  combinedRatingsBeforeStatus = pd.concat([ratingsBeforeStatusNewNotes, first5RatingsOldNotes])
 
   if logging:
     print(

diff --git a/sourcecode/scoring/note_status_history.py b/sourcecode/scoring/note_status_history.py
@@ -88,6 +88,18 @@ def _update_single_note_status_history(mergedNote, currentTimeMillis, newScoredN
   Returns:
       row of pd.DataFrame
   """
+  if mergedNote[c.finalRatingStatusKey] != mergedNote[c.currentLabelKey]:
+    # Changed status vs. previous run:
+    mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = currentTimeMillis
+  else:
+    # No change in status vs. previous run
+    # If the note has not changed status (since the launch of this feature on 2024/07/02),
+    # then the timestamp of the most recent status change should be set to -1 by default.
+    if c.timestampMillisOfMostRecentStatusChangeKey not in mergedNote.index:
+      mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = -1
+    elif pd.isna(mergedNote[c.timestampMillisOfMostRecentStatusChangeKey]):
+      mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = -1
+
   # Update the current status in accordance with this scoring run.
   assert not pd.isna(mergedNote[c.finalRatingStatusKey])
   mergedNote[c.currentLabelKey] = mergedNote[c.finalRatingStatusKey]
@@ -192,11 +204,15 @@ def _check_flips(mergedStatuses: pd.DataFrame, maxCrhChurn: float) -> None:
   )
   if len(oldCrhNotes) > 0 and len(newCrhNotes) > 0:
     # Validate that changes are within allowable bounds.
-    print(f"new note ratio: {(len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes))}")
+    print(
+      f"new note ratio: {(len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes))}. (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
+    )
     assert (
       (len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes)) < maxCrhChurn
     ), f"Too many new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
-    print(f"old note ratio: {len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)}")
+    print(
+      f"old note ratio: {len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)} (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
+    )
     assert (
       (len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)) < maxCrhChurn
     ), f"Too few new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"

diff --git a/sourcecode/scoring/pandas_utils.py b/sourcecode/scoring/pandas_utils.py
@@ -275,28 +275,6 @@ def _safe_merge(*args, **kwargs):
   return _safe_merge
 
 
-def _get_multiindex_types(df: pd.DataFrame):
-  """Create a dictionary mapping index columns to dtypes.
-
-  Note that this approach may mis-type columns under rare circumstances.  See example below:
-  # left = pd.DataFrame({"idx0": [1, 2], "idx1": [11, 12], "val1": [4, 5]}).set_index(["idx0", "idx1"])
-  # right = pd.DataFrame({"idx0": [1, 2, 3], "idx2": [21, 22, 23], "val2": [7, 8, 9]}).set_index(["idx0", "idx2"])
-  # print(dict(left.join(right, how="outer").index.dtypes))
-  # print(dict(left.join(right, how="outer").reset_index(drop=False).dtypes))
-  # $> {'idx0': dtype('int64'), 'idx1': dtype('int64'), 'idx2': dtype('int64')}
-  # $> {'idx0': dtype('int64'), 'idx1': dtype('float64'), 'idx2': dtype('int64'), 'val1': dtype('float64'), 'val2': dtype('int64')}
-
-  Unfortunatley, Pandas 1.1.5 seem to treat the type of multi-level indexes as "object"
-  as the "dtypes" property is not defined for MultiIndex and df.index.dtype and
-  df.index.values.dtype both return dtype('O').  At some point bewteen Pandas 1.1.5
-  and 2.2.2, the dtypes attribute was defined for MultiIndex and dict(df.index.dtypes)
-  yields a mapping from index columns to their actual datatype.
-  """
-  dfTypes = dict(df.reset_index(drop=False).dtypes)
-  indexCols = set(df.index.names)
-  return {col: dtype for (col, dtype) in dfTypes.items() if col in indexCols}
-
-
 def safe_join(fail: bool, counter: TypeErrorCounter) -> Callable:
   """Return a modified merge function that checks type stability.
 
@@ -334,12 +312,12 @@ def _safe_join(*args, **kwargs):
     if len(leftFrame.index.names) == 1 and len(rightFrame.index.names) == 1:
       match = leftFrame.index.dtype == rightFrame.index.dtype
     elif len(leftFrame.index.names) == 1 and len(rightFrame.index.names) > 1:
-      indexTypes = _get_multiindex_types(rightFrame)
+      indexTypes = dict(rightFrame.index.dtypes)
       name = leftFrame.index.names[0]
       assert name in indexTypes, f"{name} not found in {indexTypes}"
       match = indexTypes[name] == leftFrame.index.dtype
     elif len(leftFrame.index.names) > 1 and len(rightFrame.index.names) == 1:
-      indexTypes = _get_multiindex_types(leftFrame)
+      indexTypes = dict(leftFrame.index.dtypes)
       name = rightFrame.index.names[0]
       assert name in indexTypes, f"{name} not found in {indexTypes}"
       match = indexTypes[name] == rightFrame.index.dtype
@@ -350,8 +328,8 @@ def _safe_join(*args, **kwargs):
       assert (
         len(rightFrame.index.names) > 1
       ), f"unexpected right: {type(rightFrame.index)}, {rightFrame.index}"
-      leftIndexTypes = _get_multiindex_types(leftFrame)
-      rightIndexTypes = _get_multiindex_types(rightFrame)
+      leftIndexTypes = dict(leftFrame.index.dtypes)
+      rightIndexTypes = dict(rightFrame.index.dtypes)
       match = True
       for col in set(leftIndexTypes) & set(rightIndexTypes):
         match = match & (leftIndexTypes[col] == rightIndexTypes[col])
@@ -391,11 +369,11 @@ def _safe_join(*args, **kwargs):
     leftIndexCols = set(leftFrame.index.names)
     rightIndexCols = set(rightFrame.index.names)
     if len(leftIndexCols) > 1:
-      leftDtypes = _get_multiindex_types(leftFrame)
+      leftDtypes = dict(leftFrame.index.dtypes)
     else:
       leftDtypes = {leftFrame.index.name: leftFrame.index.dtype}
     if len(rightIndexCols) > 1:
-      rightDtypes = _get_multiindex_types(rightFrame)
+      rightDtypes = dict(rightFrame.index.dtypes)
     else:
       rightDtypes = {rightFrame.index.name: rightFrame.index.dtype}
     for col in leftIndexCols & rightIndexCols:

diff --git a/sourcecode/scoring/post_selection_similarity.py b/sourcecode/scoring/post_selection_similarity.py
@@ -100,6 +100,11 @@ def filter_ratings_by_post_selection_similarity(notes, ratings, postSelectionSim
   ratings = pd.concat(
     [ratingsWithPostSelectionSimilarityValue, ratingsWithNoPostSelectionSimilarityValue], axis=0
   )
+  ratings.drop(
+    columns={c.noteAuthorParticipantIdKey, c.raterParticipantIdKey + "_note_author"},
+    errors="ignore",
+    inplace=True,
+  )
   return ratings
 
 

diff --git a/sourcecode/scoring/process_data.py b/sourcecode/scoring/process_data.py
@@ -432,10 +432,12 @@ def write_prescoring_output(
   prescoringRaterModelOutput: pd.DataFrame,
   noteTopicClassifier: Pipeline,
   prescoringMetaOutput: c.PrescoringMetaOutput,
+  prescoringScoredNotesOutput: Optional[pd.DataFrame],
   noteModelOutputPath: str,
   raterModelOutputPath: str,
   noteTopicClassifierPath: str,
   prescoringMetaOutputPath: str,
+  prescoringScoredNotesOutputPath: Optional[str],
   headers: bool = True,
 ):
   prescoringNoteModelOutput = prescoringNoteModelOutput[c.prescoringNoteModelOutputTSVColumns]
@@ -446,6 +448,9 @@ def write_prescoring_output(
   assert all(prescoringRaterModelOutput.columns == c.prescoringRaterModelOutputTSVColumns)
   write_tsv_local(prescoringRaterModelOutput, raterModelOutputPath, headers=headers)
 
+  if prescoringScoredNotesOutput is not None and prescoringScoredNotesOutputPath is not None:
+    write_tsv_local(prescoringScoredNotesOutput, prescoringScoredNotesOutputPath, headers=headers)
+
   joblib.dump(noteTopicClassifier, noteTopicClassifierPath)
   joblib.dump(prescoringMetaOutput, prescoringMetaOutputPath)
 

diff --git a/sourcecode/scoring/run_scoring.py b/sourcecode/scoring/run_scoring.py
@@ -168,7 +168,7 @@ def _merge_results(
   assert len(scoredNotes) == scoredNotesSize, "scoredNotes should not expand"
 
   # Merge auxiliaryNoteInfo
-  if modelauxiliaryNoteInfo is not None:
+  if modelauxiliaryNoteInfo is not None and len(modelauxiliaryNoteInfo.columns) > 0:
     assert (set(modelauxiliaryNoteInfo.columns) & set(auxiliaryNoteInfo.columns)) == {
       c.noteIdKey
     }, "column names must be globally unique"
@@ -1005,7 +1005,11 @@ def run_prescoring(
   runParallel: bool = True,
   dataLoader: Optional[CommunityNotesDataLoader] = None,
   useStableInitialization: bool = True,
-) -> Tuple[pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput]:
+  pseudoraters: bool = True,
+  checkFlips: bool = True,
+) -> Tuple[
+  pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput, pd.DataFrame
+]:
   with c.time_block("Logging RAM usage"):
     _log_df_info(notes, ratings, noteStatusHistory, userEnrollment)
   with c.time_block("Note Topic Assignment"):
@@ -1061,12 +1065,41 @@ def run_prescoring(
       c.postSelectionValueKey,
     },
   )
+  del prescoringModelResultsFromAllScorers
+  del scorers
+
+  # Prescoring itself is now done. We will not run final_note_scoring to check note status flips.
+  if checkFlips:
+    # Rescore all notes. TODO: in the future, consider only rescoring a subset, e.g. unlocked notes.
+    ratingsToRescore = ratings
+    notesToRescore = notes
+    noteStatusHistoryToRescore = noteStatusHistory
+
+    scoredNotes, _, _ = run_final_note_scoring(
+      notes=notesToRescore,
+      ratings=ratingsToRescore,
+      noteStatusHistory=noteStatusHistoryToRescore,
+      userEnrollment=userEnrollment,
+      seed=seed,
+      pseudoraters=pseudoraters,
+      enabledScorers=None,
+      runParallel=runParallel,
+      useStableInitialization=useStableInitialization,
+      prescoringNoteModelOutput=prescoringNoteModelOutput,
+      prescoringRaterModelOutput=prescoringRaterModelOutput,
+      noteTopicClassifier=noteTopicClassifierPipe,
+      prescoringMetaOutput=prescoringMetaOutput,
+      checkFlips=checkFlips,
+    )
+  else:
+    scoredNotes = None
 
   return (
     prescoringNoteModelOutput,
     prescoringRaterModelOutput,
     noteTopicClassifierPipe,
     prescoringMetaOutput,
+    scoredNotes,
   )
 
 
@@ -1415,7 +1448,16 @@ def run_scoring(
   dataLoader: Optional[CommunityNotesDataLoader] = None,
   useStableInitialization: bool = True,
   writePrescoringScoringOutputCallback: Optional[
-    Callable[[pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput], None]
+    Callable[
+      [
+        pd.DataFrame,
+        pd.DataFrame,
+        sklearn.pipeline.Pipeline,
+        c.PrescoringMetaOutput,
+        Optional[pd.DataFrame],
+      ],
+      None,
+    ]
   ] = None,
   cutoffTimestampMillis: Optional[int] = None,
   excludeRatingsAfterANoteGotFirstStatusPlusNHours: Optional[int] = None,
@@ -1475,6 +1517,7 @@ def run_scoring(
     prescoringRaterModelOutput,
     prescoringNoteTopicClassifier,
     prescoringMetaOutput,
+    prescoringScoredNotes,
   ) = run_prescoring(
     notes=prescoringNotesInput,
     ratings=prescoringRatingsInput,
@@ -1485,6 +1528,7 @@ def run_scoring(
     runParallel=runParallel,
     dataLoader=dataLoader,
     useStableInitialization=useStableInitialization,
+    checkFlips=False,
   )
 
   print("We invoked run_scoring and are now in between prescoring and scoring.")
@@ -1495,6 +1539,7 @@ def run_scoring(
         prescoringRaterModelOutput,
         prescoringNoteTopicClassifier,
         prescoringMetaOutput,
+        prescoringScoredNotes,
       )
   print("Starting final scoring")
 

diff --git a/sourcecode/scoring/scoring_rules.py b/sourcecode/scoring/scoring_rules.py
@@ -495,9 +495,7 @@ def _get_value(row):
 
     # Filter set of note status updates to only include actionable notes
     actionableNotes = noteStats[noteStats["actionable"]][[c.noteIdKey]]
-    noteStatusUpdates = noteStatusUpdates.merge(
-      actionableNotes, on=c.noteIdKey, how="inner", unsafeAllowed=c.defaultIndexKey
-    )
+    noteStatusUpdates = noteStatusUpdates.merge(actionableNotes, on=c.noteIdKey, how="inner")
 
     # Set note status and return
     noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful
@@ -699,9 +697,9 @@ def score_notes(
       how="inner",
     )
     # Validate that all note scores were within the expected range
-    noteIntercepts = noteStats.merge(
-      noteIds, on=c.noteIdKey, how="inner", unsafeAllowed=c.defaultIndexKey
-    )[c.internalNoteInterceptKey]
+    noteIntercepts = noteStats.merge(noteIds, on=c.noteIdKey, how="inner")[
+      c.internalNoteInterceptKey
+    ]
 
     assert sum(noteIntercepts > self._expectedMax) == 0, f"""{sum(noteIntercepts > self._expectedMax)} notes (out of {len(noteIntercepts)}) had intercepts above expected maximum of {self._expectedMax}. 
       The highest was {max(noteIntercepts)}."""
@@ -857,7 +855,9 @@ def apply_scoring_rules(
       if additionalColumns is not None:
         # Merge any additional columns into current set of new columns
         assert {c.noteIdKey} == (set(noteColumns.columns) & set(additionalColumns.columns))
-        noteColumns = noteColumns.merge(additionalColumns, on=c.noteIdKey, how="outer")
+        noteColumns = noteColumns.merge(
+          additionalColumns, on=c.noteIdKey, how="outer", unsafeAllowed=c.defaultIndexKey
+        )
 
   with c.time_block("Condense noteRules after applying all scoring rules"):
     # Having applied all scoring rules, condense noteRules to have one row per note representing
@@ -873,9 +873,7 @@ def apply_scoring_rules(
     # Merge note labels, active rules and new columns into noteStats to form scoredNotes
     scoredNotes = noteStats.merge(noteLabels, on=c.noteIdKey, how="inner")
     scoredNotes = scoredNotes.merge(noteRules, on=c.noteIdKey, how="inner")
-    scoredNotes = scoredNotes.merge(
-      noteColumns, on=c.noteIdKey, how="left", unsafeAllowed=c.defaultIndexKey
-    )
+    scoredNotes = scoredNotes.merge(noteColumns, on=c.noteIdKey, how="left")
     # Add all of the individual model rules to the active rules column
     assert len(scoredNotes) == len(noteStats)
     # Set boolean columns indicating scoring outcomes

diff --git a/sourcecode/scoring/tag_filter.py b/sourcecode/scoring/tag_filter.py
@@ -132,5 +132,11 @@ def get_tag_thresholds(ratings: pd.DataFrame, percentile: int) -> Dict[str, floa
   """
   thresholds = {}
   for column in c.notHelpfulTagsAdjustedRatioColumns:
-    thresholds[column] = np.quantile(ratings[column], np.arange(0, 1, 0.01))[percentile]
+    if len(ratings[column]) == 0:
+      print(
+        f"Warning: No ratings for column {column} in get_tag_thresholds. Setting threshold to 0.0 arbitrarily."
+      )
+      thresholds[column] = 0.0
+    else:
+      thresholds[column] = np.quantile(ratings[column], np.arange(0, 1, 0.01))[percentile]
   return thresholds