Skip to content

Commit

Permalink
Add new note status history field and update versions
Browse files Browse the repository at this point in the history
  • Loading branch information
jbaxter committed Jul 3, 2024
1 parent 1b9281a commit 91b24d1
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 49 deletions.
2 changes: 2 additions & 0 deletions sourcecode/scoring/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ def rater_factor_key(i):
currentGroupStatusKey = "currentGroupStatus"
currentDecidedByKey = "currentDecidedBy"
currentModelingGroupKey = "currentModelingGroup"
timestampMillisOfMostRecentStatusChangeKey = "timestampMillisOfMostRecentStatusChange"

noteStatusHistoryTSVColumnsAndTypes = [
(noteIdKey, np.int64),
Expand All @@ -443,6 +444,7 @@ def rater_factor_key(i):
(currentGroupStatusKey, object),
(currentDecidedByKey, object),
(currentModelingGroupKey, np.double), # TODO: int
(timestampMillisOfMostRecentStatusChangeKey, np.double), # double because nullable.
]
noteStatusHistoryTSVColumns = [col for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
noteStatusHistoryTSVTypes = [dtype for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
Expand Down
2 changes: 1 addition & 1 deletion sourcecode/scoring/helpfulness_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def compute_general_helpfulness_scores(

helpfulRatingsOnBadNotesCount = (
helpfulRatingsOnBadNotes[[c.raterParticipantIdKey, c.totalHelpfulHarassmentRatingsPenaltyKey]]
.groupby(c.raterParticipantIdKey)
.groupby(c.raterParticipantIdKey)[[c.totalHelpfulHarassmentRatingsPenaltyKey]]
.sum()
.reset_index()
)
Expand Down
5 changes: 1 addition & 4 deletions sourcecode/scoring/note_ratings.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,7 @@ def get_ratings_before_note_status_and_public_tsv(
)
]

combinedRatingsBeforeStatus = pd.concat(
[ratingsBeforeStatusNewNotes, first5RatingsOldNotes],
unsafeAllowed=c.defaultIndexKey,
)
combinedRatingsBeforeStatus = pd.concat([ratingsBeforeStatusNewNotes, first5RatingsOldNotes])

if logging:
print(
Expand Down
20 changes: 18 additions & 2 deletions sourcecode/scoring/note_status_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ def _update_single_note_status_history(mergedNote, currentTimeMillis, newScoredN
Returns:
row of pd.DataFrame
"""
if mergedNote[c.finalRatingStatusKey] != mergedNote[c.currentLabelKey]:
# Changed status vs. previous run:
mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = currentTimeMillis
else:
# No change in status vs. previous run
# If the note has not changed status (since the launch of this feature on 2024/07/02),
# then the timestamp of the most recent status change should be set to -1 by default.
if c.timestampMillisOfMostRecentStatusChangeKey not in mergedNote.index:
mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = -1
elif pd.isna(mergedNote[c.timestampMillisOfMostRecentStatusChangeKey]):
mergedNote[c.timestampMillisOfMostRecentStatusChangeKey] = -1

# Update the current status in accordance with this scoring run.
assert not pd.isna(mergedNote[c.finalRatingStatusKey])
mergedNote[c.currentLabelKey] = mergedNote[c.finalRatingStatusKey]
Expand Down Expand Up @@ -192,11 +204,15 @@ def _check_flips(mergedStatuses: pd.DataFrame, maxCrhChurn: float) -> None:
)
if len(oldCrhNotes) > 0 and len(newCrhNotes) > 0:
# Validate that changes are within allowable bounds.
print(f"new note ratio: {(len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes))}")
print(
f"new note ratio: {(len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes))}. (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
)
assert (
(len(newCrhNotes - oldCrhNotes) / len(oldCrhNotes)) < maxCrhChurn
), f"Too many new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(newCrhNotes - oldCrhNotes)}"
print(f"old note ratio: {len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)}")
print(
f"old note ratio: {len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)} (newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
)
assert (
(len(oldCrhNotes - newCrhNotes) / len(oldCrhNotes)) < maxCrhChurn
), f"Too few new CRH notes: newCrhNotes={len(newCrhNotes)}, oldCrhNotes={len(oldCrhNotes)}, delta={len(oldCrhNotes - newCrhNotes)}"
Expand Down
34 changes: 6 additions & 28 deletions sourcecode/scoring/pandas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,28 +275,6 @@ def _safe_merge(*args, **kwargs):
return _safe_merge


def _get_multiindex_types(df: pd.DataFrame):
"""Create a dictionary mapping index columns to dtypes.
Note that this approach may mis-type columns under rare circumstances. See example below:
# left = pd.DataFrame({"idx0": [1, 2], "idx1": [11, 12], "val1": [4, 5]}).set_index(["idx0", "idx1"])
# right = pd.DataFrame({"idx0": [1, 2, 3], "idx2": [21, 22, 23], "val2": [7, 8, 9]}).set_index(["idx0", "idx2"])
# print(dict(left.join(right, how="outer").index.dtypes))
# print(dict(left.join(right, how="outer").reset_index(drop=False).dtypes))
# $> {'idx0': dtype('int64'), 'idx1': dtype('int64'), 'idx2': dtype('int64')}
# $> {'idx0': dtype('int64'), 'idx1': dtype('float64'), 'idx2': dtype('int64'), 'val1': dtype('float64'), 'val2': dtype('int64')}
Unfortunatley, Pandas 1.1.5 seem to treat the type of multi-level indexes as "object"
as the "dtypes" property is not defined for MultiIndex and df.index.dtype and
df.index.values.dtype both return dtype('O'). At some point bewteen Pandas 1.1.5
and 2.2.2, the dtypes attribute was defined for MultiIndex and dict(df.index.dtypes)
yields a mapping from index columns to their actual datatype.
"""
dfTypes = dict(df.reset_index(drop=False).dtypes)
indexCols = set(df.index.names)
return {col: dtype for (col, dtype) in dfTypes.items() if col in indexCols}


def safe_join(fail: bool, counter: TypeErrorCounter) -> Callable:
"""Return a modified merge function that checks type stability.
Expand Down Expand Up @@ -334,12 +312,12 @@ def _safe_join(*args, **kwargs):
if len(leftFrame.index.names) == 1 and len(rightFrame.index.names) == 1:
match = leftFrame.index.dtype == rightFrame.index.dtype
elif len(leftFrame.index.names) == 1 and len(rightFrame.index.names) > 1:
indexTypes = _get_multiindex_types(rightFrame)
indexTypes = dict(rightFrame.index.dtypes)
name = leftFrame.index.names[0]
assert name in indexTypes, f"{name} not found in {indexTypes}"
match = indexTypes[name] == leftFrame.index.dtype
elif len(leftFrame.index.names) > 1 and len(rightFrame.index.names) == 1:
indexTypes = _get_multiindex_types(leftFrame)
indexTypes = dict(leftFrame.index.dtypes)
name = rightFrame.index.names[0]
assert name in indexTypes, f"{name} not found in {indexTypes}"
match = indexTypes[name] == rightFrame.index.dtype
Expand All @@ -350,8 +328,8 @@ def _safe_join(*args, **kwargs):
assert (
len(rightFrame.index.names) > 1
), f"unexpected right: {type(rightFrame.index)}, {rightFrame.index}"
leftIndexTypes = _get_multiindex_types(leftFrame)
rightIndexTypes = _get_multiindex_types(rightFrame)
leftIndexTypes = dict(leftFrame.index.dtypes)
rightIndexTypes = dict(rightFrame.index.dtypes)
match = True
for col in set(leftIndexTypes) & set(rightIndexTypes):
match = match & (leftIndexTypes[col] == rightIndexTypes[col])
Expand Down Expand Up @@ -391,11 +369,11 @@ def _safe_join(*args, **kwargs):
leftIndexCols = set(leftFrame.index.names)
rightIndexCols = set(rightFrame.index.names)
if len(leftIndexCols) > 1:
leftDtypes = _get_multiindex_types(leftFrame)
leftDtypes = dict(leftFrame.index.dtypes)
else:
leftDtypes = {leftFrame.index.name: leftFrame.index.dtype}
if len(rightIndexCols) > 1:
rightDtypes = _get_multiindex_types(rightFrame)
rightDtypes = dict(rightFrame.index.dtypes)
else:
rightDtypes = {rightFrame.index.name: rightFrame.index.dtype}
for col in leftIndexCols & rightIndexCols:
Expand Down
5 changes: 5 additions & 0 deletions sourcecode/scoring/post_selection_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def filter_ratings_by_post_selection_similarity(notes, ratings, postSelectionSim
ratings = pd.concat(
[ratingsWithPostSelectionSimilarityValue, ratingsWithNoPostSelectionSimilarityValue], axis=0
)
ratings.drop(
columns={c.noteAuthorParticipantIdKey, c.raterParticipantIdKey + "_note_author"},
errors="ignore",
inplace=True,
)
return ratings


Expand Down
5 changes: 5 additions & 0 deletions sourcecode/scoring/process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,12 @@ def write_prescoring_output(
prescoringRaterModelOutput: pd.DataFrame,
noteTopicClassifier: Pipeline,
prescoringMetaOutput: c.PrescoringMetaOutput,
prescoringScoredNotesOutput: Optional[pd.DataFrame],
noteModelOutputPath: str,
raterModelOutputPath: str,
noteTopicClassifierPath: str,
prescoringMetaOutputPath: str,
prescoringScoredNotesOutputPath: Optional[str],
headers: bool = True,
):
prescoringNoteModelOutput = prescoringNoteModelOutput[c.prescoringNoteModelOutputTSVColumns]
Expand All @@ -446,6 +448,9 @@ def write_prescoring_output(
assert all(prescoringRaterModelOutput.columns == c.prescoringRaterModelOutputTSVColumns)
write_tsv_local(prescoringRaterModelOutput, raterModelOutputPath, headers=headers)

if prescoringScoredNotesOutput is not None and prescoringScoredNotesOutputPath is not None:
write_tsv_local(prescoringScoredNotesOutput, prescoringScoredNotesOutputPath, headers=headers)

joblib.dump(noteTopicClassifier, noteTopicClassifierPath)
joblib.dump(prescoringMetaOutput, prescoringMetaOutputPath)

Expand Down
51 changes: 48 additions & 3 deletions sourcecode/scoring/run_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def _merge_results(
assert len(scoredNotes) == scoredNotesSize, "scoredNotes should not expand"

# Merge auxiliaryNoteInfo
if modelauxiliaryNoteInfo is not None:
if modelauxiliaryNoteInfo is not None and len(modelauxiliaryNoteInfo.columns) > 0:
assert (set(modelauxiliaryNoteInfo.columns) & set(auxiliaryNoteInfo.columns)) == {
c.noteIdKey
}, "column names must be globally unique"
Expand Down Expand Up @@ -1005,7 +1005,11 @@ def run_prescoring(
runParallel: bool = True,
dataLoader: Optional[CommunityNotesDataLoader] = None,
useStableInitialization: bool = True,
) -> Tuple[pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput]:
pseudoraters: bool = True,
checkFlips: bool = True,
) -> Tuple[
pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput, pd.DataFrame
]:
with c.time_block("Logging RAM usage"):
_log_df_info(notes, ratings, noteStatusHistory, userEnrollment)
with c.time_block("Note Topic Assignment"):
Expand Down Expand Up @@ -1061,12 +1065,41 @@ def run_prescoring(
c.postSelectionValueKey,
},
)
del prescoringModelResultsFromAllScorers
del scorers

# Prescoring itself is now done. We will not run final_note_scoring to check note status flips.
if checkFlips:
# Rescore all notes. TODO: in the future, consider only rescoring a subset, e.g. unlocked notes.
ratingsToRescore = ratings
notesToRescore = notes
noteStatusHistoryToRescore = noteStatusHistory

scoredNotes, _, _ = run_final_note_scoring(
notes=notesToRescore,
ratings=ratingsToRescore,
noteStatusHistory=noteStatusHistoryToRescore,
userEnrollment=userEnrollment,
seed=seed,
pseudoraters=pseudoraters,
enabledScorers=None,
runParallel=runParallel,
useStableInitialization=useStableInitialization,
prescoringNoteModelOutput=prescoringNoteModelOutput,
prescoringRaterModelOutput=prescoringRaterModelOutput,
noteTopicClassifier=noteTopicClassifierPipe,
prescoringMetaOutput=prescoringMetaOutput,
checkFlips=checkFlips,
)
else:
scoredNotes = None

return (
prescoringNoteModelOutput,
prescoringRaterModelOutput,
noteTopicClassifierPipe,
prescoringMetaOutput,
scoredNotes,
)


Expand Down Expand Up @@ -1415,7 +1448,16 @@ def run_scoring(
dataLoader: Optional[CommunityNotesDataLoader] = None,
useStableInitialization: bool = True,
writePrescoringScoringOutputCallback: Optional[
Callable[[pd.DataFrame, pd.DataFrame, sklearn.pipeline.Pipeline, c.PrescoringMetaOutput], None]
Callable[
[
pd.DataFrame,
pd.DataFrame,
sklearn.pipeline.Pipeline,
c.PrescoringMetaOutput,
Optional[pd.DataFrame],
],
None,
]
] = None,
cutoffTimestampMillis: Optional[int] = None,
excludeRatingsAfterANoteGotFirstStatusPlusNHours: Optional[int] = None,
Expand Down Expand Up @@ -1475,6 +1517,7 @@ def run_scoring(
prescoringRaterModelOutput,
prescoringNoteTopicClassifier,
prescoringMetaOutput,
prescoringScoredNotes,
) = run_prescoring(
notes=prescoringNotesInput,
ratings=prescoringRatingsInput,
Expand All @@ -1485,6 +1528,7 @@ def run_scoring(
runParallel=runParallel,
dataLoader=dataLoader,
useStableInitialization=useStableInitialization,
checkFlips=False,
)

print("We invoked run_scoring and are now in between prescoring and scoring.")
Expand All @@ -1495,6 +1539,7 @@ def run_scoring(
prescoringRaterModelOutput,
prescoringNoteTopicClassifier,
prescoringMetaOutput,
prescoringScoredNotes,
)
print("Starting final scoring")

Expand Down
18 changes: 8 additions & 10 deletions sourcecode/scoring/scoring_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,7 @@ def _get_value(row):

# Filter set of note status updates to only include actionable notes
actionableNotes = noteStats[noteStats["actionable"]][[c.noteIdKey]]
noteStatusUpdates = noteStatusUpdates.merge(
actionableNotes, on=c.noteIdKey, how="inner", unsafeAllowed=c.defaultIndexKey
)
noteStatusUpdates = noteStatusUpdates.merge(actionableNotes, on=c.noteIdKey, how="inner")

# Set note status and return
noteStatusUpdates[statusColumn] = c.currentlyRatedHelpful
Expand Down Expand Up @@ -699,9 +697,9 @@ def score_notes(
how="inner",
)
# Validate that all note scores were within the expected range
noteIntercepts = noteStats.merge(
noteIds, on=c.noteIdKey, how="inner", unsafeAllowed=c.defaultIndexKey
)[c.internalNoteInterceptKey]
noteIntercepts = noteStats.merge(noteIds, on=c.noteIdKey, how="inner")[
c.internalNoteInterceptKey
]

assert sum(noteIntercepts > self._expectedMax) == 0, f"""{sum(noteIntercepts > self._expectedMax)} notes (out of {len(noteIntercepts)}) had intercepts above expected maximum of {self._expectedMax}.
The highest was {max(noteIntercepts)}."""
Expand Down Expand Up @@ -857,7 +855,9 @@ def apply_scoring_rules(
if additionalColumns is not None:
# Merge any additional columns into current set of new columns
assert {c.noteIdKey} == (set(noteColumns.columns) & set(additionalColumns.columns))
noteColumns = noteColumns.merge(additionalColumns, on=c.noteIdKey, how="outer")
noteColumns = noteColumns.merge(
additionalColumns, on=c.noteIdKey, how="outer", unsafeAllowed=c.defaultIndexKey
)

with c.time_block("Condense noteRules after applying all scoring rules"):
# Having applied all scoring rules, condense noteRules to have one row per note representing
Expand All @@ -873,9 +873,7 @@ def apply_scoring_rules(
# Merge note labels, active rules and new columns into noteStats to form scoredNotes
scoredNotes = noteStats.merge(noteLabels, on=c.noteIdKey, how="inner")
scoredNotes = scoredNotes.merge(noteRules, on=c.noteIdKey, how="inner")
scoredNotes = scoredNotes.merge(
noteColumns, on=c.noteIdKey, how="left", unsafeAllowed=c.defaultIndexKey
)
scoredNotes = scoredNotes.merge(noteColumns, on=c.noteIdKey, how="left")
# Add all of the individual model rules to the active rules column
assert len(scoredNotes) == len(noteStats)
# Set boolean columns indicating scoring outcomes
Expand Down
8 changes: 7 additions & 1 deletion sourcecode/scoring/tag_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,5 +132,11 @@ def get_tag_thresholds(ratings: pd.DataFrame, percentile: int) -> Dict[str, floa
"""
thresholds = {}
for column in c.notHelpfulTagsAdjustedRatioColumns:
thresholds[column] = np.quantile(ratings[column], np.arange(0, 1, 0.01))[percentile]
if len(ratings[column]) == 0:
print(
f"Warning: No ratings for column {column} in get_tag_thresholds. Setting threshold to 0.0 arbitrarily."
)
thresholds[column] = 0.0
else:
thresholds[column] = np.quantile(ratings[column], np.arange(0, 1, 0.01))[percentile]
return thresholds

0 comments on commit 91b24d1

Please sign in to comment.