Skip to content

Commit

Permalink
Update enhanced algo (#641)
Browse files Browse the repository at this point in the history
  • Loading branch information
bamader authored Jun 6, 2023
1 parent 4f5f238 commit d6aaa9a
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions phdi/linkage/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
# Predefined columns that map to the DIBBs MPI
IDX_TO_COL = {
0: "address",
1: "birthdate",
2: "city",
3: "first_name",
4: "last_name",
5: "mrn",
6: "sex",
7: "state",
8: "zip",
}

# Pre-computed log-odds points values for each of the DIBBs MPI
# supported columns (derived from representative synthetic data)
LOG_ODDS_SCORES = {
"birthdate": 9.944142836217619,
"first_name": 8.009121400325398,
"last_name": 5.327681398982514,
"sex": 0.6964525713514773,
"address": 5.769942276960749,
"city": 1.8002552875091014,
"state": 0.0,
"zip": 4.909466232098861,
"mrn": 1.464232660081324,
}


DIBBS_BASIC = [
{
"funcs": {
Expand Down Expand Up @@ -44,6 +72,8 @@
"similarity_measure": "JaroWinkler",
"threshold": 0.7,
"true_match_threshold": 16.5,
"idx_to_col": IDX_TO_COL,
"log_odds": LOG_ODDS_SCORES,
},
},
{
Expand All @@ -61,6 +91,8 @@
"similarity_measure": "JaroWinkler",
"threshold": 0.7,
"true_match_threshold": 7.0,
"idx_to_col": IDX_TO_COL,
"log_odds": LOG_ODDS_SCORES,
},
},
]

0 comments on commit d6aaa9a

Please sign in to comment.