Skip to content

Commit

Permalink
Amend rules 8873Q, 8897Q, 8898 to use AssessmentFactorList
Browse files Browse the repository at this point in the history
  • Loading branch information
SLornieCYC committed Mar 19, 2024
1 parent 65fa7e8 commit 4aef929
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 52 deletions.
56 changes: 41 additions & 15 deletions cin_validator/rules/cin2022_23/rule_8873Q.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@

Assessments = CINTable.Assessments
LAchildID = Assessments.LAchildID
AssessmentFactors = Assessments.AssessmentFactors
CINdetailsID = Assessments.CINdetailsID
AssessmentID = Assessments.AssessmentID
AssessmentActualStartDate = Assessments.AssessmentActualStartDate
AssessmentFactors = Assessments.AssessmentFactors

AssessmentFactorsList = CINTable.AssessmentFactorsList
AssessmentFactor = AssessmentFactorsList.AssessmentFactor

CINdetails = CINTable.CINdetails
LAchildID = CINdetails.LAchildID
Expand All @@ -38,43 +42,48 @@ def validate(
# PREPARING DATA

df_ass = data_container[Assessments].copy()
df_asslist = data_container[AssessmentFactorsList].copy()
df_cin = data_container[CINdetails].copy()

# Before you begin, rename the index so that the initial row positions can be kept intact.
df_ass.index.name = "ROW_ID"
df_asslist.index.name = "ROW_ID"
df_cin.index.name = "ROW_ID"

# Resetting the index causes the ROW_IDs to become columns of their respective DataFrames
# so that they can come along when the merge is done.
df_ass.reset_index(inplace=True)
df_asslist.reset_index(inplace=True)
df_cin.reset_index(inplace=True)

# lOGIC
# Within a <CINDetails> group, if there is only one <Assessment> group present and <AssessmentFactors> (N00181) = “21”, <ReasonForClosure> (N00103) must should = RC8 or RC9.

# Eliminates rows with more than 1 assessment per CINdetails group by determining if there's more than 1 AssessmentActualStartDate per CINdetailsID per child
# Eliminates rows with more than 1 assessment per CINdetails group by determining if there's more than 1 AssessmentID per CINdetailsID per child
df_ass_merged = df_ass.merge(df_ass, on=["LAchildID", "CINdetailsID"])
df_ass_merged = df_ass_merged[
(
df_ass_merged["AssessmentActualStartDate_x"]
!= df_ass_merged["AssessmentActualStartDate_y"]
)
(df_ass_merged["AssessmentID_x"] != df_ass_merged["AssessmentID_y"])
]
more_than_1_ass = df_ass_merged["ROW_ID_x"].tolist()

df_ass = df_ass[~df_ass["ROW_ID"].isin(more_than_1_ass)]

df_ass = df_ass[
(df_ass[AssessmentFactors] == "21")
| (df_ass[AssessmentFactors] == "21 No factors identified")
| (df_ass[AssessmentFactors].str.contains("21"))
df_ass_merged = df_ass.merge(
df_asslist[["LAchildID", "CINdetailsID", "AssessmentID", "AssessmentFactor"]],
on=["LAchildID", "CINdetailsID", "AssessmentID"],
)

df_ass_merged = df_ass_merged[
(df_ass_merged[AssessmentFactor] == "2B")
| (df_ass_merged[AssessmentFactor] == "21 No factors identified")
| (df_ass_merged[AssessmentFactor].str.contains("21"))
]

# left merge means that only the filtered cpp children will be considered and there is no possibility of additonal children coming in from other tables.
# left merge means that only the filtered cin children will be considered and there is no possibility of additonal children coming in from other tables.

# get only the CINdetails groups with AssessmentFactors including 21.
merged_df = df_ass.copy().merge(
df_cin.copy(),
merged_df = df_ass_merged.merge(
df_cin,
on=[LAchildID, "CINdetailsID"],
how="left",
suffixes=["_ass", "_cin"],
Expand All @@ -88,7 +97,12 @@ def validate(

# create an identifier for each error instance.
merged_df["ERROR_ID"] = tuple(
zip(merged_df[LAchildID], merged_df[CINdetailsID], merged_df[ReasonForClosure])
zip(
merged_df[LAchildID],
merged_df[CINdetailsID],
merged_df[AssessmentID],
merged_df[ReasonForClosure],
)
)

# The merges were done on copies of df_ass, and df_cin so that the column names in dataframes themselves aren't affected by the suffixes.
Expand Down Expand Up @@ -123,48 +137,56 @@ def test_validate():
"LAchildID": "child1",
"AssessmentFactors": "21", # 0 pass
"CINdetailsID": "cinID1",
"AssessmentID": "11",
"AssessmentActualStartDate": "5/12/1993",
},
{
"LAchildID": "child1",
"AssessmentFactors": "BOO", # 1 ignore: factor!=21
"CINdetailsID": "cinID2",
"AssessmentID": "12",
"AssessmentActualStartDate": "5/12/1993",
},
{
"LAchildID": "child2",
"AssessmentFactors": "BOO", # 2 ignore: factor!=21
"CINdetailsID": "cinID1",
"AssessmentID": "21",
"AssessmentActualStartDate": "5/12/1993",
},
{
"LAchildID": "child3",
"AssessmentFactors": "21", # 3 fail. reason!=RC8
"CINdetailsID": "cinID1",
"AssessmentID": "31",
"AssessmentActualStartDate": "5/12/1993",
},
{ # absent
"LAchildID": "child3",
"AssessmentFactors": pd.NA, # 4 ignore: factor!=21
"CINdetailsID": "cinID2",
"AssessmentID": "32",
"AssessmentActualStartDate": "5/12/1993",
},
{ # fail
"LAchildID": "child3",
"AssessmentFactors": "21", # 5 fail. reason!=RC8
"CINdetailsID": "cinID3",
"AssessmentID": "33",
"AssessmentActualStartDate": "5/12/1993",
},
{
"LAchildID": "child3",
"AssessmentFactors": "21", # ignore: more than one assessment in CIN episode
"CINdetailsID": "cinID4",
"AssessmentID": "34",
"AssessmentActualStartDate": "5/12/1993",
},
{
"LAchildID": "child3",
"AssessmentFactors": "20", # 6 ignore: factor!=21
"CINdetailsID": "cinID4",
"AssessmentID": "35",
"AssessmentActualStartDate": "5/12/1994",
},
]
Expand Down Expand Up @@ -216,6 +238,9 @@ def test_validate():
validate,
{
Assessments: sample_ass,
AssessmentFactorsList: sample_ass.rename(
columns={"AssessmentFactors": "AssessmentFactor"}
),
CINdetails: sample_cin,
},
)
Expand Down Expand Up @@ -254,7 +279,7 @@ def test_validate():
"ERROR_ID": (
"child3", # ChildID
"cinID1", # CINdetailsID
# corresponding CPPstartDate
"31", # AssessmentID
"RC10",
),
"ROW_ID": [3],
Expand All @@ -263,6 +288,7 @@ def test_validate():
"ERROR_ID": (
"child3",
"cinID3",
"33",
"RC10",
),
"ROW_ID": [5],
Expand Down
52 changes: 40 additions & 12 deletions cin_validator/rules/cin2022_23/rule_8897Q.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
AssessmentAuthorisationDate = Assessments.AssessmentAuthorisationDate
AssessmentFactors = Assessments.AssessmentFactors
LAchildID = Assessments.LAchildID
AssessmentID = Assessments.AssessmentID

AssessmentFactorsList = CINTable.AssessmentFactorsList
AssessmentFactor = AssessmentFactorsList.AssessmentFactor

Header = CINTable.Header
ReferenceDate = Header.ReferenceDate
Expand All @@ -32,14 +36,16 @@ def validate(
):
# PREPARING DATA

df = data_container[Assessments]
df_ass = data_container[Assessments]
df_asslist = data_container[AssessmentFactorsList]

header = data_container[Header]
ref_date_series = header[ReferenceDate]
collection_start, collection_end = make_census_period(ref_date_series)

# Before you begin, rename the index so that the initial row positions can be kept intact.
df.index.name = "ROW_ID"
df_ass.index.name = "ROW_ID"
df_ass.reset_index(inplace=True)

# lOGIC
# Implement rule logic as described by the Github issue.
Expand Down Expand Up @@ -95,15 +101,21 @@ def validate(
"24A",
]

condition1 = (df[AssessmentAuthorisationDate] >= collection_start) & (
df[AssessmentAuthorisationDate].notna()
df_ass_merged = df_ass.merge(
df_asslist[["LAchildID", "AssessmentID", "AssessmentFactor"]],
on=["LAchildID", "AssessmentID"],
how="left",
)

condition1 = (df_ass_merged[AssessmentAuthorisationDate] >= collection_start) & (
df_ass_merged[AssessmentAuthorisationDate].notna()
)
condition2 = (df[AssessmentFactors].notna()) & (
df[AssessmentFactors].isin(factors_list)
condition2 = (df_ass_merged[AssessmentFactors].notna()) & (
df_ass_merged[AssessmentFactor].isin(factors_list)
)

# get all the data that fits the failing condition. Reset the index so that ROW_ID now becomes a column of df
df_issues = df[condition1 & ~condition2].reset_index()
df_issues = df_ass_merged[condition1 & ~condition2].reset_index()

# SUBMIT ERRORS
# Generate a unique ID for each instance of an error. In this case,
Expand All @@ -118,7 +130,7 @@ def validate(
link_id = tuple(
zip(
df_issues[LAchildID],
df_issues[AssessmentAuthorisationDate],
df_issues[AssessmentID],
df_issues[AssessmentFactors],
)
)
Expand All @@ -142,36 +154,43 @@ def test_validate():
[
{
"LAchildID": "child1",
"AssessmentID": "11",
"AssessmentFactors": pd.NA,
"AssessmentAuthorisationDate": "26/05/2000",
}, # Fails as no assessment factor code
{
"LAchildID": "child2",
"AssessmentID": "21",
"AssessmentFactors": "99",
"AssessmentAuthorisationDate": "26/05/2000",
}, # Fails as incorrect assessment factor code
{
"LAchildID": "child3",
"AssessmentID": "31",
"AssessmentFactors": "1A",
"AssessmentAuthorisationDate": "26/05/2000",
},
{
"LAchildID": "child3",
"AssessmentID": "32",
"AssessmentAuthorisationDate": "26/05/2000",
"AssessmentFactors": pd.NA,
}, # Fails as no factor selected
{
"LAchildID": "child4",
"AssessmentID": "41",
"AssessmentFactors": "1A",
"AssessmentAuthorisationDate": "26/05/2000",
},
{
"LAchildID": "child5",
"AssessmentID": "51",
"AssessmentAuthorisationDate": pd.NA,
"AssessmentFactors": pd.NA,
},
{
"LAchildID": "child5",
"AssessmentID": "52",
"AssessmentAuthorisationDate": "26/05/1945",
"AssessmentFactors": pd.NA, # Passes as before census year
},
Expand All @@ -186,7 +205,16 @@ def test_validate():
sample_header = pd.DataFrame([{ReferenceDate: "31/03/2001"}])

# Run rule function passing in our sample data
result = run_rule(validate, {Assessments: fake_data, Header: sample_header})
result = run_rule(
validate,
{
Assessments: fake_data,
AssessmentFactorsList: fake_data.rename(
columns={"AssessmentFactors": "AssessmentFactor"}
),
Header: sample_header,
},
)

# Use .type1_issues to check for the result of .push_type1_issues() which you used above.
issues = result.type1_issues
Expand Down Expand Up @@ -215,23 +243,23 @@ def test_validate():
{
"ERROR_ID": (
"child1",
pd.to_datetime("26/05/2000", format="%d/%m/%Y", errors="coerce"),
"11",
pd.NA,
),
"ROW_ID": [0],
},
{
"ERROR_ID": (
"child2",
pd.to_datetime("26/05/2000", format="%d/%m/%Y", errors="coerce"),
"21",
"99",
),
"ROW_ID": [1],
},
{
"ERROR_ID": (
"child3",
pd.to_datetime("26/05/2000", format="%d/%m/%Y", errors="coerce"),
"32",
pd.NA,
),
"ROW_ID": [3],
Expand Down
Loading

0 comments on commit 4aef929

Please sign in to comment.