Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2 PRs #112

Merged
merged 2 commits into from
Sep 10, 2024
Merged

2 PRs #112

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 72 additions & 12 deletions google_sheets/data_processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def validate_input_data(
INSERT_CRITERION_TYPE = "{INSERT_CRITERION_TYPE}"
INSERT_LANGUAGE_CODE = "{INSERT_LANGUAGE_CODE}"
INSERT_CATEGORY = "{INSERT_CATEGORY}"
INSERT_TICKET_PRICE = "{INSERT_TICKET_PRICE}"


def _update_campaign_name(
Expand Down Expand Up @@ -174,6 +175,13 @@ def _process_row(
if not _use_template_row(new_campaign_row["Category"], template_row):
return final_df

# Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name)
if target_resource == "keyword" and (
template_row["Negative"].lower() == "false"
and template_row["Keyword Match Type"] != template_row["Match Type"]
):
return final_df

stations = [
{
"Station From": new_campaign_row["Station From"],
Expand Down Expand Up @@ -203,19 +211,28 @@ def _process_row(
new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"])
new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"])
new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
new_row = new_row.str.replace(
INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"]
)

if target_resource == "ad":
new_row["Final URL"] = station["Final Url"]
elif (
target_resource == "keyword"
and new_row["Negative"]
and new_row["Negative"].lower() == "true"
):
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif target_resource == "keyword":
if new_row["Negative"] and new_row["Negative"].lower() == "true":
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif (
new_row["Target Category"].lower() == "false"
and new_row["Match Type"] == "Exact"
):
new_row["Keyword"] = (
new_row["Keyword"].replace(INSERT_CATEGORY, "").strip()
)

new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])

final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True)

Expand Down Expand Up @@ -256,7 +273,7 @@ def process_data_f(
new_campaign_row, template_row, final_df, target_resource
)

final_df = final_df.drop(columns=["Language Code", "Category"])
final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"])
if target_resource == "keyword":
final_df = final_df.drop(columns=["Keyword Match Type"])
final_df = final_df.drop_duplicates(ignore_index=True)
Expand Down Expand Up @@ -350,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901
return df


MAX_SITELINK_TEXT_LENGTH = 25
MAX_SITELINK_DESCRIPTION_LENGTH = 35


def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame:
df.insert(0, "Issues", "")

sitelink_text_columns = [
col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text")
]

for index, row in df.iterrows():
for site_text_column in sitelink_text_columns:
site_text = row[site_text_column]
if not site_text:
continue
error_msg = ""

final_url_column = site_text_column.replace("Text", "Final URL")
if not row.get(final_url_column, None):
error_msg += f"{final_url_column} is missing.\n"
if len(site_text) > MAX_SITELINK_TEXT_LENGTH:
error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n"
site_description_column = site_text_column.replace("Text", "Description")
for i in [1, 2]:
site_description = row.get(site_description_column + f" {i}", None)
if (
site_description
and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH
):
error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n"

if error_msg:
df.loc[index, "Issues"] += error_msg

if not df["Issues"].any():
df = df.drop(columns=["Issues"])

return df


def validate_output_data(
df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"]
df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"]
) -> pd.DataFrame:
if target_resource == "ad":
return _validate_output_data_ad(df)
# No validation required for campaign and keyword data currently
elif target_resource == "campaign":
return _validate_output_data_campaign(df)
# No validation required for keyword data currently
return df
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies = [
"pydantic>=2.3,<3",
"fastapi>=0.110.2",
"prisma==0.13.1",
"google-api-python-client==2.143.0",
"google-api-python-client==2.144.0",
"asyncify==0.10.0",
"pandas==2.2.2"
]
Expand All @@ -67,10 +67,10 @@ lint = [
"types-Pygments",
"types-docutils",
"mypy==1.11.2",
"ruff==0.6.3",
"ruff==0.6.4",
"pyupgrade-directories==0.3.0",
"bandit==1.7.9",
"semgrep==1.85.0",
"semgrep==1.86.0",
"pytest-mypy-plugins==3.1.2",
]

Expand Down
12 changes: 9 additions & 3 deletions tests/app/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ class TestProcessData:
"Final Url To",
"Language Code",
"Category",
"Ticket Price",
],
[
"India",
Expand All @@ -418,6 +419,7 @@ class TestProcessData:
"https://www.example.com/to",
"EN",
"Bus",
"10.5",
],
]
),
Expand Down Expand Up @@ -475,6 +477,7 @@ async def test_process_data_keywords(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["True"],
}
)
if isinstance(detail, GoogleSheetValues):
Expand Down Expand Up @@ -517,7 +520,7 @@ async def test_process_data_ads(self) -> None:
"Bus",
"https://www.example.com/from",
"H" * 31,
"Headline 2",
"Headline 2 {INSERT_TICKET_PRICE}",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand All @@ -536,6 +539,7 @@ async def test_process_data_ads(self) -> None:
"Final Url To",
"Language Code",
"Category",
"Ticket Price",
],
[
"India",
Expand All @@ -545,6 +549,7 @@ async def test_process_data_ads(self) -> None:
"https://www.example.com/to",
"EN",
"Bus",
"10.5",
],
]
)
Expand All @@ -556,6 +561,7 @@ async def test_process_data_ads(self) -> None:
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
)
result = await process_data(
Expand Down Expand Up @@ -588,7 +594,7 @@ async def test_process_data_ads(self) -> None:
"Exact",
"https://www.example.com/from",
"HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
"Headline 2",
"Headline 2 10.5",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand All @@ -602,7 +608,7 @@ async def test_process_data_ads(self) -> None:
"Exact",
"https://www.example.com/to",
"HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
"Headline 2",
"Headline 2 10.5",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand Down
71 changes: 70 additions & 1 deletion tests/data_processing/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_update_campaign_name,
_use_template_row,
_validate_language_codes,
_validate_output_data_campaign,
process_campaign_data_f,
process_data_f,
validate_input_data,
Expand Down Expand Up @@ -119,14 +120,15 @@ def test_process_row(
{
"Campaign Name": "USA - A - B - EN",
"Ad Group Name": "A - B",
"Keyword": "k1",
"Keyword": "k1 {INSERT_CATEGORY}",
"Max CPC": "",
"Language Code": "EN",
"Negative": "FALSE",
"Level": "",
"Keyword Match Type": "Exact",
"Match Type": "Exact",
"Category": "Bus",
"Target Category": "False",
}
)
new_campaign_row = pd.Series(
Expand All @@ -137,12 +139,15 @@ def test_process_row(
"Station To": "B",
"Language Code": "EN",
"Category": category,
"Ticket Price": "100",
}
)
final_df = pd.DataFrame(columns=template_row.index)
final_df = _process_row(new_campaign_row, template_row, final_df, "keyword")

assert len(final_df) == expected_length
if expected_length == 1:
assert final_df["Keyword"].values[0] == "k1"


@pytest.mark.parametrize(
Expand All @@ -157,6 +162,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand All @@ -177,6 +183,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "EN"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -236,6 +243,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand All @@ -256,6 +264,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "EN"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -319,6 +328,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand All @@ -339,6 +349,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "DE"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -376,6 +387,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand All @@ -396,6 +408,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "DE"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -697,3 +710,59 @@ def test_validate_language_codes(
_validate_language_codes(new_campaign_df, valid_language_codes, "table")
else:
_validate_language_codes(new_campaign_df, valid_language_codes, "table")


@pytest.mark.parametrize(
("df", "expected_issues"),
[
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D1"],
"Sitelink 1 Description 2": ["D2"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
}
),
"Sitelink 1 Final URL is missing.\n",
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S" * 26],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D" * 36],
"Sitelink 1 Description 2": ["D2"],
},
),
"""Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text.
Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""",
),
],
)
def test_validate_output_data_campaign(
df: pd.DataFrame, expected_issues: Optional[str]
) -> None:
expected = df.copy()
result = _validate_output_data_campaign(df)
if expected_issues:
assert result["Issues"].values[0] == expected_issues
else:
assert result.equals(expected)
Loading