Skip to content

Commit

Permalink
48 modify templates and google sheet endpoints to use gbb structure (#49
Browse files Browse the repository at this point in the history
)

* Update expected templates from Google sheets and remove process-data endpoint

* Update Final url and match type processing

* Update process-spreadsheet endpoint
  • Loading branch information
rjambrecic authored Jul 12, 2024
1 parent 36c331c commit 10a26af
Show file tree
Hide file tree
Showing 5 changed files with 268 additions and 138 deletions.
202 changes: 120 additions & 82 deletions google_sheets/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,24 +297,23 @@ async def get_all_sheet_titles(
return sheets


NEW_CAMPAIGN_MANDATORY_COLUMNS = ["Country", "Station From", "Station To"]
NEW_CAMPAIGN_MANDATORY_COLUMNS = [
"Country",
"Station From",
"Station To",
"Final Url From",
"Final Url To",
]
MANDATORY_AD_TEMPLATE_COLUMNS = [
"Campaign",
"Ad Group",
"Headline 1",
"Headline 2",
"Headline 3",
"Description Line 1",
"Description Line 2",
"Final Url",
]

MANDATORY_KEYWORD_TEMPLATE_COLUMNS = [
"Campaign",
"Ad Group",
"Keyword",
"Criterion Type",
"Max CPC",
]


Expand All @@ -326,40 +325,12 @@ def _validate_target_resource(target_resource: Optional[str]) -> None:
)


@app.post(
"/process-data",
description="Process data to generate new ads or keywords based on the template",
)
async def process_data(
template_sheet_values: Annotated[
Optional[GoogleSheetValues],
Body(
embed=True,
description="Template values to be used for generating new ads or keywords",
),
] = None,
new_campaign_sheet_values: Annotated[
Optional[GoogleSheetValues],
Body(
embed=True,
description="New campaign values to be used for generating new ads or keywords",
),
] = None,
target_resource: Annotated[
Optional[str],
Query(
description="The target resource to be updated. This can be 'ad' or 'keyword'"
),
] = None,
template_sheet_values: GoogleSheetValues,
new_campaign_sheet_values: GoogleSheetValues,
merged_campaigns_ad_groups_df: pd.DataFrame,
target_resource: str,
) -> GoogleSheetValues:
_check_parameters_are_not_none(
{
"template_sheet_values": template_sheet_values,
"new_campaign_sheet_values": new_campaign_sheet_values,
"target_resource": target_resource,
}
)
_validate_target_resource(target_resource)
if (
len(template_sheet_values.values) < 2 # type: ignore
or len(new_campaign_sheet_values.values) < 2 # type: ignore
Expand Down Expand Up @@ -406,7 +377,12 @@ async def process_data(
status_code=status.HTTP_400_BAD_REQUEST, detail=validation_error_msg
)

processed_df = process_data_f(template_df, new_campaign_df)
processed_df = process_data_f(
merged_campaigns_ad_groups_df,
template_df,
new_campaign_df,
target_resource=target_resource,
)

validated_df = validate_output_data(
processed_df,
Expand All @@ -418,6 +394,43 @@ async def process_data(
return GoogleSheetValues(values=values)


async def process_campaigns_and_ad_groups(
campaign_template_values: GoogleSheetValues,
ad_group_template_values: GoogleSheetValues,
) -> pd.DataFrame:
_check_parameters_are_not_none(
{
"campaign_template_values": campaign_template_values,
"ad_group_template_values": ad_group_template_values,
}
)
if (
len(campaign_template_values.values) < 2 # type: ignore
or len(ad_group_template_values.values) < 2 # type: ignore
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Both template campaigns and ad groups data should have at least two rows (header and data).",
)

try:
campaign_template_df = pd.DataFrame(
campaign_template_values.values[1:], # type: ignore
columns=campaign_template_values.values[0], # type: ignore
)
ad_group_template_df = pd.DataFrame(
ad_group_template_values.values[1:], # type: ignore
columns=ad_group_template_values.values[0], # type: ignore
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid data format. Please provide data in the correct format: {e}",
) from e

return pd.merge(campaign_template_df, ad_group_template_df, how="cross")


@app.post(
"/process-spreadsheet",
description="Process data to generate new ads or keywords based on the template",
Expand All @@ -430,10 +443,6 @@ async def process_spreadsheet(
Optional[str],
Query(description="ID of the Google Sheet with the template data"),
] = None,
template_sheet_title: Annotated[
Optional[str],
Query(description="The title of the sheet with the template data"),
] = None,
new_campaign_spreadsheet_id: Annotated[
Optional[str],
Query(description="ID of the Google Sheet with the new campaign data"),
Expand All @@ -442,66 +451,95 @@ async def process_spreadsheet(
Optional[str],
Query(description="The title of the sheet with the new campaign data"),
] = None,
target_resource: Annotated[
Optional[str],
Query(
description="The target resource to be updated, options: 'ad' or 'keyword'"
),
] = None,
) -> str:
_check_parameters_are_not_none(
{
"template_spreadsheet_id": template_spreadsheet_id,
"template_sheet_title": template_sheet_title,
"new_campaign_spreadsheet_id": new_campaign_spreadsheet_id,
"new_campaign_sheet_title": new_campaign_sheet_title,
"target_resource": target_resource,
}
)
_validate_target_resource(target_resource)
template_values = await get_sheet(
user_id=user_id,
spreadsheet_id=template_spreadsheet_id,
title=template_sheet_title,
)
new_campaign_values = await get_sheet(
user_id=user_id,
spreadsheet_id=new_campaign_spreadsheet_id,
title=new_campaign_sheet_title,
)
try:
ads_template_values = await get_sheet(
user_id=user_id,
spreadsheet_id=template_spreadsheet_id,
title="Ads",
)
keywords_template_values = await get_sheet(
user_id=user_id,
spreadsheet_id=template_spreadsheet_id,
title="Keywords",
)
campaign_template_values = await get_sheet(
user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Campaigns"
)
ad_group_template_values = await get_sheet(
user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Ad Groups"
)
if not isinstance(
campaign_template_values, GoogleSheetValues
) or not isinstance(ad_group_template_values, GoogleSheetValues):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"""Please provide Campaigns, Ad Groups, Ads and KEywords tables in the template spreadsheet with id '{template_spreadsheet_id}'""",
)

merged_campaigns_ad_groups_df = await process_campaigns_and_ad_groups(
campaign_template_values=campaign_template_values,
ad_group_template_values=ad_group_template_values,
)

except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"""Make sure tables 'Campaigns', 'Ad Groups', 'Ads' and 'Keywords' are present in the template spreadsheet with id '{template_spreadsheet_id}'.""",
) from e

if not isinstance(template_values, GoogleSheetValues) or not isinstance(
new_campaign_values, GoogleSheetValues
if (
not isinstance(ads_template_values, GoogleSheetValues)
or not isinstance(keywords_template_values, GoogleSheetValues)
or not isinstance(new_campaign_values, GoogleSheetValues)
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"""Invalid data format.
template_values: {template_values}
ads_template_values: {ads_template_values}
keywords_template_values: {keywords_template_values}
new_campaign_values: {new_campaign_values}
Please provide data in the correct format.""",
)

processed_values = await process_data(
template_sheet_values=template_values,
new_campaign_sheet_values=new_campaign_values,
target_resource=target_resource,
)
response = ""
for template_values, target_resource in zip(
[ads_template_values, keywords_template_values], ["ad", "keyword"]
):
processed_values = await process_data(
template_sheet_values=template_values,
new_campaign_sheet_values=new_campaign_values,
merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df,
target_resource=target_resource,
)

title = (
f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore
)
await create_sheet(
user_id=user_id,
spreadsheet_id=new_campaign_spreadsheet_id,
title=title,
)
await update_sheet(
user_id=user_id,
spreadsheet_id=new_campaign_spreadsheet_id,
title=title,
sheet_values=processed_values,
)
title = f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore
await create_sheet(
user_id=user_id,
spreadsheet_id=new_campaign_spreadsheet_id,
title=title,
)
await update_sheet(
user_id=user_id,
spreadsheet_id=new_campaign_spreadsheet_id,
title=title,
sheet_values=processed_values,
)
response += f"Sheet with the name '{title}' has been created successfully.\n"

return f"Sheet with the name 'Captn - {target_resource.capitalize()}s' has been created successfully." # type: ignore
return response
41 changes: 34 additions & 7 deletions google_sheets/data_processing/processing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Literal
from typing import List, Literal, Optional

import pandas as pd

Expand All @@ -24,15 +24,20 @@ def validate_input_data(

INSERT_STATION_FROM = "INSERT_STATION_FROM"
INSERT_STATION_TO = "INSERT_STATION_TO"
INSERT_COUNTRY = "INSERT_COUNTRY"
INSERT_CRITERION_TYPE = "INSERT_CRITERION_TYPE"


def process_data_f(
template_df: pd.DataFrame, new_campaign_df: pd.DataFrame
merged_campaigns_ad_groups_df: pd.DataFrame,
template_df: pd.DataFrame,
new_campaign_df: pd.DataFrame,
target_resource: Optional[str] = None,
) -> pd.DataFrame:
template_df = pd.merge(merged_campaigns_ad_groups_df, template_df, how="cross")
final_df = pd.DataFrame(columns=template_df.columns)
for _, template_row in template_df.iterrows():
for _, new_campaign_row in new_campaign_df.iterrows():
campaign = f"{new_campaign_row['Country']} - {new_campaign_row['Station From']} - {new_campaign_row['Station To']}"
stations = [
{
"Station From": new_campaign_row["Station From"],
Expand All @@ -44,23 +49,45 @@ def process_data_f(
"Station To": new_campaign_row["Station From"],
},
]
if target_resource == "ad":
stations[0]["Final Url"] = new_campaign_row["Final Url From"]
stations[1]["Final Url"] = new_campaign_row["Final Url To"]

for station in stations:
new_row = template_row.copy()
new_row["Campaign"] = campaign
new_row["Ad Group"] = (
f"{station['Station From']} - {station['Station To']}"
new_row["Campaign Name"] = new_row["Campaign Name"].replace(
INSERT_COUNTRY, new_campaign_row["Country"]
)
new_row["Campaign Name"] = new_row["Campaign Name"].replace(
INSERT_STATION_FROM, new_campaign_row["Station From"]
)
new_row["Campaign Name"] = new_row["Campaign Name"].replace(
INSERT_STATION_TO, new_campaign_row["Station To"]
)

new_row["Ad Group Name"] = new_row["Ad Group Name"].replace(
INSERT_CRITERION_TYPE, new_row["Match Type"]
)

# Replace the placeholders in all columns with the actual station names INSERT_STATION_FROM
new_row = new_row.str.replace(
INSERT_COUNTRY, new_campaign_row["Country"]
)
new_row = new_row.str.replace(
INSERT_STATION_FROM, station["Station From"]
)
new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])

if target_resource == "ad":
new_row["Final URL"] = station["Final Url"]

final_df = pd.concat(
[final_df, pd.DataFrame([new_row])], ignore_index=True
)

final_df = final_df.sort_values(
by=["Campaign Name", "Ad Group Name"], ignore_index=True
)

return final_df


Expand Down
10 changes: 8 additions & 2 deletions google_sheets/google_api/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,14 @@ def get_files_f(service: Any) -> List[Dict[str, str]]:
def get_sheet_f(service: Any, spreadsheet_id: str, range: str) -> Any:
# Call the Sheets API
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute()
values = result.get("values", [])
try:
result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute()
values = result.get("values", [])
except Exception as e:
raise HTTPException(
status_code=404,
detail=f"Unable to read from spreadsheet with id '{spreadsheet_id}', and range '{range}'",
) from e

return values

Expand Down
Loading

0 comments on commit 10a26af

Please sign in to comment.