48 modify templates and google sheet endpoints to use gbb structure (#49

) * Update expected templates from Google sheets and remove process-data endpoint * Update Final url and match type processing * Update process-spreadsheet endpoint
airtai · Jul 12, 2024 · 10a26af · 10a26af
1 parent 36c331c
commit 10a26af
Show file tree

Hide file tree

Showing 5 changed files with 268 additions and 138 deletions.
diff --git a/google_sheets/app.py b/google_sheets/app.py
@@ -297,24 +297,23 @@ async def get_all_sheet_titles(
     return sheets
 
 
-NEW_CAMPAIGN_MANDATORY_COLUMNS = ["Country", "Station From", "Station To"]
+NEW_CAMPAIGN_MANDATORY_COLUMNS = [
+    "Country",
+    "Station From",
+    "Station To",
+    "Final Url From",
+    "Final Url To",
+]
 MANDATORY_AD_TEMPLATE_COLUMNS = [
-    "Campaign",
-    "Ad Group",
     "Headline 1",
     "Headline 2",
     "Headline 3",
     "Description Line 1",
     "Description Line 2",
-    "Final Url",
 ]
 
 MANDATORY_KEYWORD_TEMPLATE_COLUMNS = [
-    "Campaign",
-    "Ad Group",
     "Keyword",
-    "Criterion Type",
-    "Max CPC",
 ]
 
 
@@ -326,40 +325,12 @@ def _validate_target_resource(target_resource: Optional[str]) -> None:
         )
 
 
-@app.post(
-    "/process-data",
-    description="Process data to generate new ads or keywords based on the template",
-)
 async def process_data(
-    template_sheet_values: Annotated[
-        Optional[GoogleSheetValues],
-        Body(
-            embed=True,
-            description="Template values to be used for generating new ads or keywords",
-        ),
-    ] = None,
-    new_campaign_sheet_values: Annotated[
-        Optional[GoogleSheetValues],
-        Body(
-            embed=True,
-            description="New campaign values to be used for generating new ads or keywords",
-        ),
-    ] = None,
-    target_resource: Annotated[
-        Optional[str],
-        Query(
-            description="The target resource to be updated. This can be 'ad' or 'keyword'"
-        ),
-    ] = None,
+    template_sheet_values: GoogleSheetValues,
+    new_campaign_sheet_values: GoogleSheetValues,
+    merged_campaigns_ad_groups_df: pd.DataFrame,
+    target_resource: str,
 ) -> GoogleSheetValues:
-    _check_parameters_are_not_none(
-        {
-            "template_sheet_values": template_sheet_values,
-            "new_campaign_sheet_values": new_campaign_sheet_values,
-            "target_resource": target_resource,
-        }
-    )
-    _validate_target_resource(target_resource)
     if (
         len(template_sheet_values.values) < 2  # type: ignore
         or len(new_campaign_sheet_values.values) < 2  # type: ignore
@@ -406,7 +377,12 @@ async def process_data(
             status_code=status.HTTP_400_BAD_REQUEST, detail=validation_error_msg
         )
 
-    processed_df = process_data_f(template_df, new_campaign_df)
+    processed_df = process_data_f(
+        merged_campaigns_ad_groups_df,
+        template_df,
+        new_campaign_df,
+        target_resource=target_resource,
+    )
 
     validated_df = validate_output_data(
         processed_df,
@@ -418,6 +394,43 @@ async def process_data(
     return GoogleSheetValues(values=values)
 
 
+async def process_campaigns_and_ad_groups(
+    campaign_template_values: GoogleSheetValues,
+    ad_group_template_values: GoogleSheetValues,
+) -> pd.DataFrame:
+    _check_parameters_are_not_none(
+        {
+            "campaign_template_values": campaign_template_values,
+            "ad_group_template_values": ad_group_template_values,
+        }
+    )
+    if (
+        len(campaign_template_values.values) < 2  # type: ignore
+        or len(ad_group_template_values.values) < 2  # type: ignore
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Both template campaigns and ad groups data should have at least two rows (header and data).",
+        )
+
+    try:
+        campaign_template_df = pd.DataFrame(
+            campaign_template_values.values[1:],  # type: ignore
+            columns=campaign_template_values.values[0],  # type: ignore
+        )
+        ad_group_template_df = pd.DataFrame(
+            ad_group_template_values.values[1:],  # type: ignore
+            columns=ad_group_template_values.values[0],  # type: ignore
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid data format. Please provide data in the correct format: {e}",
+        ) from e
+
+    return pd.merge(campaign_template_df, ad_group_template_df, how="cross")
+
+
 @app.post(
     "/process-spreadsheet",
     description="Process data to generate new ads or keywords based on the template",
@@ -430,10 +443,6 @@ async def process_spreadsheet(
         Optional[str],
         Query(description="ID of the Google Sheet with the template data"),
     ] = None,
-    template_sheet_title: Annotated[
-        Optional[str],
-        Query(description="The title of the sheet with the template data"),
-    ] = None,
     new_campaign_spreadsheet_id: Annotated[
         Optional[str],
         Query(description="ID of the Google Sheet with the new campaign data"),
@@ -442,66 +451,95 @@ async def process_spreadsheet(
         Optional[str],
         Query(description="The title of the sheet with the new campaign data"),
     ] = None,
-    target_resource: Annotated[
-        Optional[str],
-        Query(
-            description="The target resource to be updated, options: 'ad' or 'keyword'"
-        ),
-    ] = None,
 ) -> str:
     _check_parameters_are_not_none(
         {
             "template_spreadsheet_id": template_spreadsheet_id,
-            "template_sheet_title": template_sheet_title,
             "new_campaign_spreadsheet_id": new_campaign_spreadsheet_id,
             "new_campaign_sheet_title": new_campaign_sheet_title,
-            "target_resource": target_resource,
         }
     )
-    _validate_target_resource(target_resource)
-    template_values = await get_sheet(
-        user_id=user_id,
-        spreadsheet_id=template_spreadsheet_id,
-        title=template_sheet_title,
-    )
     new_campaign_values = await get_sheet(
         user_id=user_id,
         spreadsheet_id=new_campaign_spreadsheet_id,
         title=new_campaign_sheet_title,
     )
+    try:
+        ads_template_values = await get_sheet(
+            user_id=user_id,
+            spreadsheet_id=template_spreadsheet_id,
+            title="Ads",
+        )
+        keywords_template_values = await get_sheet(
+            user_id=user_id,
+            spreadsheet_id=template_spreadsheet_id,
+            title="Keywords",
+        )
+        campaign_template_values = await get_sheet(
+            user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Campaigns"
+        )
+        ad_group_template_values = await get_sheet(
+            user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Ad Groups"
+        )
+        if not isinstance(
+            campaign_template_values, GoogleSheetValues
+        ) or not isinstance(ad_group_template_values, GoogleSheetValues):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"""Please provide Campaigns, Ad Groups, Ads and KEywords tables in the template spreadsheet with id '{template_spreadsheet_id}'""",
+            )
+
+        merged_campaigns_ad_groups_df = await process_campaigns_and_ad_groups(
+            campaign_template_values=campaign_template_values,
+            ad_group_template_values=ad_group_template_values,
+        )
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"""Make sure tables 'Campaigns', 'Ad Groups', 'Ads' and 'Keywords' are present in the template spreadsheet with id '{template_spreadsheet_id}'.""",
+        ) from e
 
-    if not isinstance(template_values, GoogleSheetValues) or not isinstance(
-        new_campaign_values, GoogleSheetValues
+    if (
+        not isinstance(ads_template_values, GoogleSheetValues)
+        or not isinstance(keywords_template_values, GoogleSheetValues)
+        or not isinstance(new_campaign_values, GoogleSheetValues)
     ):
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=f"""Invalid data format.
-template_values: {template_values}
+ads_template_values: {ads_template_values}
+
+keywords_template_values: {keywords_template_values}
 
 new_campaign_values: {new_campaign_values}
 
 Please provide data in the correct format.""",
         )
 
-    processed_values = await process_data(
-        template_sheet_values=template_values,
-        new_campaign_sheet_values=new_campaign_values,
-        target_resource=target_resource,
-    )
+    response = ""
+    for template_values, target_resource in zip(
+        [ads_template_values, keywords_template_values], ["ad", "keyword"]
+    ):
+        processed_values = await process_data(
+            template_sheet_values=template_values,
+            new_campaign_sheet_values=new_campaign_values,
+            merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df,
+            target_resource=target_resource,
+        )
 
-    title = (
-        f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}"  # type: ignore
-    )
-    await create_sheet(
-        user_id=user_id,
-        spreadsheet_id=new_campaign_spreadsheet_id,
-        title=title,
-    )
-    await update_sheet(
-        user_id=user_id,
-        spreadsheet_id=new_campaign_spreadsheet_id,
-        title=title,
-        sheet_values=processed_values,
-    )
+        title = f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}"  # type: ignore
+        await create_sheet(
+            user_id=user_id,
+            spreadsheet_id=new_campaign_spreadsheet_id,
+            title=title,
+        )
+        await update_sheet(
+            user_id=user_id,
+            spreadsheet_id=new_campaign_spreadsheet_id,
+            title=title,
+            sheet_values=processed_values,
+        )
+        response += f"Sheet with the name '{title}' has been created successfully.\n"
 
-    return f"Sheet with the name 'Captn - {target_resource.capitalize()}s' has been created successfully."  # type: ignore
+    return response
diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py
@@ -1,4 +1,4 @@
-from typing import List, Literal
+from typing import List, Literal, Optional
 
 import pandas as pd
 
@@ -24,15 +24,20 @@ def validate_input_data(
 
 INSERT_STATION_FROM = "INSERT_STATION_FROM"
 INSERT_STATION_TO = "INSERT_STATION_TO"
+INSERT_COUNTRY = "INSERT_COUNTRY"
+INSERT_CRITERION_TYPE = "INSERT_CRITERION_TYPE"
 
 
 def process_data_f(
-    template_df: pd.DataFrame, new_campaign_df: pd.DataFrame
+    merged_campaigns_ad_groups_df: pd.DataFrame,
+    template_df: pd.DataFrame,
+    new_campaign_df: pd.DataFrame,
+    target_resource: Optional[str] = None,
 ) -> pd.DataFrame:
+    template_df = pd.merge(merged_campaigns_ad_groups_df, template_df, how="cross")
     final_df = pd.DataFrame(columns=template_df.columns)
     for _, template_row in template_df.iterrows():
         for _, new_campaign_row in new_campaign_df.iterrows():
-            campaign = f"{new_campaign_row['Country']} - {new_campaign_row['Station From']} - {new_campaign_row['Station To']}"
             stations = [
                 {
                     "Station From": new_campaign_row["Station From"],
@@ -44,23 +49,45 @@ def process_data_f(
                     "Station To": new_campaign_row["Station From"],
                 },
             ]
+            if target_resource == "ad":
+                stations[0]["Final Url"] = new_campaign_row["Final Url From"]
+                stations[1]["Final Url"] = new_campaign_row["Final Url To"]
+
             for station in stations:
                 new_row = template_row.copy()
-                new_row["Campaign"] = campaign
-                new_row["Ad Group"] = (
-                    f"{station['Station From']} - {station['Station To']}"
+                new_row["Campaign Name"] = new_row["Campaign Name"].replace(
+                    INSERT_COUNTRY, new_campaign_row["Country"]
+                )
+                new_row["Campaign Name"] = new_row["Campaign Name"].replace(
+                    INSERT_STATION_FROM, new_campaign_row["Station From"]
+                )
+                new_row["Campaign Name"] = new_row["Campaign Name"].replace(
+                    INSERT_STATION_TO, new_campaign_row["Station To"]
+                )
+
+                new_row["Ad Group Name"] = new_row["Ad Group Name"].replace(
+                    INSERT_CRITERION_TYPE, new_row["Match Type"]
                 )
 
-                # Replace the placeholders in all columns with the actual station names INSERT_STATION_FROM
+                new_row = new_row.str.replace(
+                    INSERT_COUNTRY, new_campaign_row["Country"]
+                )
                 new_row = new_row.str.replace(
                     INSERT_STATION_FROM, station["Station From"]
                 )
                 new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
 
+                if target_resource == "ad":
+                    new_row["Final URL"] = station["Final Url"]
+
                 final_df = pd.concat(
                     [final_df, pd.DataFrame([new_row])], ignore_index=True
                 )
 
+    final_df = final_df.sort_values(
+        by=["Campaign Name", "Ad Group Name"], ignore_index=True
+    )
+
     return final_df
 
 

diff --git a/google_sheets/google_api/service.py b/google_sheets/google_api/service.py
@@ -71,8 +71,14 @@ def get_files_f(service: Any) -> List[Dict[str, str]]:
 def get_sheet_f(service: Any, spreadsheet_id: str, range: str) -> Any:
     # Call the Sheets API
     sheet = service.spreadsheets()
-    result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute()
-    values = result.get("values", [])
+    try:
+        result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute()
+        values = result.get("values", [])
+    except Exception as e:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Unable to read from spreadsheet with id '{spreadsheet_id}', and range '{range}'",
+        ) from e
 
     return values