diff --git a/google_sheets/app.py b/google_sheets/app.py index f646dcb..e94f2a8 100644 --- a/google_sheets/app.py +++ b/google_sheets/app.py @@ -297,24 +297,23 @@ async def get_all_sheet_titles( return sheets -NEW_CAMPAIGN_MANDATORY_COLUMNS = ["Country", "Station From", "Station To"] +NEW_CAMPAIGN_MANDATORY_COLUMNS = [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", +] MANDATORY_AD_TEMPLATE_COLUMNS = [ - "Campaign", - "Ad Group", "Headline 1", "Headline 2", "Headline 3", "Description Line 1", "Description Line 2", - "Final Url", ] MANDATORY_KEYWORD_TEMPLATE_COLUMNS = [ - "Campaign", - "Ad Group", "Keyword", - "Criterion Type", - "Max CPC", ] @@ -326,40 +325,12 @@ def _validate_target_resource(target_resource: Optional[str]) -> None: ) -@app.post( - "/process-data", - description="Process data to generate new ads or keywords based on the template", -) async def process_data( - template_sheet_values: Annotated[ - Optional[GoogleSheetValues], - Body( - embed=True, - description="Template values to be used for generating new ads or keywords", - ), - ] = None, - new_campaign_sheet_values: Annotated[ - Optional[GoogleSheetValues], - Body( - embed=True, - description="New campaign values to be used for generating new ads or keywords", - ), - ] = None, - target_resource: Annotated[ - Optional[str], - Query( - description="The target resource to be updated. This can be 'ad' or 'keyword'" - ), - ] = None, + template_sheet_values: GoogleSheetValues, + new_campaign_sheet_values: GoogleSheetValues, + merged_campaigns_ad_groups_df: pd.DataFrame, + target_resource: str, ) -> GoogleSheetValues: - _check_parameters_are_not_none( - { - "template_sheet_values": template_sheet_values, - "new_campaign_sheet_values": new_campaign_sheet_values, - "target_resource": target_resource, - } - ) - _validate_target_resource(target_resource) if ( len(template_sheet_values.values) < 2 # type: ignore or len(new_campaign_sheet_values.values) < 2 # type: ignore @@ -406,7 +377,12 @@ async def process_data( status_code=status.HTTP_400_BAD_REQUEST, detail=validation_error_msg ) - processed_df = process_data_f(template_df, new_campaign_df) + processed_df = process_data_f( + merged_campaigns_ad_groups_df, + template_df, + new_campaign_df, + target_resource=target_resource, + ) validated_df = validate_output_data( processed_df, @@ -418,6 +394,43 @@ async def process_data( return GoogleSheetValues(values=values) +async def process_campaigns_and_ad_groups( + campaign_template_values: GoogleSheetValues, + ad_group_template_values: GoogleSheetValues, +) -> pd.DataFrame: + _check_parameters_are_not_none( + { + "campaign_template_values": campaign_template_values, + "ad_group_template_values": ad_group_template_values, + } + ) + if ( + len(campaign_template_values.values) < 2 # type: ignore + or len(ad_group_template_values.values) < 2 # type: ignore + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Both template campaigns and ad groups data should have at least two rows (header and data).", + ) + + try: + campaign_template_df = pd.DataFrame( + campaign_template_values.values[1:], # type: ignore + columns=campaign_template_values.values[0], # type: ignore + ) + ad_group_template_df = pd.DataFrame( + ad_group_template_values.values[1:], # type: ignore + columns=ad_group_template_values.values[0], # type: ignore + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid data format. Please provide data in the correct format: {e}", + ) from e + + return pd.merge(campaign_template_df, ad_group_template_df, how="cross") + + @app.post( "/process-spreadsheet", description="Process data to generate new ads or keywords based on the template", @@ -430,10 +443,6 @@ async def process_spreadsheet( Optional[str], Query(description="ID of the Google Sheet with the template data"), ] = None, - template_sheet_title: Annotated[ - Optional[str], - Query(description="The title of the sheet with the template data"), - ] = None, new_campaign_spreadsheet_id: Annotated[ Optional[str], Query(description="ID of the Google Sheet with the new campaign data"), @@ -442,66 +451,95 @@ async def process_spreadsheet( Optional[str], Query(description="The title of the sheet with the new campaign data"), ] = None, - target_resource: Annotated[ - Optional[str], - Query( - description="The target resource to be updated, options: 'ad' or 'keyword'" - ), - ] = None, ) -> str: _check_parameters_are_not_none( { "template_spreadsheet_id": template_spreadsheet_id, - "template_sheet_title": template_sheet_title, "new_campaign_spreadsheet_id": new_campaign_spreadsheet_id, "new_campaign_sheet_title": new_campaign_sheet_title, - "target_resource": target_resource, } ) - _validate_target_resource(target_resource) - template_values = await get_sheet( - user_id=user_id, - spreadsheet_id=template_spreadsheet_id, - title=template_sheet_title, - ) new_campaign_values = await get_sheet( user_id=user_id, spreadsheet_id=new_campaign_spreadsheet_id, title=new_campaign_sheet_title, ) + try: + ads_template_values = await get_sheet( + user_id=user_id, + spreadsheet_id=template_spreadsheet_id, + title="Ads", + ) + keywords_template_values = await get_sheet( + user_id=user_id, + spreadsheet_id=template_spreadsheet_id, + title="Keywords", + ) + campaign_template_values = await get_sheet( + user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Campaigns" + ) + ad_group_template_values = await get_sheet( + user_id=user_id, spreadsheet_id=template_spreadsheet_id, title="Ad Groups" + ) + if not isinstance( + campaign_template_values, GoogleSheetValues + ) or not isinstance(ad_group_template_values, GoogleSheetValues): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"""Please provide Campaigns, Ad Groups, Ads and KEywords tables in the template spreadsheet with id '{template_spreadsheet_id}'""", + ) + + merged_campaigns_ad_groups_df = await process_campaigns_and_ad_groups( + campaign_template_values=campaign_template_values, + ad_group_template_values=ad_group_template_values, + ) + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"""Make sure tables 'Campaigns', 'Ad Groups', 'Ads' and 'Keywords' are present in the template spreadsheet with id '{template_spreadsheet_id}'.""", + ) from e - if not isinstance(template_values, GoogleSheetValues) or not isinstance( - new_campaign_values, GoogleSheetValues + if ( + not isinstance(ads_template_values, GoogleSheetValues) + or not isinstance(keywords_template_values, GoogleSheetValues) + or not isinstance(new_campaign_values, GoogleSheetValues) ): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"""Invalid data format. -template_values: {template_values} +ads_template_values: {ads_template_values} + +keywords_template_values: {keywords_template_values} new_campaign_values: {new_campaign_values} Please provide data in the correct format.""", ) - processed_values = await process_data( - template_sheet_values=template_values, - new_campaign_sheet_values=new_campaign_values, - target_resource=target_resource, - ) + response = "" + for template_values, target_resource in zip( + [ads_template_values, keywords_template_values], ["ad", "keyword"] + ): + processed_values = await process_data( + template_sheet_values=template_values, + new_campaign_sheet_values=new_campaign_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource=target_resource, + ) - title = ( - f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore - ) - await create_sheet( - user_id=user_id, - spreadsheet_id=new_campaign_spreadsheet_id, - title=title, - ) - await update_sheet( - user_id=user_id, - spreadsheet_id=new_campaign_spreadsheet_id, - title=title, - sheet_values=processed_values, - ) + title = f"Captn - {target_resource.capitalize()}s {datetime.now():%Y-%m-%d %H:%M:%S}" # type: ignore + await create_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + ) + await update_sheet( + user_id=user_id, + spreadsheet_id=new_campaign_spreadsheet_id, + title=title, + sheet_values=processed_values, + ) + response += f"Sheet with the name '{title}' has been created successfully.\n" - return f"Sheet with the name 'Captn - {target_resource.capitalize()}s' has been created successfully." # type: ignore + return response diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py index fe50755..24a78e5 100644 --- a/google_sheets/data_processing/processing.py +++ b/google_sheets/data_processing/processing.py @@ -1,4 +1,4 @@ -from typing import List, Literal +from typing import List, Literal, Optional import pandas as pd @@ -24,15 +24,20 @@ def validate_input_data( INSERT_STATION_FROM = "INSERT_STATION_FROM" INSERT_STATION_TO = "INSERT_STATION_TO" +INSERT_COUNTRY = "INSERT_COUNTRY" +INSERT_CRITERION_TYPE = "INSERT_CRITERION_TYPE" def process_data_f( - template_df: pd.DataFrame, new_campaign_df: pd.DataFrame + merged_campaigns_ad_groups_df: pd.DataFrame, + template_df: pd.DataFrame, + new_campaign_df: pd.DataFrame, + target_resource: Optional[str] = None, ) -> pd.DataFrame: + template_df = pd.merge(merged_campaigns_ad_groups_df, template_df, how="cross") final_df = pd.DataFrame(columns=template_df.columns) for _, template_row in template_df.iterrows(): for _, new_campaign_row in new_campaign_df.iterrows(): - campaign = f"{new_campaign_row['Country']} - {new_campaign_row['Station From']} - {new_campaign_row['Station To']}" stations = [ { "Station From": new_campaign_row["Station From"], @@ -44,23 +49,45 @@ def process_data_f( "Station To": new_campaign_row["Station From"], }, ] + if target_resource == "ad": + stations[0]["Final Url"] = new_campaign_row["Final Url From"] + stations[1]["Final Url"] = new_campaign_row["Final Url To"] + for station in stations: new_row = template_row.copy() - new_row["Campaign"] = campaign - new_row["Ad Group"] = ( - f"{station['Station From']} - {station['Station To']}" + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_COUNTRY, new_campaign_row["Country"] + ) + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_STATION_FROM, new_campaign_row["Station From"] + ) + new_row["Campaign Name"] = new_row["Campaign Name"].replace( + INSERT_STATION_TO, new_campaign_row["Station To"] + ) + + new_row["Ad Group Name"] = new_row["Ad Group Name"].replace( + INSERT_CRITERION_TYPE, new_row["Match Type"] ) - # Replace the placeholders in all columns with the actual station names INSERT_STATION_FROM + new_row = new_row.str.replace( + INSERT_COUNTRY, new_campaign_row["Country"] + ) new_row = new_row.str.replace( INSERT_STATION_FROM, station["Station From"] ) new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"]) + if target_resource == "ad": + new_row["Final URL"] = station["Final Url"] + final_df = pd.concat( [final_df, pd.DataFrame([new_row])], ignore_index=True ) + final_df = final_df.sort_values( + by=["Campaign Name", "Ad Group Name"], ignore_index=True + ) + return final_df diff --git a/google_sheets/google_api/service.py b/google_sheets/google_api/service.py index 35ccc5e..bf045a7 100644 --- a/google_sheets/google_api/service.py +++ b/google_sheets/google_api/service.py @@ -71,8 +71,14 @@ def get_files_f(service: Any) -> List[Dict[str, str]]: def get_sheet_f(service: Any, spreadsheet_id: str, range: str) -> Any: # Call the Sheets API sheet = service.spreadsheets() - result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute() - values = result.get("values", []) + try: + result = sheet.values().get(spreadsheetId=spreadsheet_id, range=range).execute() + values = result.get("values", []) + except Exception as e: + raise HTTPException( + status_code=404, + detail=f"Unable to read from spreadsheet with id '{spreadsheet_id}', and range '{range}'", + ) from e return values diff --git a/tests/app/test_app.py b/tests/app/test_app.py index f60b806..4bd5c77 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -1,12 +1,13 @@ from typing import Any, Dict, Optional, Union from unittest.mock import MagicMock, patch +import pandas as pd import pytest from fastapi import HTTPException from fastapi.testclient import TestClient from googleapiclient.errors import HttpError -from google_sheets.app import _check_parameters_are_not_none, app +from google_sheets.app import _check_parameters_are_not_none, app, process_data from google_sheets.model import GoogleSheetValues client = TestClient(app) @@ -167,107 +168,147 @@ def test_get_all_file_names(self) -> None: class TestProcessData: @pytest.mark.parametrize( - ("template_sheet_values", "new_campaign_sheet_values", "status_code", "detail"), + ("template_sheet_values", "new_campaign_sheet_values", "detail"), [ ( GoogleSheetValues( values=[ - ["Campaign", "Ad Group", "Keyword"], + ["Keyword"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 400, "Both template and new campaign data should have at least two rows", ), ( GoogleSheetValues( values=[ - ["Campaign", "Ad Group", "Keyword"], - ["Campaign A", "Ad group A", "Keyword A"], + ["Fake column"], + ["fake"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 400, "Mandatory columns missing in the keyword template data.", ), ( GoogleSheetValues( values=[ [ - "Campaign", - "Ad Group", "Keyword", - "Criterion Type", - "Max CPC", ], - ["Campaign A", "Ad group A", "Keyword A", "Exact", "1"], + ["Keyword A"], ] ), GoogleSheetValues( values=[ - ["Country", "Station From", "Station To"], - ["India", "Delhi", "Mumbai"], + [ + "Country", + "Station From", + "Station To", + "Final Url From", + "Final Url To", + ], + [ + "India", + "Delhi", + "Mumbai", + "https://www.example.com/from", + "https://www.example.com/to", + ], ] ), - 200, GoogleSheetValues( values=[ [ - "Campaign", - "Ad Group", + "Campaign Name", + "Ad Group Name", + "Match Type", "Keyword", - "Criterion Type", - "Max CPC", ], [ "India - Delhi - Mumbai", "Delhi - Mumbai", - "Keyword A", "Exact", - "1", + "Keyword A", ], [ "India - Delhi - Mumbai", "Mumbai - Delhi", - "Keyword A", "Exact", - "1", + "Keyword A", ], ], ), ), ], ) - def test_process_data( + @pytest.mark.asyncio() + async def test_process_data( self, template_sheet_values: GoogleSheetValues, new_campaign_sheet_values: GoogleSheetValues, - status_code: int, detail: Union[str, GoogleSheetValues], ) -> None: - response = client.post( - "/process-data?target_resource=keyword", - json={ - "template_sheet_values": template_sheet_values.model_dump(), - "new_campaign_sheet_values": new_campaign_sheet_values.model_dump(), - }, + merged_campaigns_ad_groups_df = pd.DataFrame( + { + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } ) - - assert response.status_code == status_code if isinstance(detail, GoogleSheetValues): - assert response.json() == detail.model_dump() + processed_data = await process_data( + template_sheet_values=template_sheet_values, + new_campaign_sheet_values=new_campaign_sheet_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource="keyword", + ) + assert processed_data.model_dump() == detail.model_dump() + else: - assert detail in response.json()["detail"] + with pytest.raises(HTTPException) as exc: + await process_data( + template_sheet_values=template_sheet_values, + new_campaign_sheet_values=new_campaign_sheet_values, + merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df, + target_resource="keyword", + ) + assert detail in exc.value.detail class TestOpenAPIJSON: @@ -284,7 +325,6 @@ def test_openapi(self) -> None: "/create-sheet", "/get-all-file-names", "/get-all-sheet-titles", - "/process-data", "/process-spreadsheet", ] diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py index 089a397..6bf6d7a 100644 --- a/tests/data_processing/test_processing.py +++ b/tests/data_processing/test_processing.py @@ -51,13 +51,20 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: @pytest.mark.parametrize( - ("template_df", "new_campaign_df", "expected"), + ("merged_campaigns_ad_groups_df", "template_df", "new_campaign_df", "expected"), [ ( pd.DataFrame( { - "Campaign": ["", ""], - "Ad Group": ["", ""], + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } + ), + pd.DataFrame( + { "Keyword": ["k1", "k2"], "Max CPC": ["", ""], } @@ -99,8 +106,15 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: ( pd.DataFrame( { - "Campaign": ["", ""], - "Ad Group": ["", ""], + "Campaign Name": [ + "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO" + ], + "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"], + "Match Type": ["Exact"], + } + ), + pd.DataFrame( + { "Keyword": ["k1 INSERT_STATION_FROM", "k2"], "Max CPC": ["", ""], } @@ -142,9 +156,14 @@ def test_validate_input_data(df: pd.DataFrame, expected: str) -> None: ], ) def test_process_data_f( - template_df: pd.DataFrame, new_campaign_df: pd.DataFrame, expected: List[List[str]] + merged_campaigns_ad_groups_df: pd.DataFrame, + template_df: pd.DataFrame, + new_campaign_df: pd.DataFrame, + expected: List[List[str]], ) -> None: - process_data_f(template_df, new_campaign_df).equals(expected) + process_data_f(merged_campaigns_ad_groups_df, template_df, new_campaign_df).equals( + expected + ) @pytest.mark.parametrize(