-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add gcp email filter uniflow application.
- Loading branch information
1 parent
0ab6f9b
commit 8a07e4f
Showing
10 changed files
with
571 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 161, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%reload_ext autoreload\n", | ||
"%autoreload 2\n", | ||
"\n", | ||
"import sys\n", | ||
"import pprint\n", | ||
"\n", | ||
"sys.path.append(\".\")\n", | ||
"sys.path.append(\"..\")\n", | ||
"sys.path.append(\"../..\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 162, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"True" | ||
] | ||
}, | ||
"execution_count": 162, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"from dotenv import load_dotenv\n", | ||
"load_dotenv()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 163, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"{'extract': ['ExtractHTMLFlow',\n", | ||
" 'ExtractImageFlow',\n", | ||
" 'ExtractIpynbFlow',\n", | ||
" 'ExtractMarkdownFlow',\n", | ||
" 'ExtractPDFFlow',\n", | ||
" 'ExtractTxtFlow',\n", | ||
" 'ExtractGmailFlow'],\n", | ||
" 'transform': ['TransformAzureOpenAIFlow',\n", | ||
" 'TransformCopyFlow',\n", | ||
" 'TransformGoogleFlow',\n", | ||
" 'TransformGoogleMultiModalModelFlow',\n", | ||
" 'TransformHuggingFaceFlow',\n", | ||
" 'TransformLMQGFlow',\n", | ||
" 'TransformOpenAIFlow'],\n", | ||
" 'rater': ['RaterFlow']}" | ||
] | ||
}, | ||
"execution_count": 163, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"from uniflow import Context\n", | ||
"from uniflow.flow.client import ExtractClient\n", | ||
"from uniflow.flow.config import ExtractGmailConfig\n", | ||
"from uniflow.viz import Viz\n", | ||
"from uniflow.flow.flow_factory import FlowFactory\n", | ||
"from uniflow.flow.client import TransformClient\n", | ||
"from uniflow.flow.config import TransformGmailSpamConfig\n", | ||
"from uniflow.op.model.model_config import GoogleModelConfig, OpenAIModelConfig\n", | ||
"\n", | ||
"FlowFactory.list()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 165, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"extract_client = ExtractClient(\n", | ||
" ExtractGmailConfig(\n", | ||
" credentials_path=\"credentials.json\",\n", | ||
" token_path=\"token.json\",\n", | ||
" )\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 166, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|██████████| 1/1 [00:04<00:00, 4.04s/it]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"extract_data = extract_client.run([{}])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 167, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"transform_client = TransformClient(\n", | ||
" TransformGmailSpamConfig(\n", | ||
" flow_name=\"TransformOpenAIFlow\",\n", | ||
" model_config=OpenAIModelConfig(),\n", | ||
" # flow_name=\"TransformGoogleFlow\",\n", | ||
" # model_config=GoogleModelConfig()\n", | ||
" )\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 168, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"transform_data = []\n", | ||
"for d in extract_data[0]['output'][0]:\n", | ||
" if d['body']:\n", | ||
" transform_data.append(Context(email=d['body'][:5000]))\n", | ||
" else:\n", | ||
" transform_data.append(Context(email=d['snippet'][:5000]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 169, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 0%| | 0/10 [00:00<?, ?it/s]" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|██████████| 10/10 [00:08<00:00, 1.12it/s]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"transform_output = transform_client.run(transform_data)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 170, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Email 18dfc3488fc902f1 is spam: False\n", | ||
"Email 18dfc1ef230f2165 is spam: True\n", | ||
"Email 18dfc1153607218b is spam: False\n", | ||
"Email 18dfbdae16df6616 is spam: False\n", | ||
"Email 18dfb65c017999d8 is spam: False\n", | ||
"Email 18dfb383083d31c4 is spam: False\n", | ||
"Email 18dfb3609af5acc7 is spam: False\n", | ||
"Email 18dfb3282cdd9716 is spam: True\n", | ||
"Email 18dfb151d492a69f is spam: False\n", | ||
"Email 18dfafdd5ebbc628 is spam: False\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from google.oauth2.credentials import Credentials\n", | ||
"from googleapiclient.discovery import build\n", | ||
"\n", | ||
"SPAM_LABEL = \"Spam Email (AI Email Filter)\"\n", | ||
"NON_SPAM_LABEL = \"Email (AI Email Filter)\"\n", | ||
"\n", | ||
"SCOPES = [\"https://www.googleapis.com/auth/gmail.modify\"]\n", | ||
"creds = Credentials.from_authorized_user_file(\"token.json\", SCOPES)\n", | ||
"service = build(\"gmail\", \"v1\", credentials=creds)\n", | ||
"\n", | ||
"\n", | ||
"def get_label_id(service, label_name):\n", | ||
" labels = service.users().labels().list(userId='me').execute().get('labels', [])\n", | ||
" for label in labels:\n", | ||
" if label['name'] == label_name:\n", | ||
" return label['id']\n", | ||
" return None\n", | ||
"\n", | ||
"SPAM_LABEL_ID = get_label_id(service, SPAM_LABEL)\n", | ||
"NON_SPAM_LABEL_ID = get_label_id(service, NON_SPAM_LABEL)\n", | ||
"\n", | ||
"for e, t in zip(extract_data[0]['output'][0], transform_output):\n", | ||
" # true if spam, false if not\n", | ||
" is_spam = \"yes\" in t['output'][0]['response'][0].lower()\n", | ||
" print(f\"Email {e['email_id']} is spam: {is_spam}\")\n", | ||
" email_id = e['email_id']\n", | ||
" label_id = SPAM_LABEL_ID if is_spam else NON_SPAM_LABEL_ID\n", | ||
" service.users().messages().modify(userId='me', id=e['email_id'], body={'addLabelIds': [label_id], 'removeLabelIds': []}).execute()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "uniflow", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
"""Extract Gmail flow.""" | ||
|
||
from uniflow.constants import EXTRACT | ||
from uniflow.flow.flow import Flow | ||
from uniflow.op.extract.load.gcp.workspace.gmail_op import GmailOp | ||
|
||
|
||
class ExtractGmailFlow(Flow): | ||
"""Extract Gmail Flow Class.""" | ||
|
||
TAG = EXTRACT | ||
|
||
def __init__( | ||
self, | ||
credentials_path: str = "", | ||
token_path: str = "", | ||
): | ||
"""Extract Gmail Flow Constructor.""" | ||
super().__init__() | ||
self._gmail_op = GmailOp( | ||
name="gmail_op", credentials_path=credentials_path, token_path=token_path | ||
) | ||
|
||
def run(self, nodes): | ||
"""Run Extract Gmail Flow. | ||
Args: | ||
nodes (Sequence[Node]): Nodes to run. | ||
Returns: | ||
Sequence[Node]: Nodes after running. | ||
""" | ||
return self._gmail_op(nodes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Oops, something went wrong.