Skip to content

Commit

Permalink
Add gcp email filter uniflow application.
Browse files Browse the repository at this point in the history
  • Loading branch information
goldmermaid committed Mar 1, 2024
1 parent 0ab6f9b commit 8a07e4f
Show file tree
Hide file tree
Showing 10 changed files with 571 additions and 0 deletions.
244 changes: 244 additions & 0 deletions example/application/gmail_filter.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import sys\n",
"import pprint\n",
"\n",
"sys.path.append(\".\")\n",
"sys.path.append(\"..\")\n",
"sys.path.append(\"../..\")"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dotenv import load_dotenv\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'extract': ['ExtractHTMLFlow',\n",
" 'ExtractImageFlow',\n",
" 'ExtractIpynbFlow',\n",
" 'ExtractMarkdownFlow',\n",
" 'ExtractPDFFlow',\n",
" 'ExtractTxtFlow',\n",
" 'ExtractGmailFlow'],\n",
" 'transform': ['TransformAzureOpenAIFlow',\n",
" 'TransformCopyFlow',\n",
" 'TransformGoogleFlow',\n",
" 'TransformGoogleMultiModalModelFlow',\n",
" 'TransformHuggingFaceFlow',\n",
" 'TransformLMQGFlow',\n",
" 'TransformOpenAIFlow'],\n",
" 'rater': ['RaterFlow']}"
]
},
"execution_count": 163,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from uniflow import Context\n",
"from uniflow.flow.client import ExtractClient\n",
"from uniflow.flow.config import ExtractGmailConfig\n",
"from uniflow.viz import Viz\n",
"from uniflow.flow.flow_factory import FlowFactory\n",
"from uniflow.flow.client import TransformClient\n",
"from uniflow.flow.config import TransformGmailSpamConfig\n",
"from uniflow.op.model.model_config import GoogleModelConfig, OpenAIModelConfig\n",
"\n",
"FlowFactory.list()"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"extract_client = ExtractClient(\n",
" ExtractGmailConfig(\n",
" credentials_path=\"credentials.json\",\n",
" token_path=\"token.json\",\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:04<00:00, 4.04s/it]\n"
]
}
],
"source": [
"extract_data = extract_client.run([{}])"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [],
"source": [
"transform_client = TransformClient(\n",
" TransformGmailSpamConfig(\n",
" flow_name=\"TransformOpenAIFlow\",\n",
" model_config=OpenAIModelConfig(),\n",
" # flow_name=\"TransformGoogleFlow\",\n",
" # model_config=GoogleModelConfig()\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
"transform_data = []\n",
"for d in extract_data[0]['output'][0]:\n",
" if d['body']:\n",
" transform_data.append(Context(email=d['body'][:5000]))\n",
" else:\n",
" transform_data.append(Context(email=d['snippet'][:5000]))"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/10 [00:00<?, ?it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 10/10 [00:08<00:00, 1.12it/s]\n"
]
}
],
"source": [
"transform_output = transform_client.run(transform_data)"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Email 18dfc3488fc902f1 is spam: False\n",
"Email 18dfc1ef230f2165 is spam: True\n",
"Email 18dfc1153607218b is spam: False\n",
"Email 18dfbdae16df6616 is spam: False\n",
"Email 18dfb65c017999d8 is spam: False\n",
"Email 18dfb383083d31c4 is spam: False\n",
"Email 18dfb3609af5acc7 is spam: False\n",
"Email 18dfb3282cdd9716 is spam: True\n",
"Email 18dfb151d492a69f is spam: False\n",
"Email 18dfafdd5ebbc628 is spam: False\n"
]
}
],
"source": [
"from google.oauth2.credentials import Credentials\n",
"from googleapiclient.discovery import build\n",
"\n",
"SPAM_LABEL = \"Spam Email (AI Email Filter)\"\n",
"NON_SPAM_LABEL = \"Email (AI Email Filter)\"\n",
"\n",
"SCOPES = [\"https://www.googleapis.com/auth/gmail.modify\"]\n",
"creds = Credentials.from_authorized_user_file(\"token.json\", SCOPES)\n",
"service = build(\"gmail\", \"v1\", credentials=creds)\n",
"\n",
"\n",
"def get_label_id(service, label_name):\n",
" labels = service.users().labels().list(userId='me').execute().get('labels', [])\n",
" for label in labels:\n",
" if label['name'] == label_name:\n",
" return label['id']\n",
" return None\n",
"\n",
"SPAM_LABEL_ID = get_label_id(service, SPAM_LABEL)\n",
"NON_SPAM_LABEL_ID = get_label_id(service, NON_SPAM_LABEL)\n",
"\n",
"for e, t in zip(extract_data[0]['output'][0], transform_output):\n",
" # true if spam, false if not\n",
" is_spam = \"yes\" in t['output'][0]['response'][0].lower()\n",
" print(f\"Email {e['email_id']} is spam: {is_spam}\")\n",
" email_id = e['email_id']\n",
" label_id = SPAM_LABEL_ID if is_spam else NON_SPAM_LABEL_ID\n",
" service.users().messages().modify(userId='me', id=e['email_id'], body={'addLabelIds': [label_id], 'removeLabelIds': []}).execute()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "uniflow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
47 changes: 47 additions & 0 deletions uniflow/flow/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ class ExtractConfig:
model_config: Optional[ModelConfig] = None
splitter: Optional[str] = None
post_extract_fn: Optional[Callable] = None
credentials_path: str = ""
token_path: str = ""


@dataclass
class ExtractGmailConfig(ExtractConfig):
"""Extract Txt Config Class."""

flow_name: str = "ExtractGmailFlow"
credentials_path: str = ""
token_path: str = ""


@dataclass
Expand Down Expand Up @@ -118,6 +129,42 @@ class TransformConfig:
)


@dataclass
class TransformGmailSpamConfig(TransformConfig):
"""Transform Google Config Class."""

flow_name: str = "TransformGoogleFlow"
model_config: ModelConfig = field(default_factory=GoogleModelConfig)
num_thread: int = 1
prompt_template: PromptTemplate = field(
default_factory=lambda: PromptTemplate(
instruction="""You are a highly intelligent AI trained to identify spam emails. Is this email a spam email?. \
Follow the format of the few shot examples below to include explain and answer in the response for the given email. \
You answer should be either Yes or No.""",
few_shot_prompt=[
Context(
email="""Subject: Meeting Rescheduled \
Hi Team, \
We need to reschedule this week's meeting to Thursday at 3 PM due to a conflict. Please update your calendars and let me know if you have any issues with this new time. \
Best, \
Alex""",
explain="This email is non-spam as it directly relates to the recipient's interests, contains no suspicious links or requests, and uses a personalized, professional tone.",
answer="no",
),
Context(
email="""Subject: Congratulations! You've Won! \
Dear Valued Customer, \
You've been selected to win a free iPhone! Click here to claim your prize now! Offer expires in 24 hours. No purchase necessary. \
Best, \
Prize Notification Team""",
explain="This email is spam due to its unsolicited offer, use of urgency to provoke immediate action, inclusion of a suspicious link, and lack of personalization, which are classic signs of spam.",
answer="yes",
),
],
)
)


@dataclass
class TransformGoogleConfig(TransformConfig):
"""Transform Google Config Class."""
Expand Down
4 changes: 4 additions & 0 deletions uniflow/flow/extract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from uniflow.flow.extract.extract_md_flow import ExtractMarkdownFlow # noqa: F401;
from uniflow.flow.extract.extract_pdf_flow import ExtractPDFFlow # noqa: F401;
from uniflow.flow.extract.extract_txt_flow import ExtractTxtFlow # noqa: F401, F403
from uniflow.flow.extract.gcp.workspace.extract_gmail_flow import ( # noqa: F401, F403
ExtractGmailFlow,
)

__all__ = [
"ExtractIpynbFlow",
Expand All @@ -18,4 +21,5 @@
"ExtractTxtFlow",
"ExtractImageFlow",
"ExtractHTMLFlow",
"ExtractGmailFlow",
]
Empty file.
Empty file.
33 changes: 33 additions & 0 deletions uniflow/flow/extract/gcp/workspace/extract_gmail_flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Extract Gmail flow."""

from uniflow.constants import EXTRACT
from uniflow.flow.flow import Flow
from uniflow.op.extract.load.gcp.workspace.gmail_op import GmailOp


class ExtractGmailFlow(Flow):
"""Extract Gmail Flow Class."""

TAG = EXTRACT

def __init__(
self,
credentials_path: str = "",
token_path: str = "",
):
"""Extract Gmail Flow Constructor."""
super().__init__()
self._gmail_op = GmailOp(
name="gmail_op", credentials_path=credentials_path, token_path=token_path
)

def run(self, nodes):
"""Run Extract Gmail Flow.
Args:
nodes (Sequence[Node]): Nodes to run.
Returns:
Sequence[Node]: Nodes after running.
"""
return self._gmail_op(nodes)
4 changes: 4 additions & 0 deletions uniflow/flow/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def __init__(self, config: Dict[str, Any]) -> None:
kwargs["splitter"] = self._config.splitter
if self._config.post_extract_fn:
kwargs["post_extract_fn"] = self._config.post_extract_fn
if self._config.credentials_path:
kwargs["credentials_path"] = self._config.credentials_path
if self._config.token_path:
kwargs["token_path"] = self._config.token_path
for i in range(self._num_thread):
with OpScope(name="thread_" + str(i)):
self._flow_queue.put(self._flow_cls(**kwargs))
Expand Down
Empty file.
Empty file.
Loading

0 comments on commit 8a07e4f

Please sign in to comment.