-
Notifications
You must be signed in to change notification settings - Fork 0
/
icanical_controller.py
475 lines (387 loc) · 17.1 KB
/
icanical_controller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
"""
Controller for our python code.
Gets input for the header, sender, and body from the user. Creates instance of
model.
"""
# import a function to add time
from datetime import timedelta
from datetime import date
# import the regex package for searching for dates
import re
import imaplib
from nntplib import decode_header
import email
# import a function to convert strings to dates
from dateparser import parse
class Controller():
"""
Controller for our python code.
Gets input for the header, sender, and body from the user. Creates instance
of model.
"""
def __init__(self):
self._recipient = None
self._header = None
self._body = None
@property
def recipient(self):
"""
Define property so value of recipient can be accessed from outside
class.
Recipient is the intended recipient/attendee for the ical.
"""
return self._recipient
@property
def header(self):
"""
Define property so value of header can be accessed from outside
class.
Header is the name of the event (gotten from the subject line of the
forwarded email).
"""
return self._header
def datetimes(self):
"""
Returns the starting and ending date and time found in the text.
Args:
self: controller object (does not need to be explicitly called).
Returns:
Either a list of 2 elements containing the start and end date
time date elements or the logical operator False if no time
is found.
"""
# calls get_date to find the date from the body text
date_found = get_date(self._body)
# if the body doesn't have a date in it, checks the header for a date
if not date_found:
date_found = get_date(self._header)
return date_found
def check_inbox(self, username, password):
"""
Run the get_mail funtion to start searching for new emails, read new
email to extract header, sender and body as strings. Assign these to the
recipient, header and body strings.
Args:
username: string that is the username of the ical creator's account.
password: a string that is the password of the ical creator's
account.
Returns:
a list with email header, sender, and body as strings.
"""
[header, sender, body] = get_mail(username, password)
self._header = header
self._recipient = sender
self._body = body
return [header, sender, body]
def set_am_pm(start_time, end_time):
"""
Takes two strings representing hour increments and determines if they
should be am or pm.
This is a helper function for get_date().
Args:
start_time: a string representing the start of a time range
end_time: a string representing the end of a time range
Returns:
A list of two containing the start and end time strings but with the
proper period marcation at the end of them.
"""
# create an integer of the hour spot for comparison for both times
start_int = int(re.findall(r"(?:\d{1,2})", start_time, re.IGNORECASE)[0])
end_int = int(re.findall(r"(?:\d{1,2})", end_time, re.IGNORECASE)[0])
# create logical checks if the time is am or pm
is_am_start = "am" in str.lower(start_time)
is_pm_start = "pm" in str.lower(start_time)
is_am_pm_start = is_am_start is True or is_pm_start is True
is_am_end = "am" in str.lower(end_time)
is_pm_end = "pm" in str.lower(end_time)
is_am_pm_end = is_am_end is True or is_pm_end is True
# if either are missing an am or pm marker
while is_am_pm_start is False or is_am_pm_end is False:
# create logical checks if the time is am or pm in loop so they are
# updated
is_am_start = "am" in str.lower(start_time)
is_pm_start = "pm" in str.lower(start_time)
is_am_pm_start = is_am_start is True or is_pm_start is True
is_am_end = "am" in str.lower(end_time)
is_pm_end = "pm" in str.lower(end_time)
is_am_pm_end = is_am_end is True or is_pm_end is True
# check if there is one in end time but not the other
if is_am_pm_end is True and is_am_pm_start is False:
if start_int == 12:
if is_pm_end is True:
start_time += " pm"
else:
start_time += " am"
elif end_int == 12:
if is_pm_end is True:
start_time += " am"
else:
start_time += " pm"
elif is_am_end is True:
if start_int < end_int:
start_time += " am"
else:
start_time += " pm"
else:
if start_int < end_int:
start_time += " pm"
else:
start_time += " am"
# check if there is one in the start time but not the end
if is_am_pm_end is False and is_am_pm_start is True:
if end_int == 12:
if is_pm_start is True:
end_time += " am"
else:
end_time += " pm"
elif start_int == 12:
if is_pm_start is True:
end_time += " pm"
else:
end_time += " am"
elif is_am_start is True:
if start_int < end_int:
end_time += " am"
else:
end_time += " pm"
else:
if start_int < end_int:
end_time += " pm"
else:
end_time += " am"
# check if neither start or end time have am or pm
if is_am_pm_end is False and is_am_pm_start is False:
if start_int == 12:
start_time += " pm"
elif end_int == 12:
end_time += " pm"
else:
# if either are missing an am or pm marker
end_time += " pm"
return [start_time, end_time]
def get_date(text):
"""
Takes a string and finds the start and end time of an event
mentioned in the string. If there is a start time but no
end time then an end time one hour later than the start time
will be assumed.
Args:
text: a string that should be the header or body of an email
Returns:
Either a list of 2 elements containing the start and end date
time date elements or the logical operator False if no time
is found.
"""
# variable to keep track of if a time exists
time_exist = True
# variable to keep track of if date exists
date_exist = True
# create the regex for finding time
time_regex = \
r"(?:\d+:?\d*\s*(?:AM|PM)?\s*(?:-|–|to)\s*\d+:?\d*\s*(?:AM|PM)?)" +\
r"|(?:\d+:?\d*\s*(?:AM|PM))|" +\
r"(?<=at)\s*\d{1,2}:?\d*(?!.*(?:pm|am|-|–|to))"
# attempt to find the times located in the body text
try:
time_extract = re.findall(time_regex, text, re.IGNORECASE)
# if the time is blank then there is no time
if time_extract[0] == "":
time_exist = False
except: # pylint: disable=bare-except
# if the code ran into an error there is no time
time_exist = False
# create the regex for finding the date
date_regex = r"(?:today)|(?:tomorrow)" +\
r"|(?:(?:apr|may|jun|jul|aug|sep|oct|nov|dec|jan|feb|mar)\w*\s*\d+)" +\
r"|(?:\d+/\d+/?\d*)"
# attempt to find the dates located in the body text
try:
date_extract = re.findall(date_regex, text, re.IGNORECASE)
# if the date is blank then there is no date
if date_extract[0] == "":
date_exist = False
date_extract = " "
except: # pylint: disable=bare-except
# if the code ran into an error there is no date
date_exist = False
date_extract = " "
# if a time has been found
if time_exist is True:
# using the date, we want to find the time by closest proximity
if date_exist is True:
date_index = text.index(date_extract[0])
index_distances = []
# ideally, we want to only use times with markers so we search for
# those
marker_times = []
for times in time_extract:
if "am" in times.lower() or "pm" in times.lower():
marker_times.append(times)
# if there are any marker times make it the time list
if len(marker_times) > 0:
time_extract = marker_times
# find the index distance of each time in the list
for times in time_extract:
time_index = text.index(times)
index_distances.append(abs(date_index - time_index))
# choose the closest proximity time to date
time_extract = \
time_extract[index_distances.index(min(index_distances))]
else:
# if there is no date just choose the first time
time_extract = time_extract[0]
# test if there is a seperator in the time
sep_check = "-" in time_extract or "–" in time_extract or " to " in \
str.lower(time_extract)
# find the current year so that the date is correct
current_date = date.today()
if sep_check is True:
start_time_regex = r"(?:\d+:?\d*\s*(?:am|pm)?\s*)(?=-|–|to)"
end_time_regex = \
r"(?:(?<=-)|(?<=–)|(?<=to))(?:\s*\d+:?\d*\s*(?:am|pm)?)"
# take the time before the separator
start_time = re.findall(start_time_regex, time_extract,
re.IGNORECASE)
start_time = start_time[0]
# take the time after the seperator
end_time = re.findall(end_time_regex, time_extract, re.IGNORECASE)
end_time = end_time[0]
# now make sure both times have an am or pm
set_times = set_am_pm(start_time, end_time)
start_time = set_times[0]
end_time = set_times[1]
# create the start and end date
start_date = parse(date_extract[0] + " " +
start_time, settings={'PREFER_DATES_FROM':\
'future'})
end_date = parse(date_extract[0] + " " +
end_time, settings={'PREFER_DATES_FROM':\
'future'})
# if the date doesn't exist make sure the day is today
if date_exist is False:
start_date = start_date.replace(day=current_date.day)
end_date = end_date.replace(day=current_date.day)
# make sure the year is current
start_date = start_date.replace(year=current_date.year)
end_date = end_date.replace(year=current_date.year)
return [start_date, end_date]
else:
# if there is no end time create a 1 hour time slot
start_time = time_extract
if "am" not in str.lower(start_time) and "pm" not in\
str.lower(start_time):
start_time += " pm"
start_date = parse(date_extract[0] + " " + start_time,
settings={'PREFER_DATES_FROM': 'future'})
end_date = start_date + timedelta(hours=1)
# if the date doesn't exist make sure the day is today
if date_exist is False:
start_date = start_date.replace(day=current_date.day)
end_date = end_date.replace(day=current_date.day)
# make sure the year is current
start_date = start_date.replace(year=current_date.year)
end_date = end_date.replace(year=current_date.year)
return [start_date, end_date]
# return a boolean showing that there is no date/time
return False
def get_mail(username, password):
"""
Searches for a new email in the youcanical@gmail.com inbox and decodes
the received email's subject, sender, and body.
Args:
username: a string representing an email username
password: a string representing an email password
Returns:
A list containing three strings representing the subject,
sender, and body. If there is no body, a list containing subject,
sender, and None will be returned.
"""
# define the encrypted connection path to gmail
mail = imaplib.IMAP4_SSL("imap.gmail.com", port=993)
# login to the gmail account
mail.login(username, password)
# select the folder we want to read mail from
mail.select('Inbox')
# searches for mail with no filter, typ tells if the request was valid and
# data is the id's of the emails
(typ, data) = mail.search(None, "ALL")
# select the id's of the emails from the list and separate the id's into
# separate list elements
mail_ids = data[0]
mail_ids = mail_ids.split()
# fetch the first email, RFC822 is the internet protocol
(typ, data) = mail.fetch(mail_ids[-1], '(RFC822)')
# data is a list containing a tuple then bytes the main parts of the email
# are located in the second item in the tuple
# parse the email in bytes into a message object
email_message = email.message_from_bytes(data[0][1])
# decode the email subject that the code will compare to detect a new email
reference_subject = decode_header(email_message["Subject"])
subject = reference_subject
# decode the email sender for reference
reference_sender = decode_header(email_message.get("From"))
# use regex to extract the email address
reference_sender = re.findall(
r"(?:(?<=<).*(?=>))", reference_sender, re.IGNORECASE)[0]
sender = reference_sender
# now continuously scrape the first email until it changes
while subject == reference_subject and sender == reference_sender:
# try to scrape the emails or login again if unsuccessful
try:
# select the folder we want to read mail from
mail.select('Inbox')
# searches for mail with no filter, typ tells if the request was
# valid and data is the id's of the emails
(typ, data) = mail.search(None, "ALL")
# select the id's of the emails from the list and separate the id's
# into separate list elements
mail_ids = data[0]
mail_ids = mail_ids.split()
# fetch the first email, RFC822 is the internet protocol
(typ, data) = mail.fetch(mail_ids[-1], '(RFC822)')
# data is a list containing a tuple then bytes the main parts of
# the email are located in the second item in the tuple
# parse the email in bytes into a message object
email_message = email.message_from_bytes(data[0][1])
except: # pylint: disable=bare-except
# the bot may have been logged out so if scraping fails retry the
# login
# define the encrypted connection path to gmail
mail = imaplib.IMAP4_SSL("imap.gmail.com", port=993)
# login to the gmail account
mail.login(username, password)
# select the folder we want to read mail from
mail.select('Inbox')
# searches for mail with no filter, typ tells if the request was
# valid and data is the id's of the emails
(typ, data) = mail.search(None, "ALL")
# select the id's of the emails from the list and separate the id's
# into separate list elements
mail_ids = data[0]
mail_ids = mail_ids.split()
# fetch the first email, RFC822 is the internet protocol
(typ, data) = mail.fetch(mail_ids[-1], '(RFC822)')
# data is a list containing a tuple then bytes the main parts of
# the email are located in the second item in the tuple
# parse the email in bytes into a message object
email_message = email.message_from_bytes(data[0][1])
# decode the email subject
subject = decode_header(email_message["Subject"])
# decode the email sender
sender = decode_header(email_message.get("From"))
# use regex to extract the email address
sender = re.findall(r"(?:(?<=<).*(?=>))", sender, re.IGNORECASE)[0]
# walk iterates through the parts of the emails
for part in email_message.walk():
# only capture the parts of the email that are plain text
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
body = str(email.message_from_bytes(body))
# Since forwarded emails have an extra date, we need regex to remove them
body = re.sub(r"From:[\s\S]*Subject:", "", body, re.IGNORECASE)
try:
# if the email has a body this should work fine
return[subject, sender, body]
except: # pylint: disable=bare-except
# if body does not exist return None in its place
return[subject, sender, None]