-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScheduleProcessor.py
102 lines (88 loc) · 3.14 KB
/
ScheduleProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from datetime import datetime, timedelta
import logging
import re
from BeautifulSoup import BeautifulSoup
from pacific_tzinfo import Pacific
_date_expr = '\d\d/\d\d\/\d\d'
_date_match = re.compile(_date_expr)
def processPage(html):
soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
date = None
matches = []
lines = [f.text for f in soup.findAll('font')]
logging.info(str(len(lines)) + ' lines in the html found')
for line in lines:
newDate = parseDate(line)
if newDate:
date = newDate
logging.debug('updated date to {0}'.format(date))
else:
match = parseMatch(line, date)
logging.debug('parseMatch returned {0}'.format(match))
if match and match not in matches:
matches.append(match)
logging.debug('added match: {0}'.format(match))
return matches
def parseDate(line):
try:
datestr = _date_match.search(line).group()
return datetime.strptime(datestr, '%m/%d/%y')
except:
return None
def parseMatch(line, date):
logging.debug('parsing "{0}"'.format(line))
try:
teams = line.split("vs")
time = datetime.strptime(teams[0].split()[0], '%I:%M%p').time()
logging.debug('extracted {0} as time'.format(time))
dark = ' '.join(teams[0].split()[1:])
logging.debug('dark = {0}'.format(dark))
white = teams[1].strip()
logging.debug('white = {0}'.format(white))
return Match(dark = dark,
white = white,
time = datetime.combine(date, time)
.replace(tzinfo=Pacific))
except:
return None
class Match(object):
def __init__(self, time, dark, white):
self.dark = dark
self.white = white
self.time = time
def __str__(self):
return "{0}(dark) vs {1}(white) @ {2}".format(
self.dark, self.white, self.formatted_time())
def __repr__(self):
return str(self)
def __eq__(self, match):
return self.white == match.white and \
self.dark == match.dark and \
self.time == match.time
def color(self, my_name):
if self.white == my_name:
return 'White'
elif self.dark == my_name:
return 'Dark'
else:
return None
def opponent(self, my_name):
if self.white == my_name:
return self.dark
elif self.dark == my_name:
return self.white
else:
return None
def formatted_time(self):
return self.time.strftime("%a, %x %I:%M%p")
def contains_team(self, team_name):
return self.white == team_name or self.dark == team_name
if __name__ == '__main__':
html = open('sample.html').read()
soup = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
matchstr = ' 8:45pm Mad Samba vs Tuxedos'
datestr = 'Tuesday, 12/20/11 (week 1/10)'
datestr2 = '--Tuesday, 02/07/12 (week 8/10)'
d = parseDate(datestr)
t = datetime.strptime('8:45pm', '%I:%M%p')
matches = processPage(html)