-
Notifications
You must be signed in to change notification settings - Fork 0
/
lighten_up_calgary_2023.py
43 lines (34 loc) · 1.38 KB
/
lighten_up_calgary_2023.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from datetime import datetime
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from lighten_up_calgary_2022 import LightenUpCalgary2022
class LightenUpCalgary2023(LightenUpCalgary2022):
@classmethod
def get_addresses(self):
pages = ["calgary-nw", "calgary-ne", "calgary-sw", "calgary-se", "surroundings"]
records = []
last_updated = datetime.now().isoformat()
for location in pages:
url = urljoin(self.base_url, location)
markup = requests.get(url).text
soup = BeautifulSoup(markup, "html.parser")
divs = soup.find_all("div", class_="et_pb_section")
for div in tqdm(divs[:-1], desc=f"{location}"):
inner_text = div.find("div", class_="et_pb_text_inner")
address = inner_text.text.split("\n")[-1].replace("\xa0", "")
lat, lng, address = LightenUpCalgary2023.get_geocode(
f"{address}, Calgary"
)
if not lat or not lng or not address:
continue
record = dict(
address=address,
quadrant=location,
lat=lat,
lng=lng,
last_updated=last_updated,
)
records.append(record)
return records