-
Notifications
You must be signed in to change notification settings - Fork 0
/
FDA_Drug_href.py
36 lines (31 loc) · 1.08 KB
/
FDA_Drug_href.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
html = urlopen(
"https://www.accessdata.fda.gov/scripts/drugshortages/default.cfm")
bsObj = BeautifulSoup(html, "lxml")
table = bsObj.find('table', id="cont")
rows = table.findAll("tr")
links = [a['href'] for a in table.find_all('a', href=True) if a.text]
new_links = []
for link in links:
new_links.append(("https://www.accessdata.fda.gov/scripts/drugshortages/"+link).replace(" ", "%20"))
href_rows = []
for link in new_links:
link = link.replace("®", "%C2%AE")
html = urlopen(link)
bsObj_href = BeautifulSoup(html, "lxml")
#bsObj_href = BeautifulSoup (html.decode('utf-8', 'ignore'))
div_href = bsObj_href.find("div",{"id":"accordion"})
href_rows.append(div_href.findAll("tr"))
print(href_rows)
csvFile = open("drug_shortage_href.csv", 'wt', newline='')
writer = csv.writer(csvFile)
try:
for row in href_rows:
csvRow = []
for cell in row.findAll(['td', 'th']):
csvRow.append(cell.get_text())
writer.writerow(csvRow)
finally:
csvFile.close()