-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
57 lines (49 loc) · 2.14 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
from links import LINKS
from scrapers import *
from common import CSVWriter, ErrorWriter
def main():
if not os.path.exists('out'):
os.makedirs('out')
csvwriter = CSVWriter()
errorwriter = ErrorWriter()
for idx, link in enumerate(LINKS):
print('Link %d/%d' % (idx+1, len(LINKS)))
if 'www.springer.com' in link:
scraper = SpringerScraper(link, csvwriter, errorwriter)
elif 'journals.sagepub.com' in link:
scraper = SageScraper(link, csvwriter, errorwriter)
elif 'www.elsevier.com' in link:
scraper = ElsevierScraper(link, csvwriter, errorwriter)
elif 'www.dovepress.com' in link:
print('DovePress: Need to query each journal link for editorial board, this might take a while')
scraper = DovePressScraper(link, csvwriter, errorwriter)
elif 'www.cambridge.org' in link:
scraper = CambridgeScraper(link, csvwriter, errorwriter)
elif 'pubs.rsc.org' in link:
print('RSoC: Need to query each journal link for editorial board, this might take a while')
scraper = RSCScraper(link, csvwriter, errorwriter)
elif 'direct.mit.edu' in link:
scraper = MITScraper(link, csvwriter, errorwriter)
elif 'www.tandfonline.com' in link:
scraper = TaylorFrancisScraper(link, csvwriter, errorwriter)
elif 'brill.com' in link:
scraper = BrillScraper(link, csvwriter, errorwriter)
elif 'www.frontiersin.org' in link:
print('Frontiers: Need to query API, this might take a while')
scraper = FrontiersScraper(link, csvwriter, errorwriter)
elif 'journals.plos.org' in link:
scraper = PLOSScraper(link, csvwriter, errorwriter)
scraper.journallinks = [link + 's/editorial-board']
scraper.geteditors()
continue
else:
print('Error: Invalid link')
errorwriter.addsearchlink(link)
continue
scraper.getjournallinks()
scraper.geteditors()
csvwriter.teardown()
print('Done')
if __name__ == '__main__':
main()