-
Notifications
You must be signed in to change notification settings - Fork 0
/
doidump.py
33 lines (27 loc) · 873 Bytes
/
doidump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json
import urllib
URL = "http://api.crossref.org/works?filter=type:journal-article&cursor={cursor}&rows=50"
OUTFILE = 'data/doi.txt'
def get_titles(items):
return [item['title'][0] for item in items if len(item['title']) > 0]
def write(titles, outfile):
titles = list(set(titles))
with open(outfile, 'w') as f:
f.write('\n'.join(titles).encode('utf-8'))
def main(outfile=OUTFILE):
titles = []
cursor = "*"
for i in xrange(200):
x = json.loads(urllib.urlopen(URL.format(cursor=cursor)).read())
y = x['message']
ts = get_titles(y['items'])
titles.extend(ts)
# for title in ts:
# print title
cursor = y['next-cursor']
if i % 20 == 0:
print i, cursor
write(titles, outfile)
write(titles, outfile)
if __name__ == '__main__':
main()