-
Notifications
You must be signed in to change notification settings - Fork 44
/
app.py
110 lines (94 loc) · 3.41 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import json
from collections import defaultdict
from flask import Flask, request, jsonify
from pytrie import Trie
import uuid
import requests
import time
import re
app = Flask(__name__)
data = list()
country_index = defaultdict(list)
name_index = dict()
domain_index = defaultdict(list)
# Time to wait before allowing an update to our dataset. 86400 seconds = 24 hours
UPDATE_WAIT_TIME = 86400
@app.route("/search")
def search():
if not data_loaded:
load_data()
country = request.args.get('country')
name = request.args.get('name')
name_contains = request.args.get('name_contains')
domain = request.args.get("domain")
filtered = data
limit = request.args.get('limit')
offset = request.args.get('offset')
if name and country:
name = name.lower()
country = country.lower()
name_filtered = prefix_tree.values(prefix=name)
filtered = [uni for uni in name_filtered if uni['country'].lower() == country]
elif name_contains and country:
country = country.lower()
regex = re.compile(r'\b{0}'.format(name_contains.lower()))
name_filtered = [uni for uni in data if regex.search(uni['name'].lower())]
filtered = [uni for uni in name_filtered if uni['country'].lower() == country]
elif name_contains:
regex = re.compile(r'\b{0}'.format(name_contains.lower()))
filtered = [uni for uni in data if regex.search(uni['name'].lower())]
elif name:
name = name.lower()
filtered = prefix_tree.values(prefix=name)
elif country:
country = country.lower()
filtered = country_index[country]
elif domain:
filtered = domain_index[domain]
if offset:
offset = int(offset)
if offset > len(filtered):
filtered = []
else:
filtered = filtered[offset:]
if limit:
limit = int(limit)
filtered = filtered[:limit]
return jsonify(filtered)
data_loaded = False
last_updated = 0
def load_data():
global data_loaded, prefix_tree, data, country_index, name_index, domain_index
response = requests.get("https://raw.githubusercontent.com/Hipo/university-domains-list/master/world_universities_and_domains.json")
data = response.json()
for i in data:
country_index[i["country"].lower()].append(i)
name_index[i['name'].lower()] = i
for domain in i["domains"]:
domain_index[domain].append(i)
splitted = i['name'].split(" ")
if len(splitted) > 1:
for splitted_name in splitted[1:]:
name_index[splitted_name.lower() + str(uuid.uuid1())] = i
prefix_tree = Trie(**name_index)
data_loaded = True
@app.route('/update')
def update():
global last_updated
if (time.time() >= last_updated + UPDATE_WAIT_TIME):
load_data()
last_updated = time.time()
response = {'status': 'success', 'message': 'Dataset updated!'}
else:
response = {'status': 'error', 'message': 'Dataset had been updated recently. Try again later.'}
return jsonify(response)
@app.route('/')
def index():
if not data_loaded:
load_data()
data = {'author': {'name': 'hipo', 'website': 'http://hipolabs.com'},
'example': 'http://universities.hipolabs.com/search?name=middle&country=Turkey',
'github': 'https://github.com/Hipo/university-domains-list'}
return jsonify(data)
if __name__ == "__main__":
app.run()