-
Notifications
You must be signed in to change notification settings - Fork 0
/
drug_similarity.py
91 lines (58 loc) · 2.7 KB
/
drug_similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from drug_interactions.reader.reader import DrugReader
from drug_interactions.datasets.dataset_builder import get_smiles_drugs, get_train_test_ids
URL = "https://go.drugbank.com/structures/search/small_molecule_drugs/structure?database_id="
def load_drugs():
reader = DrugReader('./data/DrugBankReleases')
old_drug_bank, new_drug_bank = reader.get_drug_data('5.1.3', '5.1.6')
return old_drug_bank, new_drug_bank
def filter_drugs(old_drug_bank, new_drug_bank):
old_drug_bank = get_smiles_drugs(old_drug_bank, atom_size=300)
new_drug_bank = get_smiles_drugs(new_drug_bank, atom_size=300)
return old_drug_bank, new_drug_bank
def get_similar_drugs(new_drug_ids):
similar_drugs_dict = {}
bad_drugs = 0
for id_ in (t := tqdm(new_drug_ids)):
t.set_description(f'Drug: {id_}')
similar_drugs_dict[id_] = []
drug_path = f'{URL}{id_}#results'
# print(drug_path)
r = requests.get(drug_path)
soup = BeautifulSoup(r.text, 'html.parser')
try:
similar_drug_table = soup.find('table', {"class": "table table-striped"})
similar_drug_table = similar_drug_table.find('tbody')
similar_drugs = similar_drug_table.find_all('tr')
for drug in similar_drugs:
result = drug.find_all('td')[0].text
similar_drug_id, score = result.split('\nScore: ')
score = float(score)
similar_drugs_dict[id_].append((similar_drug_id, score))
except AttributeError:
print(f'Failed to find similar drug for {id_}')
bad_drugs += 1
print(f'Total Bad Drugs: {bad_drugs}')
return similar_drugs_dict
def save_dict(similar_drugs, path):
with open(path, 'w') as f:
json.dump(similar_drugs, f)
def filter_new_drugs(similar_drugs, old_drug_ids):
new_dict = {}
for id_, similar_drug_list in similar_drugs.items():
new_dict[id_] = [(drug_id, score) for drug_id, score in similar_drug_list if drug_id in old_drug_ids]
return new_dict
def main():
old_drug_bank, new_drug_bank = load_drugs()
old_drug_bank, new_drug_bank = filter_drugs(old_drug_bank, new_drug_bank)
old_drug_ids, new_drug_ids = get_train_test_ids(old_drug_bank, new_drug_bank)
all_drugs = set(old_drug_ids) | set(new_drug_ids)
similar_drugs = get_similar_drugs(all_drugs)
save_dict(similar_drugs, path='./data/jsons/similar_drugs_dict_all.json')
similar_drugs = filter_new_drugs(similar_drugs, old_drug_ids)
save_dict(similar_drugs, path='./data/jsons/similar_drugs_dict_only_old.json')
if __name__ == "__main__":
main()