-
Notifications
You must be signed in to change notification settings - Fork 0
/
actorws.py
267 lines (228 loc) · 9.17 KB
/
actorws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import requests
import json
import logging
class CCTE_EPA:
"""
New endpoints to use instead of ACTORWS.
https://ccte-api-ccd.epa.gov.
Only available within the EPA network, and technically
a staging server. Public endpoints deployed at
https://api-ccte.epa.gov/
"""
def __init__(self):
self.base_url = "https://ccte-api-ccd.epa.gov"
self.batch_url = self.base_url + "/batchsearch/results"
self.single_url = self.base_url + "/ccdapp1/search/chemical"
self.calc = "actorws" # NOTE: Test before changing this
self.props = ['dsstox', 'chemid']
self.batch_result_keys = ["input", "foundBy", "dtxsid", "dtxcid", "casrn", "preferredName"]
self.single_result_keys = ["dtxsid", "dtxcid", "searchMatch", "rank", "hasStructureImage", "searchWord"]
self.keys_of_interest = ["dtxsid", "casrn", "preferredName"]
# self.dsstox_result_keys = ['casrn', 'dsstoxSubstanceId', 'preferredName', 'smiles', 'iupac'] # what we used to get back
self.batch_request = {
"inputType": "IDENTIFIER",
"identifierTypes": [],
"searchItems": ""
}
self.default_timeout = 2
########################################
# BATCH REQUEST EXAMPLES
########################################
# Example batch requests 1:
# {
# "inputType" : "IDENTIFIER",
# "identifierTypes" : ["CHEMICAL_NAME"],
# "searchItems" : "TOLMETIN SODIUM\nSODIUM SUCCINATE\nSODIUM SUCCINATE\nSELENIUM SULFIDE\nbpa"
# }
# Example batch requests 2:
# {
# "inputType" : "IDENTIFIER",
# "identifierTypes" : ["CASRN"],
# "searchItems" : "3/1/4860\n75-O5-8\n11121-31-6\n7782-50-5;\n0000542881\n0000542881;\n75058\n75-05-8\n75-50-8\n75-08-5"
# }
# Example batch requests 3:
# {
# "inputType" : "IDENTIFIER",
# "identifierTypes" : ["DTXSID"],
# "searchItems" : "DTXSID8021569\nDTXSID8021640\nDTXSID8021642\nDTXSID8021644\nDTXSID8021646\nDTXSID8021690\nDTXSID8023923\nDTXSID8039241\nDTXSID8044466\nDTXSID8060955\nDTXSID8075049"
# }
# Example batch requests 4:
# {
# "inputType" : "MASS",
# "identifierTypes" : [""],
# "searchItems" : "189\n191",
# "massError" : 0.5
# }
# Example batch requests 5:
# {
# "inputType" : "EXACT_FORMULA",
# "identifierTypes" : [""],
# "searchItems" : "XYX\nC6H6O"
# }
# Example batch requests 6:
# {
# "inputType" : "MSREADY_FORMULA",
# "identifierTypes" : [""],
# "searchItems" : "XYX\nC6H6O"
# }
########################################
########################################
# SINGLE REQUEST EXAMPLES
########################################
# Single request example 1:
# https://ccte-api-ccd.epa.gov/ccdapp1/search/chemical/start-with/benzene
# Single request example 2:
# https://ccte-api-ccd.epa.gov/ccdapp1/search/chemical/equal/DTXSID0020103
# Single request example 3:
# https://ccte-api-ccd.epa.gov/ccdapp1/search/chemical/equal/120155-79-5
# Single request example 4:
# https://ccte-api-ccd.epa.gov/ccdapp1/search/chemical/contain/citrate
########################################
self.result_obj = {
"calc": "actorws",
"prop": "",
"data": {},
}
def _make_request(self, url, data):
try:
_response = requests.post(url, json=data, headers={'Content-Type': 'application/json'}, timeout=self.default_timeout)
except requests.exceptions.Timeout as e:
logging.warning("Request to {} timed out.. No data from actorws..".format(url))
return None
except requests.exceptions.ConnectionError as e:
logging.warning("Connection error for {}.. No data from actorws..".format(url))
return None
except Exception as e:
logging.warning("Exception occurred in chemical information module: {}".format(e))
return None
if _response.status_code != 200:
# return {'success': False, 'error': "error connecting to actorws", 'data': None}
logging.warning("Exception in actorws.py making request to actorws: {}".format(_response))
raise Exception("ACTORWS request was not successful.")
return json.loads(_response.content)
def get_chemical_results(self, chemical, chem_type):
"""
Makes request to CCTE batch endpoint to get chemical data.
NOTE: Worked with chemical name or CASRN as input, not so much SMILES.
TODO: Use chemical name or CAS from chemaxon if user-entered chemical is not
one of those two.
"""
# Builds request object:
post_data = dict(self.batch_request)
if chem_type == "name":
post_data["identifierTypes"] = ["CHEMICAL_NAME"]
elif chem_type == "casrn":
post_data["identifierTypes"] = ["CASRN"]
else:
return # TODO: Handle excpetion
post_data["searchItems"] = chemical
# Makes request to CCTE:
try:
results = self._make_request(self.batch_url, post_data)
results = results[0]
except Exception as e:
logging.warning("Error getting CCTE data: {}".format(e))
logging.warning("Using only Jchem WS results instead, setting dsstox value to N/A")
_results = dict(self.result_obj)
_results['prop'] = "dsstox"
_results['data'] = {'dsstox': "N/A"}
return _results
# Gets results of interest:
_results = dict(self.result_obj)
_results['prop'] = "dsstox"
for key, val in results.items():
if key in self.keys_of_interest:
_results['data'][key] = val
return _results
class ACTORWS(object):
"""
Uses actor web services to obtain curated
CAS#, SMILES, preferred name, iupac, and DTXSID.
Location: https://actorws.epa.gov/actorws/
"""
def __init__(self):
self.base_url = "https://actorws.epa.gov/actorws"
self.chemid_url = "https://actorws.epa.gov/actorws/chemIdentifier/v01/resolve.json" # ?identifier=[chemical name or SMILES]
self.dsstox_url = "https://actorws.epa.gov/actorws/dsstox/v02/casTable.json" # ?identifier=[casrn or gsid]
self.calc = "actorws"
self.props = ['dsstox', 'chemid']
self.chemid_result_keys = ['synGsid'] # chemidentifier result key of interest
self.chemid_all_keys = ['origIdentifier', 'casrn', 'preferredName', 'synGsid', 'synType', 'synIdentifier', 'dtxsid', 'dtxcid', 'jChemInChIKey', 'indigoInChIKey', 'smiles', 'molFormula', 'molWeight', 'collidingGsid', 'collidingCasrn', 'collidingPreferredName', 'trimmedWhitespace', 'trimmedLeadingZeros', 'reformattedIdentifier', 'checksum', 'processedAs', 'infoMsg', 'warningMsg', 'msReadyForms', 'qsarForms', 'imageURL']
self.chemid_keys_map = {
'casrn': 'casrn',
'preferredName': 'preferredName',
'synGsid': 'gsid',
'dtxsid': 'dsstoxSubstanceId',
'dtxcid': 'dtxcid',
'smiles': 'smiles',
'molFormula': 'formula',
'molWeight': 'mass',
}
self.dsstox_result_keys = ['casrn', 'dtxsid', 'preferredName', 'smiles', 'iupac']
self.result_obj = {
'calc': "actorws",
'prop': "",
'data': {},
}
def make_request(self, url, payload):
try:
_response = requests.get(url, params=payload, timeout=self.default_timeout)
except requests.exceptions.Timeout as e:
logging.warning("Request to {} timed out.. No data from actorws..".format(url))
return None
except requests.exceptions.ConnectionError as e:
logging.warning("Connection error for {}.. No data from actorws..".format(url))
return None
except Exception as e:
logging.warning("Exception occurred in chemical information module: {}".format(e))
return None
if _response.status_code != 200:
# return {'success': False, 'error': "error connecting to actorws", 'data': None}
logging.warning("Exception in actorws.py making request to actorws: {}".format(_response))
raise Exception("ACTORWS request was not successful.")
return json.loads(_response.content)
############### "PUBLIC" METHODS BELOW ########################
def get_dsstox_results(self, chemical, id_type):
"""
Makes request to actowws dsstox for the following
result keys: casrn, dsstoxSubstanceId, preferredName, smiles, and iupac
Input: cas number or gsid obtained from actorws chemicalIdentifier endpoint
Output: Dictionary of above result key:vals
"""
_payload = {}
if id_type == 'gsid':
_payload = {'gsid': chemical}
elif id_type == 'CAS#':
_payload = {'casrn': chemical}
try:
_dsstox_results = self.make_request(self.dsstox_url, _payload)
_dsstox_results = _dsstox_results['DataList']['list'][0]
except Exception as e:
logging.warning("Error getting dsstox results key:vals: {}".format(e))
logging.warning("Using only Jchem WS results instead, setting dsstox value to N/A")
_results = self.result_obj
_results['prop'] = "dsstox"
_results['data'] = {'dsstox': "N/A"}
return _results
_results = self.result_obj
_results['prop'] = "dsstox"
for _key, _val in _dsstox_results.items():
if _key in self.dsstox_result_keys:
_results['data'][_key] = _val
return _results
def get_chemid_results(self, chemical):
"""
Makes request to actorws chemicalIdentifier endpoint for
'synGsid' to be used for dsstox if cas isn't provided by user.
Inputs: chemical - either a chemical name or smiles
Output: Dictionary with results_obj keys and synGsid
"""
try:
_chemid_results = self.make_request(self.chemid_url, {'identifier': chemical})
_chemid_results = _chemid_results['DataRow']
except Exception as e:
logging.warning("Exception getting chemid results from actorws: {}".format(e))
# return None
return {}
return _chemid_results
##########################################################################