Skip to content

Commit

Permalink
Merge pull request #139 from CogStack/document-apis
Browse files Browse the repository at this point in the history
CU-862k43092: improve documentation around the usage of Trainer APIs …
  • Loading branch information
tomolopolis authored Jul 12, 2023
2 parents 618f91c + 40f3e62 commit 65196a3
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 14 deletions.
21 changes: 13 additions & 8 deletions notebook_docs/API_Examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
{
"data": {
"text/plain": [
"{'Authorization': 'Token 0330dc5c1ec5eb6512b9824fd99650b834900082'}"
"{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}"
]
},
"execution_count": 4,
Expand Down Expand Up @@ -102,7 +102,6 @@
"data": {
"text/plain": [
"{'users': 'http://localhost:8001/api/users/',\n",
" 'concepts': 'http://localhost:8001/api/concepts/',\n",
" 'entities': 'http://localhost:8001/api/entities/',\n",
" 'project-annotate-entities': 'http://localhost:8001/api/project-annotate-entities/',\n",
" 'documents': 'http://localhost:8001/api/documents/',\n",
Expand All @@ -116,8 +115,7 @@
" 'vocabs': 'http://localhost:8001/api/vocabs/',\n",
" 'datasets': 'http://localhost:8001/api/datasets/',\n",
" 'icd-codes': 'http://localhost:8001/api/icd-codes/',\n",
" 'opcs-codes': 'http://localhost:8001/api/opcs-codes/',\n",
" 'upload-deployment': 'http://localhost:8001/api/upload-deployment/'}"
" 'opcs-codes': 'http://localhost:8001/api/opcs-codes/'}"
]
},
"execution_count": 5,
Expand Down Expand Up @@ -300,7 +298,7 @@
"\n",
"<!-- ![Admin Page](imgs/admin_page.png) -->\n",
"<div>\n",
"<img src=\"imgs/admin_page.png\" width=\"350px\"/>\n",
"<img src=\"./../docs/_static/img/admin_page.png\" width=\"350px\"/>\n",
"</div>\n",
"\n",
"Once you've created each object via the /admin/ page, return here to collect Users IDs and the MedCAT models IDs."
Expand Down Expand Up @@ -417,8 +415,15 @@
"source": [
"Newly created projects are now available for the assigned users. Given this above method many projects for specific conditions can created, configured and permissioned in seconds\n",
"\n",
"![](imgs/new_projects.png)"
"![](../docs/_static/img/new_projects.png)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -437,9 +442,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
248 changes: 243 additions & 5 deletions notebook_docs/Processing_Annotations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,131 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Processing MedCATTrainer Annotations\n",
"A short notebook to demonstrate the MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. "
"# MedCATtrainer Annotations\n",
"A notebook to demonstrate:\n",
"- How to download (aka export) annotations from the trainer. These downloads can also be used as export / transfer / backup option for MCTrainer.\n",
"- The MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. \n",
"- How to re-upload exported annotatinos into 'new' projects, this could be for recovery, or importing to a new Trainer deployment."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Downloading Annotations\n",
"This covers API driven downloading, and is also accessible from \\<hostname\\>:\\<port\\>/admin/"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import pandas as pd\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"URL = 'http://localhost:8001' # Should be set to your running deployment, IP / PORT if not running on localhost:8001"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# API access is via a username / password. Upon login the API auth endpoint provides an auth token that must be used for all following requests.\n",
"payload = {\"username\": \"admin\", \"password\": \"admin\"}\n",
"headers = {\n",
" 'Authorization': f'Token {json.loads(requests.post(\"http://localhost:8001/api/api-token-auth/\", json=payload).text)[\"token\"]}',\n",
"}\n",
"headers"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'id': 68,\n",
" 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 69,\n",
" 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 70,\n",
" 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
" '(Clone)'},\n",
" {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n"
]
}
],
"source": [
"# get project IDs to download\n",
"resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
"pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"# projects to download\n",
"projects_to_download = ','.join(str(r['id']) for r in resp) "
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
"# further parameters available here are:\n",
"# - with_text: Boolean: to download the annotations with the source document text. This will automatically include the doc_name. Default: False\n",
"# - with_doc_name: Boolean: if with_text is False, but you still want to include doc names. Default: False\n",
"\n",
"resp = json.loads(requests.get(f'{URL}/api/download-annos/?project_ids={projects_to_download}&with_text=True', headers=headers).text)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"# dump the output to a file.\n",
"json.dump(resp, open('trainer_export.json', 'w')) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Processing MedCATTrainer Annotations"
]
},
{
Expand Down Expand Up @@ -538,12 +661,127 @@
"We have 'nan's here as there are no other values exist in the intersection of values so cohen's kappa is undefined. We can report 100% IIA though!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Uploading Annotations\n",
"This is useful if annotations have been exported, and need to be systematically modified or if a new instance of Trainer is required to be used for further annotations\n",
"\n",
"This will use the previously exported trainer download"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"project_data = json.load(open('trainer_export.json'))"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# filter out project data that is empty"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"project_data['projects'] = [p for p in project_data['projects'] if len(p['documents']) > 0 ] "
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'id': 68,\n",
" 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 69,\n",
" 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 70,\n",
" 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
" '(Clone)'},\n",
" {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n"
]
}
],
"source": [
"# current projects projects\n",
"resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
"pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [],
"source": [
"# upload previoulsy exported projects\n",
"resp = requests.post(f'{URL}/api/upload-deployment/', json=project_data).text"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"successfully uploaded\"\n"
]
}
],
"source": [
"print(resp)"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'id': 68,\n",
" 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 69,\n",
" 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
" {'id': 70,\n",
" 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
" '(Clone)'},\n",
" {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'},\n",
" {'id': 81,\n",
" 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone) '\n",
" 'IMPORTED'},\n",
" {'id': 82,\n",
" 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
" '(Clone) IMPORTED'}]\n"
]
}
],
"source": [
"# to show the newly uploaded projects\n",
"resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
"pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
]
}
],
"metadata": {
Expand All @@ -562,7 +800,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
"version": "3.10.8"
}
},
"nbformat": 4,
Expand Down
1 change: 1 addition & 0 deletions webapp/api/api/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ def retrieve_project_data(projects: QuerySet) -> Dict[str, List]:
out_ann['last_modified'] = ann.last_modified.strftime(_dt_fmt)
out_ann['comment'] = ann.comment
out_ann['manually_created'] = ann.manually_created
out_ann['acc'] = ann.acc
# if ann.icd_code:
# out_ann['icd_code'] = {'code': ann.icd_code.code, 'desc': ann.icd_code.desc}
# if ann.opcs_code:
Expand Down
4 changes: 3 additions & 1 deletion webapp/api/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,10 @@ def download_annos(request):
return HttpResponseBadRequest('No projects to download annotations')

projects = ProjectAnnotateEntities.objects.filter(id__in=p_ids)

with_doc_name = request.GET.get('with_doc_name', False)
out = download_projects_with_text(projects) if with_text_flag else \
download_projects_without_text(projects)
download_projects_without_text(projects, with_doc_name)
return out


Expand Down

0 comments on commit 65196a3

Please sign in to comment.