From 40f3e623352261bbfb6619605038908770e19356 Mon Sep 17 00:00:00 2001 From: tomolopolis Date: Wed, 12 Jul 2023 15:22:37 +0100 Subject: [PATCH] CU-862k43092: improve documentation around the usage of Trainer APIs for downloading, uploading of exported projects --- notebook_docs/API_Examples.ipynb | 21 +- notebook_docs/Processing_Annotations.ipynb | 248 ++++++++++++++++++++- webapp/api/api/admin.py | 1 + webapp/api/api/views.py | 4 +- 4 files changed, 260 insertions(+), 14 deletions(-) diff --git a/notebook_docs/API_Examples.ipynb b/notebook_docs/API_Examples.ipynb index f08e6c48..ad0ed525 100644 --- a/notebook_docs/API_Examples.ipynb +++ b/notebook_docs/API_Examples.ipynb @@ -69,7 +69,7 @@ { "data": { "text/plain": [ - "{'Authorization': 'Token 0330dc5c1ec5eb6512b9824fd99650b834900082'}" + "{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}" ] }, "execution_count": 4, @@ -102,7 +102,6 @@ "data": { "text/plain": [ "{'users': 'http://localhost:8001/api/users/',\n", - " 'concepts': 'http://localhost:8001/api/concepts/',\n", " 'entities': 'http://localhost:8001/api/entities/',\n", " 'project-annotate-entities': 'http://localhost:8001/api/project-annotate-entities/',\n", " 'documents': 'http://localhost:8001/api/documents/',\n", @@ -116,8 +115,7 @@ " 'vocabs': 'http://localhost:8001/api/vocabs/',\n", " 'datasets': 'http://localhost:8001/api/datasets/',\n", " 'icd-codes': 'http://localhost:8001/api/icd-codes/',\n", - " 'opcs-codes': 'http://localhost:8001/api/opcs-codes/',\n", - " 'upload-deployment': 'http://localhost:8001/api/upload-deployment/'}" + " 'opcs-codes': 'http://localhost:8001/api/opcs-codes/'}" ] }, "execution_count": 5, @@ -300,7 +298,7 @@ "\n", "\n", "
\n", - "\n", + "\n", "
\n", "\n", "Once you've created each object via the /admin/ page, return here to collect Users IDs and the MedCAT models IDs." @@ -417,8 +415,15 @@ "source": [ "Newly created projects are now available for the assigned users. Given this above method many projects for specific conditions can created, configured and permissioned in seconds\n", "\n", - "![](imgs/new_projects.png)" + "![](../docs/_static/img/new_projects.png)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -437,9 +442,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebook_docs/Processing_Annotations.ipynb b/notebook_docs/Processing_Annotations.ipynb index 6f4397d7..cb092088 100644 --- a/notebook_docs/Processing_Annotations.ipynb +++ b/notebook_docs/Processing_Annotations.ipynb @@ -4,8 +4,131 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Processing MedCATTrainer Annotations\n", - "A short notebook to demonstrate the MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. " + "# MedCATtrainer Annotations\n", + "A notebook to demonstrate:\n", + "- How to download (aka export) annotations from the trainer. These downloads can also be used as export / transfer / backup option for MCTrainer.\n", + "- The MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. \n", + "- How to re-upload exported annotatinos into 'new' projects, this could be for recovery, or importing to a new Trainer deployment." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Downloading Annotations\n", + "This covers API driven downloading, and is also accessible from \\:\\/admin/" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import pandas as pd\n", + "from pprint import pprint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "URL = 'http://localhost:8001' # Should be set to your running deployment, IP / PORT if not running on localhost:8001" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# API access is via a username / password. Upon login the API auth endpoint provides an auth token that must be used for all following requests.\n", + "payload = {\"username\": \"admin\", \"password\": \"admin\"}\n", + "headers = {\n", + " 'Authorization': f'Token {json.loads(requests.post(\"http://localhost:8001/api/api-token-auth/\", json=payload).text)[\"token\"]}',\n", + "}\n", + "headers" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'id': 68,\n", + " 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 69,\n", + " 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 70,\n", + " 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n", + " '(Clone)'},\n", + " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n" + ] + } + ], + "source": [ + "# get project IDs to download\n", + "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n", + "pprint([{'id': r['id'], 'name': r['name']} for r in resp])" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "# projects to download\n", + "projects_to_download = ','.join(str(r['id']) for r in resp) " + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "# further parameters available here are:\n", + "# - with_text: Boolean: to download the annotations with the source document text. This will automatically include the doc_name. Default: False\n", + "# - with_doc_name: Boolean: if with_text is False, but you still want to include doc names. Default: False\n", + "\n", + "resp = json.loads(requests.get(f'{URL}/api/download-annos/?project_ids={projects_to_download}&with_text=True', headers=headers).text)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "# dump the output to a file.\n", + "json.dump(resp, open('trainer_export.json', 'w')) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Processing MedCATTrainer Annotations" ] }, { @@ -538,12 +661,127 @@ "We have 'nan's here as there are no other values exist in the intersection of values so cohen's kappa is undefined. We can report 100% IIA though!" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Uploading Annotations\n", + "This is useful if annotations have been exported, and need to be systematically modified or if a new instance of Trainer is required to be used for further annotations\n", + "\n", + "This will use the previously exported trainer download" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "project_data = json.load(open('trainer_export.json'))" + ] + }, + { + "cell_type": "code", + "execution_count": 123, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# filter out project data that is empty" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [], + "source": [ + "project_data['projects'] = [p for p in project_data['projects'] if len(p['documents']) > 0 ] " + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'id': 68,\n", + " 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 69,\n", + " 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 70,\n", + " 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n", + " '(Clone)'},\n", + " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n" + ] + } + ], + "source": [ + "# current projects projects\n", + "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n", + "pprint([{'id': r['id'], 'name': r['name']} for r in resp])" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "# upload previoulsy exported projects\n", + "resp = requests.post(f'{URL}/api/upload-deployment/', json=project_data).text" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"successfully uploaded\"\n" + ] + } + ], + "source": [ + "print(resp)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'id': 68,\n", + " 'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 69,\n", + " 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n", + " {'id': 70,\n", + " 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n", + " '(Clone)'},\n", + " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'},\n", + " {'id': 81,\n", + " 'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone) '\n", + " 'IMPORTED'},\n", + " {'id': 82,\n", + " 'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n", + " '(Clone) IMPORTED'}]\n" + ] + } + ], + "source": [ + "# to show the newly uploaded projects\n", + "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n", + "pprint([{'id': r['id'], 'name': r['name']} for r in resp])" + ] } ], "metadata": { @@ -562,7 +800,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.1" + "version": "3.10.8" } }, "nbformat": 4, diff --git a/webapp/api/api/admin.py b/webapp/api/api/admin.py index 8eb7abb1..f884fa14 100644 --- a/webapp/api/api/admin.py +++ b/webapp/api/api/admin.py @@ -267,6 +267,7 @@ def retrieve_project_data(projects: QuerySet) -> Dict[str, List]: out_ann['last_modified'] = ann.last_modified.strftime(_dt_fmt) out_ann['comment'] = ann.comment out_ann['manually_created'] = ann.manually_created + out_ann['acc'] = ann.acc # if ann.icd_code: # out_ann['icd_code'] = {'code': ann.icd_code.code, 'desc': ann.icd_code.desc} # if ann.opcs_code: diff --git a/webapp/api/api/views.py b/webapp/api/api/views.py index 685891d2..2bcb8764 100644 --- a/webapp/api/api/views.py +++ b/webapp/api/api/views.py @@ -569,8 +569,10 @@ def download_annos(request): return HttpResponseBadRequest('No projects to download annotations') projects = ProjectAnnotateEntities.objects.filter(id__in=p_ids) + + with_doc_name = request.GET.get('with_doc_name', False) out = download_projects_with_text(projects) if with_text_flag else \ - download_projects_without_text(projects) + download_projects_without_text(projects, with_doc_name) return out