Merge pull request #139 from CogStack/document-apis

CU-862k43092: improve documentation around the usage of Trainer APIs …
CogStack · Jul 12, 2023 · 65196a3 · 65196a3
2 parents 618f91c + 40f3e62
commit 65196a3
Show file tree

Hide file tree

Showing 4 changed files with 260 additions and 14 deletions.
diff --git a/notebook_docs/API_Examples.ipynb b/notebook_docs/API_Examples.ipynb
@@ -69,7 +69,7 @@
     {
      "data": {
       "text/plain": [
-       "{'Authorization': 'Token 0330dc5c1ec5eb6512b9824fd99650b834900082'}"
+       "{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}"
       ]
      },
      "execution_count": 4,
@@ -102,7 +102,6 @@
      "data": {
       "text/plain": [
        "{'users': 'http://localhost:8001/api/users/',\n",
-       " 'concepts': 'http://localhost:8001/api/concepts/',\n",
        " 'entities': 'http://localhost:8001/api/entities/',\n",
        " 'project-annotate-entities': 'http://localhost:8001/api/project-annotate-entities/',\n",
        " 'documents': 'http://localhost:8001/api/documents/',\n",
@@ -116,8 +115,7 @@
        " 'vocabs': 'http://localhost:8001/api/vocabs/',\n",
        " 'datasets': 'http://localhost:8001/api/datasets/',\n",
        " 'icd-codes': 'http://localhost:8001/api/icd-codes/',\n",
-       " 'opcs-codes': 'http://localhost:8001/api/opcs-codes/',\n",
-       " 'upload-deployment': 'http://localhost:8001/api/upload-deployment/'}"
+       " 'opcs-codes': 'http://localhost:8001/api/opcs-codes/'}"
       ]
      },
      "execution_count": 5,
@@ -300,7 +298,7 @@
     "\n",
     "<!-- ![Admin Page](imgs/admin_page.png) -->\n",
     "<div>\n",
-    "<img src=\"imgs/admin_page.png\" width=\"350px\"/>\n",
+    "<img src=\"./../docs/_static/img/admin_page.png\" width=\"350px\"/>\n",
     "</div>\n",
     "\n",
     "Once you've created each object via the /admin/ page, return here to collect Users IDs and the MedCAT models IDs."
@@ -417,8 +415,15 @@
    "source": [
     "Newly created projects are now available for the assigned users. Given this above method many projects for specific conditions can created, configured and permissioned in seconds\n",
     "\n",
-    "![](imgs/new_projects.png)"
+    "![](../docs/_static/img/new_projects.png)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -437,9 +442,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/notebook_docs/Processing_Annotations.ipynb b/notebook_docs/Processing_Annotations.ipynb
@@ -4,8 +4,131 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Processing MedCATTrainer Annotations\n",
-    "A short notebook to demonstrate the MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. "
+    "# MedCATtrainer Annotations\n",
+    "A notebook to demonstrate:\n",
+    "- How to download (aka export) annotations from the trainer. These downloads can also be used as export / transfer / backup option for MCTrainer.\n",
+    "- The MedCATTrainer downloaded annotations schema. Both w/ and w/o text have the same format, except from the source text. \n",
+    "- How to re-upload exported annotatinos into 'new' projects, this could be for recovery, or importing to a new Trainer deployment."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Downloading Annotations\n",
+    "This covers API driven downloading, and is also accessible from \\<hostname\\>:\\<port\\>/admin/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "from pprint import pprint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "URL = 'http://localhost:8001' # Should be set to your running deployment, IP / PORT if not running on localhost:8001"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Authorization': 'Token cc3e60dd2cc4231f7f74d1f30d35ce31d3154f7c'}"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# API access is via a username / password. Upon login the API auth endpoint provides an auth token that must be used for all following requests.\n",
+    "payload = {\"username\": \"admin\", \"password\": \"admin\"}\n",
+    "headers = {\n",
+    "    'Authorization': f'Token {json.loads(requests.post(\"http://localhost:8001/api/api-token-auth/\", json=payload).text)[\"token\"]}',\n",
+    "}\n",
+    "headers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'id': 68,\n",
+      "  'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 69,\n",
+      "  'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 70,\n",
+      "  'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
+      "          '(Clone)'},\n",
+      " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get project IDs to download\n",
+    "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
+    "pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# projects to download\n",
+    "projects_to_download = ','.join(str(r['id']) for r in resp) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# further parameters available here are:\n",
+    "# - with_text: Boolean: to download the annotations with the source document text. This will automatically include the doc_name. Default: False\n",
+    "# - with_doc_name: Boolean: if with_text is False, but you still want to include doc names. Default: False\n",
+    "\n",
+    "resp = json.loads(requests.get(f'{URL}/api/download-annos/?project_ids={projects_to_download}&with_text=True', headers=headers).text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dump the output to a file.\n",
+    "json.dump(resp, open('trainer_export.json', 'w')) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Processing MedCATTrainer Annotations"
    ]
   },
   {
@@ -538,12 +661,127 @@
     "We have 'nan's here as there are no other values exist in the intersection of values so cohen's kappa is undefined. We can report 100% IIA though!"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Uploading Annotations\n",
+    "This is useful if annotations have been exported, and need to be systematically modified or if a new instance of Trainer is required to be used for further annotations\n",
+    "\n",
+    "This will use the previously exported trainer download"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project_data = json.load(open('trainer_export.json'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# filter out project data that is empty"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project_data['projects'] = [p for p in project_data['projects'] if len(p['documents']) > 0 ] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'id': 68,\n",
+      "  'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 69,\n",
+      "  'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 70,\n",
+      "  'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
+      "          '(Clone)'},\n",
+      " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# current projects projects\n",
+    "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
+    "pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# upload previoulsy exported projects\n",
+    "resp = requests.post(f'{URL}/api/upload-deployment/', json=project_data).text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\"successfully uploaded\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(resp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'id': 68,\n",
+      "  'name': 'Top level Concept Annos (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 69,\n",
+      "  'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone)'},\n",
+      " {'id': 70,\n",
+      "  'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
+      "          '(Clone)'},\n",
+      " {'id': 71, 'name': 'Example Annotation Project - SNOMED CT All (Clone)'},\n",
+      " {'id': 81,\n",
+      "  'name': 'Chevron test - UMLS (Diseases / Symptoms / Findings) (Clone) '\n",
+      "          'IMPORTED'},\n",
+      " {'id': 82,\n",
+      "  'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings) '\n",
+      "          '(Clone) IMPORTED'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# to show the newly uploaded projects\n",
+    "resp = json.loads(requests.get(f'{URL}/api/project-annotate-entities/', headers=headers).text)['results']\n",
+    "pprint([{'id': r['id'], 'name': r['name']} for r in resp])"
+   ]
   }
  ],
  "metadata": {
@@ -562,7 +800,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.1"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,

diff --git a/webapp/api/api/admin.py b/webapp/api/api/admin.py
@@ -267,6 +267,7 @@ def retrieve_project_data(projects: QuerySet) -> Dict[str, List]:
                 out_ann['last_modified'] = ann.last_modified.strftime(_dt_fmt)
                 out_ann['comment'] = ann.comment
                 out_ann['manually_created'] = ann.manually_created
+                out_ann['acc'] = ann.acc
                 # if ann.icd_code:
                 #     out_ann['icd_code'] = {'code': ann.icd_code.code, 'desc': ann.icd_code.desc}
                 # if ann.opcs_code:

diff --git a/webapp/api/api/views.py b/webapp/api/api/views.py
@@ -569,8 +569,10 @@ def download_annos(request):
         return HttpResponseBadRequest('No projects to download annotations')
 
     projects = ProjectAnnotateEntities.objects.filter(id__in=p_ids)
+
+    with_doc_name = request.GET.get('with_doc_name', False)
     out = download_projects_with_text(projects) if with_text_flag else \
-        download_projects_without_text(projects)
+        download_projects_without_text(projects, with_doc_name)
     return out