From 1c009c2d093fdb3dc470ad2bcc607ded4dd49652 Mon Sep 17 00:00:00 2001
From: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
Date: Mon, 10 Apr 2023 19:57:09 -0700
Subject: [PATCH] fix: Improve UI for the fine-grained NER model

---
 docs/tutorials/basic.ipynb   | 36 ++++++++++++++++++------------------
 pyproject.toml               |  1 +
 src/dacy/__init__.py         |  2 +-
 src/dacy/download.py         | 28 ++++++++++++++++++++++++++--
 src/dacy/ner/fine_grained.py | 19 ++++++++++++-------
 5 files changed, 58 insertions(+), 28 deletions(-)
diff --git a/docs/tutorials/basic.ipynb b/docs/tutorials/basic.ipynb
index 9047d029..85e3372f 100644
--- a/docs/tutorials/basic.ipynb
+++ b/docs/tutorials/basic.ipynb
@@ -319,7 +319,7 @@
     {
      "data": {
       "text/plain": [
-       "<spacy.pipeline.ner.EntityRecognizer at 0x2a018e730>"
+       "<spacy.pipeline.ner.EntityRecognizer at 0x29cc06ab0>"
       ]
      },
      "execution_count": 7,
@@ -339,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -394,7 +394,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -466,7 +466,7 @@
     {
      "data": {
       "text/html": [
-       "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"da\" id=\"8b6bb5d6bad74aa099ead33a710bb4e7-0\" class=\"displacy\" width=\"1450\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
+       "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"da\" id=\"f6f16117a7124226b425ab278fbb8fb2-0\" class=\"displacy\" width=\"1450\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
        "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
        "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">DaCy</tspan>\n",
        "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PROPN</tspan>\n",
@@ -508,57 +508,57 @@
        "</text>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,2.0 750.0,2.0 750.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,2.0 750.0,2.0 750.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M70,354.0 L62,342.0 78,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-1\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-1\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cop</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cop</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M245,354.0 L237,342.0 253,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-2\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-2\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M420,354.0 L412,342.0 428,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-3\" stroke-width=\"2px\" d=\"M595,352.0 C595,264.5 735.0,264.5 735.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-3\" stroke-width=\"2px\" d=\"M595,352.0 C595,264.5 735.0,264.5 735.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M595,354.0 L587,342.0 603,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-4\" stroke-width=\"2px\" d=\"M945,352.0 C945,177.0 1265.0,177.0 1265.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-4\" stroke-width=\"2px\" d=\"M945,352.0 C945,177.0 1265.0,177.0 1265.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M945,354.0 L937,342.0 953,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-5\" stroke-width=\"2px\" d=\"M1120,352.0 C1120,264.5 1260.0,264.5 1260.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-5\" stroke-width=\"2px\" d=\"M1120,352.0 C1120,264.5 1260.0,264.5 1260.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M1120,354.0 L1112,342.0 1128,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
        "\n",
        "<g class=\"displacy-arrow\">\n",
-       "    <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-6\" stroke-width=\"2px\" d=\"M770,352.0 C770,89.5 1270.0,89.5 1270.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
+       "    <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-6\" stroke-width=\"2px\" d=\"M770,352.0 C770,89.5 1270.0,89.5 1270.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
        "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
-       "        <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nmod</textPath>\n",
+       "        <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nmod</textPath>\n",
        "    </text>\n",
        "    <path class=\"displacy-arrowhead\" d=\"M1270.0,354.0 L1278.0,342.0 1262.0,342.0\" fill=\"currentColor\"/>\n",
        "</g>\n",
diff --git a/pyproject.toml b/pyproject.toml
index d24e8c95..2d556740 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -184,6 +184,7 @@ exclude = [
   "training/v0.0.0/**",
   "training/v0.1.0/**",
   "training/v0.1.1/**",
+  "training/ner_fine_grained/**",
   "papers/DaCy-A-Unified-Framework-for-Danish-NLP/**"
 ]
 # Allow unused variables when underscore-prefixed.
diff --git a/src/dacy/__init__.py b/src/dacy/__init__.py
index d390a690..5191c647 100644
--- a/src/dacy/__init__.py
+++ b/src/dacy/__init__.py
@@ -2,5 +2,5 @@
 from dacy.sentiment import make_emotion_transformer  # noqa
 
 from .about import __download_url__, __title__, __version__  # noqa
-from .download import download_model  # noqa
+from .download import download_model, get_latest_version  # noqa
 from .load import load, models, where_is_my_dacy  # noqa
diff --git a/src/dacy/download.py b/src/dacy/download.py
index 8e907376..551c8149 100644
--- a/src/dacy/download.py
+++ b/src/dacy/download.py
@@ -1,11 +1,14 @@
 """Functions for downloading DaCy models."""
 import os
+from distutils.version import StrictVersion
 from importlib.metadata import version
 from pathlib import Path
 
 from spacy.util import get_installed_models
 from tqdm import tqdm
 
+versions = ["1.1.2", "1.0.0", "1.3.3", "1.0.12", "1.0.2"]
+versions.sort(key=StrictVersion)
 DACY_DEFAULT_PATH = Path.home() / ".dacy"
 
 DEFAULT_CACHE_DIR = os.getenv(
@@ -26,6 +29,26 @@
 }
 
 
+def get_latest_version(model: str) -> str:
+    """Returns the latest version of a DaCy model.
+
+    Args:
+        model: string indicating the model
+
+    Returns:
+        str: latest version of the model
+    """
+    if model in {"small", "medium", "large"}:
+        model = f"da_dacy_{model}_trf"
+    versions = [mdl.split("-")[-1] for mdl in models_url if "ner_fine_grained" in mdl]
+    versions = sorted(
+        versions,
+        key=lambda s: [int(u) for u in s.split(".")],
+        reverse=True,
+    )
+    return versions[0]
+
+
 def models() -> list[str]:
     """Returns a list of valid DaCy models.
 
@@ -82,12 +105,13 @@ def download_model(
         >>> download_model(model="da_dacy_medium_trf-0.1.0")
     """
     if model in {"small", "medium", "large"}:
-        model = f"da_dacy_{model}_trf-0.1.0"
+        latest_version = get_latest_version(model)
+        model = f"da_dacy_{model}_trf-{latest_version}"
     mdl_version = model.split("-")[-1]
 
     if model not in models_url:
         raise ValueError(
-            "The model is not available in DaCy. Please use dacy.models() to see a"
+            f"The model '{model}' is not available in DaCy. Please use dacy.models() to see a"
             + " list of all models",
         )
 
diff --git a/src/dacy/ner/fine_grained.py b/src/dacy/ner/fine_grained.py
index a9967db6..afd07763 100644
--- a/src/dacy/ner/fine_grained.py
+++ b/src/dacy/ner/fine_grained.py
@@ -1,4 +1,4 @@
-from typing import Callable, Literal
+from typing import Callable, Literal, Optional
 
 from spacy.lang.da import Danish
 from spacy.language import Language
@@ -9,14 +9,18 @@
 
 @Danish.factory(
     "dacy/ner-fine-grained",
-    default_config={},
+    default_config={
+        "version": None,
+        "size": "medium",
+        "transformer_name": "ner-transformer",
+    },
 )
 def create_finegrained_ner_component(
     nlp: Language,
     name: str,
-    size: Literal["small", "medium", "large"] = "small",
-    transformer_name: str = "ner-transformer",
-    version: str = "0.1.0",
+    size: Literal["small", "medium", "large"],
+    transformer_name: str,
+    version: Optional[str],
 ) -> Callable[[Doc], Doc]:
     """Create a fine grained NER component using the dacy models.
 
@@ -25,9 +29,10 @@ def create_finegrained_ner_component(
         name: The name of the component
         size: The size of the model to use. Can be "small", "medium" or "large"
         transformer_name: The name of the transformer component which the NER moel will listen to
-        version: The version of the model to use
+        version: The version of the model to use. If None, the latest version will be used
     """
-
+    if version is None:
+        version = dacy.get_latest_version("da_dacy_{size}_ner_fine_grained")
     nlp_ner = dacy.load(f"da_dacy_{size}_ner_fine_grained-{version}")
     nlp.add_pipe(factory_name="transformer", name=transformer_name, source=nlp_ner)
     name_, component = nlp_ner.components[-1]