Skip to content

Commit

Permalink
fix: Improve UI for the fine-grained NER model
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Apr 11, 2023
1 parent 98fe95e commit 1c009c2
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 28 deletions.
36 changes: 18 additions & 18 deletions docs/tutorials/basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@
{
"data": {
"text/plain": [
"<spacy.pipeline.ner.EntityRecognizer at 0x2a018e730>"
"<spacy.pipeline.ner.EntityRecognizer at 0x29cc06ab0>"
]
},
"execution_count": 7,
Expand All @@ -339,7 +339,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -394,7 +394,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -466,7 +466,7 @@
{
"data": {
"text/html": [
"<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"da\" id=\"8b6bb5d6bad74aa099ead33a710bb4e7-0\" class=\"displacy\" width=\"1450\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
"<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"da\" id=\"f6f16117a7124226b425ab278fbb8fb2-0\" class=\"displacy\" width=\"1450\" height=\"487.0\" direction=\"ltr\" style=\"max-width: none; height: 487.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"397.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">DaCy</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PROPN</tspan>\n",
Expand Down Expand Up @@ -508,57 +508,57 @@
"</text>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,2.0 750.0,2.0 750.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-0\" stroke-width=\"2px\" d=\"M70,352.0 C70,2.0 750.0,2.0 750.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M70,354.0 L62,342.0 78,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-1\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-1\" stroke-width=\"2px\" d=\"M245,352.0 C245,89.5 745.0,89.5 745.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cop</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cop</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M245,354.0 L237,342.0 253,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-2\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-2\" stroke-width=\"2px\" d=\"M420,352.0 C420,177.0 740.0,177.0 740.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M420,354.0 L412,342.0 428,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-3\" stroke-width=\"2px\" d=\"M595,352.0 C595,264.5 735.0,264.5 735.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-3\" stroke-width=\"2px\" d=\"M595,352.0 C595,264.5 735.0,264.5 735.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M595,354.0 L587,342.0 603,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-4\" stroke-width=\"2px\" d=\"M945,352.0 C945,177.0 1265.0,177.0 1265.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-4\" stroke-width=\"2px\" d=\"M945,352.0 C945,177.0 1265.0,177.0 1265.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">case</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M945,354.0 L937,342.0 953,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-5\" stroke-width=\"2px\" d=\"M1120,352.0 C1120,264.5 1260.0,264.5 1260.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-5\" stroke-width=\"2px\" d=\"M1120,352.0 C1120,264.5 1260.0,264.5 1260.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M1120,354.0 L1112,342.0 1128,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-6\" stroke-width=\"2px\" d=\"M770,352.0 C770,89.5 1270.0,89.5 1270.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <path class=\"displacy-arc\" id=\"arrow-f6f16117a7124226b425ab278fbb8fb2-0-6\" stroke-width=\"2px\" d=\"M770,352.0 C770,89.5 1270.0,89.5 1270.0,352.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-8b6bb5d6bad74aa099ead33a710bb4e7-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nmod</textPath>\n",
" <textPath xlink:href=\"#arrow-f6f16117a7124226b425ab278fbb8fb2-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nmod</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M1270.0,354.0 L1278.0,342.0 1262.0,342.0\" fill=\"currentColor\"/>\n",
"</g>\n",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ exclude = [
"training/v0.0.0/**",
"training/v0.1.0/**",
"training/v0.1.1/**",
"training/ner_fine_grained/**",
"papers/DaCy-A-Unified-Framework-for-Danish-NLP/**"
]
# Allow unused variables when underscore-prefixed.
Expand Down
2 changes: 1 addition & 1 deletion src/dacy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
from dacy.sentiment import make_emotion_transformer # noqa

from .about import __download_url__, __title__, __version__ # noqa
from .download import download_model # noqa
from .download import download_model, get_latest_version # noqa
from .load import load, models, where_is_my_dacy # noqa
28 changes: 26 additions & 2 deletions src/dacy/download.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Functions for downloading DaCy models."""
import os
from distutils.version import StrictVersion
from importlib.metadata import version
from pathlib import Path

from spacy.util import get_installed_models
from tqdm import tqdm

versions = ["1.1.2", "1.0.0", "1.3.3", "1.0.12", "1.0.2"]
versions.sort(key=StrictVersion)
DACY_DEFAULT_PATH = Path.home() / ".dacy"

DEFAULT_CACHE_DIR = os.getenv(
Expand All @@ -26,6 +29,26 @@
}


def get_latest_version(model: str) -> str:
"""Returns the latest version of a DaCy model.
Args:
model: string indicating the model
Returns:
str: latest version of the model
"""
if model in {"small", "medium", "large"}:
model = f"da_dacy_{model}_trf"
versions = [mdl.split("-")[-1] for mdl in models_url if "ner_fine_grained" in mdl]
versions = sorted(
versions,
key=lambda s: [int(u) for u in s.split(".")],
reverse=True,
)
return versions[0]


def models() -> list[str]:
"""Returns a list of valid DaCy models.
Expand Down Expand Up @@ -82,12 +105,13 @@ def download_model(
>>> download_model(model="da_dacy_medium_trf-0.1.0")
"""
if model in {"small", "medium", "large"}:
model = f"da_dacy_{model}_trf-0.1.0"
latest_version = get_latest_version(model)
model = f"da_dacy_{model}_trf-{latest_version}"
mdl_version = model.split("-")[-1]

if model not in models_url:
raise ValueError(
"The model is not available in DaCy. Please use dacy.models() to see a"
f"The model '{model}' is not available in DaCy. Please use dacy.models() to see a"
+ " list of all models",
)

Expand Down
19 changes: 12 additions & 7 deletions src/dacy/ner/fine_grained.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Literal
from typing import Callable, Literal, Optional

from spacy.lang.da import Danish
from spacy.language import Language
Expand All @@ -9,14 +9,18 @@

@Danish.factory(
"dacy/ner-fine-grained",
default_config={},
default_config={
"version": None,
"size": "medium",
"transformer_name": "ner-transformer",
},
)
def create_finegrained_ner_component(
nlp: Language,
name: str,
size: Literal["small", "medium", "large"] = "small",
transformer_name: str = "ner-transformer",
version: str = "0.1.0",
size: Literal["small", "medium", "large"],
transformer_name: str,
version: Optional[str],
) -> Callable[[Doc], Doc]:
"""Create a fine grained NER component using the dacy models.
Expand All @@ -25,9 +29,10 @@ def create_finegrained_ner_component(
name: The name of the component
size: The size of the model to use. Can be "small", "medium" or "large"
transformer_name: The name of the transformer component which the NER moel will listen to
version: The version of the model to use
version: The version of the model to use. If None, the latest version will be used
"""

if version is None:
version = dacy.get_latest_version("da_dacy_{size}_ner_fine_grained")
nlp_ner = dacy.load(f"da_dacy_{size}_ner_fine_grained-{version}")
nlp.add_pipe(factory_name="transformer", name=transformer_name, source=nlp_ner)
name_, component = nlp_ner.components[-1]
Expand Down

0 comments on commit 1c009c2

Please sign in to comment.