diff --git a/docs/evaluation/models.py b/docs/evaluation/models.py index 60dd0c8a..c4348873 100644 --- a/docs/evaluation/models.py +++ b/docs/evaluation/models.py @@ -4,10 +4,11 @@ from functools import partial -import dacy import spacy from spacy.language import Language +import dacy + def scandiner_loader() -> Language: scandiner = spacy.blank("da") @@ -22,8 +23,75 @@ def spacy_wrap_loader(mdl: str) -> Language: return daner_base +def openai_model_loader_simple_ner(model: str) -> Language: + nlp = spacy.blank("da") + nlp.add_pipe( + "llm", + config={ + "task": { + "@llm_tasks": "spacy.NER.v2", + "labels": ["PERSON", "ORGANISATION", "LOCATION"], + "label_definitions": { + "PERSON": "People, including fictional", + "ORGANISATION": "Companies, agencies, institutions, etc.", + "LOCATION": "Countries, cities, states, mountain ranges, bodies of water etc.", + }, + }, + "backend": { + "@llm_backends": "spacy.REST.v1", + "api": "OpenAI", + "config": {"model": model}, + }, + }, + ) + nlp.initialize() + return nlp + + +def openai_model_loader_fine_ner(model: str) -> Language: + nlp = spacy.blank("da") + + label_desc = { + "PERSON": "People, including fictional", + "NORP": "Nationalities or religious or political groups", + "FACILITY": "Building, airports, highways, bridges, etc.", + "ORGANIZATION": "Companies, agencies, institutions, etc.", + "GPE": "Countries, cities, states.", + "LOCATION": "Non-GPE locations, mountain ranges, bodies of water", + "PRODUCT": "Vehicles, weapons, foods, etc. (not services)", + "EVENT": "Named hurricanes, battles, wars, sports events, etc.", + "WORK OF ART": "Titles of books, songs, etc.", + "LAW": "Named documents made into laws", + "LANGUAGE": "Any named language", + "DATE": "Absolute or relative dates or periods", + "TIME": "Times smaller than a day", + "PERCENT": "Percentage", + "MONEY": "Monetary values, including unit", + "QUANTITY": "Measurements, as of weight or distance", + "ORDINAL": '"first", "second"', + "CARDINAL": "Numerals that do no fall under another type", + } + + nlp.add_pipe( + "llm", + config={ + "task": { + "@llm_tasks": "spacy.NER.v2", + "labels": list(label_desc.keys()), + "label_definitions": label_desc, + }, + "backend": { + "@llm_backends": "spacy.REST.v1", + "api": "OpenAI", + "config": {"model": model}, + }, + }, + ) + nlp.initialize() + return nlp + + MODELS = { - "saattrupdan/nbailab-base-ner-scandi": scandiner_loader, "da_dacy_large_trf-0.2.0": partial(dacy.load, "da_dacy_large_trf-0.2.0"), "da_dacy_medium_trf-0.2.0": partial(dacy.load, "da_dacy_medium_trf-0.2.0"), "da_dacy_small_trf-0.2.0": partial(dacy.load, "da_dacy_small_trf-0.2.0"), @@ -39,6 +107,7 @@ def spacy_wrap_loader(mdl: str) -> Language: dacy.load, "da_dacy_small_ner_fine_grained-0.1.0", ), + "saattrupdan/nbailab-base-ner-scandi": scandiner_loader, "alexandrainst/da-ner-base": partial( spacy_wrap_loader, "alexandrainst/da-ner-base", @@ -47,4 +116,8 @@ def spacy_wrap_loader(mdl: str) -> Language: "da_core_news_lg-3.5.0": partial(spacy.load, "da_core_news_lg"), "da_core_news_md-3.5.0": partial(spacy.load, "da_core_news_md"), "da_core_news_sm-3.5.0": partial(spacy.load, "da_core_news_sm"), + "openai/gpt-3.5-turbo (02/05/23)": partial( + openai_model_loader_simple_ner, model="gpt-3.5-turbo" + ), + "openai/gpt-4 (02/05/23)": partial(openai_model_loader_simple_ner, model="gpt-4"), } diff --git a/docs/evaluation/requirements.txt b/docs/evaluation/requirements.txt index 264a160b..d1932062 100644 --- a/docs/evaluation/requirements.txt +++ b/docs/evaluation/requirements.txt @@ -1,8 +1,8 @@ # models required for performance benchmarks -https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.5.0/da_core_news_sm-3.5.0-py3-none-any.whl -https://github.com/explosion/spacy-models/releases/download/da_core_news_md-3.5.0/da_core_news_md-3.5.0-py3-none-any.whl -https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.5.0/da_core_news_lg-3.5.0-py3-none-any.whl -https://github.com/explosion/spacy-models/releases/download/da_core_news_trf-3.5.0/da_core_news_trf-3.5.0-py3-none-any.whl +# https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.5.0/da_core_news_sm-3.5.0-py3-none-any.whl +# https://github.com/explosion/spacy-models/releases/download/da_core_news_md-3.5.0/da_core_news_md-3.5.0-py3-none-any.whl +# https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.5.0/da_core_news_lg-3.5.0-py3-none-any.whl +# https://github.com/explosion/spacy-models/releases/download/da_core_news_trf-3.5.0/da_core_news_trf-3.5.0-py3-none-any.whl # https://huggingface.co/chcaa/da_dacy_medium_ner_fine_grained/resolve/main/da_dacy_medium_ner_fine_grained-any-py3-none-any.whl # https://huggingface.co/chcaa/da_dacy_large_ner_fine_grained/resolve/main/da_dacy_large_ner_fine_grained-any-py3-none-any.whl # https://huggingface.co/chcaa/da_dacy_small_ner_fine_grained/resolve/main/da_dacy_small_ner_fine_grained-any-py3-none-any.whl @@ -10,4 +10,5 @@ https://github.com/explosion/spacy-models/releases/download/da_core_news_trf-3.5 dacy>=2.6.0 altair>=4.1.0 datasets>=1.14.0 -augmenty[all]>=1.3.7 \ No newline at end of file +augmenty[all]>=1.3.7 +spacy-llm>=0.2.0 \ No newline at end of file diff --git a/docs/performance.rst b/docs/performance.rst index 08bb147e..6de152a9 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -8,4 +8,4 @@ DaCy is evaulate contains performance metrics for performance.general performance.robustness - tutorials/performance_ner \ No newline at end of file + performance_ner \ No newline at end of file diff --git a/docs/performance_ner.ipynb b/docs/performance_ner.ipynb index 5bfd5f63..ccd73703 100644 --- a/docs/performance_ner.ipynb +++ b/docs/performance_ner.ipynb @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "tags": [ "remove-cell" @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 24, "metadata": { "tags": [ "remove-input" @@ -148,16 +148,13 @@ }, "outputs": [], "source": [ - "import sys\n", - "sys.path.append(\"../..\")\n", - "\n", "from evaluation.models import MODELS\n", "from evaluation.utils import apply_models" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "tags": [ "remove-cell" @@ -168,18 +165,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "dane (test): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", "dane (test): Loading prediction for da_dacy_large_trf-0.2.0\n", "dane (test): Loading prediction for da_dacy_medium_trf-0.2.0\n", "dane (test): Loading prediction for da_dacy_small_trf-0.2.0\n", "dane (test): Loading prediction for da_dacy_large_ner_fine_grained-0.1.0\n", - "dane (test): Running da_dacy_medium_ner_fine_grained-0.1.0\n", + "dane (test): Loading prediction for da_dacy_medium_ner_fine_grained-0.1.0\n", "dane (test): Loading prediction for da_dacy_small_ner_fine_grained-0.1.0\n", + "dane (test): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", "dane (test): Loading prediction for alexandrainst/da-ner-base\n", "dane (test): Loading prediction for da_core_news_trf-3.5.0\n", "dane (test): Loading prediction for da_core_news_lg-3.5.0\n", "dane (test): Loading prediction for da_core_news_md-3.5.0\n", - "dane (test): Loading prediction for da_core_news_sm-3.5.0\n" + "dane (test): Loading prediction for da_core_news_sm-3.5.0\n", + "dane (test): Loading prediction for openai/gpt-3.5-turbo (02/05/23)\n", + "dane (test): Running openai/gpt-4 (02/05/23)\n" ] } ], @@ -192,7 +191,32 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# normalize labels to match the dataset\n", + "for mdl in dane:\n", + " if \"openai\" not in mdl:\n", + " continue\n", + " examples = dane[mdl][\"examples\"]\n", + " mapping = {\n", + " \"PERSON\": \"PER\",\n", + " \"ORGANISATION\": \"ORG\",\n", + " \"LOCATION\": \"LOC\",\n", + " }\n", + " for e in examples:\n", + " ents = e.x.ents\n", + " for ent in ents:\n", + " ent.label_ = mapping[ent.label_]\n", + "\n", + " e.x.ents = ents\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": { "tags": [ "remove-cell" @@ -260,50 +284,7 @@ "remove-cell" ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - } - ], + "outputs": [], "source": [ "from multiprocessing import Pool\n", "with Pool(8) as p:\n", @@ -326,121 +307,137 @@ "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
F1 score with 95% confidence interval calculated using bootstrapping with 500 samples.
F1F1
ModelsAverageLocationPersonOrganizationMisc.ModelsAverageLocationPersonOrganizationMisc.
saattrupdan/nbailab-base-ner-scandi86.3 (82.4, 89.7)88.6 (83.0, 93.3)95.1 (92.4, 97.8)80.3 (73.6, 85.8)78.6 (69.4, 86.0)da_dacy_large_trf-0.2.085.4 (81.2, 88.9)89.5 (84.0, 94.7)92.6 (89.0, 95.4)79.0 (72.5, 84.6)79.0 (70.8, 86.0)
da_dacy_medium_trf-0.2.084.9 (81.0, 88.5)86.8 (81.2, 92.3)92.7 (89.2, 95.6)78.7 (71.8, 85.0)78.7 (70.6, 86.1)
da_dacy_small_trf-0.2.082.7 (79.3, 85.9)84.2 (78.3, 89.8)92.2 (88.5, 95.1)75.9 (69.3, 81.7)75.7 (68.8, 81.8)
da_dacy_large_trf-0.2.085.4 (81.2, 88.9)89.5 (84.0, 94.7)92.6 (89.0, 95.4)79.0 (72.5, 84.6)79.0 (70.8, 86.0)saattrupdan/nbailab-base-ner-scandi86.3 (82.4, 89.7)88.6 (83.0, 93.3)95.1 (92.4, 97.8)80.3 (73.6, 85.8)78.6 (69.4, 86.0)
da_dacy_medium_trf-0.2.084.9 (81.0, 88.5)86.8 (81.2, 92.3)92.7 (89.2, 95.6)78.7 (71.8, 85.0)78.7 (70.6, 86.1)alexandrainst/da-ner-base70.7 (66.2, 75.2)84.8 (77.8, 91.0)90.3 (86.3, 93.9)64.7 (57.0, 71.3)
da_dacy_small_trf-0.2.082.7 (79.3, 85.9)84.2 (78.3, 89.8)92.2 (88.5, 95.1)75.9 (69.3, 81.7)75.7 (68.8, 81.8)da_core_news_trf-3.5.079.0 (75.1, 82.3)82.1 (75.5, 88.5)91.6 (88.2, 94.5)68.0 (61.0, 75.2)69.0 (61.1, 77.3)
alexandrainst/da-ner-base70.7 (66.2, 75.2)84.8 (77.8, 91.0)90.3 (86.3, 93.9)64.7 (57.0, 71.3) da_core_news_lg-3.5.074.6 (70.8, 78.1)81.6 (75.3, 88.2)85.5 (81.1, 89.9)62.7 (54.8, 70.3)64.4 (55.9, 72.8)
da_core_news_trf-3.5.079.0 (75.1, 82.3)82.1 (75.5, 88.5)91.6 (88.2, 94.5)68.0 (61.0, 75.2)69.0 (61.1, 77.3)da_core_news_md-3.5.071.2 (66.9, 75.2)76.8 (69.9, 83.6)82.6 (77.8, 87.0)58.2 (49.6, 66.7)61.8 (52.6, 70.6)
da_core_news_lg-3.5.074.6 (70.8, 78.1)81.6 (75.3, 88.2)85.5 (81.1, 89.9)62.7 (54.8, 70.3)64.4 (55.9, 72.8)da_core_news_sm-3.5.064.4 (59.7, 68.5)61.6 (52.2, 69.9)80.1 (74.9, 85.1)49.0 (39.0, 57.5)58.4 (49.8, 67.1)
da_core_news_md-3.5.071.2 (66.9, 75.2)76.8 (69.9, 83.6)82.6 (77.8, 87.0)58.2 (49.6, 66.7)61.8 (52.6, 70.6)openai/gpt-3.5-turbo (02/05/23)57.5 (52.3, 62.2)50.7 (41.9, 59.2)81.9 (76.8, 86.5)55.7 (47.1, 63.7)
da_core_news_sm-3.5.064.4 (59.7, 68.5)61.6 (52.2, 69.9)80.1 (74.9, 85.1)49.0 (39.0, 57.5)58.4 (49.8, 67.1)openai/gpt-4 (02/05/23)70.1 (66.0, 74.3)78.9 (71.5, 85.7)85.3 (80.4, 89.5)72.0 (65.4, 78.5)
\n" ], "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -470,9 +467,7 @@ ":class: note\n", "\n", "These tables are continually updated and thus we try to limit the number of models to only the most relevant Danish models. Therefore models like Polyglot with strict requirements and consistently worse performance are excluded. If you want to see a specific model, please open an issue on GitHub.\n", - "```\n", - "\n", - "\n" + "```" ] }, { @@ -525,7 +520,20 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import partial\n", + "from evaluation.models import openai_model_loader_fine_ner\n", + "MODELS_ = MODELS.copy()\n", + "MODELS_[\"openai/gpt-3.5-turbo (02/05/23)\"] = partial(openai_model_loader_fine_ner, model=\"gpt-3.5-turbo\")\n", + "MODELS_[\"openai/gpt-4 (02/05/23)\"] = partial(openai_model_loader_fine_ner, model=\"gpt-4\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": { "tags": [ "remove-cell" @@ -536,9 +544,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "dansk (train): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", - "dansk (dev): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", - "dansk (test): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", "dansk (train): Loading prediction for da_dacy_large_trf-0.2.0\n", "dansk (dev): Loading prediction for da_dacy_large_trf-0.2.0\n", "dansk (test): Loading prediction for da_dacy_large_trf-0.2.0\n", @@ -551,57 +556,15 @@ "dansk (train): Loading prediction for da_dacy_large_ner_fine_grained-0.1.0\n", "dansk (dev): Loading prediction for da_dacy_large_ner_fine_grained-0.1.0\n", "dansk (test): Loading prediction for da_dacy_large_ner_fine_grained-0.1.0\n", - "dansk (train): Running da_dacy_medium_ner_fine_grained-0.1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dansk (dev): Running da_dacy_medium_ner_fine_grained-0.1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dansk (test): Running da_dacy_medium_ner_fine_grained-0.1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", - "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "dansk (train): Loading prediction for da_dacy_medium_ner_fine_grained-0.1.0\n", + "dansk (dev): Loading prediction for da_dacy_medium_ner_fine_grained-0.1.0\n", + "dansk (test): Loading prediction for da_dacy_medium_ner_fine_grained-0.1.0\n", "dansk (train): Loading prediction for da_dacy_small_ner_fine_grained-0.1.0\n", "dansk (dev): Loading prediction for da_dacy_small_ner_fine_grained-0.1.0\n", "dansk (test): Loading prediction for da_dacy_small_ner_fine_grained-0.1.0\n", + "dansk (train): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", + "dansk (dev): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", + "dansk (test): Loading prediction for saattrupdan/nbailab-base-ner-scandi\n", "dansk (train): Loading prediction for alexandrainst/da-ner-base\n", "dansk (dev): Loading prediction for alexandrainst/da-ner-base\n", "dansk (test): Loading prediction for alexandrainst/da-ner-base\n", @@ -616,22 +579,36 @@ "dansk (test): Loading prediction for da_core_news_md-3.5.0\n", "dansk (train): Loading prediction for da_core_news_sm-3.5.0\n", "dansk (dev): Loading prediction for da_core_news_sm-3.5.0\n", - "dansk (test): Loading prediction for da_core_news_sm-3.5.0\n" + "dansk (test): Loading prediction for da_core_news_sm-3.5.0\n", + "dansk (train): Running openai/gpt-3.5-turbo (02/05/23)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", + "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n", + "Found cached dataset parquet (/Users/au561649/.cache/huggingface/datasets/chcaa___parquet/chcaa--DANSK-8622a47955f5c4cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" ] } ], "source": [ "dansk = {}\n", - "for mdl_name, model_getter in MODELS.items():\n", + "for mdl_name, model_getter in MODELS_.items():\n", + " if \"openai\" in mdl_name:\n", + " splits=[\"test\"]\n", + " else:\n", + " splits=[\"train\", \"dev\", \"test\"]\n", " mdl_results = apply_models(\n", - " mdl_name, model_getter, dataset=\"dansk\", splits=[\"train\", \"dev\", \"test\"]\n", + " mdl_name, model_getter, dataset=\"dansk\", splits=splits\n", " )\n", " dansk[mdl_name] = mdl_results" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "tags": [ "remove-cell" @@ -687,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "tags": [ "remove-cell" @@ -757,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "tags": [ "remove-input" @@ -850,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "tags": [ "remove-cell" @@ -884,7 +861,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "tags": [ "remove-cell" @@ -912,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": { "tags": [ "remove-input"