Skip to content

Commit

Permalink
docs: Added openai models
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Jun 3, 2023
1 parent a88554c commit a2a12a6
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 198 deletions.
77 changes: 75 additions & 2 deletions docs/evaluation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

from functools import partial

import dacy
import spacy
from spacy.language import Language

import dacy


def scandiner_loader() -> Language:
scandiner = spacy.blank("da")
Expand All @@ -22,8 +23,75 @@ def spacy_wrap_loader(mdl: str) -> Language:
return daner_base


def openai_model_loader_simple_ner(model: str) -> Language:
nlp = spacy.blank("da")
nlp.add_pipe(
"llm",
config={
"task": {
"@llm_tasks": "spacy.NER.v2",
"labels": ["PERSON", "ORGANISATION", "LOCATION"],
"label_definitions": {
"PERSON": "People, including fictional",
"ORGANISATION": "Companies, agencies, institutions, etc.",
"LOCATION": "Countries, cities, states, mountain ranges, bodies of water etc.",
},
},
"backend": {
"@llm_backends": "spacy.REST.v1",
"api": "OpenAI",
"config": {"model": model},
},
},
)
nlp.initialize()
return nlp


def openai_model_loader_fine_ner(model: str) -> Language:
nlp = spacy.blank("da")

label_desc = {
"PERSON": "People, including fictional",
"NORP": "Nationalities or religious or political groups",
"FACILITY": "Building, airports, highways, bridges, etc.",
"ORGANIZATION": "Companies, agencies, institutions, etc.",
"GPE": "Countries, cities, states.",
"LOCATION": "Non-GPE locations, mountain ranges, bodies of water",
"PRODUCT": "Vehicles, weapons, foods, etc. (not services)",
"EVENT": "Named hurricanes, battles, wars, sports events, etc.",
"WORK OF ART": "Titles of books, songs, etc.",
"LAW": "Named documents made into laws",
"LANGUAGE": "Any named language",
"DATE": "Absolute or relative dates or periods",
"TIME": "Times smaller than a day",
"PERCENT": "Percentage",
"MONEY": "Monetary values, including unit",
"QUANTITY": "Measurements, as of weight or distance",
"ORDINAL": '"first", "second"',
"CARDINAL": "Numerals that do no fall under another type",
}

nlp.add_pipe(
"llm",
config={
"task": {
"@llm_tasks": "spacy.NER.v2",
"labels": list(label_desc.keys()),
"label_definitions": label_desc,
},
"backend": {
"@llm_backends": "spacy.REST.v1",
"api": "OpenAI",
"config": {"model": model},
},
},
)
nlp.initialize()
return nlp


MODELS = {
"saattrupdan/nbailab-base-ner-scandi": scandiner_loader,
"da_dacy_large_trf-0.2.0": partial(dacy.load, "da_dacy_large_trf-0.2.0"),
"da_dacy_medium_trf-0.2.0": partial(dacy.load, "da_dacy_medium_trf-0.2.0"),
"da_dacy_small_trf-0.2.0": partial(dacy.load, "da_dacy_small_trf-0.2.0"),
Expand All @@ -39,6 +107,7 @@ def spacy_wrap_loader(mdl: str) -> Language:
dacy.load,
"da_dacy_small_ner_fine_grained-0.1.0",
),
"saattrupdan/nbailab-base-ner-scandi": scandiner_loader,
"alexandrainst/da-ner-base": partial(
spacy_wrap_loader,
"alexandrainst/da-ner-base",
Expand All @@ -47,4 +116,8 @@ def spacy_wrap_loader(mdl: str) -> Language:
"da_core_news_lg-3.5.0": partial(spacy.load, "da_core_news_lg"),
"da_core_news_md-3.5.0": partial(spacy.load, "da_core_news_md"),
"da_core_news_sm-3.5.0": partial(spacy.load, "da_core_news_sm"),
"openai/gpt-3.5-turbo (02/05/23)": partial(
openai_model_loader_simple_ner, model="gpt-3.5-turbo"
),
"openai/gpt-4 (02/05/23)": partial(openai_model_loader_simple_ner, model="gpt-4"),
}
11 changes: 6 additions & 5 deletions docs/evaluation/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# models required for performance benchmarks
https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.5.0/da_core_news_sm-3.5.0-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/da_core_news_md-3.5.0/da_core_news_md-3.5.0-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.5.0/da_core_news_lg-3.5.0-py3-none-any.whl
https://github.com/explosion/spacy-models/releases/download/da_core_news_trf-3.5.0/da_core_news_trf-3.5.0-py3-none-any.whl
# https://github.com/explosion/spacy-models/releases/download/da_core_news_sm-3.5.0/da_core_news_sm-3.5.0-py3-none-any.whl
# https://github.com/explosion/spacy-models/releases/download/da_core_news_md-3.5.0/da_core_news_md-3.5.0-py3-none-any.whl
# https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.5.0/da_core_news_lg-3.5.0-py3-none-any.whl
# https://github.com/explosion/spacy-models/releases/download/da_core_news_trf-3.5.0/da_core_news_trf-3.5.0-py3-none-any.whl
# https://huggingface.co/chcaa/da_dacy_medium_ner_fine_grained/resolve/main/da_dacy_medium_ner_fine_grained-any-py3-none-any.whl
# https://huggingface.co/chcaa/da_dacy_large_ner_fine_grained/resolve/main/da_dacy_large_ner_fine_grained-any-py3-none-any.whl
# https://huggingface.co/chcaa/da_dacy_small_ner_fine_grained/resolve/main/da_dacy_small_ner_fine_grained-any-py3-none-any.whl

dacy>=2.6.0
altair>=4.1.0
datasets>=1.14.0
augmenty[all]>=1.3.7
augmenty[all]>=1.3.7
spacy-llm>=0.2.0

This comment has been minimized.

Copy link
@svlandeg

svlandeg Jun 28, 2023

Hi @KennethEnevoldsen! As a heads-up, you'll want to pin spacy-llm to the next minor version, 0.3.0 in this case. Seeing how it's currently evolving quite quickly, and we'll be making breaking changes in the upcoming 0.4.0 for instance.

This comment has been minimized.

Copy link
@KennethEnevoldsen

KennethEnevoldsen Jun 28, 2023

Author Collaborator

Thanks for the heads-up @svlandeg. This here was just used for a one-off benchmark so it won't be an issue.

2 changes: 1 addition & 1 deletion docs/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ DaCy is evaulate contains performance metrics for

performance.general
performance.robustness
tutorials/performance_ner
performance_ner
Loading

0 comments on commit a2a12a6

Please sign in to comment.