Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix missing pitches in neume ngrams + refactorings #913

Merged
merged 7 commits into from
Oct 4, 2024
51 changes: 35 additions & 16 deletions app/public/cantusdata/admin/admin.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
from django.contrib import admin
from django.contrib.admin import ModelAdmin
from django.db.models import Model
from django.db.models.query import QuerySet
from django.http import HttpRequest

from django_celery_results.models import TaskResult # type: ignore[import-untyped]
from django_celery_results.admin import TaskResultAdmin # type: ignore[import-untyped]

from cantusdata.models.manuscript import Manuscript
from cantusdata.models.chant import Chant
from cantusdata.models.folio import Folio
from cantusdata.models.plugin import Plugin
from cantusdata.models.neume_exemplar import NeumeExemplar
from cantusdata.tasks import chant_import_task
from django_celery_results.models import TaskResult
from django_celery_results.admin import TaskResultAdmin


def reindex_in_solr(modeladmin, request, queryset):
@admin.action(description="ReIndex in Solr")
def reindex_in_solr(
modeladmin: ModelAdmin, # type: ignore[type-arg]
request: HttpRequest,
queryset: QuerySet[Model],
) -> None:
for item in queryset:
item.save()


reindex_in_solr.short_description = "ReIndex in Solr"


class ManuscriptAdmin(admin.ModelAdmin):
class ManuscriptAdmin(ModelAdmin): # type: ignore[type-arg]
actions = [reindex_in_solr, "load_chants"]
ordering = ["-public", "name"]
list_per_page = 200
Expand Down Expand Up @@ -49,6 +57,13 @@ class ManuscriptAdmin(admin.ModelAdmin):
"chants_loaded",
"is_mapped",
"dbl_folio_img",
]
},
),
(
"Search",
{
"fields": [
"plugins",
]
},
Expand All @@ -68,39 +83,43 @@ class ManuscriptAdmin(admin.ModelAdmin):
description="Imports the chants associated \
with the selected manuscript(s)"
)
def load_chants(self, request, queryset):
def load_chants(self, request: HttpRequest, queryset: QuerySet[Manuscript]) -> None:
for ms in queryset:
chant_import_task.apply_async(kwargs={"manuscript_ids": [ms.pk]})
self.message_user(
request,
"Importing chants for the selected manuscripts. This may take a few minutes. Check status on the Task Results page.",
(
"Importing chants for the selected manuscripts. "
"This may take a few minutes. "
"Check status on the Task Results page."
),
)


class ChantAdmin(admin.ModelAdmin):
class ChantAdmin(ModelAdmin): # type: ignore[type-arg]
actions = [reindex_in_solr]


class FolioAdmin(admin.ModelAdmin):
class FolioAdmin(ModelAdmin): # type: ignore[type-arg]
actions = [reindex_in_solr]
readonly_fields = ("chant_count",)


class PluginAdmin(admin.ModelAdmin):
class PluginAdmin(ModelAdmin): # type: ignore[type-arg]
readonly_fields = ("slug",)


class NeumeExemplarAdmin(admin.ModelAdmin):
list_display = ("admin_image", "__str__")
class NeumeExemplarAdmin(ModelAdmin): # type: ignore[type-arg]
list_display = ("name", "folio")
readonly_fields = ("admin_image",)


class NewTaskResultAdmin(TaskResultAdmin):
class NewTaskResultAdmin(TaskResultAdmin): # type: ignore[misc]
list_display = ("task_name", "date_done", "status", "get_task_manuscript_ids")
list_filter = ("status", "date_done", "task_name")

@admin.display(description="Manuscript(s)")
def get_task_manuscript_ids(self, obj):
def get_task_manuscript_ids(self, obj: TaskResult) -> list[Manuscript]:
if obj.status == "RECEIVED":
obj_man_ids = eval(obj.task_kwargs)["manuscript_ids"]
else:
Expand Down
34 changes: 7 additions & 27 deletions app/public/cantusdata/helpers/mei_processing/mei_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,44 +13,24 @@
Defines associated types for the data structures used by the parser.
"""

from typing import Tuple, Dict, List, Iterator, Optional, Literal
from typing import Tuple, Dict, List, Iterator, Optional
from lxml import etree # pylint: disable=no-name-in-module
from cantusdata.helpers.neume_helpers import NEUME_GROUPS, NeumeName
from .mei_parsing_types import (
Zone,
SyllableText,
NeumeComponentElementData,
NeumeComponent,
ContourType,
NeumeName,
Neume,
Syllable,
)
from .bounding_box_utils import combine_bounding_boxes_single_system


# Mapping from pitch names to integer pitch class where C = 0
PITCH_CLASS = {"c": 0, "d": 2, "e": 4, "f": 5, "g": 7, "a": 9, "b": 11}

# Mapping from neume contours to neume names
NEUME_GROUPS: Dict[str, NeumeName] = {
"": "punctum",
"u": "pes",
"d": "clivis",
"uu": "scandicus",
"ud": "torculus",
"du": "porrectus",
"r": "distropha",
"rr": "tristopha",
"rd": "pressus",
"dd": "climacus",
"ddu": "climacus_resupinus",
"udu": "torculus_resupinus",
"dud": "porrectus_flexus",
"udd": "pes_subpunctis",
"uud": "scandicus_flexus",
"uudd": "scandicus_subpunctis",
"dudd": "porrectus_subpunctis",
}


class MEIParser:
"""
Expand Down Expand Up @@ -122,7 +102,7 @@ def _get_element_zone(self, element: etree._Element) -> Zone:
return zone
return {"coordinates": (-1, -1, -1, -1), "rotate": 0.0}

def _parse_syllable_text(self, syl_elem: Optional[etree.Element]) -> SyllableText:
def _parse_syllable_text(self, syl_elem: Optional[etree._Element]) -> SyllableText:
"""
Get the text of a syllable and its associated bounding box from
a 'syl' element.
Expand All @@ -132,8 +112,8 @@ def _parse_syllable_text(self, syl_elem: Optional[etree.Element]) -> SyllableTex
"""
# Ignoring type of next two expressions because for some reason
# mypy thinks they are unreachable, but we know they are not.
if syl_elem is not None and syl_elem.text: # type: ignore
text_dict: SyllableText = { # type: ignore
if syl_elem is not None and syl_elem.text:
text_dict: SyllableText = {
"text": syl_elem.text.strip(),
"bounding_box": self._get_element_zone(syl_elem),
}
Expand Down Expand Up @@ -305,7 +285,7 @@ def _syllable_iterator(
elem_iterator = first_syllable.itersiblings(
tag=[f"{self.MEINS}syllable", f"{self.MEINS}sb"]
)
current_elem = first_syllable
current_elem: Optional[etree._Element] = first_syllable
while current_elem is not None:
if current_elem.tag == f"{self.MEINS}syllable":
current_syl = current_elem.find(f"{self.MEINS}syl")
Expand Down
22 changes: 2 additions & 20 deletions app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from typing import Tuple, TypedDict, Literal, List, Optional, NotRequired
from typing_extensions import TypeAlias

from cantusdata.helpers.neume_helpers import NeumeName

# A type for coordinates of bounding boxes
CoordinatesType: TypeAlias = Tuple[int, int, int, int]
"""
Expand All @@ -31,26 +33,6 @@ class Zone(TypedDict):


ContourType = Literal["u", "d", "r"]
NeumeName = Literal[
"punctum",
"pes",
"clivis",
"scandicus",
"torculus",
"porrectus",
"distropha",
"tristopha",
"pressus",
"climacus",
"climacus_resupinus",
"torculus_resupinus",
"porrectus_flexus",
"pes_subpunctis",
"scandicus_flexus",
"scandicus_subpunctis",
"porrectus_subpunctis",
"compound",
]


class NeumeComponentElementData(TypedDict):
Expand Down
19 changes: 13 additions & 6 deletions app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

import uuid
from typing import List, Tuple, Optional
from cantusdata.helpers.neume_helpers import NeumeName
from .mei_parser import MEIParser
from .mei_parsing_types import (
Neume,
NeumeComponent,
ContourType,
NeumeName,
NgramDocument,
Zone,
)
Expand Down Expand Up @@ -159,6 +159,8 @@ def create_ngram_documents(self) -> List[NgramDocument]:
# At each pitch in the file, we'll generate all the necessary
# ngrams that start with that pitch.
for start_idx in range(num_pitches):
# Start by collecting ngrams of pitches of lengths min_ngram
# to max_ngram.
largest_num_neumes = 0
for ngram_length in range(self.min_ngram, self.max_ngram + 1):
# Collect the pitches for an ngram of ngram_length
Expand All @@ -167,8 +169,9 @@ def create_ngram_documents(self) -> List[NgramDocument]:
end_idx = start_idx + ngram_length
if end_idx > num_pitches:
break
nc_ngram = pitches[start_idx:end_idx]
doc = self._create_document_from_neume_components(nc_ngram)
doc = self._create_document_from_neume_components(
pitches[start_idx:end_idx]
)
# If the pitch at start_idx is the beginning of a neume
# and the pitch following this ngram is also the beginning
# of a neume (or we've reached the end of the file),
Expand Down Expand Up @@ -204,11 +207,15 @@ def create_ngram_documents(self) -> List[NgramDocument]:
):
if (
name_at_pitch := neume_names[start_idx + ngram_num_pitches]
) is not None and len(ngram_neume_names) < wanted_ngram_length:
) is not None:
# If we've reached a new neume name, but we already
# have the desired number of neumes in our ngram,
# we've added all the required pitches for this ngram
# to ngram_num_pitches and can break the while loop.
if len(ngram_neume_names) == wanted_ngram_length:
break
ngram_neume_names.append(name_at_pitch)
ngram_num_pitches += 1
if len(ngram_neume_names) == wanted_ngram_length:
break
# We'll only add this ngram if we've actually gotten to
# the desired number of neumes (if we didn't, it means
# we reached the end of the file)
Expand Down
77 changes: 77 additions & 0 deletions app/public/cantusdata/helpers/neume_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Contains various neume-related constructs that are used throughout the backend,
especially for MEI parsing and OMR search.
"""

from typing import Literal, Dict

# NEUME_NAMES contains the currently-supported neumes. They are
# included in the order used for UI (esp. as neume exemplars).
# Ordering is by:
# 1. The number of pitches in the neume (ascending)
# 2. The the direction of the first interval in the neume (first ascending,
# then pitch repetition, then descending)
# 3+. The direction of following intervals in the neume (according to 2.)
# N. The all-purpose "compound" neume at the end
NEUME_NAMES = [
"punctum",
"pes",
"distropha",
"clivis",
"scandicus",
"torculus",
"tristopha",
"pressus",
"porrectus",
"climacus",
"scandicus-flexus",
"torculus-resupinus",
"pes-subpunctis",
"porrectus-flexus",
"climacus-resupinus",
"scandicus-subpunctis",
"porrectus-subpunctis",
"compound",
]

NeumeName = Literal[
"punctum",
"pes",
"distropha",
"clivis",
"scandicus",
"torculus",
"tristopha",
"pressus",
"porrectus",
"climacus",
"scandicus-flexus",
"torculus-resupinus",
"pes-subpunctis",
"porrectus-flexus",
"climacus-resupinus",
"scandicus-subpunctis",
"porrectus-subpunctis",
"compound",
]

# Mapping from neume contours to neume names
NEUME_GROUPS: Dict[str, NeumeName] = {
"": "punctum",
"u": "pes",
"r": "distropha",
"d": "clivis",
"uu": "scandicus",
"ud": "torculus",
"rr": "tristopha",
"rd": "pressus",
"du": "porrectus",
"dd": "climacus",
"uud": "scandicus-flexus",
"udu": "torculus-resupinus",
"udd": "pes-subpunctis",
"dud": "porrectus-flexus",
"ddu": "climacus-resupinus",
"uudd": "scandicus-subpunctis",
"dudd": "porrectus-subpunctis",
}
21 changes: 2 additions & 19 deletions app/public/cantusdata/helpers/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,7 @@
queries.
"""

# Contains the words that are allowed
# in a neume_name query
VALID_NEUME_NAME_WORDS = {
"punctum",
"pes",
"clivis",
"scandicus",
"torculus",
"porrectus",
"distropha",
"tristopha",
"pressus",
"climacus",
"resupinus",
"flexus",
"subpunctis",
"compound",
}
from cantusdata.helpers.neume_helpers import NEUME_NAMES


def validate_intervals_query_word(word: str) -> bool:
Expand All @@ -45,7 +28,7 @@ def validate_query(q: list[str], q_type: str) -> bool:
"""
match q_type:
case "neume_names":
return all(neume in VALID_NEUME_NAME_WORDS for neume in q)
return all(neume in NEUME_NAMES for neume in q)
case "pitch_names" | "pitch_names_transposed":
return all(pitch in "abcdefg" for pitch in q)
case "contour":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def handle(self, *args: Any, **options: Any) -> None:
self.flush_manuscript_ngrams_from_index(solr_conn, manuscript_id)
return None
folio_map: Dict[str, str] = dict(
Folio.objects.filter(manuscript_id=manuscript_id).values_list(
Folio.objects.filter(manuscript_id=manuscript_id).values_list( # type: ignore[arg-type]
"number", "image_uri"
)
)
Expand Down
Loading
Loading