From c186451057f3aba326f7a458c09e9fb675ca8296 Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Tue, 27 Aug 2024 08:39:15 -0400
Subject: [PATCH 1/7] refactor(admin): add types and do minor refactoring on
 admin.py

---
 app/public/cantusdata/admin/admin.py | 51 +++++++++++++++++++---------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/app/public/cantusdata/admin/admin.py b/app/public/cantusdata/admin/admin.py
index 0d5d081c..5392b15c 100644
--- a/app/public/cantusdata/admin/admin.py
+++ b/app/public/cantusdata/admin/admin.py
@@ -1,23 +1,31 @@
 from django.contrib import admin
+from django.contrib.admin import ModelAdmin
+from django.db.models import Model
+from django.db.models.query import QuerySet
+from django.http import HttpRequest
+
+from django_celery_results.models import TaskResult  # type: ignore[import-untyped]
+from django_celery_results.admin import TaskResultAdmin  # type: ignore[import-untyped]
+
 from cantusdata.models.manuscript import Manuscript
 from cantusdata.models.chant import Chant
 from cantusdata.models.folio import Folio
 from cantusdata.models.plugin import Plugin
 from cantusdata.models.neume_exemplar import NeumeExemplar
 from cantusdata.tasks import chant_import_task
-from django_celery_results.models import TaskResult
-from django_celery_results.admin import TaskResultAdmin
 
 
-def reindex_in_solr(modeladmin, request, queryset):
+@admin.action(description="ReIndex in Solr")
+def reindex_in_solr(
+    modeladmin: ModelAdmin,  # type: ignore[type-arg]
+    request: HttpRequest,
+    queryset: QuerySet[Model],
+) -> None:
     for item in queryset:
         item.save()
 
 
-reindex_in_solr.short_description = "ReIndex in Solr"
-
-
-class ManuscriptAdmin(admin.ModelAdmin):
+class ManuscriptAdmin(ModelAdmin):  # type: ignore[type-arg]
     actions = [reindex_in_solr, "load_chants"]
     ordering = ["-public", "name"]
     list_per_page = 200
@@ -49,6 +57,13 @@ class ManuscriptAdmin(admin.ModelAdmin):
                     "chants_loaded",
                     "is_mapped",
                     "dbl_folio_img",
+                ]
+            },
+        ),
+        (
+            "Search",
+            {
+                "fields": [
                     "plugins",
                 ]
             },
@@ -68,39 +83,43 @@ class ManuscriptAdmin(admin.ModelAdmin):
         description="Imports the chants associated \
         with the selected manuscript(s)"
     )
-    def load_chants(self, request, queryset):
+    def load_chants(self, request: HttpRequest, queryset: QuerySet[Manuscript]) -> None:
         for ms in queryset:
             chant_import_task.apply_async(kwargs={"manuscript_ids": [ms.pk]})
         self.message_user(
             request,
-            "Importing chants for the selected manuscripts. This may take a few minutes. Check status on the Task Results page.",
+            (
+                "Importing chants for the selected manuscripts. "
+                "This may take a few minutes. "
+                "Check status on the Task Results page."
+            ),
         )
 
 
-class ChantAdmin(admin.ModelAdmin):
+class ChantAdmin(ModelAdmin):  # type: ignore[type-arg]
     actions = [reindex_in_solr]
 
 
-class FolioAdmin(admin.ModelAdmin):
+class FolioAdmin(ModelAdmin):  # type: ignore[type-arg]
     actions = [reindex_in_solr]
     readonly_fields = ("chant_count",)
 
 
-class PluginAdmin(admin.ModelAdmin):
+class PluginAdmin(ModelAdmin):  # type: ignore[type-arg]
     readonly_fields = ("slug",)
 
 
-class NeumeExemplarAdmin(admin.ModelAdmin):
-    list_display = ("admin_image", "__str__")
+class NeumeExemplarAdmin(ModelAdmin):  # type: ignore[type-arg]
+    list_display = ("name", "folio")
     readonly_fields = ("admin_image",)
 
 
-class NewTaskResultAdmin(TaskResultAdmin):
+class NewTaskResultAdmin(TaskResultAdmin):  # type: ignore[misc]
     list_display = ("task_name", "date_done", "status", "get_task_manuscript_ids")
     list_filter = ("status", "date_done", "task_name")
 
     @admin.display(description="Manuscript(s)")
-    def get_task_manuscript_ids(self, obj):
+    def get_task_manuscript_ids(self, obj: TaskResult) -> list[Manuscript]:
         if obj.status == "RECEIVED":
             obj_man_ids = eval(obj.task_kwargs)["manuscript_ids"]
         else:

From 6e850a1b1620b6a029816fb8476f545e646862a5 Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Tue, 27 Aug 2024 11:47:20 -0400
Subject: [PATCH 2/7] fix(mei indexing): use hyphens in multiword neume names

Replaces underscores with hyphens to delimit words in multiword
neume names. This disambiguates situation where we had been using
underscores to delimit neume names and words within multiword neume
names.
---
 .../helpers/mei_processing/mei_parser.py           | 14 +++++++-------
 .../helpers/mei_processing/mei_parsing_types.py    | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parser.py b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
index 15c4e1d9..9e942c48 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parser.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
@@ -42,13 +42,13 @@
     "rr": "tristopha",
     "rd": "pressus",
     "dd": "climacus",
-    "ddu": "climacus_resupinus",
-    "udu": "torculus_resupinus",
-    "dud": "porrectus_flexus",
-    "udd": "pes_subpunctis",
-    "uud": "scandicus_flexus",
-    "uudd": "scandicus_subpunctis",
-    "dudd": "porrectus_subpunctis",
+    "ddu": "climacus-resupinus",
+    "udu": "torculus-resupinus",
+    "dud": "porrectus-flexus",
+    "udd": "pes-subpunctis",
+    "uud": "scandicus-flexus",
+    "uudd": "scandicus-subpunctis",
+    "dudd": "porrectus-subpunctis",
 }
 
 
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
index 014acab3..378d635a 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
@@ -42,13 +42,13 @@ class Zone(TypedDict):
     "tristopha",
     "pressus",
     "climacus",
-    "climacus_resupinus",
-    "torculus_resupinus",
-    "porrectus_flexus",
-    "pes_subpunctis",
-    "scandicus_flexus",
-    "scandicus_subpunctis",
-    "porrectus_subpunctis",
+    "climacus-resupinus",
+    "torculus-resupinus",
+    "porrectus-flexus",
+    "pes-subpunctis",
+    "scandicus-flexus",
+    "scandicus-subpunctis",
+    "porrectus-subpunctis",
     "compound",
 ]
 

From d9e0914526395fc2900eac742763ad9558b648a0 Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Thu, 19 Sep 2024 12:13:09 -0400
Subject: [PATCH 3/7] refactor(neume types): move supported neumes to their own
 helpers file

Neume types, mappings, and UI ordering are used across multiple modules.
Their own helper module is a better location than within the MEI parsing
helper module.

Updates some type annotations and typing-related dependencies in the process.
---
 .../helpers/mei_processing/mei_parser.py      | 34 ++------
 .../mei_processing/mei_parsing_types.py       | 22 +-----
 .../helpers/mei_processing/mei_tokenizer.py   |  2 +-
 .../cantusdata/helpers/neume_helpers.py       | 77 +++++++++++++++++++
 .../commands/index_manuscript_mei.py          |  2 +-
 5 files changed, 88 insertions(+), 49 deletions(-)
 create mode 100644 app/public/cantusdata/helpers/neume_helpers.py

diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parser.py b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
index 9e942c48..b0c9661c 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parser.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parser.py
@@ -13,44 +13,24 @@
 Defines associated types for the data structures used by the parser.
 """
 
-from typing import Tuple, Dict, List, Iterator, Optional, Literal
+from typing import Tuple, Dict, List, Iterator, Optional
 from lxml import etree  # pylint: disable=no-name-in-module
+from cantusdata.helpers.neume_helpers import NEUME_GROUPS, NeumeName
 from .mei_parsing_types import (
     Zone,
     SyllableText,
     NeumeComponentElementData,
     NeumeComponent,
     ContourType,
-    NeumeName,
     Neume,
     Syllable,
 )
 from .bounding_box_utils import combine_bounding_boxes_single_system
 
+
 # Mapping from pitch names to integer pitch class where C = 0
 PITCH_CLASS = {"c": 0, "d": 2, "e": 4, "f": 5, "g": 7, "a": 9, "b": 11}
 
-# Mapping from neume contours to neume names
-NEUME_GROUPS: Dict[str, NeumeName] = {
-    "": "punctum",
-    "u": "pes",
-    "d": "clivis",
-    "uu": "scandicus",
-    "ud": "torculus",
-    "du": "porrectus",
-    "r": "distropha",
-    "rr": "tristopha",
-    "rd": "pressus",
-    "dd": "climacus",
-    "ddu": "climacus-resupinus",
-    "udu": "torculus-resupinus",
-    "dud": "porrectus-flexus",
-    "udd": "pes-subpunctis",
-    "uud": "scandicus-flexus",
-    "uudd": "scandicus-subpunctis",
-    "dudd": "porrectus-subpunctis",
-}
-
 
 class MEIParser:
     """
@@ -122,7 +102,7 @@ def _get_element_zone(self, element: etree._Element) -> Zone:
             return zone
         return {"coordinates": (-1, -1, -1, -1), "rotate": 0.0}
 
-    def _parse_syllable_text(self, syl_elem: Optional[etree.Element]) -> SyllableText:
+    def _parse_syllable_text(self, syl_elem: Optional[etree._Element]) -> SyllableText:
         """
         Get the text of a syllable and its associated bounding box from
         a 'syl' element.
@@ -132,8 +112,8 @@ def _parse_syllable_text(self, syl_elem: Optional[etree.Element]) -> SyllableTex
         """
         # Ignoring type of next two expressions because for some reason
         # mypy thinks they are unreachable, but we know they are not.
-        if syl_elem is not None and syl_elem.text:  # type: ignore
-            text_dict: SyllableText = {  # type: ignore
+        if syl_elem is not None and syl_elem.text:
+            text_dict: SyllableText = {
                 "text": syl_elem.text.strip(),
                 "bounding_box": self._get_element_zone(syl_elem),
             }
@@ -305,7 +285,7 @@ def _syllable_iterator(
             elem_iterator = first_syllable.itersiblings(
                 tag=[f"{self.MEINS}syllable", f"{self.MEINS}sb"]
             )
-            current_elem = first_syllable
+            current_elem: Optional[etree._Element] = first_syllable
             while current_elem is not None:
                 if current_elem.tag == f"{self.MEINS}syllable":
                     current_syl = current_elem.find(f"{self.MEINS}syl")
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
index 378d635a..50684564 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_parsing_types.py
@@ -5,6 +5,8 @@
 from typing import Tuple, TypedDict, Literal, List, Optional, NotRequired
 from typing_extensions import TypeAlias
 
+from cantusdata.helpers.neume_helpers import NeumeName
+
 # A type for coordinates of bounding boxes
 CoordinatesType: TypeAlias = Tuple[int, int, int, int]
 """
@@ -31,26 +33,6 @@ class Zone(TypedDict):
 
 
 ContourType = Literal["u", "d", "r"]
-NeumeName = Literal[
-    "punctum",
-    "pes",
-    "clivis",
-    "scandicus",
-    "torculus",
-    "porrectus",
-    "distropha",
-    "tristopha",
-    "pressus",
-    "climacus",
-    "climacus-resupinus",
-    "torculus-resupinus",
-    "porrectus-flexus",
-    "pes-subpunctis",
-    "scandicus-flexus",
-    "scandicus-subpunctis",
-    "porrectus-subpunctis",
-    "compound",
-]
 
 
 class NeumeComponentElementData(TypedDict):
diff --git a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
index a3bc5d62..7dcec625 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
@@ -6,12 +6,12 @@
 
 import uuid
 from typing import List, Tuple, Optional
+from cantusdata.helpers.neume_helpers import NeumeName
 from .mei_parser import MEIParser
 from .mei_parsing_types import (
     Neume,
     NeumeComponent,
     ContourType,
-    NeumeName,
     NgramDocument,
     Zone,
 )
diff --git a/app/public/cantusdata/helpers/neume_helpers.py b/app/public/cantusdata/helpers/neume_helpers.py
new file mode 100644
index 00000000..b0639d99
--- /dev/null
+++ b/app/public/cantusdata/helpers/neume_helpers.py
@@ -0,0 +1,77 @@
+"""
+Contains various neume-related constructs that are used throughout the backend,
+especially for MEI parsing and OMR search.
+"""
+
+from typing import Literal, Dict
+
+# NEUME_NAMES contains the currently-supported neumes. They are
+# included in the order used for UI (esp. as neume exemplars).
+# Ordering is by:
+#   1. The number of pitches in the neume (ascending)
+#   2. The the direction of the first interval in the neume (first ascending,
+#      then pitch repetition, then descending)
+#   3+. The direction of following intervals in the neume (according to 2.)
+#   N. The all-purpose "compound" neume at the end
+NEUME_NAMES = [
+    "punctum",
+    "pes",
+    "distropha",
+    "clivis",
+    "scandicus",
+    "torculus",
+    "tristopha",
+    "pressus",
+    "porrectus",
+    "climacus",
+    "scandicus-flexus",
+    "torculus-resupinus",
+    "pes-subpunctis",
+    "porrectus-flexus",
+    "climacus-resupinus",
+    "scandicus-subpunctis",
+    "porrectus-subpunctis",
+    "compound",
+]
+
+NeumeName = Literal[
+    "punctum",
+    "pes",
+    "distropha",
+    "clivis",
+    "scandicus",
+    "torculus",
+    "tristopha",
+    "pressus",
+    "porrectus",
+    "climacus",
+    "scandicus-flexus",
+    "torculus-resupinus",
+    "pes-subpunctis",
+    "porrectus-flexus",
+    "climacus-resupinus",
+    "scandicus-subpunctis",
+    "porrectus-subpunctis",
+    "compound",
+]
+
+# Mapping from neume contours to neume names
+NEUME_GROUPS: Dict[str, NeumeName] = {
+    "": "punctum",
+    "u": "pes",
+    "r": "distropha",
+    "d": "clivis",
+    "uu": "scandicus",
+    "ud": "torculus",
+    "rr": "tristopha",
+    "rd": "pressus",
+    "du": "porrectus",
+    "dd": "climacus",
+    "uud": "scandicus-flexus",
+    "udu": "torculus-resupinus",
+    "udd": "pes-subpunctis",
+    "dud": "porrectus-flexus",
+    "ddu": "climacus-resupinus",
+    "uudd": "scandicus-subpunctis",
+    "dudd": "porrectus-subpunctis",
+}
diff --git a/app/public/cantusdata/management/commands/index_manuscript_mei.py b/app/public/cantusdata/management/commands/index_manuscript_mei.py
index 15857901..4fe58b4a 100644
--- a/app/public/cantusdata/management/commands/index_manuscript_mei.py
+++ b/app/public/cantusdata/management/commands/index_manuscript_mei.py
@@ -74,7 +74,7 @@ def handle(self, *args: Any, **options: Any) -> None:
             self.flush_manuscript_ngrams_from_index(solr_conn, manuscript_id)
             return None
         folio_map: Dict[str, str] = dict(
-            Folio.objects.filter(manuscript_id=manuscript_id).values_list(
+            Folio.objects.filter(manuscript_id=manuscript_id).values_list(  # type: ignore[arg-type]
                 "number", "image_uri"
             )
         )

From a04bb9d6083f58da91cf18acc7bbfce38c453dcc Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Thu, 19 Sep 2024 14:15:53 -0400
Subject: [PATCH 4/7] fix(search utils): import valid neume names from
 neume_helpers

---
 app/public/cantusdata/helpers/search_utils.py | 21 ++-----------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/app/public/cantusdata/helpers/search_utils.py b/app/public/cantusdata/helpers/search_utils.py
index 2de12732..6c74897b 100644
--- a/app/public/cantusdata/helpers/search_utils.py
+++ b/app/public/cantusdata/helpers/search_utils.py
@@ -3,24 +3,7 @@
 queries.
 """
 
-# Contains the words that are allowed
-# in a neume_name query
-VALID_NEUME_NAME_WORDS = {
-    "punctum",
-    "pes",
-    "clivis",
-    "scandicus",
-    "torculus",
-    "porrectus",
-    "distropha",
-    "tristopha",
-    "pressus",
-    "climacus",
-    "resupinus",
-    "flexus",
-    "subpunctis",
-    "compound",
-}
+from cantusdata.helpers.neume_helpers import NEUME_NAMES
 
 
 def validate_intervals_query_word(word: str) -> bool:
@@ -45,7 +28,7 @@ def validate_query(q: list[str], q_type: str) -> bool:
     """
     match q_type:
         case "neume_names":
-            return all(neume in VALID_NEUME_NAME_WORDS for neume in q)
+            return all(neume in NEUME_NAMES for neume in q)
         case "pitch_names" | "pitch_names_transposed":
             return all(pitch in "abcdefg" for pitch in q)
         case "contour":

From 9cb4009d21125eb46312dbda78a1defce0e49e5f Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Mon, 23 Sep 2024 13:07:58 -0400
Subject: [PATCH 5/7] fix(search utils test): update neume name query
 validation for neume name hyphens

refs: 9ff400c83a9feea0e78fccdde408d55c61d57199
---
 app/public/cantusdata/test/core/helpers/test_search_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/public/cantusdata/test/core/helpers/test_search_utils.py b/app/public/cantusdata/test/core/helpers/test_search_utils.py
index 28201a91..11b0718a 100644
--- a/app/public/cantusdata/test/core/helpers/test_search_utils.py
+++ b/app/public/cantusdata/test/core/helpers/test_search_utils.py
@@ -10,7 +10,7 @@
 class SearchUtilsTestCase(TestCase):
     def test_validate_query(self) -> None:
         with self.subTest("neume_names validation"):
-            valid_neume_names = ["punctum", "flexus", "porrectus"]
+            valid_neume_names = ["punctum", "scandicus-flexus", "porrectus"]
             invalid_neume_names = ["punctum", "flexus", "not_a_neume_name"]
             self.assertTrue(validate_query(valid_neume_names, "neume_names"))
             self.assertFalse(validate_query(invalid_neume_names, "neume_names"))

From ac06363623557dcf2480e74f74d5d742699f8890 Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Mon, 23 Sep 2024 14:07:18 -0400
Subject: [PATCH 6/7] fix(mei tokenizer): ensure neume ngrams have all required
 pitches

Fixes the logic governing the creation of ngrams of neumes in the
MEITokenizer class so that an ngram document containing multiple complete
neumes contains all of the pitches contained within those neumes.

Adds a new test to the MEITokenizer test class that ensures that
the number of pitches in an ngram agrees with the number of pitches
suggested by the neume names in the ngram.
---
 .../helpers/mei_processing/mei_tokenizer.py   | 17 ++++++---
 .../mei_processing/test_mei_tokenizer.py      | 35 +++++++++++++++++--
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
index 7dcec625..d16a06d7 100644
--- a/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
+++ b/app/public/cantusdata/helpers/mei_processing/mei_tokenizer.py
@@ -159,6 +159,8 @@ def create_ngram_documents(self) -> List[NgramDocument]:
         # At each pitch in the file, we'll generate all the necessary
         # ngrams that start with that pitch.
         for start_idx in range(num_pitches):
+            # Start by collecting ngrams of pitches of lengths min_ngram
+            # to max_ngram.
             largest_num_neumes = 0
             for ngram_length in range(self.min_ngram, self.max_ngram + 1):
                 # Collect the pitches for an ngram of ngram_length
@@ -167,8 +169,9 @@ def create_ngram_documents(self) -> List[NgramDocument]:
                 end_idx = start_idx + ngram_length
                 if end_idx > num_pitches:
                     break
-                nc_ngram = pitches[start_idx:end_idx]
-                doc = self._create_document_from_neume_components(nc_ngram)
+                doc = self._create_document_from_neume_components(
+                    pitches[start_idx:end_idx]
+                )
                 # If the pitch at start_idx is the beginning of a neume
                 # and the pitch following this ngram is also the beginning
                 # of a neume (or we've reached the end of the file),
@@ -204,11 +207,15 @@ def create_ngram_documents(self) -> List[NgramDocument]:
                     ):
                         if (
                             name_at_pitch := neume_names[start_idx + ngram_num_pitches]
-                        ) is not None and len(ngram_neume_names) < wanted_ngram_length:
+                        ) is not None:
+                            # If we've reached a new neume name, but we already
+                            # have the desired number of neumes in our ngram,
+                            # we've added all the required pitches for this ngram
+                            # to ngram_num_pitches and can break the while loop.
+                            if len(ngram_neume_names) == wanted_ngram_length:
+                                break
                             ngram_neume_names.append(name_at_pitch)
                         ngram_num_pitches += 1
-                        if len(ngram_neume_names) == wanted_ngram_length:
-                            break
                     # We'll only add this ngram if we've actually gotten to
                     # the desired number of neumes (if we didn't, it means
                     # we reached the end of the file)
diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
index fd054bda..8a0c85f8 100644
--- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
+++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
@@ -1,10 +1,11 @@
 from unittest import TestCase
 from os import path
 import json
-from typing import List
+from typing import List, cast
 from cantusdata.settings import BASE_DIR
 from cantusdata.helpers.mei_processing.mei_tokenizer import MEITokenizer
 from cantusdata.helpers.mei_processing.mei_parsing_types import NgramDocument
+from cantusdata.helpers.neume_helpers import NEUME_GROUPS, NeumeName
 
 TEST_MEI_FILE = path.join(
     BASE_DIR,
@@ -18,6 +19,9 @@
     "cdn-hsmu-m2149l4_001r.mei",
 )
 
+# Switch NEUME_GROUPS keys and values
+NEUME_NAME_CONTOUR_MAPPER = {v: k for k, v in NEUME_GROUPS.items()}
+
 
 def calculate_expected_total_ngrams(
     mei_file: str, min_ngram: int, max_ngram: int
@@ -78,7 +82,6 @@ def prepare_tokenizer_results(
 
 
 class MEITokenizerTestCase(TestCase):
-
     def test_mei_tokenizer(self) -> None:
         tokenizer_1_2 = MEITokenizer(
             TEST_MEI_FILE,
@@ -113,6 +116,34 @@ def test_mei_tokenizer(self) -> None:
                 TEST_MEI_FILE, 3, 5
             )
             self.assertEqual(len(ngram_docs_3_5), expected_num_ngrams_3_5)
+        with self.subTest("Test neume ngram pitch lengths"):
+            # Test that each ngram of neume names has the correct number of pitches
+            # For ngrams that don't include compound neumes, we test that the number
+            # of pitches is exactly the number expected given the neumes in the ngram.
+            # For ngrams thatinclude compound neumes, we test that the number of pitches
+            # is at least the number we would expect, given that a compound neueme will
+            # have at least three pitches.
+            for doc in ngram_docs_1_2:
+                if "neume_names" in doc:
+                    ngram_includes_compound = False
+                    pitches = doc["pitch_names"].split("_")
+                    neume_names = cast(list[NeumeName], doc["neume_names"].split("_"))
+                    num_expected_pitches = 0
+                    for neume_name in neume_names:
+                        if neume_name == "compound":
+                            num_expected_pitches += 3
+                            ngram_includes_compound = True
+                        else:
+                            # The number of expected pitches per neume are the number of
+                            # letters in the contour string plus one.
+                            num_expected_pitches += (
+                                len(NEUME_NAME_CONTOUR_MAPPER[neume_name]) + 1
+                            )
+                    with self.subTest(neume_names=neume_names):
+                        if ngram_includes_compound:
+                            self.assertGreaterEqual(len(pitches), num_expected_pitches)
+                        else:
+                            self.assertEqual(len(pitches), num_expected_pitches)
         # First three neumes in test file:
         # <neume xml:id="neume-0000001734946468">
         #     <nc xml:id="nc-0000000895518447" facs="#zone-0000001993884372" oct="3" pname="d"/>

From 92a25b098d4efe1387aebd86626d28ab0a445e81 Mon Sep 17 00:00:00 2001
From: Dylan Hillerbrand <dhillerbrand@gmail.com>
Date: Mon, 23 Sep 2024 14:36:42 -0400
Subject: [PATCH 7/7] refactor: modify data setup and test mei files setting

Moves the TEST_MEI_FILES_PATH setting to settings.py rather than
setting it separately for each test.

Makes some changes to test set-up and break-down to remove the need to index
MEI before each test.
---
 app/public/cantusdata/settings.py                   |  2 ++
 .../helpers/mei_processing/test_mei_tokenizer.py    | 10 ++--------
 .../commands/test_index_manuscript_mei.py           | 12 +++++++++++-
 .../test/core/views/test_search_notation_view.py    | 13 ++++++++-----
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/app/public/cantusdata/settings.py b/app/public/cantusdata/settings.py
index 0c646b61..8e95caa7 100644
--- a/app/public/cantusdata/settings.py
+++ b/app/public/cantusdata/settings.py
@@ -175,3 +175,5 @@
 CELERY_RESULT_EXTENDED = True
 CELERY_APP = "cantusdata"
 CELERY_TASK_TRACK_STARTED = True
+
+TEST_MEI_FILES_PATH = "cantusdata/test/core/helpers/mei_processing/test_mei_files"
diff --git a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
index 8a0c85f8..594c127a 100644
--- a/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
+++ b/app/public/cantusdata/test/core/helpers/mei_processing/test_mei_tokenizer.py
@@ -2,19 +2,13 @@
 from os import path
 import json
 from typing import List, cast
-from cantusdata.settings import BASE_DIR
+from django.conf import settings
 from cantusdata.helpers.mei_processing.mei_tokenizer import MEITokenizer
 from cantusdata.helpers.mei_processing.mei_parsing_types import NgramDocument
 from cantusdata.helpers.neume_helpers import NEUME_GROUPS, NeumeName
 
 TEST_MEI_FILE = path.join(
-    BASE_DIR,
-    "cantusdata",
-    "test",
-    "core",
-    "helpers",
-    "mei_processing",
-    "test_mei_files",
+    settings.TEST_MEI_FILES_PATH,
     "123723",
     "cdn-hsmu-m2149l4_001r.mei",
 )
diff --git a/app/public/cantusdata/test/core/management/commands/test_index_manuscript_mei.py b/app/public/cantusdata/test/core/management/commands/test_index_manuscript_mei.py
index f2f12e5f..fa7e086e 100644
--- a/app/public/cantusdata/test/core/management/commands/test_index_manuscript_mei.py
+++ b/app/public/cantusdata/test/core/management/commands/test_index_manuscript_mei.py
@@ -12,7 +12,7 @@
 from solr.core import SolrConnection  # type: ignore
 
 
-TEST_MEI_FILES_PATH = "cantusdata/test/core/helpers/mei_processing/test_mei_files"
+TEST_MEI_FILES_PATH = settings.TEST_MEI_FILES_PATH
 
 
 class IndexManuscriptMeiTestCase(TestCase):
@@ -26,6 +26,11 @@ def setUpTestData(cls) -> None:
         Folio.objects.create(number="001r", manuscript=manuscript)
         Folio.objects.create(number="001v", manuscript=manuscript)
 
+    @classmethod
+    def tearDownClass(cls) -> None:
+        call_command("index_manuscript_mei", "123723", "--flush-index")
+        super().tearDownClass()
+
     def test_index_manuscript_mei(self) -> None:
         # Assert that prior to the command run, the folio "999r" does not
         # exist in the database
@@ -123,3 +128,8 @@ def tearDown(self) -> None:
         os.rmdir("/test-mei-dir/123723")
         os.rmdir("/test-mei-dir")
         os.rmdir("/empty-mei-dir")
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        call_command("index_manuscript_mei", "123723", "--flush-index")
+        super().tearDownClass()
diff --git a/app/public/cantusdata/test/core/views/test_search_notation_view.py b/app/public/cantusdata/test/core/views/test_search_notation_view.py
index 8bf8762d..724d8b5e 100644
--- a/app/public/cantusdata/test/core/views/test_search_notation_view.py
+++ b/app/public/cantusdata/test/core/views/test_search_notation_view.py
@@ -1,12 +1,11 @@
 from rest_framework.test import APITestCase
 from django.core.management import call_command
 from django.urls import reverse
+from django.conf import settings
 
 from cantusdata.views.search_notation import SearchNotationView, NotationSearchException
 from cantusdata.models import Manuscript, Folio
 
-TEST_MEI_FILES_PATH = "cantusdata/test/core/helpers/mei_processing/test_mei_files"
-
 
 class TestSearchNotationView(APITestCase):
     search_notation_view = SearchNotationView()
@@ -33,7 +32,9 @@ def setUpTestData(cls) -> None:
             image_uri="test_001r.jpg",
         )
 
-    def setUp(self) -> None:
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
         call_command(
             "index_manuscript_mei",
             "123723",
@@ -42,7 +43,7 @@ def setUp(self) -> None:
             "--max-ngram",
             "5",
             "--mei-dir",
-            TEST_MEI_FILES_PATH,
+            settings.TEST_MEI_FILES_PATH,
         )
 
     def test_create_query_string(self) -> None:
@@ -163,5 +164,7 @@ def test_get(self) -> None:
             self.assertIn("results", response_data)
             self.assertIn("numFound", response_data)
 
-    def tearDown(self) -> None:
+    @classmethod
+    def tearDownClass(cls) -> None:
         call_command("index_manuscript_mei", "123723", "--flush-index")
+        super().tearDownClass()