Skip to content

Commit

Permalink
New version of CompMusic Turkish Makam Dataset (#576)
Browse files Browse the repository at this point in the history
* add new version of dataset

* black

* fix in tox

* fixing librosa (@dagett)

* black formatting

* remove old index, add note in docs

* fix formatting, add tests for makam

* formatting four way tabla dataset

* add normpath to tests and script

* formatting

---------

Co-authored-by: Harsh Palan <10105238+harshpalan@users.noreply.github.com>
Co-authored-by: Guillem Cortès <cortes.sebastia@gmail.com>
  • Loading branch information
3 people authored Nov 2, 2023
1 parent 4bc5b18 commit ef72b2c
Show file tree
Hide file tree
Showing 13 changed files with 5,053 additions and 5,013 deletions.
26 changes: 18 additions & 8 deletions mirdata/datasets/compmusic_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
.. admonition:: Dataset Info
:class: dropdown
**NOTE**: From mirdata v0.3.8 on, the only version available of this dataset is dlfm2016-fix1, which is
basically the same as dlfm2016, but with a few fixes in some annotations. The original dlfm2016 version
is still available in mirdata versions <=0.3.7. Note that from dlfm2016 to dlfm2016-fix1, no new recordings
or annotation were added, only a few annotation files were fixed.
This dataset is designed to test makam recognition methodologies on Ottoman-Turkish makam music.
It is composed of 50 recording from each of the 20 most common makams in CompMusic Project's Dunya Ottoman-Turkish
Makam Music collection. Currently the dataset is the largest makam recognition dataset.
Expand Down Expand Up @@ -58,16 +63,18 @@
"""

INDEXES = {
"default": "dlfm2016",
"test": "dlfm2016",
"dlfm2016": core.Index(filename="compmusic_otmm_makam_index_dlfm2016.json"),
"default": "dlfm2016-fix1",
"test": "dlfm2016-fix1",
"dlfm2016-fix1": core.Index(
filename="compmusic_otmm_makam_index_dlfm2016-fix1.json"
),
}

REMOTES = {
"all": download_utils.RemoteFileMetadata(
filename="otmm_makam_recognition_dataset-dlfm2016.zip",
url="https://zenodo.org/record/58413/files/otmm_makam_recognition_dataset-dlfm2016.zip?download=1",
checksum="c2b9c8bdcbdcf15745b245adfc793145",
filename="otmm_makam_recognition_dataset-dlfm2016-fix1.zip",
url="https://zenodo.org/record/4883680/files/MTG/otmm_makam_recognition_dataset-dlfm2016-fix1.zip?download=1",
checksum="83724c889d36f684cff3f15f20ce0d34",
)
}

Expand Down Expand Up @@ -176,7 +183,10 @@ def load_mb_tags(fhandle: TextIO) -> dict:
Dict: metadata of the track
"""
return json.load(fhandle)
mb_tags = json.load(fhandle)
if "duration" not in mb_tags.keys():
mb_tags["duration"] = 0.0 # Few tracks have no duration information
return mb_tags


@core.docstring_inherit(core.Dataset)
Expand All @@ -201,7 +211,7 @@ def __init__(self, data_home=None, version="default"):
def _metadata(self):
metadata_path = os.path.join(
os.path.normpath(self.data_home),
"MTG-otmm_makam_recognition_dataset-f14c0d0",
"MTG-otmm_makam_recognition_dataset-55ce75a",
"annotations.json",
)

Expand Down

Large diffs are not rendered by default.

20 changes: 12 additions & 8 deletions scripts/make_compmusic_otmm_makam_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from mirdata.validate import md5


OTMM_MAKAM_INDEX_PATH = '../mirdata/datasets/indexes/otmm_makam_index.json'
OTMM_MAKAM_INDEX_PATH = os.path.normpath(
'../mirdata/datasets/indexes/compmusic_otmm_makam_index_dlfm2016-fix1.json'
)


def make_otmm_makam_index(dataset_data_path):

otmm_index = {'version': 'dlfm2016', 'tracks': {}, 'metadata': []}
otmm_index = {'version': 'dlfm2016-fix1', 'tracks': {}, 'metadata': []}

for makam in os.listdir(os.path.join(dataset_data_path, 'data')):
if '.' not in makam:
Expand All @@ -23,7 +25,7 @@ def make_otmm_makam_index(dataset_data_path):
otmm_index['tracks'][index] = {
"metadata": [
os.path.join(
'MTG-otmm_makam_recognition_dataset-f14c0d0',
'MTG-otmm_makam_recognition_dataset-55ce75a',
'data',
makam,
track,
Expand All @@ -32,7 +34,7 @@ def make_otmm_makam_index(dataset_data_path):
],
"pitch": [
os.path.join(
'MTG-otmm_makam_recognition_dataset-f14c0d0',
'MTG-otmm_makam_recognition_dataset-55ce75a',
'data',
makam,
pitch_path,
Expand All @@ -44,10 +46,12 @@ def make_otmm_makam_index(dataset_data_path):
),
],
}
otmm_index['metadata'] = [
os.path.join('MTG-otmm_makam_recognition_dataset-f14c0d0', 'annotations.json'),
md5(os.path.join(dataset_data_path, 'annotations.json')),
]
otmm_index['metadata'] = {
"annotation_metadata": [
os.path.join('MTG-otmm_makam_recognition_dataset-55ce75a', 'annotations.json'),
md5(os.path.join(dataset_data_path, 'annotations.json')),
]
}

with open(OTMM_MAKAM_INDEX_PATH, 'w') as fhandle:
json.dump(otmm_index, fhandle, indent=2)
Expand Down
16 changes: 14 additions & 2 deletions tests/datasets/test_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@ def test_track():
"track_id": "cafcdeaf-e966-4ff0-84fb-f660d2b68365",
"pitch_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/compmusic_otmm_makam/"),
"MTG-otmm_makam_recognition_dataset-f14c0d0/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.pitch",
os.path.normpath(
"MTG-otmm_makam_recognition_dataset-55ce75a/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.pitch"
),
),
"mb_tags_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/compmusic_otmm_makam/"),
"MTG-otmm_makam_recognition_dataset-f14c0d0/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.json",
os.path.normpath(
"MTG-otmm_makam_recognition_dataset-55ce75a/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.json"
),
),
"form": "sarki",
"instrumentation": "Solo vocal with accompaniment",
Expand Down Expand Up @@ -707,6 +711,14 @@ def test_load_mb_tags():
}
]

# test empty duration
track_id = "2a3e1a6d-ab82-4a46-ae8b-0fb057b53de0"
dataset = compmusic_otmm_makam.Dataset(data_home)
track = dataset.track(track_id)
mb_tags_path = track.mb_tags_path
mb_tags = compmusic_otmm_makam.load_mb_tags(mb_tags_path)
assert mb_tags["duration"] == 0.0


def test_special_turkish_characters():
data_home = "tests/resources/mir_datasets/compmusic_otmm_makam"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"usul": [
{
"attribute_key": "aksak",
"mb_attribute": "Aksak",
"source": "http://musicbrainz.org/work/753ff394-dec1-422b-991f-227d8f848532"
},
{
"attribute_key": "aksak",
"mb_tag": "aksak",
"source": "http://musicbrainz.org/recording/cafcdeaf-e966-4ff0-84fb-f660d2b68365"
}
]
}

0 comments on commit ef72b2c

Please sign in to comment.