Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New version of CompMusic Turkish Makam Dataset #576

Merged
merged 20 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions mirdata/datasets/compmusic_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
.. admonition:: Dataset Info
:class: dropdown

**NOTE**: From mirdata v0.3.8 on, the only version available of this dataset is dlfm2016-fix1, which is
basically the same as dlfm2016, but with a few fixes in some annotations. The original dlfm2016 version
is still available in mirdata versions <=0.3.7. Note that from dlfm2016 to dlfm2016-fix1, no new recordings
or annotation were added, only a few annotation files were fixed.

This dataset is designed to test makam recognition methodologies on Ottoman-Turkish makam music.
It is composed of 50 recording from each of the 20 most common makams in CompMusic Project's Dunya Ottoman-Turkish
Makam Music collection. Currently the dataset is the largest makam recognition dataset.
Expand Down Expand Up @@ -58,16 +63,18 @@
"""

INDEXES = {
"default": "dlfm2016",
"test": "dlfm2016",
"dlfm2016": core.Index(filename="compmusic_otmm_makam_index_dlfm2016.json"),
"default": "dlfm2016-fix1",
"test": "dlfm2016-fix1",
"dlfm2016-fix1": core.Index(
filename="compmusic_otmm_makam_index_dlfm2016-fix1.json"
),
}

REMOTES = {
"all": download_utils.RemoteFileMetadata(
filename="otmm_makam_recognition_dataset-dlfm2016.zip",
url="https://zenodo.org/record/58413/files/otmm_makam_recognition_dataset-dlfm2016.zip?download=1",
checksum="c2b9c8bdcbdcf15745b245adfc793145",
filename="otmm_makam_recognition_dataset-dlfm2016-fix1.zip",
url="https://zenodo.org/record/4883680/files/MTG/otmm_makam_recognition_dataset-dlfm2016-fix1.zip?download=1",
checksum="83724c889d36f684cff3f15f20ce0d34",
)
}

Expand Down Expand Up @@ -176,7 +183,10 @@ def load_mb_tags(fhandle: TextIO) -> dict:
Dict: metadata of the track

"""
return json.load(fhandle)
mb_tags = json.load(fhandle)
if "duration" not in mb_tags.keys():
mb_tags["duration"] = 0.0 # Few tracks have no duration information
return mb_tags


@core.docstring_inherit(core.Dataset)
Expand All @@ -201,7 +211,7 @@ def __init__(self, data_home=None, version="default"):
def _metadata(self):
metadata_path = os.path.join(
os.path.normpath(self.data_home),
"MTG-otmm_makam_recognition_dataset-f14c0d0",
"MTG-otmm_makam_recognition_dataset-55ce75a",
"annotations.json",
)

Expand Down

Large diffs are not rendered by default.

18 changes: 10 additions & 8 deletions scripts/make_compmusic_otmm_makam_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from mirdata.validate import md5


OTMM_MAKAM_INDEX_PATH = '../mirdata/datasets/indexes/otmm_makam_index.json'
OTMM_MAKAM_INDEX_PATH = '../mirdata/datasets/indexes/compmusic_otmm_makam_index_dlfm2016-fix1.json'
genisplaja marked this conversation as resolved.
Show resolved Hide resolved


def make_otmm_makam_index(dataset_data_path):

otmm_index = {'version': 'dlfm2016', 'tracks': {}, 'metadata': []}
otmm_index = {'version': 'dlfm2016-fix1', 'tracks': {}, 'metadata': []}

for makam in os.listdir(os.path.join(dataset_data_path, 'data')):
if '.' not in makam:
Expand All @@ -23,7 +23,7 @@ def make_otmm_makam_index(dataset_data_path):
otmm_index['tracks'][index] = {
"metadata": [
os.path.join(
'MTG-otmm_makam_recognition_dataset-f14c0d0',
'MTG-otmm_makam_recognition_dataset-55ce75a',
'data',
makam,
track,
Expand All @@ -32,7 +32,7 @@ def make_otmm_makam_index(dataset_data_path):
],
"pitch": [
os.path.join(
'MTG-otmm_makam_recognition_dataset-f14c0d0',
'MTG-otmm_makam_recognition_dataset-55ce75a',
'data',
makam,
pitch_path,
Expand All @@ -44,10 +44,12 @@ def make_otmm_makam_index(dataset_data_path):
),
],
}
otmm_index['metadata'] = [
os.path.join('MTG-otmm_makam_recognition_dataset-f14c0d0', 'annotations.json'),
md5(os.path.join(dataset_data_path, 'annotations.json')),
]
otmm_index['metadata'] = {
"annotation_metadata": [
os.path.join('MTG-otmm_makam_recognition_dataset-55ce75a', 'annotations.json'),
md5(os.path.join(dataset_data_path, 'annotations.json')),
]
}

with open(OTMM_MAKAM_INDEX_PATH, 'w') as fhandle:
json.dump(otmm_index, fhandle, indent=2)
Expand Down
12 changes: 10 additions & 2 deletions tests/datasets/test_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ def test_track():
"track_id": "cafcdeaf-e966-4ff0-84fb-f660d2b68365",
"pitch_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/compmusic_otmm_makam/"),
"MTG-otmm_makam_recognition_dataset-f14c0d0/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.pitch",
"MTG-otmm_makam_recognition_dataset-55ce75a/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.pitch",
genisplaja marked this conversation as resolved.
Show resolved Hide resolved
),
"mb_tags_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/compmusic_otmm_makam/"),
"MTG-otmm_makam_recognition_dataset-f14c0d0/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.json",
"MTG-otmm_makam_recognition_dataset-55ce75a/data/Kurdilihicazkar/cafcdeaf-e966-4ff0-84fb-f660d2b68365.json",
genisplaja marked this conversation as resolved.
Show resolved Hide resolved
),
"form": "sarki",
"instrumentation": "Solo vocal with accompaniment",
Expand Down Expand Up @@ -707,6 +707,14 @@ def test_load_mb_tags():
}
]

# test empty duration
track_id = "2a3e1a6d-ab82-4a46-ae8b-0fb057b53de0"
dataset = compmusic_otmm_makam.Dataset(data_home)
track = dataset.track(track_id)
mb_tags_path = track.mb_tags_path
mb_tags = compmusic_otmm_makam.load_mb_tags(mb_tags_path)
assert mb_tags["duration"] == 0.0


def test_special_turkish_characters():
data_home = "tests/resources/mir_datasets/compmusic_otmm_makam"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"usul": [
{
"attribute_key": "aksak",
"mb_attribute": "Aksak",
"source": "http://musicbrainz.org/work/753ff394-dec1-422b-991f-227d8f848532"
},
{
"attribute_key": "aksak",
"mb_tag": "aksak",
"source": "http://musicbrainz.org/recording/cafcdeaf-e966-4ff0-84fb-f660d2b68365"
}
]
}
Loading