Skip to content

Commit

Permalink
[WIP] OpenMIC2018 (#544)
Browse files Browse the repository at this point in the history
* added openmic2018 indexer

* basic openmic dataclass. tracks still need much work.

* promoted pandas to a proper dependency

* blacked openmic2018, added some features

* building tests for openmic2018

* building tests for openmic2018

* generalize openmic to multiple splits going forward

* autodocced openmic track class

* blacked

* fixing some naming inconsistencies, use proper open wrapper

* mypy is mysterious

* blacking

* added exception chaining and download message to openmic

* Trying openmic split as a property

* fixed a typo in a docstring

* simplified partial annotation filter
  • Loading branch information
bmcfee committed Sep 21, 2022
1 parent c4759b4 commit 8db5795
Show file tree
Hide file tree
Showing 15 changed files with 200,593 additions and 0 deletions.
200,031 changes: 200,031 additions & 0 deletions mirdata/datasets/indexes/openmic2018_index.json

Large diffs are not rendered by default.

369 changes: 369 additions & 0 deletions mirdata/datasets/openmic2018.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,369 @@
"""OpenMIC-2018 Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
OpenMIC-2018 is a dataset of 20000 excerpts of polyphonic audio recordings
in the Free Music Archive (FMA).
This dataset was produced by Spotify and New York University.
The collection has been partially annotated for the presence or absence of
20 instrument categories by workers on a crowd-sourcing platform.
Each excerpt is 10 seconds long, and no two excerpts come from the same
recording.
Each of the 20 instrument classes is guaranteed to have at least 500
positive examples and 1500 observations (positive or negative) in total.
The style and genre of recordings is mixed and biased with respect to
the FMA, but designed to ensure sufficient representation of each
instrument category.
Note that the excerpts are partially annotated: only some of the instrument
labels will be observed for any track, and many are unobserved.
Annotations include a confidence rating derived from the inter-annotator
agreement on the track.
Raw (disaggregated) annotations are also provided.
In addition to raw audio, pre-computed features generated by the
VGGish model are provided.
A pre-registered partition of the data has been constructed to ensure
reproducible evaluation with approximately balanced class presentations.
All excerpts are permissively licensed (Creative Commons or Public Domain).
For more details, please visit: https://zenodo.org/record/1432913
"""
import json
import re
from pathlib import Path
from typing import BinaryIO, Optional, Tuple, Dict, List

import librosa
import numpy as np
import pandas as pd
from smart_open import open

from mirdata import download_utils, jams_utils, core, io

BIBTEX = """
@inproceedings{DBLP:conf/ismir/HumphreyDM18,
author = {Eric Humphrey and
Simon Durand and
Brian McFee},
editor = {Emilia G{\'{o}}mez and
Xiao Hu and
Eric Humphrey and
Emmanouil Benetos},
title = {OpenMIC-2018: An Open Data-set for Multiple Instrument Recognition},
booktitle = {Proceedings of the 19th International Society for Music Information
Retrieval Conference, {ISMIR} 2018, Paris, France, September 23-27,
2018},
pages = {438--444},
year = {2018},
url = {http://ismir2018.ircam.fr/doc/pdfs/248\_Paper.pdf},
timestamp = {Thu, 12 Mar 2020 11:33:14 +0100},
biburl = {https://dblp.org/rec/conf/ismir/HumphreyDM18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
},
@dataset{humphrey_eric_j_2018_1432913,
author = {Humphrey, Eric J. and
Durand, Simon and
McFee, Brian},
title = {OpenMIC-2018},
month = sep,
year = 2018,
publisher = {Zenodo},
version = {v1.0.0},
doi = {10.5281/zenodo.1432913},
url = {https://doi.org/10.5281/zenodo.1432913}
}
"""

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="openmic2018_index.json"),
}

REMOTES = {
"remote_data": download_utils.RemoteFileMetadata(
filename="openmic-2018-v1.0.0.tgz",
url="https://zenodo.org/record/1432913/files/openmic-2018-v1.0.0.tgz?download=1",
checksum="e4ccf187e2bb5ab2e115416e8aafe7f4",
),
}

LICENSE_INFO = "Creative Commons Attribution 4.0 International"

INSTRUMENTS = {
"accordion": 0,
"banjo": 1,
"bass": 2,
"cello": 3,
"clarinet": 4,
"cymbals": 5,
"drums": 6,
"flute": 7,
"guitar": 8,
"mallet_percussion": 9,
"mandolin": 10,
"organ": 11,
"piano": 12,
"saxophone": 13,
"synthesizer": 14,
"trombone": 15,
"trumpet": 16,
"ukulele": 17,
"violin": 18,
"voice": 19,
}


class Track(core.Track):
"""openmic2018 Track class
Args:
track_id (str): track id of the track
Attributes:
audio_path (str): path to the audio file
split (str): string identifier for train/test split
track_id (str): track id
vggish_path (str): path to pre-computed VGGish features
artist (str): name of the artist
audio (tuple): audio time series and sampling rate (mono, 44100 Hz)
genres (list): list of strings denoting genres for this track
instruments (dict): instrument name -> confidence (>0.5 indicates presence)
start_time (float): time (in seconds) where this excerpt occurs in the full recording
title (str): title of the track
url (str): web address to access the original recording on the Free Music Archive
vggish (tuple): np.ndarrays for frame times and pre-computed VGGish features
"""

def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(
track_id,
data_home,
dataset_name=dataset_name,
index=index,
metadata=metadata,
)

# -- add any dataset specific attributes here
self.audio_path = self.get_path("audio")
self.vggish_path = self.get_path("vggish")

@property
def split(self):
"""Get the pre-defined split"""
return self._track_metadata.get("split")

@property
def instruments(self) -> Dict[str, float]:
"""The instruments for this track.
Each observed instrument for the track receives a score between 0 and 1,
corresponding to the number of annotators who believe the instrument to be
present.
A score less than 0.5 indicates that the instrument is probably not present.
Returns:
* dict : instrument name -> confidence score
"""

scores = dict()
for k in INSTRUMENTS:
inst_score = self._track_metadata.get(k, np.nan)
if not np.isnan(inst_score):
scores[k] = inst_score
return scores

@property
def genres(self) -> Optional[List[str]]:
"""The FMA genres of the track.
Returns:
* genres: list of genre strings
"""

return list(
[g["genre_title"] for g in self._track_metadata.get("track_genres")]
)

@property
def artist(self) -> Optional[str]:
"""The artist of the track.
Returns:
* artist
"""

return self._track_metadata.get("artist_name")

@property
def title(self) -> Optional[str]:
"""The title of the track.
Returns:
* title
"""
return self._track_metadata.get("track_title")

@property
def url(self) -> Optional[str]:
"""The URL on Free Music Archive for this track
Returns:
* str - URL
"""
return self._track_metadata.get("track_url")

@property
def start_time(self) -> float:
"""The starting time (in seconds) of the selected excerpt within the track.
Returns:
* float - starting time
"""
return self._track_metadata.get("start_time")

@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The track's audio
Returns:
* np.ndarray - audio signal
* float - sample rate
"""
return load_audio(self.audio_path)

@property
def vggish(self) -> Tuple[np.ndarray, np.ndarray]:
"""The track's pre-computed VGGish features
Returns:
* np.ndarray - time indices (seconds) for each frame
* np.ndarray - VGGish features; shape=(n_frames, 128)
"""

with open(self.vggish_path, "r") as fd:
data = json.load(fd)
times = np.asarray(data["time_points"])
vgg = np.asarray(data["features"])
return times, vgg

# -- will be fed as beat_data=[(self.beats, None)], see jams_utils), and returns a jams
# -- object with the annotations.
def to_jams(self):
"""Jams: the track's data in jams format"""
return jams_utils.jams_converter(
audio_path=self.audio_path,
metadata=self._track_metadata,
# tag_data=None, # FIXME
)


@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
"""Load an OpenMIC2018 audio file.
Audio will be resampled to 44100 Hz and downmixed to mono.
Args:
fhandle (str or file-like): path or file-like object pointing to an audio file
Returns:
* np.ndarray - the audio signal
* float - The sample rate of the audio file
"""
# -- load as 44100 mono
return librosa.load(fhandle, sr=44100, mono=True)


# -- use this decorator so the docs are complete
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""The OpenMIC-2018 dataset"""

def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="openmic2018",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
remotes=REMOTES,
license_info=LICENSE_INFO,
)

@core.cached_property
def _metadata(self):
metadata_path = Path(self.data_home) / "openmic-2018-metadata.csv"

try:
with open(metadata_path, "r") as fdesc:
# index column is second to last
metadata = pd.read_csv(fdesc, index_col=-2)
except FileNotFoundError as exc:
raise FileNotFoundError(
f"Metadata file {metadata_path} not found. " "Did you run .download?"
) from exc

# genres column is a json object: expand it
# the raw CSV file is not actually valid json, so we'll fix that with a
# regexp
str_fixer = re.compile("(?<!\\\\)'")
metadata["track_genres"] = metadata["track_genres"].map(
lambda x: json.loads(str_fixer.sub('"', x)), na_action="ignore"
)

# Create a column for splits
metadata["split"] = pd.Series(index=metadata.index, data="")

# Populate each split
for split_file in (Path(self.data_home) / "partitions").rglob("*.csv"):
split = split_file.stem
with open(split_file, "r") as fdesc:
split_df = pd.read_csv(
fdesc,
header=None,
index_col=0,
)
split_df["split"] = split
metadata.update(split_df)

# Tack on labels
label_path = Path(self.data_home) / "openmic-2018-aggregated-labels.csv"
with open(label_path, "r") as fdesc:
labels = pd.read_csv(fdesc, index_col=0)
# Pivot the labels into its own dataframe
labels = labels.pivot_table(
columns="instrument", values="relevance", index=labels.index
)

# Join to metadata
metadata = metadata.join(labels)

# Tack on individual annotations?

return metadata.to_dict(orient="index")

@core.cached_property
def _class_map(self):
class_path = Path(self.data_home) / "class-map.json"

try:
with open(class_path, "r") as fd:
classes = json.load(fd)
except FileNotFoundError as exc:
raise FileNotFoundError(
f"Metadata file {class_path} not found. " "Did you run .download?"
) from exc

return classes
Loading

0 comments on commit 8db5795

Please sign in to comment.