Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating file paths and other issues with windows environment #567

Merged
2 changes: 1 addition & 1 deletion mirdata/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def __repr__(self):
@cached_property
def _index(self):
try:
with open(self.index_path) as fhandle:
with open(self.index_path, encoding="utf-8") as fhandle:
index = json.load(fhandle)
except FileNotFoundError:
if self._index_data.remote:
Expand Down
7 changes: 4 additions & 3 deletions mirdata/datasets/acousticbrainz_genre.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

"""
import json
import os

from deprecated.sphinx import deprecated

Expand Down Expand Up @@ -204,7 +205,7 @@ def __init__(
metadata,
)

self.path = self.get_path("data")
self.path = os.path.normpath(self.get_path("data"))
self.genre = [genre for genre in self.track_id.split("#")[4:] if genre != ""]
self.mbid = self.track_id.split("#")[2]
self.mbid_group = self.track_id.split("#")[3]
Expand Down Expand Up @@ -363,7 +364,7 @@ def rhythm(self):

@core.cached_property
def acousticbrainz_metadata(self):
return load_extractor(self.path)
return load_extractor(os.path.normpath(self.path))

def to_jams(self):
"""the track's data in jams format
Expand All @@ -374,7 +375,7 @@ def to_jams(self):
"""
return jams_utils.jams_converter(
metadata={
"features": load_extractor(self.path),
"features": load_extractor(os.path.normpath(self.path)),
"duration": self.acousticbrainz_metadata["metadata"][
"audio_properties"
]["length"],
Expand Down
6 changes: 3 additions & 3 deletions mirdata/datasets/compmusic_jingju_acappella.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def load_phrases(fhandle: TextIO) -> annotations.LyricData:
start_times = []
end_times = []
lyrics = []

# fhandle.reconfigure(encoding="utf-8")
harshpalan marked this conversation as resolved.
Show resolved Hide resolved
reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
start_times.append(float(line[0]))
Expand All @@ -308,7 +308,7 @@ def load_syllable(fhandle: TextIO) -> annotations.LyricData:
start_times = []
end_times = []
events = []

# fhandle.reconfigure(encoding="utf-8")
harshpalan marked this conversation as resolved.
Show resolved Hide resolved
reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
start_times.append(float(line[0]))
Expand Down Expand Up @@ -351,7 +351,7 @@ def _metadata(self):

metadata = {}
try:
with open(metadata_path_laosheng, "r") as fhandle:
with open(metadata_path_laosheng, "r", encoding="utf-8") as fhandle:
reader = csv.reader(fhandle, delimiter=",")
next(reader)
for line in reader:
Expand Down
8 changes: 4 additions & 4 deletions mirdata/datasets/compmusic_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ def load_mb_tags(fhandle: TextIO) -> dict:
Dict: metadata of the track

"""

return json.load(fhandle)


Expand All @@ -214,14 +213,15 @@ def __init__(self, data_home=None, version="default"):
@core.cached_property
def _metadata(self):
metadata_path = os.path.join(
self.data_home,
os.path.normpath(self.data_home),
"MTG-otmm_makam_recognition_dataset-f14c0d0",
"annotations.json",
)

metadata = {}
try:
with open(metadata_path) as f:
# f.reconfigure(encoding="utf-8")
meta = json.load(f)
for i in meta:
index = i["mbid"].split("/")[-1]
Expand All @@ -233,8 +233,8 @@ def _metadata(self):
except FileNotFoundError:
raise FileNotFoundError("Metadata not found. Did you run .download()?")

temp = metadata_path.split("/")[-2]
data_home = metadata_path.split(temp)[0]
temp = os.path.split(metadata_path)[-2]
data_home = os.path.split(temp)[0]
metadata["data_home"] = data_home

return metadata
Expand Down
2 changes: 1 addition & 1 deletion mirdata/datasets/rwc_jazz.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _metadata(self):
metadata_path = os.path.join(self.data_home, "metadata-master", "rwc-j.csv")

try:
with open(metadata_path, "r") as fhandle:
with open(metadata_path, "r", encoding="utf-8") as fhandle:
dialect = csv.Sniffer().sniff(fhandle.read(1024))
fhandle.seek(0)
reader = csv.reader(fhandle, dialect)
Expand Down
1 change: 0 additions & 1 deletion mirdata/datasets/saraga_carnatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,6 @@ def load_sections(fhandle):
"""
intervals = []
section_labels = []

reader = csv.reader(fhandle, delimiter="\t")
for line in reader:
if line != "\n":
Expand Down
4 changes: 2 additions & 2 deletions mirdata/datasets/saraga_hindustani.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def load_tempo(fhandle):

sections = []
try:
with open(sections_abs_path, "r") as fhandle2:
with open(sections_abs_path, "r", encoding="utf-8") as fhandle2:
reader = csv.reader(fhandle2, delimiter=",")
for line in reader:
if line != "\n":
Expand Down Expand Up @@ -379,7 +379,7 @@ def load_sections(fhandle):
"""
intervals = []
section_labels = []

# fhandle.reconfigure(encoding="utf-8")
harshpalan marked this conversation as resolved.
Show resolved Hide resolved
reader = csv.reader(fhandle, delimiter=",")
for line in reader:
if line:
Expand Down
8 changes: 6 additions & 2 deletions mirdata/datasets/slakh.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
# split (train/validation/test/omitted) is part of the relative filepath in the index
self.split = None # for baby_slakh, there are no data splits - set to None
if index["version"] == "2100-redux":
self.split = self._track_paths["metadata"][0].split(os.sep)[1]
self.split = os.path.normpath(self._track_paths["metadata"][0]).split(
os.sep
)[1]
assert (
self.split in SPLITS
), "{} not a valid split - should be one of {}.".format(self.split, SPLITS)
Expand Down Expand Up @@ -266,7 +268,9 @@ def __init__(
# split (train/validation/test) is determined by the relative filepath in the index
self.split = None # for baby_slakh, there are no data splits - set to None
if index["version"] == "2100-redux":
self.split = self._multitrack_paths["mix"][0].split(os.sep)[1]
self.split = os.path.normpath(self._multitrack_paths["mix"][0]).split(
os.sep
)[1]
assert self.split in SPLITS, "{} not in SPLITS".format(self.split)

self.data_split = self.split # deprecated in 0.3.6
Expand Down
2 changes: 1 addition & 1 deletion mirdata/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def wrapper(file_path_or_obj: Optional[Union[str, TextIO]]) -> Optional[T]:
if not file_path_or_obj:
return None
if isinstance(file_path_or_obj, str):
with open(file_path_or_obj) as f:
with open(file_path_or_obj, encoding="utf-8") as f:
return func(f)
elif isinstance(file_path_or_obj, io.StringIO):
return func(file_path_or_obj)
Expand Down
26 changes: 18 additions & 8 deletions tests/datasets/test_acousticbrainz_genre.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@

def test_track():
default_trackid = "tagtraum#validation#be9e01e5-8f93-494d-bbaa-ddcc5a52f629#2b6bfcfd-46a5-3f98-a58f-2c51d7c9e960#trance########"
data_home = "tests/resources/mir_datasets/acousticbrainz_genre"
data_home = os.path.normpath("tests/resources/mir_datasets/acousticbrainz_genre")

dataset = acousticbrainz_genre.Dataset(data_home, version="test")
track = dataset.track(default_trackid)

expected_attributes = {
"path": "tests/resources/mir_datasets/acousticbrainz_genre/acousticbrainz-mediaeval-validation/be/be9e01e5-8f93-494d-bbaa-ddcc5a52f629.json",
"path": os.path.normpath(
"tests/resources/mir_datasets/acousticbrainz_genre/acousticbrainz-mediaeval-validation/be/be9e01e5-8f93-494d-bbaa-ddcc5a52f629.json"
),
"track_id": "tagtraum#validation#be9e01e5-8f93-494d-bbaa-ddcc5a52f629#2b6bfcfd-46a5-3f98-a58f-2c51d7c9e960#trance########",
"genre": ["trance"],
"mbid": "be9e01e5-8f93-494d-bbaa-ddcc5a52f629",
Expand All @@ -39,14 +41,16 @@ def test_track():


def test_load_extractor():
path = "tests/resources/mir_datasets/acousticbrainz_genre/acousticbrainz-mediaeval-validation/be/be9e01e5-8f93-494d-bbaa-ddcc5a52f629.json"
path = os.path.normpath(
"tests/resources/mir_datasets/acousticbrainz_genre/acousticbrainz-mediaeval-validation/be/be9e01e5-8f93-494d-bbaa-ddcc5a52f629.json"
)
extractor_data = acousticbrainz_genre.load_extractor(path)

assert isinstance(extractor_data, dict)


def test_to_jams():
data_home = "tests/resources/mir_datasets/acousticbrainz_genre"
data_home = os.path.normpath("tests/resources/mir_datasets/acousticbrainz_genre")
trackid = "tagtraum#validation#be9e01e5-8f93-494d-bbaa-ddcc5a52f629#2b6bfcfd-46a5-3f98-a58f-2c51d7c9e960#trance########"

dataset = acousticbrainz_genre.Dataset(data_home, version="test")
Expand All @@ -57,7 +61,7 @@ def test_to_jams():

def test_filter_index():

data_home = "tests/resources/mir_datasets/acousticbrainz_genre"
data_home = os.path.normpath("tests/resources/mir_datasets/acousticbrainz_genre")
dataset = acousticbrainz_genre.Dataset(data_home, version="test")
index = dataset.load_all_train()
assert len(index) == 8
Expand All @@ -81,14 +85,18 @@ def test_filter_index():

def test_download(httpserver):

data_home = "tests/resources/mir_datasets/acousticbrainz_genre_download"
data_home = os.path.normpath(
"tests/resources/mir_datasets/acousticbrainz_genre_download"
)

if os.path.exists(data_home):
shutil.rmtree(data_home)

httpserver.serve_content(
open(
"tests/resources/download/acousticbrainz_genre_index.json.zip",
os.path.normpath(
"tests/resources/download/acousticbrainz_genre_index.json.zip"
),
"rb",
).read()
)
Expand All @@ -112,7 +120,9 @@ def test_download(httpserver):

httpserver.serve_content(
open(
"tests/resources/download/acousticbrainz-mediaeval-features-train-01.tar.bz2",
os.path.normpath(
"tests/resources/download/acousticbrainz-mediaeval-features-train-01.tar.bz2"
),
"rb",
).read()
)
Expand Down
43 changes: 27 additions & 16 deletions tests/datasets/test_beatles.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import numpy as np

from mirdata.datasets import beatles
Expand All @@ -7,21 +8,31 @@

def test_track():
default_trackid = "0111"
data_home = "tests/resources/mir_datasets/beatles"
data_home = os.path.normpath("tests/resources/mir_datasets/beatles")
dataset = beatles.Dataset(data_home)
track = dataset.track(default_trackid)

expected_attributes = {
"audio_path": "tests/resources/mir_datasets/beatles/"
+ "audio/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.wav",
"beats_path": "tests/resources/mir_datasets/beatles/"
+ "annotations/beat/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.txt",
"chords_path": "tests/resources/mir_datasets/beatles/"
+ "annotations/chordlab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
"keys_path": "tests/resources/mir_datasets/beatles/"
+ "annotations/keylab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
"sections_path": "tests/resources/mir_datasets/beatles/"
+ "annotations/seglab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
"audio_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatles/"),
"audio/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.wav",
),
"beats_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatles/"),
"annotations/beat/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.txt",
),
"chords_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatles/"),
"annotations/chordlab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
),
"keys_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatles/"),
"annotations/keylab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
),
"sections_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatles/"),
"annotations/seglab/The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab",
),
"title": "11_-_Do_You_Want_To_Know_A_Secret",
"track_id": "0111",
}
Expand Down Expand Up @@ -51,7 +62,7 @@ def test_track():

def test_to_jams():

data_home = "tests/resources/mir_datasets/beatles"
data_home = os.path.normpath("tests/resources/mir_datasets/beatles")
dataset = beatles.Dataset(data_home)
track = dataset.track("0111")
jam = track.to_jams()
Expand Down Expand Up @@ -153,7 +164,7 @@ def test_to_jams():


def test_load_beats():
beats_path = (
beats_path = os.path.normpath(
"tests/resources/mir_datasets/beatles/annotations/beat/"
+ "The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.txt"
)
Expand All @@ -179,7 +190,7 @@ def test_load_beats():


def test_load_chords():
chords_path = (
chords_path = os.path.normpath(
"tests/resources/mir_datasets/beatles/annotations/chordlab/"
+ "The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab"
)
Expand All @@ -201,7 +212,7 @@ def test_load_chords():


def test_load_key():
key_path = (
key_path = os.path.normpath(
"tests/resources/mir_datasets/beatles/annotations/keylab/"
+ "The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab"
)
Expand All @@ -218,7 +229,7 @@ def test_load_key():


def test_load_sections():
sections_path = (
sections_path = os.path.normpath(
"tests/resources/mir_datasets/beatles/annotations/seglab/"
+ "The Beatles/01_-_Please_Please_Me/11_-_Do_You_Want_To_Know_A_Secret.lab"
)
Expand Down
18 changes: 14 additions & 4 deletions tests/datasets/test_beatport_key.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import numpy as np

from mirdata.datasets import beatport_key
Expand All @@ -6,14 +7,23 @@

def test_track():
default_trackid = "1"
data_home = "tests/resources/mir_datasets/beatport_key"
data_home = os.path.normpath("tests/resources/mir_datasets/beatport_key")
dataset = beatport_key.Dataset(data_home)
track = dataset.track(default_trackid)

expected_attributes = {
"audio_path": "tests/resources/mir_datasets/beatport_key/audio/100066 Lindstrom - Monsteer (Original Mix).mp3",
"keys_path": "tests/resources/mir_datasets/beatport_key/keys/100066 Lindstrom - Monsteer (Original Mix).txt",
"metadata_path": "tests/resources/mir_datasets/beatport_key/meta/100066 Lindstrom - Monsteer (Original Mix).json",
"audio_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatport_key/"),
"audio/100066 Lindstrom - Monsteer (Original Mix).mp3",
),
"keys_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatport_key/"),
"keys/100066 Lindstrom - Monsteer (Original Mix).txt",
),
"metadata_path": os.path.join(
os.path.normpath("tests/resources/mir_datasets/beatport_key/"),
"meta/100066 Lindstrom - Monsteer (Original Mix).json",
),
"title": "100066 Lindstrom - Monsteer (Original Mix)",
"track_id": "1",
}
Expand Down
Loading