diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index c27918548..71a523f46 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -35,6 +35,7 @@ To install ``mirdata`` for development purposes: pip install .[tests] pip install .[docs] pip install .[dali] + pip install .[haydn_op20] We recommend to install `pyenv `_ to manage your Python versions diff --git a/mirdata/download_utils.py b/mirdata/download_utils.py index ef87c9828..547d0c61e 100644 --- a/mirdata/download_utils.py +++ b/mirdata/download_utils.py @@ -320,8 +320,13 @@ def extractall_unicode(zfile, out_dir): "cp437" ).decode(errors="ignore") != filename: filename_bytes = filename.encode("cp437") - guessed_encoding = chardet.detect(filename_bytes)["encoding"] or "utf8" - filename = filename_bytes.decode(guessed_encoding, "replace") + if filename_bytes.decode("utf-8", "replace") != filename_bytes.decode( + errors="ignore" + ): + guessed_encoding = chardet.detect(filename_bytes)["encoding"] or "utf8" + filename = filename_bytes.decode(guessed_encoding, "replace") + else: + filename = filename_bytes.decode("utf-8", "replace") disk_file_name = os.path.join(out_dir, filename)