Skip to content

Commit

Permalink
Merge pull request #136 from simleo/fix_missing_file
Browse files Browse the repository at this point in the history
Fix missing file
  • Loading branch information
simleo authored Sep 20, 2022
2 parents 7380019 + 15f5d80 commit 1a45d86
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 22 deletions.
15 changes: 12 additions & 3 deletions rocrate/model/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import errno
import os
import shutil
from pathlib import Path
from urllib.request import urlopen
Expand Down Expand Up @@ -48,9 +50,16 @@ def write(self, base_path):
if self.fetch_remote:
self.__get_parts(out_path)
else:
out_path.mkdir(parents=True, exist_ok=True)
if not self.crate.source and self.source and Path(self.source).exists():
self.crate._copy_unlisted(self.source, out_path)
if self.source is None:
out_path.mkdir(parents=True, exist_ok=True)
else:
if not Path(self.source).exists():
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), str(self.source)
)
out_path.mkdir(parents=True, exist_ok=True)
if not self.crate.source:
self.crate._copy_unlisted(self.source, out_path)

def __get_parts(self, out_path):
out_path.mkdir(parents=True, exist_ok=True)
Expand Down
7 changes: 5 additions & 2 deletions rocrate/model/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from pathlib import Path
import shutil
import urllib.request
import warnings
from http.client import HTTPResponse
from io import BytesIO, StringIO

Expand Down Expand Up @@ -59,7 +59,10 @@ def write(self, base_path):
if self.fetch_remote:
out_file_path.parent.mkdir(parents=True, exist_ok=True)
urllib.request.urlretrieve(response.url, out_file_path)
elif os.path.isfile(self.source):
elif self.source is None:
# Allows to record a File entity whose @id does not exist, see #73
warnings.warn(f"No source for {self.id}")
else:
out_file_path.parent.mkdir(parents=True, exist_ok=True)
if not out_file_path.exists() or not out_file_path.samefile(self.source):
shutil.copy(self.source, out_file_path)
36 changes: 35 additions & 1 deletion test/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,27 @@ def test_missing_dir(test_data_dir, tmpdir):
assert examples_dataset.id == f'{name}/'

out_path = tmpdir / 'crate_read_out'
with pytest.raises(OSError):
crate.write(out_path)

# Two options to get a writable crate

# 1. Set the source to None (will create an empty dir)
examples_dataset.source = None
crate.write(out_path)
assert (out_path / name).is_dir()

shutil.rmtree(out_path)

# 2. Provide an existing source
source = tmpdir / "source"
source.mkdir()
examples_dataset.source = source
crate.write(out_path)
assert not (out_path / 'README.txt').exists()
assert (out_path / name).is_dir()


@pytest.mark.filterwarnings("ignore")
def test_missing_file(test_data_dir, tmpdir):
crate_dir = test_data_dir / 'read_crate'
name = 'test_file_galaxy.txt'
Expand All @@ -335,9 +352,26 @@ def test_missing_file(test_data_dir, tmpdir):
assert test_file.id == name

out_path = tmpdir / 'crate_read_out'
with pytest.raises(OSError):
crate.write(out_path)

# Two options to get a writable crate

# 1. Set the source to None (file will still be missing in the copy)
test_file.source = None
crate.write(out_path)
assert not (out_path / name).exists()

shutil.rmtree(out_path)

# 2. Provide an existing source
source = tmpdir / "source.txt"
text = "foo\nbar\n"
source.write_text(text)
test_file.source = source
crate.write(out_path)
assert (out_path / name).read_text() == text


def test_generic_data_entity(tmpdir):
rc_id = "#collection"
Expand Down
48 changes: 32 additions & 16 deletions test/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,23 +301,36 @@ def test_remote_uri_exceptions(tmpdir):
# no error on Windows, or on Linux as root, so we don't use pytest.raises


@pytest.mark.parametrize("fetch_remote,validate_url", [(False, False), (False, True), (True, False), (True, True)])
def test_missing_source(test_data_dir, tmpdir, fetch_remote, validate_url):
@pytest.mark.filterwarnings("ignore")
@pytest.mark.parametrize("what", ["file", "dataset"])
def test_missing_source(test_data_dir, tmpdir, what):
path = test_data_dir / uuid.uuid4().hex
args = {"fetch_remote": fetch_remote, "validate_url": validate_url}

crate = ROCrate()
file_ = crate.add_file(path, **args)
assert file_ is crate.dereference(path.name)
out_path = tmpdir / 'ro_crate_out_1'
crate.write(out_path)
assert not (out_path / path.name).exists()
entity = getattr(crate, f"add_{what}")(path)
assert entity is crate.dereference(path.name)
crate_dir = tmpdir / 'ro_crate_out_1'
with pytest.raises(OSError):
crate.write(crate_dir)

crate = ROCrate()
file_ = crate.add_file(path, path.name, **args)
assert file_ is crate.dereference(path.name)
out_path = tmpdir / 'ro_crate_out_2'
assert not (out_path / path.name).exists()
entity = getattr(crate, f"add_{what}")(path, path.name)
assert entity is crate.dereference(path.name)
crate_dir = tmpdir / 'ro_crate_out_2'
with pytest.raises(OSError):
crate.write(crate_dir)

crate = ROCrate()
entity = getattr(crate, f"add_{what}")(None, path.name)
assert entity is crate.dereference(path.name)
crate_dir = tmpdir / 'ro_crate_out_3'
crate.write(crate_dir)
out_path = crate_dir / path.name
if what == "file":
assert not out_path.exists()
else:
assert out_path.is_dir()
assert not any(out_path.iterdir())


@pytest.mark.parametrize("fetch_remote,validate_url", [(False, False), (False, True), (True, False), (True, True)])
Expand All @@ -336,12 +349,15 @@ def test_no_source_no_dest(test_data_dir, fetch_remote, validate_url):

def test_dataset(test_data_dir, tmpdir):
crate = ROCrate()
path = test_data_dir / "a" / "b"
d1 = crate.add_dataset(path)
path_a_b = test_data_dir / "a" / "b"
path_c = test_data_dir / "c"
for p in path_a_b, path_c:
p.mkdir(parents=True)
d1 = crate.add_dataset(path_a_b)
assert crate.dereference("b") is d1
d2 = crate.add_dataset(path, "a/b")
d2 = crate.add_dataset(path_a_b, "a/b")
assert crate.dereference("a/b") is d2
d_from_str = crate.add_dataset(str(test_data_dir / "c"))
d_from_str = crate.add_dataset(str(path_c))
assert crate.dereference("c") is d_from_str

out_path = tmpdir / 'ro_crate_out'
Expand Down

0 comments on commit 1a45d86

Please sign in to comment.