Skip to content

Commit

Permalink
Merge pull request #739 from UNFmontreal/fix/git-annex-sensitive
Browse files Browse the repository at this point in the history
datalad sensitive marking fixes
  • Loading branch information
yarikoptic authored Feb 24, 2024
2 parents d3385d7 + 78db99e commit bf79098
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
28 changes: 18 additions & 10 deletions heudiconv/external/dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,25 @@ def add_to_datalad(
message="Added gitattributes to place all .heudiconv content"
" under annex",
)
ds.save(
save_res = ds.save(
".",
recursive=True
# not in effect! ?
# annex_add_opts=['--include-dotfiles']
)
annexed_files = [sr["path"] for sr in save_res if sr.get("key", None)]

# TODO: filter for only changed files?
# Provide metadata for sensitive information
mark_sensitive(ds, "sourcedata")
mark_sensitive(ds, "*_scans.tsv") # top level
mark_sensitive(ds, "*/*_scans.tsv") # within subj
mark_sensitive(ds, "*/*/*_scans.tsv") # within sess/subj
mark_sensitive(ds, "*/anat") # within subj
mark_sensitive(ds, "*/*/anat") # within ses/subj
sensitive_patterns = [
"sourcedata",
"*_scans.tsv", # top level
"*/*_scans.tsv", # within subj
"*/*/*_scans.tsv", # within sess/subj
"*/anat", # within subj
"*/*/anat", # within ses/subj
]
for sp in sensitive_patterns:
mark_sensitive(ds, sp, annexed_files)
if dsh_path:
mark_sensitive(ds, ".heudiconv") # entire .heudiconv!
superds.save(path=ds.path, message=msg, recursive=True)
Expand All @@ -178,26 +182,30 @@ def add_to_datalad(
"""


def mark_sensitive(ds: Dataset, path_glob: str) -> None:
def mark_sensitive(ds: Dataset, path_glob: str, files: list[str] | None = None) -> None:
"""
Parameters
----------
ds : Dataset to operate on
path_glob : str
glob of the paths within dataset to work on
files : list[str]
subset of files to mark
Returns
-------
None
"""
paths = glob(op.join(ds.path, path_glob))
if files:
paths = [p for p in paths if p in files]
if not paths:
return
lgr.debug("Marking %d files with distribution-restrictions field", len(paths))
# set_metadata can be a bloody generator
res = ds.repo.set_metadata(
paths, init=dict([("distribution-restrictions", "sensitive")]), recursive=True
paths, add=dict([("distribution-restrictions", "sensitive")]), recursive=True
)
if inspect.isgenerator(res):
res = list(res)
21 changes: 21 additions & 0 deletions heudiconv/external/tests/test_dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,24 @@ def test_mark_sensitive(tmp_path: Path) -> None:
# g2 since the same content
assert not all_meta.pop("g1", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec}


def test_mark_sensitive_subset(tmp_path: Path) -> None:
ds = dl.Dataset(tmp_path).create(force=True)
create_tree(
str(tmp_path),
{
"f1": "d1",
"f2": "d2",
"g1": "d3",
"g2": "d1",
},
)
ds.save(".")
mark_sensitive(ds, "f*", [str(tmp_path / "f1")])
all_meta = dict(ds.repo.get_metadata("."))
target_rec = {"distribution-restrictions": ["sensitive"]}
# g2 since the same content
assert not all_meta.pop("g1", None) # nothing or empty record
assert not all_meta.pop("f2", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "g2": target_rec}

0 comments on commit bf79098

Please sign in to comment.