Add example to convert file formats and changing keypoints (#304)

* first draft of example to convert file formats and changing keypoints * second draft to convert and modify pose track files. I split the function into three separate operations. * formatted the code and gotrid of spelling mistakes for the docs * changed outline of code, new functions now handle ds not fpaths and added additional example function to run on fpath * Apply suggestions from code review added Niko's suggestions, implementing comments Co-authored-by: Niko Sirmpilatze <niko.sirbiladze@gmail.com> * pre-commit changes * corrected import error * pre-commit changes again * get rid of extra underlines * finishing touches * added default thumbnail for examples without plots --------- Co-authored-by: Niko Sirmpilatze <niko.sirbiladze@gmail.com>
neuroinformatics-unit · Oct 22, 2024 · 6c32608 · 6c32608
1 parent b399ce0
commit 6c32608
Show file tree

Hide file tree

Showing 3 changed files with 232 additions and 0 deletions.
diff --git a/docs/source/_static/data_icon.png b/docs/source/_static/data_icon.png
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -109,6 +109,7 @@
         "dependencies": ["environment.yml"],
     },
     "reference_url": {"movement": None},
+    "default_thumb_file": "source/_static/data_icon.png",  # default thumbnail image  
     "remove_config_comments": True,
     # do not render config params set as # sphinx_gallery_config [= value]
 }

diff --git a/examples/convert_file_formats.py b/examples/convert_file_formats.py
@@ -0,0 +1,231 @@
+"""Convert pose tracks between file formats
+===========================================
+
+Load pose tracks from one file format, modify them,
+and save them to another file format.
+"""
+
+# %%
+# Motivation
+# ----------
+# When working with pose estimation data, it's often useful to convert
+# between file formats. For example, you may need to
+# use some downstream analysis tool that requires a specific file
+# format, or expects the keypoints to be named in a certain way.
+#
+# In the following example, we will load a dataset from a
+# SLEAP file, modify the keypoints (rename, delete, reorder),
+# and save the modified dataset as a DeepLabCut file.
+#
+# We'll first walk through each step separately, and then
+# combine them into a single function that can be applied
+# to multiple files at once.
+
+# %%
+# Imports
+# -------
+import tempfile
+from pathlib import Path
+
+from movement import sample_data
+from movement.io import load_poses, save_poses
+
+# %%
+# Load the dataset
+# ----------------
+# We'll start with the path to a file output by one of
+# our :ref:`supported pose estimation frameworks<target-formats>`.
+# For example, the path could be something like:
+
+# uncomment and edit the following line to point to your own local file
+# file_path = "/path/to/my/data.h5"
+
+# %%
+# For the sake of this example, we will use the path to one of
+# the sample datasets provided with ``movement``.
+
+file_path = sample_data.fetch_dataset_paths(
+    "SLEAP_single-mouse_EPM.analysis.h5"
+)["poses"]
+print(file_path)
+
+# %%
+# Now let's load this file into a
+# :ref:`movement poses dataset<target-poses-and-bboxes-dataset>`,
+# which we can then modify to our liking.
+
+ds = load_poses.from_sleap_file(file_path, fps=30)
+print(ds, "\n")
+print("Individuals:", ds.coords["individuals"].values)
+print("Keypoints:", ds.coords["keypoints"].values)
+
+
+# %%
+# .. note::
+#     If you're running this code in a Jupyter notebook,
+#     you can just type ``ds`` (instead of printing it)
+#     to explore the dataset interactively.
+
+# %%
+# Rename keypoints
+# ----------------
+# We start with a dictionary that maps old keypoint names to new ones.
+# Next, we define a function that takes that dictionary and a dataset
+# as inputs, and returns a modified dataset. Notice that under the hood
+# this function calls :meth:`xarray.Dataset.assign_coords`.
+
+rename_dict = {
+    "snout": "nose",
+    "left_ear": "earL",
+    "right_ear": "earR",
+    "centre": "middle",
+    "tail_base": "tailbase",
+    "tail_end": "tailend",
+}
+
+
+def rename_keypoints(ds, rename_dict):
+    # get the current names of the keypoints
+    keypoint_names = ds.coords["keypoints"].values
+
+    # rename the keypoints
+    if not rename_dict:
+        print("No keypoints to rename. Skipping renaming step.")
+    else:
+        new_keypoints = [rename_dict.get(kp, str(kp)) for kp in keypoint_names]
+        # Assign the modified values back to the Dataset
+        ds = ds.assign_coords(keypoints=new_keypoints)
+    return ds
+
+
+# %%
+# Let's apply the function to our dataset and see the results.
+ds_renamed = rename_keypoints(ds, rename_dict)
+print("Keypoints in modified dataset:", ds_renamed.coords["keypoints"].values)
+
+
+# %%
+# Delete keypoints
+# -----------------
+# Let's create a list of keypoints to delete.
+# In this case, we choose to get rid of the ``tailend`` keypoint,
+# which is often hard to reliably track.
+# We delete it using :meth:`xarray.Dataset.drop_sel`,
+# wrapped in an appropriately named function.
+
+keypoints_to_delete = ["tailend"]
+
+
+def delete_keypoints(ds, delete_keypoints):
+    if not delete_keypoints:
+        print("No keypoints to delete. Skipping deleting step.")
+    else:
+        # Delete the specified keypoints and their corresponding data
+        ds = ds.drop_sel(keypoints=delete_keypoints)
+    return ds
+
+
+ds_deleted = delete_keypoints(ds_renamed, keypoints_to_delete)
+print("Keypoints in modified dataset:", ds_deleted.coords["keypoints"].values)
+
+
+# %%
+# Reorder keypoints
+# ------------------
+# We start with a list of keypoints in the desired order
+# (in this case, we'll just swap the order of the left and right ears).
+# We then use :meth:`xarray.Dataset.reindex`, wrapped in yet another function.
+
+ordered_keypoints = ["nose", "earR", "earL", "middle", "tailbase"]
+
+
+def reorder_keypoints(ds, ordered_keypoints):
+    if not ordered_keypoints:
+        print("No keypoints to reorder. Skipping reordering step.")
+    else:
+        ds = ds.reindex(keypoints=ordered_keypoints)
+    return ds
+
+
+ds_reordered = reorder_keypoints(ds_deleted, ordered_keypoints)
+print(
+    "Keypoints in modified dataset:", ds_reordered.coords["keypoints"].values
+)
+
+# %%
+# Save the modified dataset
+# ---------------------------
+# Now that we have modified the dataset to our liking,
+# let's save it to a .csv file in the DeepLabCut format.
+# In this case, we save the file to a temporary
+# directory, and we use the same file name
+# as the original, but ending in ``_dlc.csv``.
+# You will need to specify a different ``target_dir`` and edit
+# the ``dest_path`` variable to your liking.
+
+target_dir = tempfile.mkdtemp()
+dest_path = Path(target_dir) / f"{file_path.stem}_dlc.csv"
+
+save_poses.to_dlc_file(ds_reordered, dest_path, split_individuals=False)
+print(f"Saved modified dataset to {dest_path}.")
+
+# %%
+# .. note::
+#     The ``split_individuals`` argument allows you to save
+#     a dataset with multiple individuals as separate files,
+#     with the individual ID appended to each file name.
+#     In this case, we set it to ``False`` because we only have
+#     one individual in the dataset, and we don't need its name
+#     appended to the file name.
+
+
+# %%
+# One function to rule them all
+# -----------------------------
+# Since we know how to rename, delete, and reorder keypoints,
+# let's put it all together in a single function
+# and see how we could apply it to multiple files at once,
+# as we might do in a real-world scenario.
+#
+# The following function will convert all files in a folder
+# (that end with a specified suffix) from SLEAP to DeepLabCut format.
+# Each file will be loaded, modified according to the
+# ``rename_dict``, ``keypoints_to_delete``, and ``ordered_keypoints``
+# we've defined above, and saved to the target directory.
+
+
+data_dir = "/path/to/your/data/"
+target_dir = "/path/to/your/target/data/"
+
+
+def convert_all(data_dir, target_dir, suffix=".slp"):
+    source_folder = Path(data_dir)
+    file_paths = list(source_folder.rglob(f"*{suffix}"))
+
+    for file_path in file_paths:
+        file_path = Path(file_path)
+
+        # this determines the file names for the modified files
+        dest_path = Path(target_dir) / f"{file_path.stem}_dlc.csv"
+
+        if dest_path.exists():
+            print(f"Skipping {file_path} as {dest_path} already exists.")
+            continue
+
+        if file_path.exists():
+            print(f"Processing: {file_path}")
+            # load the data from SLEAP file
+            ds = load_poses.from_sleap_file(file_path)
+            # modify the data
+            ds_renamed = rename_keypoints(ds, rename_dict)
+            ds_deleted = delete_keypoints(ds_renamed, keypoints_to_delete)
+            ds_reordered = reorder_keypoints(ds_deleted, ordered_keypoints)
+            # save modified data to a DeepLabCut file
+            save_poses.to_dlc_file(
+                ds_reordered, dest_path, split_individuals=False
+            )
+        else:
+            raise ValueError(
+                f"File '{file_path}' does not exist. "
+                f"Please check the file path and try again."
+            )