Skip to content

Commit

Permalink
Metadata filtering config is a dictionary #110
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Oct 25, 2018
1 parent 733675a commit d110433
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 40 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,14 @@ The text representation of the notebook focuses on the part of the notebook that
To that aim, cell metadata `autoscroll`, `collapsed`, `scrolled`, `trusted` and `ExecuteTime` are not included in the text representation. And only the required notebook metadata: `kernelspec`, `language_info` and `jupytext` are saved when a notebook is exported as text.

When a paired notebook is loaded, Jupytext reconstructs the filtered metadata using the `.ipynb` file. Please keep in mind that the `.ipynb` file is typically not distributed accross contributors, and that the cell metadata may be lost when an input cell changes (cells are matched according to their contents). Thus, if some cell or notebook metadata are important to your notebook, you should preserve it in the text version. Change the default metadata filtering as follows:
- If you want to preserve all the notebook metadata but `widgets` and `varInspector` in the YAML header, set a notebook metadata `"jupytext": {"metadata_filter": {"notebook": "all-widgets,varInspector"}}`
- If you want to preserve all the notebook metadata but `widgets` and `varInspector` in the YAML header, set a notebook metadata `"jupytext": {"metadata_filter": {"notebook": "all,-widgets,-varInspector"}}`
- If you want to preserve the `toc` section (in addition to the default YAML header), use `"jupytext": {"metadata_filter": {"notebook": "toc"}}`
- At last, if you want to modify the default cell filter and allow `ExecuteTime` and `autoscroll`, but not `hide_ouput`, use `"jupytext": {"metadata_filter": {"cells": "ExecuteTime,autoscroll-hide_ouput"}}`
- At last, if you want to modify the default cell filter and allow `ExecuteTime` and `autoscroll`, but not `hide_ouput`, use `"jupytext": {"metadata_filter": {"cells": "ExecuteTime,autoscroll,-hide_ouput"}}`

A default value for these filters can be set on Jupytext's content manager using, for instance
```
c.default_notebook_metadata_filter = "all-widgets,varInspector"
c.default_cell_metadata_filter = "ExecuteTime,autoscroll-hide_ouput"
c.default_notebook_metadata_filter = "all,-widgets,-varInspector"
c.default_cell_metadata_filter = "ExecuteTime,autoscroll,-hide_ouput"
```
Help us improving the default configuration: if you are aware of a notebook metadata that should not be filtered, or of a cell metadata that should always be filtered, please open an issue and let us know.

Expand Down
2 changes: 1 addition & 1 deletion jupytext/cell_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

_BOOLEAN_OPTIONS_DICTIONARY = [('hide_input', 'echo', True),
('hide_output', 'include', True)]
_IGNORE_CELL_METADATA = '-' + ','.join([
_IGNORE_CELL_METADATA = ','.join('-{}'.format(name) for name in [
# Frequent cell metadata that should not enter the text representation
# (these metadata are preserved in the paired Jupyter notebook).
'autoscroll', 'collapsed', 'scrolled', 'trusted', 'ExecuteTime',
Expand Down
13 changes: 9 additions & 4 deletions jupytext/contentsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .combine import combine_inputs_with_outputs
from .formats import check_file_version, NOTEBOOK_EXTENSIONS, \
format_name_for_ext, parse_one_format, parse_formats, transition_to_jupytext_section_in_metadata
from .metadata_filter import metadata_filter_as_dict


def _jupytext_writes(ext, format_name):
Expand Down Expand Up @@ -341,15 +342,19 @@ def get(self, path, content=True, type=None, format=None,
except OverflowError:
pass

jupytext_metadata = model['content']['metadata'].setdefault('jupytext', {})
if self.default_notebook_metadata_filter:
(model['content'].metadata.setdefault('jupytext', {})
.setdefault('metadata_filter', {})
(jupytext_metadata.setdefault('metadata_filter', {})
.setdefault('notebook', self.default_notebook_metadata_filter))
if self.default_cell_metadata_filter:
(model['content'].metadata.setdefault('jupytext', {})
.setdefault('metadata_filter', {})
(jupytext_metadata.setdefault('metadata_filter', {})
.setdefault('cells', self.default_cell_metadata_filter))

for filter_level in ['notebook', 'cells']:
filter = jupytext_metadata.get('metadata_filter', {}).get(filter_level)
if filter is not None:
jupytext_metadata['metadata_filter'][filter_level] = metadata_filter_as_dict(filter)

if model_outputs:
combine_inputs_with_outputs(model['content'], model_outputs['content'])
elif not fmt.endswith('.ipynb'):
Expand Down
78 changes: 54 additions & 24 deletions jupytext/metadata_filter.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,70 @@
"""Notebook and cell metadata filtering"""


def parse_metadata_config(metadata_config, actual_keys, filtered_keys=None):
"""Return additional and excluded sets that correspond to a config of the form
'entry_one,entry_two-negative_entry_one,negative_entry_two"""
def metadata_filter_as_dict(metadata_config):
"""Return the metadata filter represented as either None (no filter),
or a dictionary with at most two keys: 'additional' and 'excluded',
which contain either a list of metadata names, or the string 'all'"""

if metadata_config is None:
return {}

if metadata_config is True:
metadata_config = 'all'
elif metadata_config is False:
metadata_config = '-all'
elif metadata_config is None:
metadata_config = ''

if '-' in metadata_config:
additional, excluded = metadata_config.split('-', 1)
excluded = set(excluded.split(',')).difference({''})
if not additional and 'all' not in excluded:
additional = set(filtered_keys or actual_keys)
return {'additional': 'all'}

if metadata_config is False:
return {'excluded': 'all'}

if isinstance(metadata_config, dict):
assert set(metadata_config) <= set(['additional', 'excluded'])
return metadata_config

metadata_keys = metadata_config.split(',')

metadata_config = {}

for key in metadata_keys:
key = key.strip()
if key.startswith('-'):
metadata_config.setdefault('excluded', []).append(key[1:].strip())
elif key.startswith('+'):
metadata_config.setdefault('additional', []).append(key[1:].strip())
else:
additional = set(additional.split(',')).difference({''})
else:
additional = set(metadata_config.split(',')).difference({''})
excluded = set()
metadata_config.setdefault('additional', []).append(key)

if 'all' in additional:
for section in metadata_config:
if 'all' in metadata_config[section]:
metadata_config[section] = 'all'
else:
metadata_config[section] = [key for key in metadata_config[section] if key]

return metadata_config


def apply_metadata_filter(metadata_config, actual_keys, filtered_keys=None):
"""Apply the filter and replace 'all' with the actual or filtered keys"""

metadata_config = metadata_filter_as_dict(metadata_config)
additional = metadata_config.get('additional') or set()
excluded = metadata_config.get('excluded') or set()

if not additional and excluded and excluded != 'all':
additional = set(filtered_keys or actual_keys).difference(excluded)

if additional == 'all':
additional = actual_keys
if 'all' in excluded:
excluded = actual_keys.difference(additional)

return additional, excluded
if excluded == 'all':
excluded = set(actual_keys).difference(additional)

return set(additional), set(excluded)


def filter_metadata(metadata, user_metadata_config, default_metadata_config):
"""Filter the cell or notebook metadata, according to the user preference"""
actual_keys = set(metadata.keys())
default_positive, default_negative = parse_metadata_config(default_metadata_config, actual_keys)
user_positive, user_negative = parse_metadata_config(
default_positive, default_negative = apply_metadata_filter(default_metadata_config, actual_keys)
user_positive, user_negative = apply_metadata_filter(
user_metadata_config, actual_keys,
actual_keys.intersection(default_positive).difference(default_negative))

Expand Down
2 changes: 1 addition & 1 deletion tests/test_contentsmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def test_metadata_filter_is_effective(nb_file, tmpdir):

# set config
cm.default_jupytext_formats = 'ipynb, py'
cm.default_notebook_metadata_filter = 'jupytext-all'
cm.default_notebook_metadata_filter = 'jupytext,-all'
cm.default_cell_metadata_filter = '-all'

# load notebook
Expand Down
25 changes: 19 additions & 6 deletions tests/test_metadata_filter.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
from jupytext.metadata_filter import filter_metadata
import pytest
from jupytext.metadata_filter import filter_metadata, metadata_filter_as_dict


def to_dict(keys):
return {key: None for key in keys}


@pytest.mark.parametrize('metadata_filter_string,metadata_filter_dict',
[('all, -widgets,-varInspector',
{'additional': 'all', 'excluded': ['widgets', 'varInspector']}),
('toc', {'additional': ['toc']}),
('+ toc', {'additional': ['toc']}),
('preserve,-all', {'additional': ['preserve'], 'excluded': 'all'}),
('ExecuteTime, autoscroll, -hide_output',
{'additional': ['ExecuteTime', 'autoscroll'], 'excluded': ['hide_output']})])
def test_string_to_dict_conversion(metadata_filter_string, metadata_filter_dict):
assert metadata_filter_as_dict(metadata_filter_string) == metadata_filter_dict


def test_metadata_filter_default():
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), None, '-technical'
) == to_dict(['user', 'preserve'])
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), None, 'preserve-all'
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), None, 'preserve,-all'
) == to_dict(['preserve'])


def test_metadata_filter_user_plus_default():
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), '-user', '-technical'
) == to_dict(['preserve'])
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'all-user', '-technical'
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'all,-user', '-technical'
) == to_dict(['preserve', 'technical'])
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'user', 'preserve-all'
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'user', 'preserve,-all'
) == to_dict(['user', 'preserve'])


def test_metadata_filter_user_overrides_default():
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'all-user', '-technical'
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'all,-user', '-technical'
) == to_dict(['technical', 'preserve'])
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'user-all', 'preserve'
assert filter_metadata(to_dict(['technical', 'user', 'preserve']), 'user,-all', 'preserve'
) == to_dict(['user'])

1 comment on commit d110433

@mwouts
Copy link
Owner Author

@mwouts mwouts commented on d110433 Oct 25, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.