Allow multiline comments for text cells in py:percent formats

Closes #305
mwouts · Oct 12, 2019 · 1accbce · 1accbce
1 parent 4a76ffe
commit 1accbce
Show file tree

Hide file tree

Showing 6 changed files with 239 additions and 6 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -9,7 +9,9 @@ Release History
 **Improvements**
 
 - Raw cells are now encoded using HTML comments (``<!-- #raw -->`` and ``<!-- #endraw -->``) in Markdown files. And code blocks from Markdown files, when they don't have an explicit language, are displayed as Markdown cells in Jupyter (#321)
+- Markdown and raw cells can use multiline comments in the ``py:percent`` format (#305)
 - ``jupytext notebook.py --to ipynb`` updates the timestamp of ``notebook.py`` so that the paired notebook still works in Jupyter (#335, #254)
+
 **BugFixes**
 
 

diff --git a/docs/formats.md b/docs/formats.md
@@ -75,7 +75,16 @@ Our implementation of the `percent` format is compatible with the original speci
 ```python
 # %% Optional text [cell type] {optional JSON metadata}
 ```
-where cell type is either omitted (code cells), or `[markdown]` or  `[raw]`. The content of markdown and raw cells is commented out in the resulting script.
+where cell type is either omitted (code cells), or `[markdown]` or  `[raw]`. The content of markdown and raw cells is commented out in the resulting script, using line comments by default. Multiline comments can also be used for Python scripts. If you want to use multiline comments for all text cells, add a `{"jupytext": '{"cell_markers": "\\"\\"\\""}'}` metadata to your notebook, either with the notebook metadata editor in Jupyter, or at the command line:
+```bash
+jupytext --update-metadata '{"jupytext": {"cell_markers": "\\"\\"\\""}}' notebook.ipynb --to py:percent
+```
+
+If you want to use multiline comments for all your paired notebooks, you could also add
+```python
+c.ContentsManager.default_cell_markers = '"""'
+```
+to your `.jupyter/jupyter_notebook_config.py` file.
 
 Percent scripts created by Jupytext have a header with an explicit format information. The format of scripts with no header is inferred automatically: scripts with at least one `# %%` cell are identified as `percent` scripts. Scripts with at least one double percent cell, and an uncommented Jupyter magic command, are identified as `hydrogen` scripts.
 

diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
@@ -619,7 +619,22 @@ def find_cell_content(self, lines):
 
         if self.cell_type != 'code' or (self.metadata and not is_active('py', self.metadata)) \
                 or (self.language is not None and self.language != self.default_language):
-            source = uncomment(source, self.comment)
+            if self.ext == '.py' and self.cell_type != 'code' and self.org_content \
+                    and self.org_content[0].lstrip().startswith(('"""', "'''")):
+                content = '\n'.join(self.org_content).strip()
+                for triple_quote in ['"""', "'''"]:
+                    if content.startswith(triple_quote) and content.endswith(triple_quote):
+                        content = content[3:-3]
+                        self.metadata['cell_marker'] = triple_quote
+                        # Trim first/last line return
+                        if content.startswith('\n'):
+                            content = content[1:]
+                        if content.endswith('\n'):
+                            content = content[:-1]
+                        source = content.splitlines()
+                        break
+            else:
+                source = uncomment(source, self.comment)
         elif self.metadata is not None and self.comment_magics:
             source = self.uncomment_code_and_magics(source)
 
@@ -643,7 +658,13 @@ def find_cell_end(self, lines):
             self.cell_type = 'code'
 
         next_cell = len(lines)
+        parser = StringParser(self.language or self.default_language)
         for i, line in enumerate(lines):
+            if parser.is_quoted():
+                parser.read_line(line)
+                continue
+
+            parser.read_line(line)
             if i > 0 and (self.start_code_re.match(line) or self.alternative_start_code_re.match(line)):
                 next_cell = i
                 break

diff --git a/jupytext/cell_to_text.py b/jupytext/cell_to_text.py
@@ -2,6 +2,7 @@
 
 import re
 import json
+import warnings
 from copy import copy
 from .languages import cell_language, comment_lines
 from .cell_metadata import is_active, _IGNORE_CELL_METADATA
@@ -207,7 +208,10 @@ class LightScriptCellExporter(BaseCellExporter):
     def __init__(self, *args, **kwargs):
         BaseCellExporter.__init__(self, *args, **kwargs)
         if 'cell_markers' in self.fmt:
-            if self.fmt['cell_markers'] != '+,-':
+            if ',' not in self.fmt['cell_markers']:
+                warnings.warn("Ignored cell markers '{}' as it does not match the expected 'start,end' pattern"
+                              .format(self.fmt.pop('cell_markers')))
+            elif self.fmt['cell_markers'] != '+,-':
                 self.cell_marker_start, self.cell_marker_end = self.fmt['cell_markers'].split(',', 1)
         for key in ['endofcell']:
             if key in self.unfiltered_metadata:
@@ -357,6 +361,10 @@ class DoublePercentCellExporter(BaseCellExporter):  # pylint: disable=W0223
     default_comment_magics = True
     parse_cell_language = True
 
+    def __init__(self, *args, **kwargs):
+        BaseCellExporter.__init__(self, *args, **kwargs)
+        self.cell_markers = self.fmt.get('cell_markers')
+
     def cell_to_text(self):
         """Return the text representation for the cell"""
         if self.cell_type != 'code':
@@ -381,6 +389,10 @@ def cell_to_text(self):
                 return lines
             return lines + source
 
+        cell_marker = self.unfiltered_metadata.get('cell_marker', self.cell_markers)
+        if self.cell_type != 'code' and cell_marker:
+            return lines + [cell_marker] + self.source + [cell_marker]
+
         return lines + comment_lines(self.source, self.comment)
 
 

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -110,11 +110,12 @@ def __init__(self,
             header_prefix=_SCRIPT_EXTENSIONS[ext]['comment'],
             cell_reader_class=DoublePercentScriptCellReader,
             cell_exporter_class=DoublePercentCellExporter,
+            # Version 1.3 on 2019-09-21 - jupytext v1.3.0: Markdown cells can be quoted using triple quotes #305
             # Version 1.2 on 2018-11-18 - jupytext v0.8.6: Jupyter magics are commented by default #126, #132
             # Version 1.1 on 2018-09-23 - jupytext v0.7.0rc1 : [markdown] and
             # [raw] for markdown and raw cells.
             # Version 1.0 on 2018-09-22 - jupytext v0.7.0rc0 : Initial version
-            current_version_number='1.2',
+            current_version_number='1.3',
             min_readable_version_number='1.1') for ext in _SCRIPT_EXTENSIONS] + \
     [
         NotebookFormatDescription(

diff --git a/tests/test_read_simple_percent.py b/tests/test_read_simple_percent.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 
-from nbformat.v4.nbbase import new_notebook, new_code_cell
-from jupytext.compare import compare
+import os
+from nbformat.v4.nbbase import new_notebook, new_code_cell, new_markdown_cell
+from jupytext.compare import compare, compare_notebooks
 import jupytext
 
 
@@ -206,3 +207,190 @@ def test_first_cell_markdown_191():
     assert nb.cells[0].cell_type == 'markdown'
     assert nb.cells[1].cell_type == 'code'
     assert nb.cells[2].cell_type == 'markdown'
+
+
+def test_multiline_comments_in_markdown_1():
+    text = """# %% [markdown]
+'''
+a
+long
+cell
+'''
+"""
+    nb = jupytext.reads(text, 'py')
+    assert len(nb.cells) == 1
+    assert nb.cells[0].cell_type == 'markdown'
+    assert nb.cells[0].source == "a\nlong\ncell"
+    py = jupytext.writes(nb, 'py')
+    compare(py, text)
+
+
+def test_multiline_comments_in_markdown_2():
+    text = '''# %% [markdown]
+"""
+a
+long
+cell
+"""
+'''
+    nb = jupytext.reads(text, 'py')
+    assert len(nb.cells) == 1
+    assert nb.cells[0].cell_type == 'markdown'
+    assert nb.cells[0].source == "a\nlong\ncell"
+    py = jupytext.writes(nb, 'py')
+    compare(py, text)
+
+
+def test_multiline_comments_format_option():
+    text = '''# %% [markdown]
+"""
+a
+long
+cell
+"""
+'''
+    nb = new_notebook(cells=[new_markdown_cell("a\nlong\ncell")],
+                      metadata={'jupytext': {'cell_markers': '"""',
+                                             'notebook_metadata_filter': '-all'}})
+    py = jupytext.writes(nb, 'py:percent')
+    compare(py, text)
+
+
+def test_multiline_comments_in_raw_cell():
+    text = '''# %% [raw]
+"""
+some
+text
+"""
+'''
+    nb = jupytext.reads(text, 'py')
+    assert len(nb.cells) == 1
+    assert nb.cells[0].cell_type == 'raw'
+    assert nb.cells[0].source == "some\ntext"
+    py = jupytext.writes(nb, 'py')
+    compare(py, text)
+
+
+def test_multiline_comments_in_markdown_cell_no_line_return():
+    text = '''# %% [markdown]
+"""a
+long
+cell"""
+'''
+    nb = jupytext.reads(text, 'py')
+    assert len(nb.cells) == 1
+    assert nb.cells[0].cell_type == 'markdown'
+    assert nb.cells[0].source == "a\nlong\ncell"
+
+
+def test_multiline_comments_in_markdown_cell_is_robust_to_additional_cell_marker():
+    text = '''# %% [markdown]
+"""
+some text, and a fake cell marker
+# %% [raw]
+"""
+'''
+    nb = jupytext.reads(text, 'py')
+    assert len(nb.cells) == 1
+    assert nb.cells[0].cell_type == 'markdown'
+    assert nb.cells[0].source == "some text, and a fake cell marker\n# %% [raw]"
+    py = jupytext.writes(nb, 'py')
+    compare(py, text)
+
+
+def test_cell_markers_option_in_contents_manager(tmpdir):
+    tmp_ipynb = str(tmpdir.join('notebook.ipynb'))
+    tmp_py = str(tmpdir.join('notebook.py'))
+
+    cm = jupytext.TextFileContentsManager()
+    cm.root_dir = str(tmpdir)
+
+    nb = new_notebook(cells=[new_code_cell('1 + 1'), new_markdown_cell('a\nlong\ncell')],
+                      metadata={'jupytext': {'formats': 'ipynb,py:percent',
+                                             'notebook_metadata_filter': '-all',
+                                             'cell_markers': "'''"}})
+    cm.save(model=dict(type='notebook', content=nb), path='notebook.ipynb')
+
+    assert os.path.isfile(tmp_ipynb)
+    assert os.path.isfile(tmp_py)
+
+    with open(tmp_py) as fp:
+        text = fp.read()
+
+    compare(text, """# %%
+1 + 1
+
+# %% [markdown]
+'''
+a
+long
+cell
+'''
+""")
+
+    nb2 = jupytext.read(tmp_py)
+    compare_notebooks(nb2, nb)
+
+
+def test_default_cell_markers_in_contents_manager(tmpdir):
+    tmp_ipynb = str(tmpdir.join('notebook.ipynb'))
+    tmp_py = str(tmpdir.join('notebook.py'))
+
+    cm = jupytext.TextFileContentsManager()
+    cm.root_dir = str(tmpdir)
+    cm.default_cell_markers = "'''"
+
+    nb = new_notebook(cells=[new_code_cell('1 + 1'), new_markdown_cell('a\nlong\ncell')],
+                      metadata={'jupytext': {'formats': 'ipynb,py:percent',
+                                             'notebook_metadata_filter': '-all'}})
+    cm.save(model=dict(type='notebook', content=nb), path='notebook.ipynb')
+
+    assert os.path.isfile(tmp_ipynb)
+    assert os.path.isfile(tmp_py)
+
+    with open(tmp_py) as fp:
+        text = fp.read()
+
+    compare(text, """# %%
+1 + 1
+
+# %% [markdown]
+'''
+a
+long
+cell
+'''
+""")
+
+    nb2 = jupytext.read(tmp_py)
+    compare_notebooks(nb2, nb)
+
+
+def test_default_cell_markers_in_contents_manager_does_not_impact_light_format(tmpdir):
+    tmp_ipynb = str(tmpdir.join('notebook.ipynb'))
+    tmp_py = str(tmpdir.join('notebook.py'))
+
+    cm = jupytext.TextFileContentsManager()
+    cm.root_dir = str(tmpdir)
+    cm.default_cell_markers = "'''"
+
+    nb = new_notebook(cells=[new_code_cell('1 + 1'), new_markdown_cell('a\nlong\ncell')],
+                      metadata={'jupytext': {'formats': 'ipynb,py',
+                                             'notebook_metadata_filter': '-all'}})
+    cm.save(model=dict(type='notebook', content=nb), path='notebook.ipynb')
+
+    assert os.path.isfile(tmp_ipynb)
+    assert os.path.isfile(tmp_py)
+
+    with open(tmp_py) as fp:
+        text = fp.read()
+
+    compare(text, """1 + 1
+
+# a
+# long
+# cell
+""")
+
+    nb2 = jupytext.read(tmp_py)
+    compare_notebooks(nb2, nb)