Read code cells from scripts created by nbconvert #89

mwouts · Oct 2, 2018 · 954e730 · 954e730
1 parent 1a5e7f5
commit 954e730
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 7 deletions.
diff --git a/jupytext/cell_reader.py b/jupytext/cell_reader.py
@@ -426,6 +426,7 @@ def __init__(self, ext):
         self.default_language = script['language']
         self.comment = script['comment']
         self.start_code_re = re.compile(r"^{}\s+%%(.*)$".format(self.comment))
+        self.nbconvert_start_code_re = re.compile(r"^{} (<codecell>|In\[[0-9 ]*\]:?)$".format(self.comment))
 
     def options_to_metadata(self, options):
         return None, double_percent_options_to_metadata(options)
@@ -437,7 +438,7 @@ def find_cell_content(self, lines):
             self.find_cell_end(lines)
 
         # Metadata to dict
-        if self.start_code_re.match(lines[0]):
+        if self.start_code_re.match(lines[0]) or self.nbconvert_start_code_re.match(lines[0]):
             cell_start = 1
         else:
             cell_start = 0
@@ -468,7 +469,7 @@ def find_cell_end(self, lines):
             self.cell_type = 'code'
 
         for i, line in enumerate(lines):
-            if i > 0 and self.start_code_re.match(line):
+            if i > 0 and (self.start_code_re.match(line) or self.nbconvert_start_code_re.match(line)):
                 if _BLANK_LINE.match(lines[i - 1]):
                     return i - 1, i, False
                 return i, i, False

diff --git a/jupytext/formats.py b/jupytext/formats.py
@@ -4,6 +4,7 @@
 """
 
 import os
+import re
 from .header import header_to_metadata_and_cell, insert_or_test_version_number
 from .cell_reader import MarkdownCellReader, RMarkdownCellReader, \
     LightScriptCellReader, RScriptCellReader, DoublePercentScriptCellReader, \
@@ -169,10 +170,12 @@ def guess_format(text, ext):
 
     # Is this a Hydrogen-like script?
     # Or a Sphinx-gallery script?
-    if ext in ['.jl', '.py', '.R']:
+    if ext in _SCRIPT_EXTENSIONS:
+        comment = _SCRIPT_EXTENSIONS[ext]['comment']
         twenty_hash = ''.join(['#'] * 20)
-        double_percent = '# %%'
+        double_percent = comment + ' %%'
         double_percent_and_space = double_percent + ' '
+        nbconvert_script_re = re.compile(r'^{}( <codecell>| In\[[0-9 ]*\]:?)'.format(comment))
         twenty_hash_count = 0
         double_percent_count = 0
 
@@ -184,11 +187,10 @@ def guess_format(text, ext):
 
             # Don't count escaped Jupyter magics (no space between
             # %% and command) as cells
-            if line == double_percent or \
-                    line.startswith(double_percent_and_space):
+            if line == double_percent or line.startswith(double_percent_and_space) or nbconvert_script_re.match(line):
                 double_percent_count += 1
 
-            if line.startswith(twenty_hash):
+            if line.startswith(twenty_hash) and ext == '.py':
                 twenty_hash_count += 1
 
         if double_percent_count >= 2 or twenty_hash_count >= 2:

diff --git a/tests/test_read_simple_percent.py b/tests/test_read_simple_percent.py
@@ -62,3 +62,43 @@ def test_read_cell_with_metadata(
 
     script2 = jupytext.writes(nb, ext='.py', format_name='percent')
     compare(script, script2)
+
+
+def test_read_nbconvert_script(script="""
+# coding: utf-8
+
+# A markdown cell
+
+# In[1]:
+
+
+import pandas as pd
+
+pd.options.display.max_rows = 6
+pd.options.display.max_columns = 20
+
+
+# Another markdown cell
+
+# In[2]:
+
+
+1 + 1
+
+
+# Again, a markdown cell
+
+# In[33]:
+
+
+2 + 2
+
+
+# <codecell>
+
+
+3 + 3
+"""):
+    assert jupytext.formats.guess_format(script, '.py') == 'percent'
+    nb = jupytext.reads(script, '.py')
+    assert len(nb.cells) == 5