fix RST parsing when no indent after enum.item (fix #17249) (#17257)

nim-lang · Mar 12, 2021 · d97bf4f · d97bf4f
1 parent 9782580
commit d97bf4f
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 28 deletions.
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
@@ -1366,8 +1366,9 @@ proc commandRstAux(cache: IdentCache, conf: ConfigRef;
   var d = newDocumentor(filen, cache, conf, outExt)
 
   d.isPureRst = true
-  var rst = parseRst(readFile(filen.string), filen.string, 0, 1, d.hasToc,
-                     {roSupportRawDirective, roSupportMarkdown}, conf)
+  var rst = parseRst(readFile(filen.string), filen.string,
+                     line=LineRstInit, column=ColRstInit,
+                     d.hasToc, {roSupportRawDirective, roSupportMarkdown}, conf)
   var modDesc = newStringOfCap(30_000)
   renderRstToOut(d[], rst, modDesc)
   d.modDesc = rope(modDesc)

diff --git a/doc/contributing.rst b/doc/contributing.rst
@@ -599,14 +599,16 @@ to existing modules is acceptable. For two reasons:
 
 Conventions
 -----------
-1. New stdlib modules should go under `Nim/lib/std/`. The rationale is to require
-users to import via `import std/foo` instead of `import foo`, which would cause
-potential conflicts with nimble packages. Note that this still applies for new modules
-in existing logical directories, e.g.:
-use `lib/std/collections/foo.nim`, not `lib/pure/collections/foo.nim`.
+1. New stdlib modules should go under `Nim/lib/std/`. The rationale is to
+   require users to import via `import std/foo` instead of `import foo`,
+   which would cause potential conflicts with nimble packages.
+   Note that this still applies for new modules in existing logical
+   directories, e.g.: use `lib/std/collections/foo.nim`,
+   not `lib/pure/collections/foo.nim`.
 
 2. New module names should prefer plural form whenever possible, e.g.:
-`std/sums.nim` instead of `std/sum.nim`. In particular, this reduces chances of conflicts
-between module name and the symbols it defines. Furthermore, module names should
-use `snake_case` and not use capital letters, which cause issues when going
-from an OS without case sensitivity to an OS with it.
+   `std/sums.nim` instead of `std/sum.nim`. In particular, this reduces
+   chances of conflicts between module name and the symbols it defines.
+   Furthermore, module names should use `snake_case` and not use capital
+   letters, which cause issues when going from an OS without case
+   sensitivity to an OS with it.
diff --git a/doc/manual.rst b/doc/manual.rst
@@ -6073,13 +6073,14 @@ avoid ambiguity when there are multiple modules with the same path.
 
 There are two pseudo directories:
 
-1. ``std``: The ``std`` pseudo directory is the abstract location of Nim's standard
-library. For example, the syntax ``import std / strutils`` is used to unambiguously
-refer to the standard library's ``strutils`` module.
-2. ``pkg``: The ``pkg`` pseudo directory is used to unambiguously refer to a Nimble
-package. However, for technical details that lie outside the scope of this document,
-its semantics are: *Use the search path to look for module name but ignore the standard
-library locations*. In other words, it is the opposite of ``std``.
+1. ``std``: The ``std`` pseudo directory is the abstract location of
+   Nim's standard library. For example, the syntax ``import std / strutils``
+   is used to unambiguously refer to the standard library's ``strutils`` module.
+2. ``pkg``: The ``pkg`` pseudo directory is used to unambiguously refer to
+   a Nimble package. However, for technical details that lie outside the
+   scope of this document, its semantics are: *Use the search path to look for
+   module name but ignore the standard library locations*.
+   In other words, it is the opposite of ``std``.
 
 
 From import statement

diff --git a/lib/impure/db_sqlite.nim b/lib/impure/db_sqlite.nim
@@ -154,7 +154,7 @@
 ## The reasoning is as follows:
 ## 1. it's close to what many DBs offer natively (char**)
 ## 2. it hides the number of types that the DB supports
-## (int? int64? decimal up to 10 places? geo coords?)
+##    (int? int64? decimal up to 10 places? geo coords?)
 ## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query)
 ##
 ## See also

diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim
@@ -467,12 +467,20 @@ type
     s*: PSharedState
     indentStack*: seq[int]
     filename*: string
-    line*, col*: int
+    line*, col*: int            ## initial line/column of whole text or
+                                ## documenation fragment that will be added
+                                ## in case of error/warning reporting to
+                                ## (relative) line/column of the token.
     hasToc*: bool
     curAnchor*: string          # variable to track latest anchor in s.anchors
 
   EParseError* = object of ValueError
 
+const
+  LineRstInit* = 1 ## Initial line number for standalone RST text
+  ColRstInit* = 0 ## Initial column number for standalone RST text
+                  ## (Nim global reporting adds ColOffset=1)
+
 template currentTok(p: RstParser): Token = p.tok[p.idx]
 template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
 template nextTok(p: RstParser): Token = p.tok[p.idx + 1]
@@ -542,8 +550,8 @@ proc initParser(p: var RstParser, sharedState: PSharedState) =
   p.idx = 0
   p.filename = ""
   p.hasToc = false
-  p.col = 0
-  p.line = 1
+  p.col = ColRstInit
+  p.line = LineRstInit
   p.s = sharedState
 
 proc addNodesAux(n: PRstNode, result: var string) =
@@ -1439,8 +1447,8 @@ proc countTitles(p: var RstParser, n: PRstNode) =
         if p.s.hTitleCnt >= 2:
           break
 
-proc tokenAfterNewline(p: RstParser): int =
-  result = p.idx
+proc tokenAfterNewline(p: RstParser, start: int): int =
+  result = start
   while true:
     case p.tok[result].kind
     of tkEof:
@@ -1450,6 +1458,9 @@ proc tokenAfterNewline(p: RstParser): int =
       break
     else: inc result
 
+proc tokenAfterNewline(p: RstParser): int {.inline.} =
+  result = tokenAfterNewline(p, p.idx)
+
 proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
   ## check that underline/overline length is enough for the heading.
   ## No support for Unicode.
@@ -1937,13 +1948,34 @@ proc parseEnumList(p: var RstParser): PRstNode =
     wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3]  # number of tokens
     wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
       # position of enumeration sequence (number/letter) in enumerator
-  result = newRstNodeA(p, rnEnumList)
   let col = currentTok(p).col
   var w = 0
   while w < wildcards.len:
     if match(p, p.idx, wildcards[w]): break
     inc w
   assert w < wildcards.len
+  proc checkAfterNewline(p: RstParser, report: bool): bool =
+    let j = tokenAfterNewline(p, start=p.idx+1)
+    if p.tok[j].kind notin {tkIndent, tkEof} and
+        p.tok[j].col < p.tok[p.idx+wildToken[w]].col and
+        (p.tok[j].col > col or
+          (p.tok[j].col == col and not match(p, j, wildcards[w]))):
+      if report:
+        let n = p.line + p.tok[j].line
+        let msg = "\n" & """
+          not enough indentation on line $2
+              (if it's continuation of enumeration list),
+          or no blank line after line $1 (if it should be the next paragraph),
+          or no escaping \ at the beginning of line $1
+              (if lines $1..$2 are a normal paragraph, not enum. list)""".
+          unindent(8)
+        rstMessage(p, mwRstStyle, msg % [$(n-1), $n])
+      result = false
+    else:
+      result = true
+  if not checkAfterNewline(p, report = true):
+    return nil
+  result = newRstNodeA(p, rnEnumList)
   let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
   var prevAE = ""  # so as not allow mixing auto-enumerators `1` and `#`
   var curEnum = 1
@@ -1963,6 +1995,10 @@ proc parseEnumList(p: var RstParser): PRstNode =
     result.add(item)
     if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
         match(p, p.idx+1, wildcards[w]):
+      # don't report to avoid duplication of warning since for
+      # subsequent enum. items parseEnumList will be called second time:
+      if not checkAfterNewline(p, report = false):
+        break
       let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
       # check that it's in sequence: enumerator == next(prevEnum)
       if "n" in wildcards[w]:  # arabic numeral
@@ -2336,7 +2372,8 @@ proc selectDir(p: var RstParser, d: string): PRstNode =
   of "warning": result = dirAdmonition(p, d)
   of "default-role": result = dirDefaultRole(p)
   else:
-    rstMessage(p, meInvalidDirective, d)
+    let tok = p.tok[p.idx-2]  # report on directive in ".. directive::"
+    rstMessage(p, meInvalidDirective, d, tok.line, tok.col)
 
 proc prefix(ftnType: FootnoteType): string =
   case ftnType

diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim
@@ -1506,7 +1506,8 @@ proc rstToHtml*(s: string, options: RstParseOptions,
   initRstGenerator(d, outHtml, config, filen, options, myFindFile,
                    rst.defaultMsgHandler)
   var dummyHasToc = false
-  var rst = rstParse(s, filen, 0, 1, dummyHasToc, options)
+  var rst = rstParse(s, filen, line=LineRstInit, column=ColRstInit,
+                     dummyHasToc, options)
   result = ""
   renderRstToOut(d, rst, result)
 
@@ -1518,4 +1519,7 @@ proc rstToLatex*(rstSource: string; options: RstParseOptions): string {.inline,
   var option: bool
   var rstGenera: RstGenerator
   rstGenera.initRstGenerator(outLatex, defaultConfig(), "input", options)
-  rstGenera.renderRstToOut(rstParse(rstSource, "", 1, 1, option, options), result)
+  rstGenera.renderRstToOut(
+      rstParse(rstSource, "", line=LineRstInit, column=ColRstInit,
+               option, options),
+      result)
diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim
@@ -708,6 +708,15 @@ Test1
     doAssert count(output7, "<li>") == 3
     doAssert "start=\"3\"" in output7 and "class=\"upperalpha simple\"" in output7
 
+    # check that it's not recognized as enum.list without indentation on 2nd line
+    let input8 = dedent """
+      A. string1
+      string2
+      """
+    # TODO: find out hot to catch warning here instead of throwing a defect
+    expect(AssertionDefect):
+      let output8 = input8.toHtml
+
   test "Markdown enumerated lists":
     let input1 = dedent """
       Below are 2 enumerated lists: Markdown-style (5 items) and RST (1 item)