From d97bf4f1c80ef41789ecfc3430e04293d2b20043 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Fri, 12 Mar 2021 10:33:21 +0300 Subject: [PATCH] fix RST parsing when no indent after enum.item (fix #17249) (#17257) --- compiler/docgen.nim | 5 ++-- doc/contributing.rst | 20 +++++++------ doc/manual.rst | 15 +++++----- lib/impure/db_sqlite.nim | 2 +- lib/packages/docutils/rst.nim | 51 +++++++++++++++++++++++++++----- lib/packages/docutils/rstgen.nim | 8 +++-- tests/stdlib/trstgen.nim | 9 ++++++ 7 files changed, 82 insertions(+), 28 deletions(-) diff --git a/compiler/docgen.nim b/compiler/docgen.nim index e902705fcb35..fd5c8b47acec 100644 --- a/compiler/docgen.nim +++ b/compiler/docgen.nim @@ -1366,8 +1366,9 @@ proc commandRstAux(cache: IdentCache, conf: ConfigRef; var d = newDocumentor(filen, cache, conf, outExt) d.isPureRst = true - var rst = parseRst(readFile(filen.string), filen.string, 0, 1, d.hasToc, - {roSupportRawDirective, roSupportMarkdown}, conf) + var rst = parseRst(readFile(filen.string), filen.string, + line=LineRstInit, column=ColRstInit, + d.hasToc, {roSupportRawDirective, roSupportMarkdown}, conf) var modDesc = newStringOfCap(30_000) renderRstToOut(d[], rst, modDesc) d.modDesc = rope(modDesc) diff --git a/doc/contributing.rst b/doc/contributing.rst index e6301faae43f..ba9c1caf38cd 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -599,14 +599,16 @@ to existing modules is acceptable. For two reasons: Conventions ----------- -1. New stdlib modules should go under `Nim/lib/std/`. The rationale is to require -users to import via `import std/foo` instead of `import foo`, which would cause -potential conflicts with nimble packages. Note that this still applies for new modules -in existing logical directories, e.g.: -use `lib/std/collections/foo.nim`, not `lib/pure/collections/foo.nim`. +1. New stdlib modules should go under `Nim/lib/std/`. The rationale is to + require users to import via `import std/foo` instead of `import foo`, + which would cause potential conflicts with nimble packages. + Note that this still applies for new modules in existing logical + directories, e.g.: use `lib/std/collections/foo.nim`, + not `lib/pure/collections/foo.nim`. 2. New module names should prefer plural form whenever possible, e.g.: -`std/sums.nim` instead of `std/sum.nim`. In particular, this reduces chances of conflicts -between module name and the symbols it defines. Furthermore, module names should -use `snake_case` and not use capital letters, which cause issues when going -from an OS without case sensitivity to an OS with it. + `std/sums.nim` instead of `std/sum.nim`. In particular, this reduces + chances of conflicts between module name and the symbols it defines. + Furthermore, module names should use `snake_case` and not use capital + letters, which cause issues when going from an OS without case + sensitivity to an OS with it. diff --git a/doc/manual.rst b/doc/manual.rst index ba7ef5059e6d..4686247d2177 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -6073,13 +6073,14 @@ avoid ambiguity when there are multiple modules with the same path. There are two pseudo directories: -1. ``std``: The ``std`` pseudo directory is the abstract location of Nim's standard -library. For example, the syntax ``import std / strutils`` is used to unambiguously -refer to the standard library's ``strutils`` module. -2. ``pkg``: The ``pkg`` pseudo directory is used to unambiguously refer to a Nimble -package. However, for technical details that lie outside the scope of this document, -its semantics are: *Use the search path to look for module name but ignore the standard -library locations*. In other words, it is the opposite of ``std``. +1. ``std``: The ``std`` pseudo directory is the abstract location of + Nim's standard library. For example, the syntax ``import std / strutils`` + is used to unambiguously refer to the standard library's ``strutils`` module. +2. ``pkg``: The ``pkg`` pseudo directory is used to unambiguously refer to + a Nimble package. However, for technical details that lie outside the + scope of this document, its semantics are: *Use the search path to look for + module name but ignore the standard library locations*. + In other words, it is the opposite of ``std``. From import statement diff --git a/lib/impure/db_sqlite.nim b/lib/impure/db_sqlite.nim index 6ca81db9e9d2..8324079602f1 100644 --- a/lib/impure/db_sqlite.nim +++ b/lib/impure/db_sqlite.nim @@ -154,7 +154,7 @@ ## The reasoning is as follows: ## 1. it's close to what many DBs offer natively (char**) ## 2. it hides the number of types that the DB supports -## (int? int64? decimal up to 10 places? geo coords?) +## (int? int64? decimal up to 10 places? geo coords?) ## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query) ## ## See also diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 61b17bed18f6..ea0c079daa93 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -467,12 +467,20 @@ type s*: PSharedState indentStack*: seq[int] filename*: string - line*, col*: int + line*, col*: int ## initial line/column of whole text or + ## documenation fragment that will be added + ## in case of error/warning reporting to + ## (relative) line/column of the token. hasToc*: bool curAnchor*: string # variable to track latest anchor in s.anchors EParseError* = object of ValueError +const + LineRstInit* = 1 ## Initial line number for standalone RST text + ColRstInit* = 0 ## Initial column number for standalone RST text + ## (Nim global reporting adds ColOffset=1) + template currentTok(p: RstParser): Token = p.tok[p.idx] template prevTok(p: RstParser): Token = p.tok[p.idx - 1] template nextTok(p: RstParser): Token = p.tok[p.idx + 1] @@ -542,8 +550,8 @@ proc initParser(p: var RstParser, sharedState: PSharedState) = p.idx = 0 p.filename = "" p.hasToc = false - p.col = 0 - p.line = 1 + p.col = ColRstInit + p.line = LineRstInit p.s = sharedState proc addNodesAux(n: PRstNode, result: var string) = @@ -1439,8 +1447,8 @@ proc countTitles(p: var RstParser, n: PRstNode) = if p.s.hTitleCnt >= 2: break -proc tokenAfterNewline(p: RstParser): int = - result = p.idx +proc tokenAfterNewline(p: RstParser, start: int): int = + result = start while true: case p.tok[result].kind of tkEof: @@ -1450,6 +1458,9 @@ proc tokenAfterNewline(p: RstParser): int = break else: inc result +proc tokenAfterNewline(p: RstParser): int {.inline.} = + result = tokenAfterNewline(p, p.idx) + proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool = ## check that underline/overline length is enough for the heading. ## No support for Unicode. @@ -1937,13 +1948,34 @@ proc parseEnumList(p: var RstParser): PRstNode = wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0] # position of enumeration sequence (number/letter) in enumerator - result = newRstNodeA(p, rnEnumList) let col = currentTok(p).col var w = 0 while w < wildcards.len: if match(p, p.idx, wildcards[w]): break inc w assert w < wildcards.len + proc checkAfterNewline(p: RstParser, report: bool): bool = + let j = tokenAfterNewline(p, start=p.idx+1) + if p.tok[j].kind notin {tkIndent, tkEof} and + p.tok[j].col < p.tok[p.idx+wildToken[w]].col and + (p.tok[j].col > col or + (p.tok[j].col == col and not match(p, j, wildcards[w]))): + if report: + let n = p.line + p.tok[j].line + let msg = "\n" & """ + not enough indentation on line $2 + (if it's continuation of enumeration list), + or no blank line after line $1 (if it should be the next paragraph), + or no escaping \ at the beginning of line $1 + (if lines $1..$2 are a normal paragraph, not enum. list)""". + unindent(8) + rstMessage(p, mwRstStyle, msg % [$(n-1), $n]) + result = false + else: + result = true + if not checkAfterNewline(p, report = true): + return nil + result = newRstNodeA(p, rnEnumList) let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"] var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#` var curEnum = 1 @@ -1963,6 +1995,10 @@ proc parseEnumList(p: var RstParser): PRstNode = result.add(item) if currentTok(p).kind == tkIndent and currentTok(p).ival == col and match(p, p.idx+1, wildcards[w]): + # don't report to avoid duplication of warning since for + # subsequent enum. items parseEnumList will be called second time: + if not checkAfterNewline(p, report = false): + break let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol # check that it's in sequence: enumerator == next(prevEnum) if "n" in wildcards[w]: # arabic numeral @@ -2336,7 +2372,8 @@ proc selectDir(p: var RstParser, d: string): PRstNode = of "warning": result = dirAdmonition(p, d) of "default-role": result = dirDefaultRole(p) else: - rstMessage(p, meInvalidDirective, d) + let tok = p.tok[p.idx-2] # report on directive in ".. directive::" + rstMessage(p, meInvalidDirective, d, tok.line, tok.col) proc prefix(ftnType: FootnoteType): string = case ftnType diff --git a/lib/packages/docutils/rstgen.nim b/lib/packages/docutils/rstgen.nim index 59a9ba09ab6c..f72ff9e8f0ba 100644 --- a/lib/packages/docutils/rstgen.nim +++ b/lib/packages/docutils/rstgen.nim @@ -1506,7 +1506,8 @@ proc rstToHtml*(s: string, options: RstParseOptions, initRstGenerator(d, outHtml, config, filen, options, myFindFile, rst.defaultMsgHandler) var dummyHasToc = false - var rst = rstParse(s, filen, 0, 1, dummyHasToc, options) + var rst = rstParse(s, filen, line=LineRstInit, column=ColRstInit, + dummyHasToc, options) result = "" renderRstToOut(d, rst, result) @@ -1518,4 +1519,7 @@ proc rstToLatex*(rstSource: string; options: RstParseOptions): string {.inline, var option: bool var rstGenera: RstGenerator rstGenera.initRstGenerator(outLatex, defaultConfig(), "input", options) - rstGenera.renderRstToOut(rstParse(rstSource, "", 1, 1, option, options), result) + rstGenera.renderRstToOut( + rstParse(rstSource, "", line=LineRstInit, column=ColRstInit, + option, options), + result) diff --git a/tests/stdlib/trstgen.nim b/tests/stdlib/trstgen.nim index 3c27054aa556..7d5c0e28f2c2 100644 --- a/tests/stdlib/trstgen.nim +++ b/tests/stdlib/trstgen.nim @@ -708,6 +708,15 @@ Test1 doAssert count(output7, "
  • ") == 3 doAssert "start=\"3\"" in output7 and "class=\"upperalpha simple\"" in output7 + # check that it's not recognized as enum.list without indentation on 2nd line + let input8 = dedent """ + A. string1 + string2 + """ + # TODO: find out hot to catch warning here instead of throwing a defect + expect(AssertionDefect): + let output8 = input8.toHtml + test "Markdown enumerated lists": let input1 = dedent """ Below are 2 enumerated lists: Markdown-style (5 items) and RST (1 item)