diff --git a/changelog.md b/changelog.md
index c0cd970247f5..922b8470a65f 100644
--- a/changelog.md
+++ b/changelog.md
@@ -266,6 +266,8 @@
- The unary minus in `-1` is now part of the integer literal, it is now parsed as a single token.
This implies that edge cases like `-128'i8` finally work correctly.
+- Custom numeric literals are now supported.
+
## Compiler changes
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index dded231d773f..b24a24a2b1bd 100644
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -416,7 +416,7 @@ proc nodeToHighlightedHtml(d: PDoc; n: PNode; result: var Rope; renderFlags: TRe
of tkOpr:
dispA(d.conf, result, "$1", "\\spanOperator{$1}",
[escLit])
- of tkStrLit..tkTripleStrLit:
+ of tkStrLit..tkTripleStrLit, tkCustomLit:
dispA(d.conf, result, "$1",
"\\spanStringLit{$1}", [escLit])
of tkCharLit:
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index bcd3f0076780..b34b010c24ba 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -60,6 +60,7 @@ type
tkFloat64Lit = "tkFloat64Lit", tkFloat128Lit = "tkFloat128Lit",
tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit",
tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
+ tkCustomLit = "tkCustomLit",
tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}",
@@ -313,8 +314,7 @@ proc getNumber(L: var Lexer, result: var Token) =
proc lexMessageLitNum(L: var Lexer, msg: string, startpos: int, msgKind = errGenerated) =
# Used to get slightly human friendlier err messages.
- const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
- 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
+ const literalishChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '.', '\''}
var msgPos = L.bufpos
var t: Token
t.literal = ""
@@ -326,15 +326,14 @@ proc getNumber(L: var Lexer, result: var Token) =
t.literal.add(L.buf[L.bufpos])
inc(L.bufpos)
matchChars(L, t, literalishChars)
- if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
- inc(L.bufpos)
+ if L.buf[L.bufpos] in literalishChars:
t.literal.add(L.buf[L.bufpos])
+ inc(L.bufpos)
matchChars(L, t, {'0'..'9'})
L.bufpos = msgPos
lexMessage(L, msgKind, msg % t.literal)
var
- startpos, endpos: int
xi: BiggestInt
isBase10 = true
numDigits = 0
@@ -346,7 +345,7 @@ proc getNumber(L: var Lexer, result: var Token) =
result.tokType = tkIntLit # int literal until we know better
result.literal = ""
result.base = base10
- startpos = L.bufpos
+ let startpos = L.bufpos
tokenBegin(result, startpos)
var isPositive = true
@@ -395,201 +394,187 @@ proc getNumber(L: var Lexer, result: var Token) =
discard matchUnderscoreChars(L, result, {'0'..'9'})
if L.buf[L.bufpos] in {'e', 'E'}:
result.tokType = tkFloatLit
- eatChar(L, result, 'e')
+ eatChar(L, result)
if L.buf[L.bufpos] in {'+', '-'}:
eatChar(L, result)
discard matchUnderscoreChars(L, result, {'0'..'9'})
- endpos = L.bufpos
+ let endpos = L.bufpos
# Second stage, find out if there's a datatype suffix and handle it
var postPos = endpos
+
if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
+ let errPos = postPos
+ var customLitPossible = false
if L.buf[postPos] == '\'':
inc(postPos)
+ customLitPossible = true
- case L.buf[postPos]
- of 'f', 'F':
- inc(postPos)
- if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
- result.tokType = tkFloat32Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
- result.tokType = tkFloat64Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '1') and
- (L.buf[postPos + 1] == '2') and
- (L.buf[postPos + 2] == '8'):
- result.tokType = tkFloat128Lit
- inc(postPos, 3)
- else: # "f" alone defaults to float32
- result.tokType = tkFloat32Lit
- of 'd', 'D': # ad hoc convenience shortcut for f64
- inc(postPos)
- result.tokType = tkFloat64Lit
- of 'i', 'I':
- inc(postPos)
- if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
- result.tokType = tkInt64Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
- result.tokType = tkInt32Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
- result.tokType = tkInt16Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '8'):
- result.tokType = tkInt8Lit
- inc(postPos)
- else:
- lexMessageLitNum(L, "invalid number: '$1'", startpos)
- of 'u', 'U':
- inc(postPos)
- if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
- result.tokType = tkUInt64Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
- result.tokType = tkUInt32Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
- result.tokType = tkUInt16Lit
- inc(postPos, 2)
- elif (L.buf[postPos] == '8'):
- result.tokType = tkUInt8Lit
- inc(postPos)
+ if L.buf[postPos] in SymChars:
+ var suffixAsLower = newStringOfCap(10)
+ var suffix = newStringOfCap(10)
+ while true:
+ let c = L.buf[postPos]
+ suffix.add c
+ suffixAsLower.add toLowerAscii(c)
+ inc postPos
+ if L.buf[postPos] notin SymChars+{'_'}: break
+ case suffix
+ of "f", "f32": result.tokType = tkFloat32Lit
+ of "d", "f64": result.tokType = tkFloat64Lit
+ of "f128": result.tokType = tkFloat128Lit
+ of "i8": result.tokType = tkInt8Lit
+ of "i16": result.tokType = tkInt16Lit
+ of "i32": result.tokType = tkInt32Lit
+ of "i64": result.tokType = tkInt64Lit
+ of "u": result.tokType = tkUIntLit
+ of "u8": result.tokType = tkUInt8Lit
+ of "u16": result.tokType = tkUInt16Lit
+ of "u32": result.tokType = tkUInt32Lit
+ of "u64": result.tokType = tkUInt64Lit
else:
- result.tokType = tkUIntLit
+ if customLitPossible:
+ # remember the position of the ``'`` so that the parser doesn't
+ # have to reparse the custom literal:
+ result.iNumber = len(result.literal)
+ result.literal.add '\''
+ result.literal.add suffix
+ result.tokType = tkCustomLit
+ else:
+ lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
else:
- lexMessageLitNum(L, "invalid number: '$1'", startpos)
+ lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
# Is there still a literalish char awaiting? Then it's an error!
if L.buf[postPos] in literalishChars or
(L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
lexMessageLitNum(L, "invalid number: '$1'", startpos)
- # Third stage, extract actual number
- L.bufpos = startpos # restore position
- var pos = startpos
- try:
- if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
- inc(pos, 2)
- xi = 0 # it is a base prefix
-
- case L.buf[pos - 1]
- of 'b', 'B':
- result.base = base2
- while pos < endpos:
- if L.buf[pos] != '_':
- xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
- inc(pos)
- # 'c', 'C' is deprecated
- of 'o', 'c', 'C':
- result.base = base8
- while pos < endpos:
- if L.buf[pos] != '_':
- xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
- inc(pos)
- of 'x', 'X':
- result.base = base16
- while pos < endpos:
- case L.buf[pos]
- of '_':
- inc(pos)
- of '0'..'9':
- xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
- inc(pos)
- of 'a'..'f':
- xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
+ if result.tokType != tkCustomLit:
+ # Third stage, extract actual number
+ L.bufpos = startpos # restore position
+ var pos = startpos
+ try:
+ if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
+ inc(pos, 2)
+ xi = 0 # it is a base prefix
+
+ case L.buf[pos - 1]
+ of 'b', 'B':
+ result.base = base2
+ while pos < endpos:
+ if L.buf[pos] != '_':
+ xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
- of 'A'..'F':
- xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
+ # 'c', 'C' is deprecated
+ of 'o', 'c', 'C':
+ result.base = base8
+ while pos < endpos:
+ if L.buf[pos] != '_':
+ xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
- else:
- break
+ of 'x', 'X':
+ result.base = base16
+ while pos < endpos:
+ case L.buf[pos]
+ of '_':
+ inc(pos)
+ of '0'..'9':
+ xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
+ inc(pos)
+ of 'a'..'f':
+ xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
+ inc(pos)
+ of 'A'..'F':
+ xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
+ inc(pos)
+ else:
+ break
+ else:
+ internalError(L.config, getLineInfo(L), "getNumber")
+
+ case result.tokType
+ of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
+ of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
+ of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
+ of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
+ of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
+ of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
+ of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
+ of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
+ of tkFloat32Lit:
+ setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
+ # note: this code is endian neutral!
+ # XXX: Test this on big endian machine!
+ of tkFloat64Lit, tkFloatLit:
+ setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
+ else: internalError(L.config, getLineInfo(L), "getNumber")
+
+ # Bounds checks. Non decimal literals are allowed to overflow the range of
+ # the datatype as long as their pattern don't overflow _bitwise_, hence
+ # below checks of signed sizes against uint*.high is deliberate:
+ # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
+ if result.tokType notin floatTypes:
+ let outOfRange =
+ case result.tokType
+ of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
+ of tkInt8Lit: (xi > BiggestInt(uint8.high))
+ of tkInt16Lit: (xi > BiggestInt(uint16.high))
+ of tkInt32Lit: (xi > BiggestInt(uint32.high))
+ else: false
+
+ if outOfRange:
+ #echo "out of range num: ", result.iNumber, " vs ", xi
+ lexMessageLitNum(L, "number out of range: '$1'", startpos)
+
else:
- internalError(L.config, getLineInfo(L), "getNumber")
-
- case result.tokType
- of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
- of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
- of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
- of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
- of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
- of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
- of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
- of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
- of tkFloat32Lit:
- setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
- # note: this code is endian neutral!
- # XXX: Test this on big endian machine!
- of tkFloat64Lit, tkFloatLit:
- setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
- else: internalError(L.config, getLineInfo(L), "getNumber")
-
- # Bounds checks. Non decimal literals are allowed to overflow the range of
- # the datatype as long as their pattern don't overflow _bitwise_, hence
- # below checks of signed sizes against uint*.high is deliberate:
- # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
- if result.tokType notin floatTypes:
+ case result.tokType
+ of floatTypes:
+ result.fNumber = parseFloat(result.literal)
+ of tkUInt64Lit, tkUIntLit:
+ var iNumber: uint64
+ var len: int
+ try:
+ len = parseBiggestUInt(result.literal, iNumber)
+ except ValueError:
+ raise newException(OverflowDefect, "number out of range: " & $result.literal)
+ if len != result.literal.len:
+ raise newException(ValueError, "invalid integer: " & $result.literal)
+ result.iNumber = cast[int64](iNumber)
+ else:
+ var iNumber: int64
+ var len: int
+ try:
+ len = parseBiggestInt(result.literal, iNumber)
+ except ValueError:
+ raise newException(OverflowDefect, "number out of range: " & $result.literal)
+ if len != result.literal.len:
+ raise newException(ValueError, "invalid integer: " & $result.literal)
+ result.iNumber = iNumber
+
+ # Explicit bounds checks.
let outOfRange =
case result.tokType
- of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
- of tkInt8Lit: (xi > BiggestInt(uint8.high))
- of tkInt16Lit: (xi > BiggestInt(uint16.high))
- of tkInt32Lit: (xi > BiggestInt(uint32.high))
+ of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
+ of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
+ of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
+ of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
+ of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
+ of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
else: false
if outOfRange:
- #echo "out of range num: ", result.iNumber, " vs ", xi
lexMessageLitNum(L, "number out of range: '$1'", startpos)
- else:
- case result.tokType
- of floatTypes:
- result.fNumber = parseFloat(result.literal)
- of tkUInt64Lit, tkUIntLit:
- var iNumber: uint64
- var len: int
- try:
- len = parseBiggestUInt(result.literal, iNumber)
- except ValueError:
- raise newException(OverflowDefect, "number out of range: " & $result.literal)
- if len != result.literal.len:
- raise newException(ValueError, "invalid integer: " & $result.literal)
- result.iNumber = cast[int64](iNumber)
- else:
- var iNumber: int64
- var len: int
- try:
- len = parseBiggestInt(result.literal, iNumber)
- except ValueError:
- raise newException(OverflowDefect, "number out of range: " & $result.literal)
- if len != result.literal.len:
- raise newException(ValueError, "invalid integer: " & $result.literal)
- result.iNumber = iNumber
-
- # Explicit bounds checks.
- let outOfRange =
- case result.tokType
- of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
- of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
- of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
- of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
- of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
- of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
- else: false
-
- if outOfRange:
- lexMessageLitNum(L, "number out of range: '$1'", startpos)
-
- # Promote int literal to int64? Not always necessary, but more consistent
- if result.tokType == tkIntLit:
- if result.iNumber > high(int32) or result.iNumber < low(int32):
- result.tokType = tkInt64Lit
-
- except ValueError:
- lexMessageLitNum(L, "invalid number: '$1'", startpos)
- except OverflowDefect, RangeDefect:
- lexMessageLitNum(L, "number out of range: '$1'", startpos)
+ # Promote int literal to int64? Not always necessary, but more consistent
+ if result.tokType == tkIntLit:
+ if result.iNumber > high(int32) or result.iNumber < low(int32):
+ result.tokType = tkInt64Lit
+
+ except ValueError:
+ lexMessageLitNum(L, "invalid number: '$1'", startpos)
+ except OverflowDefect, RangeDefect:
+ lexMessageLitNum(L, "number out of range: '$1'", startpos)
tokenEnd(result, postPos-1)
L.bufpos = postPos
@@ -830,8 +815,9 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
inc(pos)
L.bufpos = pos
-proc getCharacter(L: var Lexer, tok: var Token) =
+proc getCharacter(L: var Lexer; tok: var Token) =
tokenBegin(tok, L.bufpos)
+ let startPos = L.bufpos
inc(L.bufpos) # skip '
var c = L.buf[L.bufpos]
case c
@@ -842,10 +828,16 @@ proc getCharacter(L: var Lexer, tok: var Token) =
else:
tok.literal = $c
inc(L.bufpos)
- if L.buf[L.bufpos] != '\'':
- lexMessage(L, errGenerated, "missing closing ' for character literal")
- tokenEndIgnore(tok, L.bufpos)
- inc(L.bufpos) # skip '
+ if L.buf[L.bufpos] == '\'':
+ tokenEndIgnore(tok, L.bufpos)
+ inc(L.bufpos) # skip '
+ else:
+ if startPos > 0 and L.buf[startPos-1] == '`':
+ tok.literal = "'"
+ L.bufpos = startPos+1
+ else:
+ lexMessage(L, errGenerated, "missing closing ' for character literal")
+ tokenEndIgnore(tok, L.bufpos)
proc getSymbol(L: var Lexer, tok: var Token) =
var h: Hash = 0
diff --git a/compiler/parser.nim b/compiler/parser.nim
index fe857c81bb47..b9a6ffb8ccdb 100644
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -355,7 +355,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
let node = newNodeI(nkIdent, lineinfo)
node.ident = p.lex.cache.getIdent(accm)
result.add(node)
- of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCharLit:
+ of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCustomLit:
result.add(newIdentNodeP(p.lex.cache.getIdent($p.tok), p))
getTok(p)
else:
@@ -627,7 +627,7 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
#| | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
#| | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
#| | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
- #| | CHAR_LIT
+ #| | CHAR_LIT | CUSTOM_NUMERIC_LIT
#| | NIL
#| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
#| identOrLiteral = generalizedLit | symbol | literal
@@ -710,6 +710,14 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
of tkCharLit:
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
getTok(p)
+ of tkCustomLit:
+ let splitPos = p.tok.iNumber.int
+ let str = newStrNodeP(nkRStrLit, p.tok.literal.substr(0, splitPos-1), p)
+ let callee = newIdentNodeP(getIdent(p.lex.cache, p.tok.literal.substr(splitPos)), p)
+ result = newNodeP(nkDotExpr, p)
+ result.add str
+ result.add callee
+ getTok(p)
of tkNil:
result = newNodeP(nkNilLit, p)
getTok(p)
@@ -807,7 +815,7 @@ proc primarySuffix(p: var Parser, r: PNode,
result = commandExpr(p, result, mode)
break
result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
- of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast,
+ of tkSymbol, tkAccent, tkIntLit..tkCustomLit, tkNil, tkCast,
tkOpr, tkDotDot, tkVar, tkOut, tkStatic, tkType, tkEnum, tkTuple,
tkObject, tkProc:
# XXX: In type sections we allow the free application of the
@@ -1097,7 +1105,7 @@ proc isExprStart(p: Parser): bool =
case p.tok.tokType
of tkSymbol, tkAccent, tkOpr, tkNot, tkNil, tkCast, tkIf, tkFor,
tkProc, tkFunc, tkIterator, tkBind, tkBuiltInMagics,
- tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCharLit, tkVar, tkRef, tkPtr,
+ tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCustomLit, tkVar, tkRef, tkPtr,
tkTuple, tkObject, tkWhen, tkCase, tkOut:
result = true
else: result = false
@@ -1498,7 +1506,7 @@ proc parseReturnOrRaise(p: var Parser, kind: TNodeKind): PNode =
#| yieldStmt = 'yield' optInd expr?
#| discardStmt = 'discard' optInd expr?
#| breakStmt = 'break' optInd expr?
- #| continueStmt = 'break' optInd expr?
+ #| continueStmt = 'continue' optInd expr?
result = newNodeP(kind, p)
getTok(p)
if p.tok.tokType == tkComment:
diff --git a/compiler/renderer.nim b/compiler/renderer.nim
index c36eaaf1186f..9ca485f6e523 100644
--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -942,7 +942,7 @@ proc skipHiddenNodes(n: PNode): PNode =
else: break
proc accentedName(g: var TSrcGen, n: PNode) =
- const backticksNeeded = OpChars + {'[', '{'}
+ const backticksNeeded = OpChars + {'[', '{', '\''}
if n == nil: return
let isOperator =
if n.kind == nkIdent and n.ident.s.len > 0 and n.ident.s[0] in backticksNeeded: true
@@ -976,6 +976,11 @@ proc infixArgument(g: var TSrcGen, n: PNode, i: int) =
if needsParenthesis:
put(g, tkParRi, ")")
+proc isCustomLit(n: PNode): bool =
+ n.len == 2 and n[0].kind == nkRStrLit and
+ (n[1].kind == nkIdent and n[1].ident.s.startsWith('\'')) or
+ (n[1].kind == nkSym and n[1].sym.name.s.startsWith('\''))
+
proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
if isNil(n): return
var
@@ -1195,9 +1200,14 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
gcomma(g, n, c)
put(g, tkBracketRi, "]")
of nkDotExpr:
- gsub(g, n, 0)
- put(g, tkDot, ".")
- gsub(g, n, 1)
+ if isCustomLit(n):
+ put(g, tkCustomLit, n[0].strVal)
+ gsub(g, n, 1)
+ else:
+ gsub(g, n, 0)
+ put(g, tkDot, ".")
+ if n.len > 1:
+ accentedName(g, n[1])
of nkBind:
putWithSpace(g, tkBind, "bind")
gsub(g, n, 0)
diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim
index ff8f68ed03ab..fee43162e78c 100644
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -1524,7 +1524,7 @@ proc semProcAnnotation(c: PContext, prc: PNode;
return
proc semInferredLambda(c: PContext, pt: TIdTable, n: PNode): PNode {.nosinks.} =
- ## used for resolving 'auto' in lambdas based on their callsite
+ ## used for resolving 'auto' in lambdas based on their callsite
var n = n
let original = n[namePos].sym
let s = original #copySym(original, false)
diff --git a/doc/grammar.txt b/doc/grammar.txt
index 0d5eef179eb1..d4f4a051587d 100644
--- a/doc/grammar.txt
+++ b/doc/grammar.txt
@@ -46,7 +46,7 @@ literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
| UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
- | CHAR_LIT
+ | CHAR_LIT | CUSTOM_NUMERIC_LIT
| NIL
generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
identOrLiteral = generalizedLit | symbol | literal
@@ -100,6 +100,7 @@ postExprBlocks = ':' stmt? ( IND{=} doBlock
| IND{=} 'of' exprList ':' stmt
| IND{=} 'elif' expr ':' stmt
| IND{=} 'except' exprList ':' stmt
+ | IND{=} 'finally' ':' stmt
| IND{=} 'else' ':' stmt )*
exprStmt = simpleExpr
(( '=' optInd expr colonBody? )
diff --git a/doc/manual.rst b/doc/manual.rst
index db125163043f..a882eb945e4f 100644
--- a/doc/manual.rst
+++ b/doc/manual.rst
@@ -490,11 +490,17 @@ this. Another reason is that Nim can thus support `array[char, int]` or
type is used for Unicode characters, it can represent any Unicode character.
`Rune` is declared in the `unicode module `_.
+A character literal that does not end in ``'`` interpreted as ``'`` if there
+is a preceeding backtick token. There must be no whitespace between the preceeding
+backtick token and the character literal. This special rule ensures that a declaration
+like ``proc `'customLiteral`(s: string)`` is valid. See also
+`Custom Numeric Literals <#custom-numeric-literals>`_.
-Numerical constants
--------------------
-Numerical constants are of a single type and have the form::
+Numeric Literals
+----------------
+
+Numeric literals have the form::
hexdigit = digit | 'A'..'F' | 'a'..'f'
octdigit = '0'..'7'
@@ -530,8 +536,10 @@ Numerical constants are of a single type and have the form::
FLOAT64_LIT = HEX_LIT '\'' FLOAT64_SUFFIX
| (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) ['\''] FLOAT64_SUFFIX
+ CUSTOM_NUMERIC_LIT = (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) '\'' CUSTOM_NUMERIC_SUFFIX
-As can be seen in the productions, numerical constants can contain underscores
+
+As can be seen in the productions, numeric literals can contain underscores
for readability. Integer and floating-point literals may be given in decimal (no
prefix), binary (prefix `0b`), octal (prefix `0o`), and hexadecimal
(prefix `0x`) notation.
@@ -579,7 +587,7 @@ is optional if it is not ambiguous (only hexadecimal floating-point literals
with a type suffix can be ambiguous).
-The type suffixes are:
+The pre-defined type suffixes are:
================= =========================
Type Suffix Resulting type of literal
@@ -611,6 +619,42 @@ the bit width of the datatype, it is accepted.
Hence: 0b10000000'u8 == 0x80'u8 == 128, but, 0b10000000'i8 == 0x80'i8 == -1
instead of causing an overflow error.
+
+Custom Numeric Literals
+~~~~~~~~~~~~~~~~~~~~~~~
+
+If the suffix is not predefined, then the suffix is assumed to be a call
+to a proc, template, macro or other callable identifier that is passed the
+string containing the literal. The callable identifier needs to be declared
+with a special ``'`` prefix:
+
+.. code-block:: nim
+
+ import strutils
+ type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
+ proc `'u4`(n: string): u4 =
+ # The leading ' is required.
+ result = (parseInt(n) and 0x0F).u4
+
+ var x = 5'u4
+
+More formally, a custom numeric literal `123'custom` is transformed
+to r"123".`'custom` in the parsing step. There is no AST node kind that
+corresponds to this transformation. The transformation naturally handles
+the case that additional parameters are passed to the callee:
+
+.. code-block:: nim
+
+ import strutils
+ type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
+ proc `'u4`(n: string; moreData: int): u4 =
+ result = (parseInt(n) and 0x0F).u4
+
+ var x = 5'u4(123)
+
+Custom numeric literals are covered by the grammar rule named `CUSTOM_NUMERIC_LIT`.
+
+
Operators
---------
diff --git a/tests/lexer/mlexerutils.nim b/tests/lexer/mlexerutils.nim
new file mode 100644
index 000000000000..eae7a0006938
--- /dev/null
+++ b/tests/lexer/mlexerutils.nim
@@ -0,0 +1,9 @@
+import macros
+
+macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
+
+macro assertAST*(expected: string, struct: untyped): untyped =
+ var ast = newLit(struct.treeRepr)
+ result = quote do:
+ if `ast` != `expected`:
+ doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
\ No newline at end of file
diff --git a/tests/lexer/tcustom_numeric_literals.nim b/tests/lexer/tcustom_numeric_literals.nim
new file mode 100644
index 000000000000..a2f355b4dea0
--- /dev/null
+++ b/tests/lexer/tcustom_numeric_literals.nim
@@ -0,0 +1,150 @@
+discard """
+ targets: "c cpp js"
+"""
+
+# Test tkStrNumLit
+
+import std/[macros, strutils]
+import mlexerutils
+
+# AST checks
+
+assertAST dedent """
+ StmtList
+ ProcDef
+ AccQuoted
+ Ident "\'"
+ Ident "wrap"
+ Empty
+ Empty
+ FormalParams
+ Ident "string"
+ IdentDefs
+ Ident "number"
+ Ident "string"
+ Empty
+ Empty
+ Empty
+ StmtList
+ Asgn
+ Ident "result"
+ Infix
+ Ident "&"
+ Infix
+ Ident "&"
+ StrLit "[["
+ Ident "number"
+ StrLit "]]"""":
+ proc `'wrap`(number: string): string =
+ result = "[[" & number & "]]"
+
+assertAST dedent """
+ StmtList
+ DotExpr
+ RStrLit "-38383839292839283928392839283928392839283.928493849385935898243e-50000"
+ Ident "\'wrap"""":
+ -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap
+
+proc `'wrap`(number: string): string = "[[" & number & "]]"
+doAssert lispReprStr(-1'wrap) == """(DotExpr (RStrLit "-1") (Ident "\'wrap"))"""
+
+template main =
+ block: # basic suffix usage
+ template `'twrap`(number: string): untyped =
+ number.`'wrap`
+ proc extraContext(): string =
+ 22.40'wrap
+ proc `*`(left, right: string): string =
+ result = left & "times" & right
+ proc `+`(left, right: string): string =
+ result = left & "plus" & right
+
+ doAssert 1'wrap == "[[1]]"
+ doAssert -1'wrap == "[[-1]]":
+ "unable to resolve a negative integer-suffix pattern"
+ doAssert 12345.67890'wrap == "[[12345.67890]]"
+ doAssert 1'wrap*1'wrap == "[[1]]times[[1]]":
+ "unable to resolve an operator between two suffixed numeric literals"
+ doAssert 1'wrap+ -1'wrap == "[[1]]plus[[-1]]": # will generate a compiler warning about inconsistent spacing
+ "unable to resolve a negative suffixed numeric literal following an operator"
+ doAssert 1'wrap + -1'wrap == "[[1]]plus[[-1]]"
+ doAssert 1'twrap == "[[1]]"
+ doAssert extraContext() == "[[22.40]]":
+ "unable to return a suffixed numeric literal by an implicit return"
+ doAssert 0x5a3a'wrap == "[[0x5a3a]]"
+ doAssert 0o5732'wrap == "[[0o5732]]"
+ doAssert 0b0101111010101'wrap == "[[0b0101111010101]]"
+ doAssert -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap == "[[-38383839292839283928392839283928392839283.928493849385935898243e-50000]]"
+ doAssert 1234.56'wrap == "[[1234.56]]":
+ "unable to properly account for context with suffixed numeric literals"
+
+ block: # verify that the i64, f32, etc builtin suffixes still parse correctly
+ const expectedF32: float32 = 123.125
+ proc `'f9`(number: string): string = # proc starts with 'f' just like 'f32'
+ "[[" & number & "]]"
+ proc `'f32a`(number: string): string = # looks even more like 'f32'
+ "[[" & number & "]]"
+ proc `'d9`(number: string): string = # proc starts with 'd' just like the d suffix
+ "[[" & number & "]]"
+ proc `'i9`(number: string): string = # proc starts with 'i' just like 'i64'
+ "[[" & number & "]]"
+ proc `'u9`(number: string): string = # proc starts with 'u' just like 'u8'
+ "[[" & number & "]]"
+
+ doAssert 123.125f32 == expectedF32:
+ "failing to support non-quoted legacy f32 floating point suffix"
+ doAssert 123.125'f32 == expectedF32
+ doAssert 123.125e0'f32 == expectedF32
+ doAssert 1234.56'wrap == 1234.56'f9
+ doAssert 1234.56'wrap == 1234.56'f32a
+ doAssert 1234.56'wrap == 1234.56'd9
+ doAssert 1234.56'wrap == 1234.56'i9
+ doAssert 1234.56'wrap == 1234.56'u9
+ doAssert lispReprStr(1234.56'u9) == """(DotExpr (RStrLit "1234.56") (Ident "\'u9"))""":
+ "failed to properly build AST for suffix that starts with u"
+ doAssert -128'i8 == (-128).int8
+
+ block: # case checks
+ doAssert 1E2 == 100:
+ "lexer not handling upper-case exponent"
+ doAssert 1.0E2 == 100.0
+ doAssert 1e2 == 100
+ doAssert 0xdeadBEEF'wrap == "[[0xdeadBEEF]]":
+ "lexer not maintaining original case"
+ doAssert 0.1E12'wrap == "[[0.1E12]]"
+ doAssert 0.0e12'wrap == "[[0.0e12]]"
+ doAssert 0.0e+12'wrap == "[[0.0e+12]]"
+ doAssert 0.0e-12'wrap == "[[0.0e-12]]"
+ doAssert 0e-12'wrap == "[[0e-12]]"
+
+ block: # macro and template usage
+ template `'foo`(a: string): untyped = (a, 2)
+ doAssert -12'foo == ("-12", 2)
+ template `'fooplus`(a: string, b: int): untyped = (a, b)
+ doAssert -12'fooplus(2) == ("-12", 2)
+ template `'fooplusopt`(a: string, b: int = 99): untyped = (a, b)
+ doAssert -12'fooplusopt(2) == ("-12", 2)
+ doAssert -12'fooplusopt() == ("-12", 99)
+ doAssert -12'fooplusopt == ("-12", 99)
+ macro `'bar`(a: static string): untyped =
+ var infix = newNimNode(nnkInfix)
+ infix.add newIdentNode("&")
+ infix.add newLit("got ")
+ infix.add newLit(a.repr)
+ result = newNimNode(nnkStmtList)
+ result.add infix
+ doAssert -12'bar == "got \"-12\""
+ macro deb(a): untyped = newLit(a.repr)
+ doAssert deb(-12'bar) == "-12'bar"
+ # macro metawrap(): untyped =
+ # func wrap1(a: string): string = "{" & a & "}"
+ # func `'wrap2`(a: string): string = "{" & a & "}"
+ # result = quote do:
+ # let a1 = wrap1"-128"
+ # let a2 = -128'wrap2
+ # metawrap()
+ # doAssert a1 == "{-128}"
+ # doAssert a2 == "{-128}"
+
+static: main()
+main()
diff --git a/tests/lexer/tstrlits.nim b/tests/lexer/tstrlits.nim
deleted file mode 100644
index 8e8250a5bc2b..000000000000
--- a/tests/lexer/tstrlits.nim
+++ /dev/null
@@ -1,19 +0,0 @@
-discard """
- output: "a\"\"long string\"\"\"\"\"abc\"def_'2'●𝌆𝌆A"
-"""
-# Test the new different string literals
-
-const
- tripleEmpty = """"long string"""""""" # "long string """""
-
- rawQuote = r"a"""
-
- raw = r"abc""def"
-
- escaped = "\x5f'\50'\u25cf\u{1D306}\u{1d306}\u{41}"
-
-
-stdout.write(rawQuote)
-stdout.write(tripleEmpty)
-stdout.write(raw)
-stdout.writeLine(escaped)
diff --git a/tests/lexer/tunary_minus.nim b/tests/lexer/tunary_minus.nim
index 89f1b79ef72f..0aa861d53721 100644
--- a/tests/lexer/tunary_minus.nim
+++ b/tests/lexer/tunary_minus.nim
@@ -6,13 +6,7 @@ discard """
import std/[macros, strutils]
-macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
-
-macro assertAST*(expected: string, struct: untyped): untyped =
- var ast = newLit(struct.treeRepr)
- result = quote do:
- if `ast` != `expected`:
- doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
+import mlexerutils
const one = 1
const minusOne = `-`(one)