Skip to content

Commit

Permalink
cmd/compile: accept new Go2 number literals
Browse files Browse the repository at this point in the history
This CL introduces compiler support for the new binary and octal integer
literals, hexadecimal floats, and digit separators for all number literals.

The new Go 2 number literal scanner accepts the following liberal format:

number   = [ prefix ] digits [ "." digits ] [ exponent ] [ "i" ] .
prefix   = "0" [ "b" |"B" | "o" | "O" | "x" | "X" ] .
digits   = { digit | "_" } .
exponent = ( "e" | "E" | "p" | "P" ) [ "+" | "-" ] digits .

If the number starts with "0x" or "0X", digit is any hexadecimal digit;
otherwise, digit is any decimal digit. If the accepted number is not valid,
errors are reported accordingly.

See the new test cases in scanner_test.go for a selection of valid and
invalid numbers and the respective error messages.

R=Go1.13

Updates golang#12711.
Updates golang#19308.
Updates golang#28493.
Updates golang#29008.

Change-Id: Ic8febc7bd4dc5186b16a8c8897691e81125cf0ca
Reviewed-on: https://go-review.googlesource.com/c/157677
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
  • Loading branch information
griesemer authored and nebulabox committed Feb 20, 2019
1 parent a069b1c commit 8f699f8
Show file tree
Hide file tree
Showing 7 changed files with 487 additions and 88 deletions.
6 changes: 5 additions & 1 deletion src/cmd/compile/internal/gc/mpfloat.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"math"
"math/big"
"strings"
)

// implements float arithmetic
Expand Down Expand Up @@ -177,11 +178,14 @@ func (a *Mpflt) Neg() {
}

func (a *Mpflt) SetString(as string) {
// TODO(gri) remove this code once math/big.Float.Parse can handle separators
as = strings.Replace(as, "_", "", -1) // strip separators

for len(as) > 0 && (as[0] == ' ' || as[0] == '\t') {
as = as[1:]
}

f, _, err := a.Val.Parse(as, 10)
f, _, err := a.Val.Parse(as, 0)
if err != nil {
yyerror("malformed constant: %s (%v)", as, err)
a.Val.SetFloat64(0)
Expand Down
7 changes: 7 additions & 0 deletions src/cmd/compile/internal/gc/mpint.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package gc
import (
"fmt"
"math/big"
"strings"
)

// implements integer arithmetic
Expand Down Expand Up @@ -281,6 +282,12 @@ func (a *Mpint) SetInt64(c int64) {
}

func (a *Mpint) SetString(as string) {
// TODO(gri) remove this code once math/big.Int.SetString can handle 0o-octals and separators
as = strings.Replace(as, "_", "", -1) // strip separators
if len(as) >= 2 && as[0] == '0' && (as[1] == 'o' || as[1] == 'O') {
as = "0" + as[2:]
}

_, ok := a.Val.SetString(as, 0)
if !ok {
// required syntax is [+-][0[x]]d*
Expand Down
233 changes: 167 additions & 66 deletions src/cmd/compile/internal/syntax/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
s.nlsemi = false
}

func (s *scanner) errorf(format string, args ...interface{}) {
s.error(fmt.Sprintf(format, args...))
}

// next advances the scanner by reading the next token.
//
// If a read, source encoding, or lexical error occurs, next calls
Expand Down Expand Up @@ -149,8 +153,9 @@ redo:

case '.':
c = s.getr()
if isDigit(c) {
s.unread(1)
if isDecimal(c) {
s.ungetr()
s.unread(1) // correct position of '.' (needed by startLit in number)
s.number('.')
break
}
Expand Down Expand Up @@ -304,7 +309,7 @@ redo:

default:
s.tok = 0
s.error(fmt.Sprintf("invalid character %#U", c))
s.errorf("invalid character %#U", c)
goto redo
}

Expand All @@ -320,19 +325,15 @@ assignop:
}

func isLetter(c rune) bool {
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
}

func isDigit(c rune) bool {
return '0' <= c && c <= '9'
return 'a' <= lower(c) && lower(c) <= 'z' || c == '_'
}

func (s *scanner) ident() {
s.startLit()

// accelerate common case (7bit ASCII)
c := s.getr()
for isLetter(c) || isDigit(c) {
for isLetter(c) || isDecimal(c) {
c = s.getr()
}

Expand Down Expand Up @@ -372,10 +373,10 @@ func (s *scanner) isIdentRune(c rune, first bool) bool {
// ok
case unicode.IsDigit(c):
if first {
s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
s.errorf("identifier cannot begin with digit %#U", c)
}
case c >= utf8.RuneSelf:
s.error(fmt.Sprintf("invalid identifier character %#U", c))
s.errorf("invalid identifier character %#U", c)
default:
return false
}
Expand All @@ -401,86 +402,188 @@ func init() {
}
}

func lower(c rune) rune { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter
func isDecimal(c rune) bool { return '0' <= c && c <= '9' }
func isHex(c rune) bool { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' }

// digits accepts the sequence { digit | '_' } starting with c0.
// If base <= 10, digits accepts any decimal digit but records
// the index (relative to the literal start) of a digit >= base
// in *invalid, if *invalid < 0.
// digits returns the first rune that is not part of the sequence
// anymore, and a bitset describing whether the sequence contained
// digits (bit 0 is set), or separators '_' (bit 1 is set).
func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
c = c0
if base <= 10 {
max := rune('0' + base)
for isDecimal(c) || c == '_' {
ds := 1
if c == '_' {
ds = 2
} else if c >= max && *invalid < 0 {
*invalid = int(s.col0 - s.col) // record invalid rune index
}
digsep |= ds
c = s.getr()
}
} else {
for isHex(c) || c == '_' {
ds := 1
if c == '_' {
ds = 2
}
digsep |= ds
c = s.getr()
}
}
return
}

func (s *scanner) number(c rune) {
s.startLit()

base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
digsep := 0 // bit 0: digit present, bit 1: '_' present
invalid := -1 // index of invalid digit in literal, or < 0

// integer part
var ds int
if c != '.' {
s.kind = IntLit // until proven otherwise
s.kind = IntLit
if c == '0' {
c = s.getr()
if c == 'x' || c == 'X' {
// hex
switch lower(c) {
case 'x':
c = s.getr()
hasDigit := false
for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
c = s.getr()
hasDigit = true
}
if !hasDigit {
s.error("malformed hex constant")
}
goto done
}

// decimal 0, octal, or float
has8or9 := false
for isDigit(c) {
if c > '7' {
has8or9 = true
}
base, prefix = 16, 'x'
case 'o':
c = s.getr()
}
if c != '.' && c != 'e' && c != 'E' && c != 'i' {
// octal
if has8or9 {
s.error("malformed octal constant")
}
goto done
}

} else {
// decimal or float
for isDigit(c) {
base, prefix = 8, 'o'
case 'b':
c = s.getr()
base, prefix = 2, 'b'
default:
base, prefix = 8, '0'
digsep = 1 // leading 0
}
}
c, ds = s.digits(c, base, &invalid)
digsep |= ds
}

// float
// fractional part
if c == '.' {
s.kind = FloatLit
c = s.getr()
for isDigit(c) {
c = s.getr()
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
}
c, ds = s.digits(s.getr(), base, &invalid)
digsep |= ds
}

if digsep&1 == 0 {
s.error(litname(prefix) + " has no digits")
}

// exponent
if c == 'e' || c == 'E' {
s.kind = FloatLit
if e := lower(c); e == 'e' || e == 'p' {
switch {
case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", c)
case e == 'p' && prefix != 'x':
s.errorf("%q exponent requires hexadecimal mantissa", c)
}
c = s.getr()
if c == '-' || c == '+' {
s.kind = FloatLit
if c == '+' || c == '-' {
c = s.getr()
}
if !isDigit(c) {
s.error("malformed floating-point constant exponent")
}
for isDigit(c) {
c = s.getr()
c, ds = s.digits(c, 10, nil)
digsep |= ds
if ds&1 == 0 {
s.error("exponent has no digits")
}
} else if prefix == 'x' && s.kind == FloatLit {
s.error("hexadecimal mantissa requires a 'p' exponent")
}

// complex
// suffix 'i'
if c == 'i' {
s.kind = ImagLit
s.getr()
if prefix != 0 && prefix != '0' {
s.error("invalid suffix 'i' on " + litname(prefix))
}
c = s.getr()
}

done:
s.ungetr()

s.nlsemi = true
s.lit = string(s.stopLit())
s.tok = _Literal

if s.kind == IntLit && invalid >= 0 {
s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix)))
}

if digsep&2 != 0 {
if i := invalidSep(s.lit); i >= 0 {
s.errh(s.line, s.col+uint(i), "'_' must separate successive digits")
}
}
}

func litname(prefix rune) string {
switch prefix {
case 'x':
return "hexadecimal literal"
case 'o', '0':
return "octal literal"
case 'b':
return "binary literal"
}
return "decimal literal"
}

// invalidSep returns the index of the first invalid separator in x, or -1.
func invalidSep(x string) int {
x1 := ' ' // prefix char, we only care if it's 'x'
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
i := 0

// a prefix counts as a digit
if len(x) >= 2 && x[0] == '0' {
x1 = lower(rune(x[1]))
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
d = '0'
i = 2
}
}

// mantissa and exponent
for ; i < len(x); i++ {
p := d // previous digit
d = rune(x[i])
switch {
case d == '_':
if p != '0' {
return i
}
case isDecimal(d) || x1 == 'x' && isHex(d):
d = '0'
default:
if p == '_' {
return i - 1
}
d = '.'
}
}
if d == '_' {
return len(x) - 1
}

return -1
}

func (s *scanner) rune() {
Expand Down Expand Up @@ -713,12 +816,10 @@ func (s *scanner) escape(quote rune) bool {
for i := n; i > 0; i-- {
d := base
switch {
case isDigit(c):
case isDecimal(c):
d = uint32(c) - '0'
case 'a' <= c && c <= 'f':
d = uint32(c) - ('a' - 10)
case 'A' <= c && c <= 'F':
d = uint32(c) - ('A' - 10)
case 'a' <= lower(c) && lower(c) <= 'f':
d = uint32(lower(c)) - ('a' - 10)
}
if d >= base {
if c < 0 {
Expand All @@ -728,7 +829,7 @@ func (s *scanner) escape(quote rune) bool {
if base == 8 {
kind = "octal"
}
s.error(fmt.Sprintf("non-%s character in escape sequence: %c", kind, c))
s.errorf("non-%s character in escape sequence: %c", kind, c)
s.ungetr()
return false
}
Expand All @@ -739,7 +840,7 @@ func (s *scanner) escape(quote rune) bool {
s.ungetr()

if x > max && base == 8 {
s.error(fmt.Sprintf("octal escape value > 255: %d", x))
s.errorf("octal escape value > 255: %d", x)
return false
}

Expand Down
Loading

0 comments on commit 8f699f8

Please sign in to comment.