From 0063922f5c8a07b78603b2da7f6a3c2094711c3d Mon Sep 17 00:00:00 2001 From: Marc Vertes Date: Tue, 23 Apr 2024 14:36:57 +0200 Subject: [PATCH] feat: initial and partial support of composite expressions (#9) A new `Composite` token is created. Literal composite expressions are recognized and partially handled by the parser but not yet by the code generator. Other cosmetic changes are present. --- lang/token.go | 1 + lang/token_string.go | 21 +++++++++++---------- parser/compiler.go | 3 +++ parser/decl.go | 3 ++- parser/expr.go | 25 ++++++++++++++++++++++--- parser/interpreter_test.go | 8 ++++++++ parser/parse.go | 4 ++-- parser/type.go | 24 ++++++++++++++++++------ vm/type.go | 18 ++++++++++++++++-- vm/vm.go | 11 +++++++++++ 10 files changed, 94 insertions(+), 24 deletions(-) diff --git a/lang/token.go b/lang/token.go index 7ad7bf1..41c8439 100644 --- a/lang/token.go +++ b/lang/token.go @@ -111,6 +111,7 @@ const ( // Internal virtual machine tokens (no corresponding keyword). Call CallX + Composite EqualSet Grow Index diff --git a/lang/token_string.go b/lang/token_string.go index 6b19fca..97961e3 100644 --- a/lang/token_string.go +++ b/lang/token_string.go @@ -93,19 +93,20 @@ func _() { _ = x[Var-82] _ = x[Call-83] _ = x[CallX-84] - _ = x[EqualSet-85] - _ = x[Grow-86] - _ = x[Index-87] - _ = x[JumpFalse-88] - _ = x[JumpSetFalse-89] - _ = x[JumpSetTrue-90] - _ = x[Label-91] - _ = x[New-92] + _ = x[Composite-85] + _ = x[EqualSet-86] + _ = x[Grow-87] + _ = x[Index-88] + _ = x[JumpFalse-89] + _ = x[JumpSetFalse-90] + _ = x[JumpSetTrue-91] + _ = x[Label-92] + _ = x[New-93] } -const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew" +const _Token_name = "IllegalCommentIdentCharFloatImagIntStringAddSubMulQuoRemAndOrXorShlShrAndNotPeriodEqualGreaterGreaterEqualLandLessLessEqualLorNotEqualDefineAssignAddAssignSubAssignMulAssignQuoAssignRemAssignAndAssignOrAssignXorAssignShlAssignShrAssignAndNotAssignIncDecPlusMinusAddrDerefBitCompArrowEllipsisNotTildeCommaSemicolonColonParenBlockBracketBlockBraceBlockBreakCaseChanConstContinueDefaultDeferElseFallthroughForFuncGoGotoIfImportInterfaceMapPackageRangeReturnSelectStructSwitchTypeVarCallCallXCompositeEqualSetGrowIndexJumpFalseJumpSetFalseJumpSetTrueLabelNew" -var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 496, 500, 505, 514, 526, 537, 542, 545} +var _Token_index = [...]uint16{0, 7, 14, 19, 23, 28, 32, 35, 41, 44, 47, 50, 53, 56, 59, 61, 64, 67, 70, 76, 82, 87, 94, 106, 110, 114, 123, 126, 134, 140, 146, 155, 164, 173, 182, 191, 200, 208, 217, 226, 235, 247, 250, 253, 257, 262, 266, 271, 278, 283, 291, 294, 299, 304, 313, 318, 328, 340, 350, 355, 359, 363, 368, 376, 383, 388, 392, 403, 406, 410, 412, 416, 418, 424, 433, 436, 443, 448, 454, 460, 466, 472, 476, 479, 483, 488, 497, 505, 509, 514, 523, 535, 546, 551, 554} func (i Token) String() string { if i < 0 || i >= Token(len(_Token_index)-1) { diff --git a/parser/compiler.go b/parser/compiler.go index 7a90597..3b9a32d 100644 --- a/parser/compiler.go +++ b/parser/compiler.go @@ -134,6 +134,9 @@ func (c *Compiler) Codegen(tokens Tokens) (err error) { } emit(int64(t.Pos), vm.CallX, int64(t.Beg)) + case lang.Composite: + log.Println("COMPOSITE") + case lang.Grow: emit(int64(t.Pos), vm.Grow, int64(t.Beg)) diff --git a/parser/decl.go b/parser/decl.go index b1cd13b..23b5ed6 100644 --- a/parser/decl.go +++ b/parser/decl.go @@ -280,10 +280,11 @@ func (p *Parser) parseTypeLine(in Tokens) (out Tokens, err error) { if isAlias { toks = toks[1:] } - typ, err := p.ParseTypeExpr(toks) + typ, err := p.parseTypeExpr(toks) if err != nil { return out, err } + typ.Name = in[0].Str p.addSym(unsetAddr, in[0].Str, vm.NewValue(typ), symType, typ, p.funcScope != "") return out, err } diff --git a/parser/expr.go b/parser/expr.go index 9e96e42..cf6ee74 100644 --- a/parser/expr.go +++ b/parser/expr.go @@ -7,10 +7,11 @@ import ( "github.com/mvertes/parscan/lang" "github.com/mvertes/parscan/scanner" + "github.com/mvertes/parscan/vm" ) func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { - log.Println("ParseExpr in:", in) + log.Println("parseExpr in:", in) var ops, selectors Tokens var vl int var selectorIndex string @@ -84,6 +85,24 @@ func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { out = append(out, t) vl++ ops = append(ops, scanner.Token{Tok: lang.Call, Pos: t.Pos, Beg: p.numItems(t.Block(), lang.Comma)}) + case lang.BraceBlock: + // the block can be a func body or a composite type content. + // In both cases it is preceded by a type definition. We must determine the starting token of type def, + // parse the type def, and substitute the type def by a single ident. + // TODO: handle implicit type in composite expression. + ti := p.typeStartIndex(in[:len(in)-1]) + if ti == -1 { + return out, ErrInvalidType + } + typ, err := p.parseTypeExpr(in[ti : len(in)-1]) + if err != nil { + return out, ErrInvalidType + } + p.addSym(unsetAddr, typ.String(), vm.NewValue(typ), symType, typ, p.funcScope != "") + out = append(out, t, scanner.Token{Tok: lang.Ident, Pos: t.Pos, Str: typ.String()}) + i = ti + vl += 2 + ops = append(ops, scanner.Token{Tok: lang.Composite, Pos: t.Pos}) case lang.BracketBlock: out = append(out, t) vl++ @@ -113,7 +132,7 @@ func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { } out = append(out, ops...) - log.Println("ParseExpr out:", out, "vl:", vl, "ops:", ops) + log.Println("parseExpr out:", out, "vl:", vl, "ops:", ops) // A logical operator (&&, ||) involves additional control flow operations. if out, err = p.parseLogical(out); err != nil { return out, err @@ -133,7 +152,7 @@ func (p *Parser) parseExpr(in Tokens) (out Tokens, err error) { t := out[i] var toks Tokens switch t.Tok { - case lang.ParenBlock, lang.BracketBlock: + case lang.ParenBlock, lang.BracketBlock, lang.BraceBlock: if toks, err = p.parseExprStr(t.Block()); err != nil { return out, err } diff --git a/parser/interpreter_test.go b/parser/interpreter_test.go index a459359..a6ad246 100644 --- a/parser/interpreter_test.go +++ b/parser/interpreter_test.go @@ -255,3 +255,11 @@ func TestImport(t *testing.T) { {src: `import . "fmt"; Println(4)`, res: ""}, }) } + +func TestComposite(t *testing.T) { + run(t, []etest{ + {src: "type T struct{}; t := T{}; t", res: "{}"}, + {src: "t := struct{}{}; t", res: "{}"}, + // {src: "type T struct{N int}; t := T{2}; t", res: "{2}"}, + }) +} diff --git a/parser/parse.go b/parser/parse.go index bd19d81..add1600 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -74,7 +74,7 @@ func (p *Parser) parseStmt(in Tokens) (out Tokens, err error) { if len(in) == 0 { return nil, nil } - log.Println("ParseStmt in:", in) + log.Println("parseStmt in:", in) switch t := in[0]; t.Tok { case lang.Break: return p.parseBreak(in) @@ -249,7 +249,7 @@ func (p *Parser) parseFunc(in Tokens) (out Tokens, err error) { if bi < 0 { return out, fmt.Errorf("no function body") } - typ, err := p.ParseTypeExpr(in[:bi]) + typ, err := p.parseTypeExpr(in[:bi]) if err != nil { return out, err } diff --git a/parser/type.go b/parser/type.go index b9ab0f9..ba60e8f 100644 --- a/parser/type.go +++ b/parser/type.go @@ -26,12 +26,10 @@ var ( ErrTypeNotImplemented = errors.New("not implemented") ) -// ParseTypeExpr parses a list of tokens defining a type expresssion and returns -// the corresponding runtime type or an error. -func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { +func (p *Parser) parseTypeExpr(in Tokens) (typ *vm.Type, err error) { switch in[0].Tok { case lang.BracketBlock: - typ, err := p.ParseTypeExpr(in[1:]) + typ, err := p.parseTypeExpr(in[1:]) if err != nil { return nil, err } @@ -53,7 +51,7 @@ func (p *Parser) ParseTypeExpr(in Tokens) (typ *vm.Type, err error) { return vm.SliceOf(typ), nil case lang.Mul: - typ, err := p.ParseTypeExpr(in[1:]) + typ, err := p.parseTypeExpr(in[1:]) if err != nil { return nil, err } @@ -157,7 +155,7 @@ func (p *Parser) parseParamTypes(in Tokens, flag typeFlag) (types []*vm.Type, va continue } } - typ, err := p.ParseTypeExpr(t) + typ, err := p.parseTypeExpr(t) if err != nil { return nil, nil, err } @@ -196,3 +194,17 @@ func (p *Parser) hasFirstParam(in Tokens) bool { s, _, ok := p.getSym(in[0].Str, p.scope) return !ok || s.kind != symType } + +// typeStartIndex returns the index of the start of type expression in tokens, or -1. +func (p *Parser) typeStartIndex(in Tokens) int { + index := len(in) - 1 + for i := index; i >= 0; i-- { + switch in[i].Tok { + case lang.Ident, lang.Struct, lang.Map, lang.Func, lang.Interface, lang.Mul, lang.BraceBlock, lang.BracketBlock, lang.ParenBlock: + index = i + default: + return index + } + } + return -1 +} diff --git a/vm/type.go b/vm/type.go index 16e3733..49215db 100644 --- a/vm/type.go +++ b/vm/type.go @@ -10,10 +10,19 @@ type Type struct { Rtype reflect.Type } +func (t *Type) String() string { + if t.Name != "" { + return t.Name + } + return t.Rtype.String() +} + +// Elem returns a type's element type. func (t *Type) Elem() *Type { return &Type{Rtype: t.Rtype.Elem()} } +// Out returns the type's i'th output parameter. func (t *Type) Out(i int) *Type { return &Type{Rtype: t.Rtype.Out(i)} } @@ -43,18 +52,22 @@ func ValueOf(v any) Value { return Value{Data: reflect.ValueOf(v)} } +// PointerTo returns the pointer type with element t. func PointerTo(t *Type) *Type { return &Type{Rtype: reflect.PointerTo(t.Rtype)} } -func ArrayOf(size int, t *Type) *Type { - return &Type{Rtype: reflect.ArrayOf(size, t.Rtype)} +// ArrayOf returns the array type with the given length and element type. +func ArrayOf(length int, t *Type) *Type { + return &Type{Rtype: reflect.ArrayOf(length, t.Rtype)} } +// SliceOf returns the slice type with the given element type. func SliceOf(t *Type) *Type { return &Type{Rtype: reflect.SliceOf(t.Rtype)} } +// FuncOf returns the function type with the given argument and result types. func FuncOf(arg, ret []*Type, variadic bool) *Type { a := make([]reflect.Type, len(arg)) for i, e := range arg { @@ -67,6 +80,7 @@ func FuncOf(arg, ret []*Type, variadic bool) *Type { return &Type{Rtype: reflect.FuncOf(a, r, variadic)} } +// StructOf returns the struct type with the given field types. func StructOf(fields []*Type) *Type { rf := make([]reflect.StructField, len(fields)) for i, f := range fields { diff --git a/vm/vm.go b/vm/vm.go index 0b2a1ad..408be6e 100644 --- a/vm/vm.go +++ b/vm/vm.go @@ -1,3 +1,4 @@ +// Package vm implement a stack based virtual machine. package vm import ( @@ -85,6 +86,7 @@ var strop = [...]string{ // for VM tracing. Vassign: "Vassign", } +// Code represents the virtual machine byte code. type Code [][]int64 // Machine represents a virtual machine. @@ -256,19 +258,24 @@ func (m *Machine) Run() (err error) { } } +// PushCode adds instructions to the machine code. func (m *Machine) PushCode(code ...[]int64) (p int) { p = len(m.code) m.code = append(m.code, code...) return p } +// SetIP sets the value of machine instruction pointer to given index. func (m *Machine) SetIP(ip int) { m.ip = ip } + +// Push pushes data values on top of machine memory stack. func (m *Machine) Push(v ...Value) (l int) { l = len(m.mem) m.mem = append(m.mem, v...) return l } +// Pop removes and returns the value on the top of machine stack. func (m *Machine) Pop() (v Value) { l := len(m.mem) - 1 v = m.mem[l] @@ -276,6 +283,7 @@ func (m *Machine) Pop() (v Value) { return v } +// Top returns (but not remove) the value on the top of machine stack. func (m *Machine) Top() (v Value) { if l := len(m.mem); l > 0 { v = m.mem[l-1] @@ -283,12 +291,14 @@ func (m *Machine) Top() (v Value) { return v } +// PopExit removes the last machine code instruction if is Exit. func (m *Machine) PopExit() { if l := len(m.code); l > 0 && m.code[l-1][1] == Exit { m.code = m.code[:l-1] } } +// CodeString returns the string representation of a machine code instruction. func CodeString(op []int64) string { switch len(op) { case 2: @@ -317,6 +327,7 @@ func slint(a []int64) []int { return r } +// Vstring returns the string repreentation of a list of values. func Vstring(lv []Value) string { s := "[" for _, v := range lv {