From 0277cd3c64d8e12701c7fdec41179c72f24c8a08 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 16 Feb 2024 05:56:48 +0100 Subject: [PATCH 001/169] don't emit header includes for unused imported globals (#1191) ## Summary Imported globals are now only included in the build if they're used anywhere in alive code, fixing C headers being unnecessarily pulled in for them. Closes https://github.com/nim-works/nimskull/issues/1190. Closes https://github.com/nim-works/nimskull/issues/1125. ## Details An imported 'let' or 'var' was so far added to the (logical) struct of the module the global is part of, but this is wrong! The definition of an imported global doesn't imply the definition of a location owned by the NimSkull program, rather it makes an external name available with the given name and type. **Changes:** - don't add globals to the module struct if they're marked with `sfImportc` - introduce `symbolToPmir` for querying the PMIR/MIR kind of a symbol without adding it to the MIR environment (`nameNode` does this) - change `mirgen.genLocDef` to only use `nameNode` (which registers the symbol with the environment) if the statement constitutes an assignment - add a specification test for the new behaviour Imported globals are now only registered with the MIR environment (and subsequently have their definition/include emitted) when they're part of the alive program. --- compiler/mir/mirgen.nim | 8 ++-- compiler/mir/proto_mir.nim | 38 ++++++++++--------- compiler/sem/modulelowering.nim | 4 +- .../s01_interop/t05_unused_importc.nim | 18 +++++++++ 4 files changed, 44 insertions(+), 24 deletions(-) create mode 100644 tests/lang/s05_pragmas/s01_interop/t05_unused_importc.nim diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index c5cda9354e6..e7cfd342fdd 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -1243,10 +1243,10 @@ proc genLocDef(c: var TCtx, n: PNode, val: PNode) = s = n.sym hasInitializer = val.kind != nkEmpty sink = sfCursor notin s.flags - node = nameNode(c, s) + kind = symbolToPmir(s) c.builder.useSource(c.sp, n) - if node.kind == mnkGlobal and c.scopeDepth == 1: + if kind == pirGlobal and c.scopeDepth == 1: # no 'def' statement is emitted for top-level globals if hasInitializer: genAsgn(c, true, sink, n, val) @@ -1257,7 +1257,7 @@ proc genLocDef(c: var TCtx, n: PNode, val: PNode) = # the location doesn't have an explicit starting value. Initialize # it to the type's default value. c.buildStmt mnkInit: - c.add node + c.add nameNode(c, s) c.buildMagicCall mDefault, s.typ: discard else: @@ -1265,7 +1265,7 @@ proc genLocDef(c: var TCtx, n: PNode, val: PNode) = discard else: c.buildStmt (if sfCursor in s.flags: mnkDefCursor else: mnkDef): - c.add node + c.add nameNode(c, s) if hasInitializer: genAsgnSource(c, val, sink) else: diff --git a/compiler/mir/proto_mir.nim b/compiler/mir/proto_mir.nim index 2c6f30f23e2..620287bc0ef 100644 --- a/compiler/mir/proto_mir.nim +++ b/compiler/mir/proto_mir.nim @@ -438,6 +438,25 @@ proc wantArray(e: var seq[ProtoItem]) = # without them, so we do prefer lvalue captures e[^1].keep = kLvalue +func symbolToPmir*(s: PSym): range[pirProc..pirConst] = + ## Returns the proto-MIR item kind corresponding to `s`. + case s.kind + of skVar, skLet, skForVar: + if sfGlobal in s.flags: + pirGlobal + else: + pirLocal + of skTemp, skResult: + pirLocal + of skParam: + pirParam + of skConst: + pirConst + of skProc, skFunc, skConverter, skMethod, skIterator: + pirProc + else: + unreachable(s.kind) + proc exprToPmir(c: TranslateCtx, result: var seq[ProtoItem], n: PNode, sink: bool) = ## Translates the single node `n` and recurses if it's a non-terminal. This ## procedure makes up the core of the AST-to-proto-MIR translation. @@ -482,24 +501,7 @@ proc exprToPmir(c: TranslateCtx, result: var seq[ProtoItem], n: PNode, sink: boo of nkLambdaKinds: node pirProc, sym, n[namePos].sym of nkSym: - let kind: range[pirProc..pirConst] = - case n.sym.kind - of skVar, skLet, skForVar: - if sfGlobal in n.sym.flags: - pirGlobal - else: - pirLocal - of skTemp, skResult: - pirLocal - of skParam: - pirParam - of skConst: - pirConst - of skProc, skFunc, skConverter, skMethod, skIterator: - pirProc - else: - unreachable(n.sym.kind) - + let kind = symbolToPmir(n.sym) result.add ProtoItem(orig: n, typ: n.sym.typ, kind: kind, sym: n.sym) of nkDerefExpr: wantPure(n[0]) diff --git a/compiler/sem/modulelowering.nim b/compiler/sem/modulelowering.nim index 025c7fef8bc..694d1955586 100644 --- a/compiler/sem/modulelowering.nim +++ b/compiler/sem/modulelowering.nim @@ -190,8 +190,8 @@ proc registerGlobals(stmts: seq[PNode], structs: var ModuleStructs) = ## the module level (within the module imperative body `stmts`). proc register(structs: var ModuleStructs, s: PSym, isTopLevel: bool) {.nimcall.} = - if sfCompileTime in s.flags: - # don't register compile-time globals with the module struct + if {sfCompileTime, sfImportc} * s.flags != {}: + # don't register compile-time or imported globals with the module struct discard elif s.kind == skTemp: # HACK: semantic analysis sometimes produces temporaries (it does so for diff --git a/tests/lang/s05_pragmas/s01_interop/t05_unused_importc.nim b/tests/lang/s05_pragmas/s01_interop/t05_unused_importc.nim new file mode 100644 index 00000000000..3a830d0ba74 --- /dev/null +++ b/tests/lang/s05_pragmas/s01_interop/t05_unused_importc.nim @@ -0,0 +1,18 @@ +discard """ + description: ''' + Let and var bindings for imported entities are only included in the + build if they're assigned to, read from, or have their address taken, + within code part of the final build + ''' +""" + +var a {.importc, header: "".}: int +let b {.importc, header: "".}: int + +# an initial assignment (e.g.: `` = 0``) would count as a usage and result in +# a compiler error, since the headers are non-existent + +proc f() = + # while this is a usage of `a`, `f` is not part of the final build, so + # `a` is not pulled in + a = 1 From 6a40423ffd9a8a47790b4a897382b92d0498bfe7 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 17 Feb 2024 23:43:08 +0100 Subject: [PATCH 002/169] cgen: fix undefined behaviour of `openArray` conversions (#1197) ## Summary When in their default empty state, both implicit and explicit conversions from `string` and `seq` to `openArray` invoked undefined behaviour at the C level. This is fixed now. Fixes https://github.com/nim-works/nimskull/issues/1194. ## Details Access of the payload pointer, which is null when the sequence is in its default state, is now guarded by a not-null check. If the payload pointer is null, the payload pointer is not accessed and null is used as the data pointer value for the `openArray` tuple. Benchmarking this change with the compiler itself showed it to have little to no effect on execution time. --- compiler/backend/ccgcalls.nim | 6 ++++-- compiler/backend/ccgexprs.nim | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls.nim index 4a9159c9f80..1b164a61efb 100644 --- a/compiler/backend/ccgcalls.nim +++ b/compiler/backend/ccgcalls.nim @@ -148,10 +148,12 @@ proc genOpenArraySlice(p: BProc; q: CgNode; formalType, destType: PType): (Rope, if formalType.skipTypes(abstractInst).kind in {tyVar} and atyp.kind == tyString: linefmt(p, cpsStmts, "#nimPrepareStrMutationV2($1);$n", [byRefLoc(p, a)]) if atyp.kind in {tyVar}: - result = ("($4*)(*$1)$3+($2)" % [rdLoc(a), rdLoc(b), dataField(p), dest], + result = ("((*$1).p != NIM_NIL ? ($4*)(*$1)$3+$2 : NIM_NIL)" % + [rdLoc(a), rdLoc(b), dataField(p), dest], lengthExpr) else: - result = ("($4*)$1$3+($2)" % [rdLoc(a), rdLoc(b), dataField(p), dest], + result = ("($1.p != NIM_NIL ? ($4*)$1$3+$2 : NIM_NIL)" % + [rdLoc(a), rdLoc(b), dataField(p), dest], lengthExpr) else: internalError(p.config, "openArrayLoc: " & typeToString(a.t)) diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index 694fba74505..f33c570ccd1 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -129,7 +129,7 @@ proc genOpenArrayConv(p: BProc; d: TLoc; a: TLoc) = linefmt(p, cpsStmts, "$1.Field0 = $2; $1.Field1 = $2Len_0;$n", [rdLoc(d), a.rdLoc]) of tySequence: - linefmt(p, cpsStmts, "$1.Field0 = $2$3; $1.Field1 = $4;$n", + linefmt(p, cpsStmts, "$1.Field0 = ($2.p != NIM_NIL ? $2$3 : NIM_NIL); $1.Field1 = $4;$n", [rdLoc(d), a.rdLoc, dataField(p), lenExpr(p, a)]) of tyArray: linefmt(p, cpsStmts, "$1.Field0 = $2; $1.Field1 = $3;$n", @@ -138,7 +138,7 @@ proc genOpenArrayConv(p: BProc; d: TLoc; a: TLoc) = if skipTypes(d.t, abstractInst).kind in {tyVar}: linefmt(p, cpsStmts, "#nimPrepareStrMutationV2($1);$n", [byRefLoc(p, a)]) - linefmt(p, cpsStmts, "$1.Field0 = $2$3; $1.Field1 = $4;$n", + linefmt(p, cpsStmts, "$1.Field0 = ($2.p != NIM_NIL ? $2$3 : NIM_NIL); $1.Field1 = $4;$n", [rdLoc(d), a.rdLoc, dataField(p), lenExpr(p, a)]) else: internalError(p.config, a.lode.info, "cannot handle " & $a.t.kind) From ceab1aa75241576a52d67683be6ecc535ad374c1 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 17 Feb 2024 23:43:44 +0100 Subject: [PATCH 003/169] cgen: fix undefined behaviour with object conversion (#1196) ## Summary Fix up-conversions of `nil` `ref object` or `ptr object` values invoking undefined behaviour at the C level. Fixes https://github.com/nim-works/nimskull/issues/1193. ## Details Up conversions of pointers or `ref`s were translated into a dereference + address-of sequence (e.g., `&a->Sup`), which invokes UB if the pointer is null. Instead, the pointer is now directly cast to the super type. According to the C89 standard (6.5.2.1 "Structure and union specifiers"), "A pointer to a structure object, suitably converted, points to its initial member", so this cast is legal. --- compiler/backend/ccgexprs.nim | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index f33c570ccd1..628bc3b1262 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -1943,10 +1943,15 @@ proc upConv(p: BProc, n: CgNode, d: var TLoc) = a.storage) # an indirection is used: d.flags.incl lfIndirect + elif isRef: + # using ``&(x->Sup)`` is undefined behaviour when x is null, so the + # pointer has to be cast instead + putIntoDest(p, d, n, + "(($1) ($2))" % [getTypeDesc(p.module, n.typ), rdLoc(a)]) else: - var r = rdLoc(a) & (if isRef: "->Sup" else: ".Sup") + var r = rdLoc(a) & ".Sup" for i in 2..inheritanceDiff(src, dest): r.add(".Sup") - putIntoDest(p, d, n, if isRef: "&" & r else: r, a.storage) + putIntoDest(p, d, n, r, a.storage) proc useConst*(m: BModule; id: ConstId) = let sym = m.g.env[id] From ee40a2f816400a3aea164f6e57cf3ca1f9fffbc7 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sun, 18 Feb 2024 18:28:11 +0100 Subject: [PATCH 004/169] fix constant expressions sometimes having the wrong value (#1198) ## Summary Fix a critical bug with the constant-data table, which led to, in rare cases, constant aggregate construction expressions evaluating to the wrong value at run-time. All backends were affected. ## Details The `MirNode` comparison procedure of `DataTable` didn't take all relevant fields into account, meaning that unequal `mnkObjConstr`, `mnkConstr`, and `mnkField` were treated as equal. For example, the comparison would result in 'true' for `Obj(a: 1)` and `Obj(b: 1)`. This problem was hidden by the hashing procedure properly considering all relevant `MirNode` fields; only when there were hash or bucket collisions did the equality problem surface. All fields are taken into account by `datatables.==` now, and the used case statement is made exhaustive in order to prevent similar issues in the future. Since a language-level test would be rather contrived and brittle (a hash/bucket collision is required), a unit test for the comparison used by `datatables` is added. --- compiler/mir/datatables.nim | 11 +++-- tests/compiler/tdatatables.nim | 87 ++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/compiler/tdatatables.nim diff --git a/compiler/mir/datatables.nim b/compiler/mir/datatables.nim index dc03151d1ab..e2ac06d6857 100644 --- a/compiler/mir/datatables.nim +++ b/compiler/mir/datatables.nim @@ -85,9 +85,14 @@ proc cmp(a, b: ConstrTree): bool = exprStructuralEquivalent(a.lit, b.lit) of mnkProc: a.prc == b.prc - else: - # all other nodes are equal when their kind is the same - true + of mnkConstr, mnkObjConstr: + a.len == b.len + of mnkField: + a.field.id == b.field.id + of mnkArg, mnkEnd: + true # same node kind -> equal nodes + of AllNodeKinds - ConstrTreeNodes: + unreachable(a.kind) if not a[0].typ.sameBackendType(b[0].typ) or a.len != b.len: # the (backend-)type is different -> not the same constant expressions diff --git a/tests/compiler/tdatatables.nim b/tests/compiler/tdatatables.nim new file mode 100644 index 00000000000..d0129b49977 --- /dev/null +++ b/tests/compiler/tdatatables.nim @@ -0,0 +1,87 @@ +discard """ + description: "Tests for the compiler/mir/datatables module" + targets: native +""" + +import compiler/ast/ast +include compiler/mir/datatables + +# some placeholder types to assing to the nodes. For object types, a different +# ID means that it's a different type +let + t1 = PType(itemId: ItemId(item: 1), kind: tyObject, sons: @[PType nil]) + t2 = PType(itemId: ItemId(item: 2), kind: tyObject, sons: @[PType nil]) + t3 = PType(itemId: ItemId(item: 3), kind: tyObject, sons: @[PType nil]) + field1 = PSym(itemId: ItemId(item: 1)) + field2 = PSym(itemId: ItemId(item: 2)) + +# node constructor +template node(k: MirNodeKind, t: PType, field, val: untyped): MirNode = + MirNode(kind: k, typ: t, field: val) +template node(k: MirNodeKind, field, val: untyped): MirNode = + MirNode(kind: k, field: val) +template node(k: MirNodeKind): MirNode = + MirNode(kind: k) +template literal(val: PNode): MirNode = + MirNode(kind: mnkLiteral, lit: val) + +block tree_equality: + # the type is only relevant for the head of the tree (the first node) + + # setup a list of structurally valid and unique (in terms of equality) trees + let trees = @[ + # --- literals + @[node(mnkLiteral, t1, lit, newIntNode(nkIntLit, 0))], + @[node(mnkLiteral, t2, lit, newIntNode(nkIntLit, 0))], + @[node(mnkLiteral, t1, lit, newStrNode(nkStrLit, ""))], + @[node(mnkLiteral, t1, lit, newStrNode(nkStrLit, "a"))], + @[node(mnkLiteral, t1, lit, newFloatNode(nkFloatLit, 0.0))], + # 0.0 and -0.0 are different float values + # FIXME: doesn't work yet + #@[node(mnkLiteral, t1, lit, newFloatNode(nkFloatLit, -0.0))], + + # --- ordered aggregates + @[node(mnkConstr, t1, len, 0), node(mnkEnd)], + @[node(mnkConstr, t2, len, 0), node(mnkEnd)], + @[node(mnkConstr, t1, len, 1), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), + node(mnkEnd)], + @[node(mnkConstr, t1, len, 2), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkEnd)], + + # --- aggregates with fields + @[node(mnkObjConstr, t1, len, 0), node(mnkEnd)], + @[node(mnkObjConstr, t2, len, 0), node(mnkEnd)], + @[node(mnkObjConstr, t1, len, 1), + node(mnkField, field, field1), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkEnd)], + # same field value, different field: + @[node(mnkObjConstr, t1, len, 1), + node(mnkField, field, field2), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkEnd)], + @[node(mnkObjConstr, t1, len, 1), + node(mnkField, field, field1), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkField, field, field2), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkEnd)], + # swapped fields + @[node(mnkObjConstr, t1, len, 1), + node(mnkField, field, field2), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkField, field, field1), + node(mnkArg), literal(newIntNode(nkIntLit, 0)), node(mnkEnd), + node(mnkEnd)] + ] + + # compare all trees with each other + for i in 0.. Date: Mon, 19 Feb 2024 00:01:49 +0100 Subject: [PATCH 005/169] rework the MIR (part 2) (#1195) ## Summary This introduces the `copy`, `move`, and `sink` *assignment modifiers*, which make it explicit where and how ownership is transferred with assignments. An assignment with an lvalue expression as the source operand and no modifier is a *shallow copy*, replacing the need for `fastAsgn`. Previously, the exact semantics of an assignment where an implicit property of every assignment. Besides slightly simplifying the `injectdestructors` pass and data-flow graph construction, the semantics of assignments being properly encoded in the MIR is a pre-requisite for many MIR-based optimization passes. More generally, it's a significant step towards decoupling value ownership semantics from lifetime hooks. Semantics of existing code don't (intentionally) change, this is an internal-only rework. ## Details ### MIR * the `mnkCopy`, `mnkMove`, and `mnkSink` nodes are added, representing the modifiers. Syntax-wise, they may only appear directly in assignment source slots * the `mnkFastAsgn` node is obsolete and removed * `copy` and `move` assignments are *final* . That is, a `copy` assignment cannot become a `move` assignment, and the inverse is also not possible * only `sink` assignments are non-final. Collapsing them into either a `copy` or `move` assignment is the focus of the move analyzer ### AST-to-MIR Translation `mirgen` performs the initial placement of assignment modifier, using a proto-MIR-based analysis. The decision-making works as follows: 1. when the destination is owning (i.e., all non-`.cursor` locals/ globals/fields): 1. if a non-destructive move is definitely possible, `move` is used 2. if a move is never possible (e.g., because the source is a cursor), `copy` is used 3. if whether a move is possible depends on a data-flow analysis, `sink` is used 4. if a destructive move is possible, `sink` is used 5. if the source expression is an rvalue expression returning an owning value (e.g., a call, a construction, etc.), and the assignment is an initial assignment, an in-place assignment is used 6. for types with custom copy/sink/destroy behaviour, and non-initial assignments, the rvalue is first assigned to a temporary and then `move`d into the destination 2. when the destination is non-owning, no modifier is used Except for the rule 1.6, placement of the modifiers is independent on whether lifetime hooks are involved. In addition, all rvalue expressions (except for `cast`) are now treated as returning *owning* values. What this means, in effect, is that temporaries now properly use `def` instead of `def_cursor`. As the moment, whether `def` or `def_cursor` is used for non-lifetime-hook using types has no practical effect, but the new behaviour is technically correct, while the previous one was not. ### Move Analyzer Operation of the move analyzer changes slightly: instead of looking for all `opConsume` data-flow instructions, it only looks for those corresponding to `sink` assignments. Since whether a move or copy is used is only relevant for lifetime-hook-using types, the `isLastRead` analysis continues to only be performed for assignments of those types. Compared to before, the DFG is now updated with the move analyzer results, removing the need to pass them to each of the following analysis routine. ### Assignment Rewriting Rewriting of assignment now only looks for assignments with modifiers, all assignments without a modifier are ignored. `move`s are exclusively turned into `=sink` calls (never destructive moves), `copy`s into `=copy`. `sink` is either turned into a copy, move, or destructive move. Only the implementation is different, effective program behaviour doesn't change. ### MIR-to-CGIR Translation * `move` assignments and *shallow copy* assignments are translated to `cnkFastAsgn` * all other assignments translate to `cnkAsgn` * the small optimization preventing definitions being moved to the start of their scope is expanded to also apply to `def`s of temporaries without lifetime hooks, now that `def` is used for temporaries more often --------- Co-authored-by: Saem Ghani --- compiler/backend/backends.nim | 16 +- compiler/backend/cgirgen.nim | 36 ++- compiler/mir/analysis.nim | 44 ++-- compiler/mir/mirconstr.nim | 18 +- compiler/mir/mirgen.nim | 123 +++++++--- compiler/mir/mirpasses.nim | 12 +- compiler/mir/mirtrees.nim | 25 +- compiler/mir/proto_mir.nim | 154 ++++++++++--- compiler/mir/utils.nim | 15 +- compiler/sem/injectdestructors.nim | 253 ++++++++++----------- compiler/sem/mirexec.nim | 48 ++-- doc/mir.rst | 23 +- tests/arc/topt_cursor.nim | 8 +- tests/arc/topt_no_cursor.nim | 80 +++---- tests/arc/topt_refcursors.nim | 16 +- tests/arc/topt_wasmoved_destroy_pairs.nim | 25 +- tests/lang_objects/destructor/tv2_cast.nim | 20 +- tests/misc/tdont_fold_procedure_cast.nim | 7 +- 18 files changed, 559 insertions(+), 364 deletions(-) diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index 58e79f8143e..a2e48ec4b28 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -401,21 +401,7 @@ proc produceFragmentsForGlobals( let global = env.globals.add(s) # generate the MIR code for an initializing assignment: prepare(init, result.init.source, graph.emptyNode) - init.setSource(result.init.source.add(it)) - init.buildStmt mnkInit: - init.setSource(result.init.source.add(it[0])) - init.use toValue(global, s.typ) - init.setSource(result.init.source.add(it[2])) - if it[2].kind == nkEmpty: - # no explicit initializer expression means that the default value - # should be used - # XXX: ^^ it'd make sense to instead let semantic analysis ensure - # this (i.e. by placing a ``default(T)`` in the initializer - # slot) - init.buildMagicCall mDefault, s.typ: - discard - else: - generateCode(graph, env, config, it[2], init, result.init.source) + generateAssignment(graph, env, config, it, init, result.init.source) # if the global requires one, emit a destructor call into the deinit # fragment: diff --git a/compiler/backend/cgirgen.nim b/compiler/backend/cgirgen.nim index ac01da15cae..96b31615263 100644 --- a/compiler/backend/cgirgen.nim +++ b/compiler/backend/cgirgen.nim @@ -473,6 +473,29 @@ proc callToIr(tree: MirBody, cl: var TranslateCl, n: MirNode, proc exprToIr(tree: MirBody, cl: var TranslateCl, cr: var TreeCursor): CgNode +proc sourceExprToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor): tuple[n: CgNode, useFast: bool] = + ## Translates the MIR expression appearing in an assignment's source + ## slot. Assignment modifiers are dropped, and whether a fast assignment or + ## normal assignment should be used is computed and returned. + case tree[cr].kind + of mnkCopy, mnkSink: + # requires a full assignment + discard enter(tree, cr) + result = (valueToIr(tree, cl, cr), false) + leave(tree, cr) + of mnkMove: + # an ``x = move y`` assignment can be turned into a fast assignment + discard enter(tree, cr) + result = (valueToIr(tree, cl, cr), true) + leave(tree, cr) + of LvalueExprKinds: + # a fast assignment is correct for all raw lvalues + result = (lvalueToIr(tree, cl, cr), true) + else: + # rvalue expressions require a full assignment + result = (exprToIr(tree, cl, cr), false) + proc defToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, n: MirNode, cr: var TreeCursor): CgNode = ## Translates a 'def'-like construct @@ -531,13 +554,14 @@ proc defToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, # don't use the field interperation for variant access lvalueToIr(tree, cl, cr, preferField=false) else: - exprToIr(tree, cl, cr) + sourceExprToIr(tree, cl, cr)[0] leave(tree, cr) if n.kind in {mnkBind, mnkBindMut} and arg.typ.kind notin {tyVar, tyLent}: # wrap the operand in an address-of operation arg = newOp(cnkHiddenAddr, info, def.typ, arg) let isLet = (entity.kind == mnkTemp and n.kind == mnkDefCursor) or + (entity.kind == mnkTemp and not hasDestructor(def.typ)) or (entity.kind == mnkAlias) # to reduce the pressure on the code generator, locals that never cross # structured control-flow boundaries are not lifted. As a temporary @@ -634,9 +658,10 @@ proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, of DefNodes: defToIr(tree, env, cl, n, cr) of mnkAsgn, mnkInit, mnkSwitch: - to cnkAsgn, lvalueToIr(tree, cl, cr), exprToIr(tree, cl, cr) - of mnkFastAsgn: - to cnkFastAsgn, lvalueToIr(tree, cl, cr), exprToIr(tree, cl, cr) + let + dst = lvalueToIr(tree, cl, cr) + (src, useFast) = sourceExprToIr(tree, cl, cr) + to (if useFast: cnkFastAsgn else: cnkAsgn), dst, src of mnkRepeat: to cnkRepeatStmt, body() of mnkBlock: @@ -810,6 +835,9 @@ proc exprToIr(tree: MirBody, cl: var TranslateCl, mnkMul: cnkMul, mnkDiv: cnkDiv, mnkModI: cnkModI] treeOp Map[n.kind]: res.kids = @[valueToIr(tree, cl, cr), valueToIr(tree, cl, cr)] + of mnkCopy, mnkMove, mnkSink: + # translation of assignments needs to handle all modifiers + unreachable("loose assignment modifier") of AllNodeKinds - ExprKinds - {mnkNone}: unreachable(n.kind) diff --git a/compiler/mir/analysis.nim b/compiler/mir/analysis.nim index 4b32ff61d4d..eec14495a53 100644 --- a/compiler/mir/analysis.nim +++ b/compiler/mir/analysis.nim @@ -9,10 +9,6 @@ ## not. This means that run-time aliasing (e.g., through pointers) is **not** ## considered. ## -## Analysis routine related to liveness take an additional ``Values`` -## instance as input, for knowing about what operation to collapse an -## ``opConsume`` to. -## ## When a "before" or "after" relationship is mentioned in the context of ## operations, it doesn't refer to the relative memory location of the ## nodes representing the operations, but rather to the operations' @@ -53,7 +49,7 @@ type alive ComputeAliveProc[T] = - proc(tree: MirTree, values: Values, loc: T, op: Opcode, + proc(tree: MirTree, loc: T, op: Opcode, n: OpValue): AliveState {.nimcall, noSideEffect.} func skipConversions*(tree: MirTree, val: OpValue): OpValue = @@ -68,7 +64,7 @@ func isOwned*(v: Values, val: OpValue): bool {.inline.} = func markOwned*(v: var Values, val: OpValue) {.inline.} = v.owned.incl val -func isAlive*(tree: MirTree, cfg: DataFlowGraph, v: Values, +func isAlive*(tree: MirTree, cfg: DataFlowGraph, span: Subgraph, loc: Path, start: InstrPos): bool = ## Computes whether the location named by `loc` does contain a value (i.e., ## is alive) when the data-flow operation at `start` is reached (but not @@ -99,10 +95,16 @@ func isAlive*(tree: MirTree, cfg: DataFlowGraph, v: Values, # return already return true - of opKill: + of opKill, opConsume: if isPartOf(tree, loc, path n) == yes: + # the location's value is consumed or the location is killed. No + # operation coming before the current one can change that, so we can + # stop traversing the current path exit = true + # partially consuming the value, or killing the location, does *not* + # change the alive state + of opInvalidate: discard @@ -112,16 +114,6 @@ func isAlive*(tree: MirTree, cfg: DataFlowGraph, v: Values, # derived from a global -> assume the analysed global is mutated return true - of opConsume: - if v.isOwned(n): - if isPartOf(tree, loc, path n) == yes: - # the location's value is consumed and it becomes empty. No operation - # coming before the current one can change that, so we can stop - # traversing the current path - exit = true - - # partially consuming the location does *not* change the alive state - of opUse: discard "not relevant" @@ -226,7 +218,7 @@ func isLastWrite*(tree: MirTree, cfg: DataFlowGraph, span: Subgraph, loc: Path, result = (true, state.exit, state.escapes) func computeAliveOp*[T: PSym | GlobalId | TempId]( - tree: MirTree, values: Values, loc: T, op: Opcode, n: OpValue): AliveState = + tree: MirTree, loc: T, op: Opcode, n: OpValue): AliveState = ## Computes the state of `loc` at the *end* of the given operation. The ## operands are expected to *not* alias with each other. The analysis ## result will be wrong if they do @@ -253,9 +245,10 @@ func computeAliveOp*[T: PSym | GlobalId | TempId]( # the analysed location or one derived from it is mutated return alive - of opKill: + of opKill, opConsume: if sameLocation(n): - # the location is killed + # the location is killed or its value is consumed (i.e., moved somewhere + # else) return dead of opInvalidate: @@ -266,15 +259,10 @@ func computeAliveOp*[T: PSym | GlobalId | TempId]( # the operation mutates global state and we're analysing a global result = alive - of opConsume: - if values.isOwned(n) and sameLocation(n): - # the location's value is consumed - result = dead - else: discard -func computeAlive*[T](tree: MirTree, cfg: DataFlowGraph, values: Values, +func computeAlive*[T](tree: MirTree, cfg: DataFlowGraph, span: Subgraph, loc: T, op: static ComputeAliveProc[T] ): tuple[alive, escapes: bool] = ## Computes whether the location is alive when `span` is exited via either @@ -287,7 +275,7 @@ func computeAlive*[T](tree: MirTree, cfg: DataFlowGraph, values: Values, var exit = false for opc, n in traverseFromExits(cfg, span, exit): - case op(tree, values, loc, opc, n) + case op(tree, loc, opc, n) of dead: exit = true of alive: @@ -299,7 +287,7 @@ func computeAlive*[T](tree: MirTree, cfg: DataFlowGraph, values: Values, # check if the location is alive at the structured exit of the span for opc, n in traverseReverse(cfg, span, span.b + 1, exit): - case op(tree, values, loc, opc, n) + case op(tree, loc, opc, n) of dead: exit = true of alive: diff --git a/compiler/mir/mirconstr.nim b/compiler/mir/mirconstr.nim index 5b00531336d..e7db1ec85fa 100644 --- a/compiler/mir/mirconstr.nim +++ b/compiler/mir/mirconstr.nim @@ -357,12 +357,23 @@ func emitByName*(bu: var MirBuilder, val: Value, e: EffectKind) = bu.subTree MirNode(kind: mnkTag, effect: e): bu.use val +func move*(bu: var MirBuilder, val: Value) = + ## Emits ``move val``. + bu.subTree MirNode(kind: mnkMove, typ: val.typ): + bu.use val + func asgn*(buf: var MirBuilder, a, b: Value) = - ## Emits an assignment of `b` to `a`. + ## Emits a shallow assignment: ``a = b``. buf.subTree MirNode(kind: mnkAsgn): buf.use a buf.use b +func asgnMove*(bu: var MirBuilder, a, b: Value) = + ## Emits a move assignment: ``a = move b``. + bu.subTree mnkAsgn: + bu.use a + bu.move b + func inline*(bu: var MirBuilder, tree: MirTree, fr: NodePosition): Value = ## Inlines the operand for non-mutating use. This is meant to be used for ## materialzing immutable arguments when inlining calls / expanding @@ -415,6 +426,11 @@ func materialize*(bu: var MirBuilder, loc: Value): Value = bu.use result bu.use loc +func materializeMove*(bu: var MirBuilder, loc: Value): Value = + ## Emits a new owning temporary that's initialized with the moved-from `loc`. + bu.wrapTemp loc.typ: + bu.move loc + func finish*(bu: sink MirBuilder): MirTree = ## Consumes `bu` and returns the finished tree. if bu.swapped: diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index e7cfd342fdd..3a1666de2e4 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -833,7 +833,7 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic) = of mAnd, mOr: let tmp = getTemp(c, n.typ) withFront c.builder: - genAndOr(c, n, Destination(isSome: true, val: tmp)) + genAndOr(c, n, Destination(isSome: true, val: tmp, flags: {dfOwns})) c.use tmp of mDefault: # use the canonical form: @@ -1134,8 +1134,11 @@ proc genObjConstr(c: var TCtx, n: PNode, isConsume: bool) = proc genRaise(c: var TCtx, n: PNode) = assert n.kind == nkRaiseStmt if n[0].kind != nkEmpty: - let tmp = c.wrapTemp n[0].typ: - genx(c, n[0], consume=true) + # the raise operand slot is a sink context, and it behaves much like a + # ``sink`` parameter + var e = exprToPmir(c, n[0], true, false) + wantConsumeable(e) + let tmp = toValue(c, e, e.high) # emit the preparation code: let @@ -1166,26 +1169,35 @@ proc genReturn(c: var TCtx, n: PNode) = c.add MirNode(kind: mnkReturn) -proc genAsgnSource(c: var TCtx, e: PNode, sink: bool) = +proc genAsgnSource(c: var TCtx, e: PNode, status: set[DestFlag]) = ## Generates the MIR code for the right-hand side of an assignment. - ## The value is captured in a temporary if necessary for proper - ## destruction. - var e = exprToPmir(c, e, sink, false) - if not sink: + ## `status` provides the information necessary to decide what assignment + ## modifiers to use and whether a temporary is required. + ## + ## If not an initial assignment, and lifetime hooks are present, a temporary + ## is introduced for rvalue expressions that return owning values: + ## + ## def _1 = get() + ## dest = move _1 + ## + ## This is necessary for the later hook injection, which triggers on + ## assignment modifiers, to work. + var e = exprToPmir(c, e, dfOwns in status, false) + if dfOwns in status: + wantOwning(e, dfEmpty notin status and hasDestructor(e.typ)) + else: wantShallow(e) + genx(c, e, e.high) proc genAsgn(c: var TCtx, dest: Destination, rhs: PNode) = assert dest.isSome - let owns = dfOwns in dest.flags let kind = - if owns: - if dfEmpty in dest.flags: mnkInit - else: mnkAsgn - else: mnkFastAsgn + if dfEmpty in dest.flags: mnkInit + else: mnkAsgn c.buildStmt kind: c.use dest.val - c.genAsgnSource(rhs, sink = owns) + c.genAsgnSource(rhs, dest.flags) proc unwrap(c: var TCtx, n: PNode): PNode = ## If `n` is a statement-list expression, generates the code for all @@ -1215,7 +1227,7 @@ proc genAsgn(c: var TCtx, isFirst, sink: bool, lhs, rhs: PNode) = sink = sink and not isCursor(lhs) case rhs.kind - of ComplexExprs, nkStmtListExpr: + of ComplexExprs: # optimization: forward the destination. For example: # x = if cond: a else: b # becomes: @@ -1225,16 +1237,20 @@ proc genAsgn(c: var TCtx, isFirst, sink: bool, lhs, rhs: PNode) = genWithDest(c, rhs, initDestination(dest, isFirst, sink)) else: let kind = - if sink: - if isFirst: mnkInit - else: mnkAsgn - else: mnkFastAsgn + if isFirst: mnkInit + else: mnkAsgn + + var status: set[DestFlag] + if sink: + status.incl dfOwns + if isFirst: + status.incl dfEmpty c.buildStmt kind: # ``genLvalueOperand`` ensures that unstable lvalue # expressions are captured genLvalueOperand(c, lhs, true) - genAsgnSource(c, rhs, sink) + genAsgnSource(c, rhs, status) proc genLocDef(c: var TCtx, n: PNode, val: PNode) = ## Generates the 'def' construct for the entity provided by the symbol node @@ -1267,7 +1283,9 @@ proc genLocDef(c: var TCtx, n: PNode, val: PNode) = c.buildStmt (if sfCursor in s.flags: mnkDefCursor else: mnkDef): c.add nameNode(c, s) if hasInitializer: - genAsgnSource(c, val, sink) + genAsgnSource(c, val): + if sink: {dfEmpty, dfOwns} + else: {dfEmpty} else: c.add MirNode(kind: mnkNone) @@ -1319,7 +1337,8 @@ proc genVarTuple(c: var TCtx, n: PNode) = let val = c.allocTemp(initExpr.typ) c.buildStmt mnkDefUnpack: c.use val - genx(c, initExpr, consume = true) + # ensure that the temporary owns the tuple value: + genAsgnSource(c, initExpr, {dfEmpty, dfOwns}) # generate the unpack logic: for i in 0.. assign the default value - c.buildStmt (if isInit: mnkInit else: mnkAsgn): + elif isInit or not hasDestructor(a[0].typ): + # the default value can be assigned in-place + c.buildStmt mnkInit: genOperand(c, a[0]) c.buildMagicCall mDefault, a[0].typ: discard + else: + # a 'move' modifier is required for the assignment to later be + # rewritten + c.buildStmt mnkAsgn: + genOperand(c, a[0]) + c.buildTree mnkMove, a[0].typ: + c.wrapAndUse a[0].typ: + c.buildMagicCall mDefault, a[0].typ: + discard else: unreachable() @@ -1778,6 +1811,18 @@ proc genx(c: var TCtx, e: PMirExpr, i: int) = Destination(isSome: true, val: tmp, flags: {dfOwns, dfEmpty}) c.use tmp + of pirCopy: + c.buildOp mnkCopy, n.typ: + recurse() + of pirMove: + c.buildOp mnkMove, n.typ: + recurse() + of pirSink, pirDestructiveMove: + # a destructive move is currently not translated into a move + wasMoved, + # but rather into a sink, which is then, if necessary, later turned into + # a destructive move + c.buildOp mnkSink, n.typ: + recurse() of pirMat, pirMatCursor: let f = c.builder.push: recurse() # only materialize a temporary if the expression is not already a @@ -1858,7 +1903,7 @@ proc gen(c: var TCtx, n: PNode) = field: dest[^1].field): genx(c, dest, dest.len - 2) - genAsgnSource(c, n[1], false) # the source operand + genAsgnSource(c, n[1], {dfOwns}) # the source operand else: # a normal assignment genAsgn(c, false, true, n[0], n[1]) @@ -1958,6 +2003,26 @@ proc genWithDest(c: var TCtx, n: PNode; dest: Destination) = else: gen(c, n) +proc generateAssignment*(graph: ModuleGraph, env: var MirEnv, + config: TranslationConfig, n: PNode, + builder: var MirBuilder, source: var SourceMap) = + ## Translates an `nkIdentDefs` AST into MIR and emits the result into + ## `builder`'s currently selected buffer. + assert n.kind == nkIdentDefs and n.len == 3 + var c = TCtx(context: skUnknown, graph: graph, config: config) + # treat the code as top-level code so that no 'def' is generated for + # assignments to globals + c.scopeDepth = 1 + + template swapState() = + swap(c.sp.map, source) + swap(c.builder, builder) + swap(c.env, env) + + swapState() + genLocInit(c, n[0], n[2]) + swapState() + proc generateCode*(graph: ModuleGraph, env: var MirEnv, config: TranslationConfig, n: PNode, builder: var MirBuilder, source: var SourceMap) = diff --git a/compiler/mir/mirpasses.nim b/compiler/mir/mirpasses.nim index 95caec3a114..02c01e6e805 100644 --- a/compiler/mir/mirpasses.nim +++ b/compiler/mir/mirpasses.nim @@ -110,7 +110,7 @@ proc preventRvo(tree: MirTree, changes: var Changeset) = # we don't need to consider defs or initializing assignments (``mnkInit``) # here, because there it is guaranteed that the destination does not appear # anywhere in the source expression - for i in search(tree, {mnkFastAsgn, mnkAsgn}): + for i in search(tree, {mnkAsgn}): let source = tree.operand(i, 1) if tree[source].kind notin CallKinds or tree[source, 0].kind == mnkMagic or not eligibleForRvo(tree[source].typ): @@ -159,9 +159,9 @@ proc preventRvo(tree: MirTree, changes: var Changeset) = tmp = bu.allocTemp(tree[source].typ) bu.use tmp changes.insert(tree, tree.sibling(i), i, bu): - bu.subTree tree[i].kind: + bu.subTree mnkAsgn: bu.emitFrom(tree, pos) - bu.use tmp + bu.move tmp proc lowerSwap(tree: MirTree, changes: var Changeset) = ## Lowers a ``swap(a, b)`` call into: @@ -180,10 +180,10 @@ proc lowerSwap(tree: MirTree, changes: var Changeset) = let a = bu.bindMut(tree, NodePosition tree.argument(i, 0)) b = bu.bindMut(tree, NodePosition tree.argument(i, 1)) - temp = bu.materialize(a) + temp = bu.materializeMove(a) # we're just swapping the values, no full copy is needed - bu.asgn a, b - bu.asgn b, temp + bu.asgnMove a, b + bu.asgnMove b, temp proc eliminateTemporaries(tree: MirTree, changes: var Changeset) = ## Where safe (i.e., observable program behaviour does not change), elides diff --git a/compiler/mir/mirtrees.nim b/compiler/mir/mirtrees.nim index dafa8dd0078..d29892ae298 100644 --- a/compiler/mir/mirtrees.nim +++ b/compiler/mir/mirtrees.nim @@ -97,9 +97,6 @@ type mnkBindMut ## introduces an alias that may be used for read/write access ## and assignments. The source expression must not be empty - mnkFastAsgn ## assignment that cannot be rewritten into copy, move, or - ## hook call - # future direction: same as with DefCursor, remove FastAsgn mnkAsgn ## normal assignment; the destination might store a value ## already. Whether the source is copied or moved depends ## on the expression @@ -185,6 +182,17 @@ type ## ``ref`` to it, or or constructs a new aggregate value ## with named fields + mnkCopy ## denotes the assignment as copying the source value + mnkMove ## denotes the assignment as moving the value. This does + ## not imply a phyiscal change to the source location + mnkSink ## collapses into one of the following: + ## - a copy (`mnkCopy`) + ## - a non-destructive move (`mnkMove`) + ## - a destructive move + ## + ## Collapsing ``mnkSink`` is the responsibility of the move + ## analyzer. + mnkArg ## when used in a call: denotes an argument that may either be ## passed by value or by name. Evaluation order is unspecified ## when used in a construction: denotes a value that is copied @@ -330,12 +338,17 @@ const SingleOperandNodes* = {mnkPathNamed, mnkPathPos, mnkPathVariant, mnkPathConv, mnkAddr, mnkDeref, mnkView, mnkDerefView, mnkStdConv, mnkConv, mnkCast, mnkRaise, mnkTag, mnkArg, - mnkName, mnkConsume, mnkVoid} + mnkName, mnkConsume, mnkVoid, mnkCopy, mnkMove, + mnkSink} ## Nodes that start sub-trees but that always have a single sub node. ArgumentNodes* = {mnkArg, mnkName, mnkConsume} ## Nodes only allowed in argument contexts. + ModifierNodes* = {mnkCopy, mnkMove, mnkSink} + ## Assignment modifiers. Nodes that can only appear directly in the source + ## slot of assignments. + SymbolLike* = {mnkParam, mnkLocal} ## Nodes for which the `sym` field is available @@ -350,7 +363,7 @@ const StmtNodes* = {mnkScope, mnkStmtList, mnkIf, mnkCase, mnkRepeat, mnkTry, mnkBlock, mnkBreak, mnkReturn, mnkRaise, mnkPNode, mnkInit, - mnkAsgn, mnkSwitch, mnkFastAsgn, mnkVoid, mnkRaise, mnkEmit, + mnkAsgn, mnkSwitch, mnkVoid, mnkRaise, mnkEmit, mnkAsm} + DefNodes UnaryOps* = {mnkNeg} @@ -365,7 +378,7 @@ const mnkCast, mnkAddr, mnkView, mnkToSlice} + UnaryOps + BinaryOps ExprKinds* = {mnkCall, mnkCheckedCall, mnkConstr, mnkObjConstr} + - LvalueExprKinds + RvalueExprKinds + LvalueExprKinds + RvalueExprKinds + ModifierNodes CallKinds* = {mnkCall, mnkCheckedCall} diff --git a/compiler/mir/proto_mir.nim b/compiler/mir/proto_mir.nim index 620287bc0ef..43337a79b23 100644 --- a/compiler/mir/proto_mir.nim +++ b/compiler/mir/proto_mir.nim @@ -70,6 +70,13 @@ type pirStmtList # usually skipped + # --- ownership operations + + pirCopy + pirMove + pirDestructiveMove # move + wasMoved + pirSink + # --- constructors pirClosureConstr @@ -180,9 +187,6 @@ func typ*(n: seq[ProtoItem]): PType {.inline.} = func classify*(e: seq[ProtoItem], i: int): ExprKind = ## Returns the kind of the given proto-MIR expression. - # XXX: ownership is unrelated to whether a type has custom copy/sink/ - # destruction logic. Taking the latter into consideration is an - # optimization that needs to eventually be removed case e[i].kind of pirLiteral, pirProc: Literal @@ -191,22 +195,20 @@ func classify*(e: seq[ProtoItem], i: int): ExprKind = # constant expression are later turned into anonymous constants, so # they're lvalues too Lvalue - of pirCall, pirComplex, pirStringToCString, pirCStringToString: - if hasDestructor(e[i].typ): - OwnedRvalue - else: - Rvalue + of pirCall, pirComplex, pirSetConstr, pirAddr, pirView, pirToSlice, + pirToSubSlice, pirStringToCString, pirCStringToString, + pirConv, pirStdConv, pirChckRange: + OwnedRvalue of pirObjConstr, pirTupleConstr, pirClosureConstr, pirArrayConstr: - if e[i].owning and hasDestructor(e[i].typ): + if e[i].owning: OwnedRvalue else: Rvalue of pirRefConstr: OwnedRvalue - of pirSetConstr, pirAddr, pirView, pirCast, pirConv, pirStdConv, - pirChckRange, pirToSlice, pirToSubSlice: + of pirCast: Rvalue - of pirMat: + of pirMat, pirCopy, pirMove, pirDestructiveMove, pirSink: OwnedRvalue of pirMatCursor: Rvalue @@ -247,6 +249,9 @@ func isPure(e: seq[ProtoItem], n: int): bool = of pirMat, pirMatCursor: # the materialized-into temporary is never assigned to true + of pirCopy, pirMove, pirDestructiveMove, pirSink: + # always produce an owning value + true of pirDeref, pirViewDeref: # the pointer destination could change (unless it's an immutable view) false @@ -286,15 +291,107 @@ func isStable(e: seq[ProtoItem], n: int): bool = else: unreachable(e[n].kind) +func ownershipOp(e: seq[ProtoItem], i: int): ProtoItemKind = + ## Infers and returns the best fitting operation to retrieve an owning + ## value from the given *lvalue*. + func decayMove(kind: ProtoItemKind): ProtoItemKind {.inline.} = + # moving from a projection requires a destructive move, since the source + # location needs to be destroyed after (in order to free the non-moved + # parts) + case kind + of pirMove: pirDestructiveMove + else: kind + + case e[i].kind + of pirParam: + if e[i].typ.kind == tySink: + pirSink + else: + pirCopy + of pirLocal, pirGlobal: + if sfCursor in e[i].sym.flags: + pirCopy # cursors can only be copied from + else: + pirSink # moveability depends on data flow + of pirConst, pirConstExpr, pirLiteral: + pirCopy + of pirFieldAccess: + if sfCursor in e[i].field.flags: + pirCopy # non-owning fields cannot be copied + else: + decayMove ownershipOp(e, i - 1) + of pirTupleAccess, pirArrayAccess, pirVariantAccess, pirSeqAccess: + decayMove ownershipOp(e, i - 1) + of pirLvalueConv: + # it's still the whole location that would be consumed, so no destructive + # move is required + ownershipOp(e, i - 1) + of pirCheckedArrayAccess, pirCheckedSeqAccess, pirCheckedVariantAccess, + pirCheckedObjConv: + decayMove ownershipOp(e, i - 1) + of pirDeref, pirViewDeref: + # pointers and views are currently not tracked, so their targets can only + # be copied from + pirCopy + of pirMat: + pirMove + of pirMatCursor: + pirCopy + of pirStmtList, pirMatLvalue: + ownershipOp(e, i - 1) + else: + # cannot be part of an lvalue expression sequence + unreachable(e[i].kind) + +func wantOwning*(e: var seq[ProtoItem], forceTemp: bool) = + ## Makes sure `e` produces an owning value. If `forceTemp` is true, a + ## temporary is materialized even if the expression would already produce + ## an owning value. + case classify(e, e.high) + of Rvalue: + # rvalue expressions cannot be copied from directly + if e[^1].kind != pirMatCursor: + e.add pirMatCursor + e.add pirCopy + of OwnedRvalue: + var i = e.high + while e[i].kind == pirStmtList: + dec i + case e[i].kind + of pirMat: + e.add pirMove + of pirComplex: + # watch out! try-finally expressions can have exceptional control-flow + # that forces the destination temporary to have to be destroyed in a + # finalizer. A destructive move is required + e.add pirMat + e.add pirDestructiveMove + elif forceTemp: + e.add pirMat + e.add pirMove + of Lvalue: + e.add ownershipOp(e, e.high) + of Literal: + if forceTemp: + e.add pirMat + e.add pirMove + func wantConsumeable*(e: var seq[ProtoItem]) = ## Makes sure `e` is an expression that can be used in a context requiring a - ## certainly-consumeable value. + ## certainly-consumeable value (either a materialized temporary or a literal + ## value). case classify(e, e.high) - of Rvalue, OwnedRvalue: + of Rvalue: + if e[^1].kind != pirMatCursor: + e.add pirMatCursor + e.add pirCopy + e.add pirMat + of OwnedRvalue: if e[^1].kind != pirMat: # requires an owning temporary e.add pirMat of Lvalue: + e.add ownershipOp(e, e.high) e.add pirMat of Literal: discard "okay, can be used as is" @@ -351,13 +448,14 @@ func selectWhenBranch*(n: PNode, isNimvm: bool): PNode = else: n[1][0] func handleConstExpr(result: var seq[ProtoItem], n: PNode, kind: ProtoItemKind, - sink: bool) = - ## If eligible, translates `n` to a constant expression. To a construction of - ## kind `kind` otherwise. + sink, lift: bool) = + ## If `lift` is true and the expression is eligible, translates `n` to a + ## constant expression. To a construction of kind `kind` otherwise. ## ## Only fully constant, non-empty aggregate or set constructions are ## treated as constant expressions. - if not sink and n.len > ord(n.kind == nkObjConstr) and isDeepConstExpr(n): + if lift and n.len > ord(n.kind == nkObjConstr) and + isDeepConstExpr(n): result.add ProtoItem(orig: n, typ: n.typ, kind: pirConstExpr) elif kind == pirSetConstr: result.add ProtoItem(orig: n, typ: n.typ, kind: kind) @@ -671,24 +769,24 @@ proc exprToPmir(c: TranslateCtx, result: var seq[ProtoItem], n: PNode, sink: boo of nkBracket: # if the construction is of seq type, then it's a constant seq value, - # which we prefer to lift into a constant (again) - let consume = - n.typ.skipTypes(IrrelevantTypes).kind != tySequence and sink - handleConstExpr(result, n, pirArrayConstr, consume) + # which we prefer to lift into a constant (again), even in sink contexts + let lift = + n.typ.skipTypes(IrrelevantTypes).kind == tySequence or not(sink) + handleConstExpr(result, n, pirArrayConstr, sink, lift) of nkCurly: - # never treat set constructions as appearing in a sink context, so that - # they're always turned into constants, if possible - handleConstExpr(result, n, pirSetConstr, false) + # always attempt to turn set constructions into constants, regardless of + # whether they're used in a sink context + handleConstExpr(result, n, pirSetConstr, false, true) of nkObjConstr: if n.typ.skipTypes(IrrelevantTypes).kind == tyRef: # ref constructions are never constant result.add n, pirRefConstr else: - handleConstExpr(result, n, pirObjConstr, sink) + handleConstExpr(result, n, pirObjConstr, sink, not sink) of nkTupleConstr: - handleConstExpr(result, n, pirTupleConstr, sink) + handleConstExpr(result, n, pirTupleConstr, sink, not sink) of nkClosure: - handleConstExpr(result, n, pirClosureConstr, sink) + handleConstExpr(result, n, pirClosureConstr, sink, not sink) of nkWhenStmt: # a ``when nimvm`` expression diff --git a/compiler/mir/utils.nim b/compiler/mir/utils.nim index a2fcb55ab79..4bbf962cc2f 100644 --- a/compiler/mir/utils.nim +++ b/compiler/mir/utils.nim @@ -332,6 +332,15 @@ proc exprToStr(nodes: MirTree, i: var int, result: var string, env: EnvPtr) = mnkMul: " * ", mnkDiv: " div ", mnkModI: " mod "] result.add Map[kind] valueToStr() # second operand + of mnkCopy: + tree "copy ": + valueToStr() + of mnkMove: + tree "move ": + valueToStr() + of mnkSink: + tree "sink ": + valueToStr() else: # TODO: make this branch exhaustive result.add "" @@ -413,12 +422,6 @@ proc stmtToStr(nodes: MirTree, i: var int, indent: int, result: var string, result.add " := " exprToStr() result.add "\n" - of mnkFastAsgn: - tree "": - valueToStr() - result.add " =fast " - exprToStr() - result.add "\n" of mnkStmtList: renderList(indent) of mnkTry: diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index 049d340db1d..fb8471c3f07 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -28,20 +28,19 @@ ## Except for thread-local variables, the others are destroyed at the ## end of the program. ## -## ``solveOwnership`` then computes for all lvalue expression appearing in -## consume (e.g., argument to ``sink`` parameter) or sink contexts (source -## lvalue in an assignment). +## ``collapseSink`` then computes for all lvalue expression appearing as +## source operands to sink assignments whether it's the last use of the +## value currently stored in the location identified by the lvalue. All sinks +## where this is the case are remembered, and their corresponding data-flow +## operation is turned from a 'use' into a 'consume'. ## -## Using the now resolved ownership status of all expressions, the next -## analysis step computes which locations need to be destroyed via a destructor -## call (see ``computeDestructors``). +## With all sink assignments either collapsed into copy or move assignments, +## the next analysis step computes which locations need to be destroyed via a +## destructor call (see ``computeDestructors``). ## ## As the last step, the assignment rewriting and destructor injection is ## performed, using the previously gathered data. ## -## For the assignment rewriting, if the source operand of an assignment is -## owned, a move is used instead of a copy. -## ## Ownership analysis ## ================== ## @@ -339,13 +338,6 @@ func computeOwnership(tree: MirTree, cfg: DataFlowGraph, entities: EntityDict, ## Computes for `lval` whether it can be moved from (i.e., ownership of the ## value transferred) at the program position `start`. case tree[lval.root].kind - of mnkDeref, mnkDerefView, mnkConst: - # * derefs reaching here means that they couldn't be resolved - # * handles to constant locations are never owning - false - of mnkLiteral: - # literals can be moved (although not destructively) - true of mnkLocal, mnkParam, mnkGlobal, mnkTemp: # only entities that are relevant for destructor injection have an entry in # `entities`. Those that don't also can't be consumed (because we either @@ -364,19 +356,34 @@ func computeOwnership(tree: MirTree, cfg: DataFlowGraph, entities: EntityDict, else: unreachable() -func solveOwnership(tree: MirTree, cfg: DataFlowGraph, - entities: EntityDict): Values = - ## Computes for all lvalues used in consume context whether they're owning - ## or not. Returns a ``Values`` instance with the results. - # search for 'consume' instructions and compute for their operands whether - # it's a handle that owns the location's value +func collapseSink(tree: MirTree, cfg: var DataFlowGraph, + entities: EntityDict): Values = + ## Computes for every ``mnkSink`` node what operation (copy or move) it has + ## to collapse to, returning the result(s) as a ``Values`` instance. + ## + ## In addition, the DFG instructions in `cfg` for sinks-turned-into-moves + ## are updated to ``opConsume`` instructions. + var update: seq[InstrPos] + ## tracks the DFG instructions that need to be updated + + # search for all 'use' instructions representing sinks, and compute whether + # they have to be turned into a move or copy for i, op, opr in cfg.instructions: - if op == opConsume and hasDestructor(tree[opr].typ) and - computeOwnership(tree, cfg, entities, - computePath(tree, NodePosition opr), i + 1): - result.markOwned(opr) - else: - discard "nothing to do" + if op == opUse and tree[tree.parent(NodePosition opr)].kind == mnkSink: + # it's the DFG instruction for a sink + if hasDestructor(tree[opr].typ) and + computeOwnership(tree, cfg, entities, + computePath(tree, NodePosition opr), i + 1): + update.add i + result.markOwned(opr) + + # for the moment, sinks are always turned into copies for values without + # custom destroy/copy/sink behaviour + + # change all 'use' instructions corresponding to sinks to 'consume' + # instructions. This is more efficient than changing the node kinds and then + # recomputing the graph + cfg.change(update, opConsume) type DestructionMode = enum demNone ## location doesn't need to be destroyed because it contains no @@ -386,11 +393,11 @@ type DestructionMode = enum demFinally ## the location contains a value when the scope is exited via ## unstructured control-flow -func requiresDestruction(tree: MirTree, cfg: DataFlowGraph, values: Values, +func requiresDestruction(tree: MirTree, cfg: DataFlowGraph, span: Subgraph, def: NodePosition, entity: MirNode ): DestructionMode = template computeAlive(loc, op: untyped): untyped = - computeAlive(tree, cfg, values, span, loc, op) + computeAlive(tree, cfg, span, loc, op) let r = case entity.kind @@ -413,7 +420,7 @@ func requiresDestruction(tree: MirTree, cfg: DataFlowGraph, values: Values, elif r.alive: demNormal else: demNone -func computeDestructors(tree: MirTree, cfg: DataFlowGraph, values: Values, +func computeDestructors(tree: MirTree, cfg: DataFlowGraph, entities: EntityDict): seq[DestroyEntry] = ## Computes and collects which locations present in `entities` need to be ## destroyed at the exit of their enclosing scope in order to prevent the @@ -441,7 +448,7 @@ func computeDestructors(tree: MirTree, cfg: DataFlowGraph, values: Values, # defer destruction of the global to the end of the program discard - case requiresDestruction(tree, cfg, values, info.scope, def, entity) + case requiresDestruction(tree, cfg, info.scope, def, entity) of demNormal: result.add (scopeStart, def, false) of demFinally: @@ -458,7 +465,7 @@ func computeDestructors(tree: MirTree, cfg: DataFlowGraph, values: Values, # --------- analysis routines -------------- -func isAlive(tree: MirTree, cfg: DataFlowGraph, v: Values, +func isAlive(tree: MirTree, cfg: DataFlowGraph, entities: EntityDict, val: Path, at: InstrPos): bool = ## Computes if `val` refers to a location that contains a value when ## `at` in the DFG is reached. @@ -482,7 +489,7 @@ func isAlive(tree: MirTree, cfg: DataFlowGraph, v: Values, if at <= scope.a: false # the location cannot be alive else: - isAlive(tree, cfg, v, scope, val, at) + isAlive(tree, cfg, scope, val, at) else: # something that we can't analyse (e.g. a dereferenced pointer). We have # to be conservative and assume that the location the lvalue names already @@ -546,23 +553,14 @@ func needsReset(tree: MirTree, cfg: DataFlowGraph, ar: AnalysisResults, # the presence of the value is observed -> a reset is required result = true -func isMoveable(tree: MirTree, v: Values, n: NodePosition): bool = - ## Returns whether the value of the expression `n` can be moved. +func isMove(tree: MirTree, v: Values, n: NodePosition): bool = + ## Returns whether the assignment modifier at `n` is a move modifier (after + ## collapsing sink). case tree[n].kind: - of LvalueExprKinds - {mnkDeref, mnkDerefView}: - v.isOwned(OpValue n) - of mnkDeref, mnkDerefView: - false - of mnkLiteral, mnkProc, mnkType: - true - of mnkConv, mnkStdConv, mnkCast, mnkAddr, mnkView, mnkToSlice, UnaryOps, - BinaryOps: - # the result of these operations is not an owned value - false - of mnkCall, mnkCheckedCall, mnkObjConstr, mnkConstr: - true - of AllNodeKinds - ExprKinds: - unreachable(tree[n].kind) + of mnkCopy: false + of mnkMove: true + of mnkSink: v.isOwned(tree.operand(n)) + else: unreachable(tree[n].kind) # ------- code generation routines -------- @@ -605,13 +603,13 @@ proc genInjectedSink(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, else: # without a sink hook, a ``=destroy`` + blit-copy is used genDestroy(bu, graph, env, dest) - bu.asgn dest, source + bu.asgnMove dest, source proc genSinkFromTemporary(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, dest, source: Value) = ## Similar to ``genInjectedSink`` but generates code for destructively ## moving the source operand into a temporary first. - let tmp = bu.materialize(source) + let tmp = bu.materializeMove(source) genWasMoved(bu, graph, source) genInjectedSink(bu, graph, env, dest, tmp) @@ -650,12 +648,13 @@ func destructiveMoveOperands(bu: var MirBuilder, tree: MirTree, proc expandAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, env: var MirEnv, stmt: NodePosition, pos: InstrPos, c: var Changeset) = - ## Expands an assignment into either a copy, move, or destructive move. - ## `stmt` is the assignment statement node and `pos` is the 'def' data-flow - ## instruction corresponding to it. + ## Rewrites the assignment at `stmt` into either a ``=copy`` hook call, + ## ``=sink`` hook call, move, or destructive move. + ## `pos` is the 'def' data-flow instruction corresponding to the assignment. let dest = tree.child(stmt, 0) - source = tree.child(stmt, 1) + operator = tree.child(stmt, 1) + source = tree.child(operator, 0) sourcePath = computePath(tree, source) destPath = computePath(tree, dest) relation = compare(tree, sourcePath, destPath) @@ -663,77 +662,60 @@ proc expandAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, if relation.isSame: # a self-assignment -> elide c.remove(tree, stmt) - elif isMoveable(tree, ar.v[], source): + elif isMove(tree, ar.v[], operator): # a move is possible -> sink if true: - let fromLvalue = isNamed(tree, OpValue source) + template needsReset(): bool = + # only a ``sink`` modifier allows for the injection of resets + (tree[operator].kind == mnkSink and + needsReset(tree, ctx.cfg, ar, sourcePath, pos)) if tree[stmt].kind != mnkInit and - isAlive(tree, ctx.cfg, ar.v[], ar.entities[], destPath, pos): + isAlive(tree, ctx.cfg, ar.entities[], destPath, pos): # there already exists a value in the destination location -> use the # sink operation - if fromLvalue: + if true: c.replaceMulti(tree, stmt, bu): let a = bu.bindMut(tree, dest) if isAPartOfB(relation) != no: - # this is a potential part-to-whole assignment, e.g.: ``x = x.y``. - # We need to move the source value into a temporary first, as - # ``=sink`` would otherwise destroy ``x`` first, also destroying - # ``x.y`` in the process - let b = bu.bindImmutable(tree, source) + # this is a potential part-to-whole assignment, e.g.: + # ``x = move x.y``. We need to move the source value into a + # temporary first, as ``=sink`` would otherwise destroy ``x`` + # first, also destroying ``x.y`` in the process + let b = bu.bindMut(tree, source) genSinkFromTemporary(bu, ctx.graph, env, a, b) - elif needsReset(tree, ctx.cfg, ar, sourcePath, pos): + elif needsReset(): # a sink from a location that needs to be reset after the move # (i.e., a destructive move) let (b, clear) = bu.destructiveMoveOperands(tree, source) genInjectedSink(bu, ctx.graph, env, a, b) genWasMoved(bu, ctx.graph, clear) else: - # a sink from a location that doesn't need to be cleared after + # a sink from a location that doesn't need to be reset afterwards let b = bu.bindImmutable(tree, source) genInjectedSink(bu, ctx.graph, env, a, b) - else: - # this is a bit hack-y, but in order to support changes within the - # second operand's tree, the assignment is not replaced as a whole - # but rather turned into a def statement. ``a.x = f(arg 1)`` becomes: - # def _1 = f(arg 1) - # bind_mut _2 = a.x - # =sink(name _2, arg _1) - # XXX: this is going to become cleaner once `mirgen` handles most of - # the sink-related transformations - var tmp: Value - c.changeTree(tree, stmt, MirNode(kind: mnkDef)) - c.replaceMulti(tree, dest, bu): - # replace the destination operand with the name of a newly - # allocated temporary - tmp = bu.allocTemp(tree[source].typ) - bu.use tmp - - c.insert(tree, tree.sibling(stmt), stmt, bu): - # the value is only accessible through the source expression, a - # destructive move is not required - let a = bu.bindMut(tree, dest) - genInjectedSink(bu, ctx.graph, env, a, tmp) - - else: + elif needsReset(): # the destination location doesn't contain a value yet (which would # need to be destroyed first otherwise) -> a bitwise copy can be used - if fromLvalue and needsReset(tree, ctx.cfg, ar, sourcePath, pos): - # we don't need to check for part-to-whole assignments here, because - # if the destination location has no value, so don't locations derived - # from it, in which case it doesn't matter when the reset happens - # XXX: the reset could be omitted for part-to-whole assignments - c.replaceMulti(tree, stmt, bu): - let - a = bu.bindMut(tree, dest) - (b, clear) = bu.destructiveMoveOperands(tree, source) - bu.asgn a, b - genWasMoved(bu, ctx.graph, clear) - - else: - # no hook call nor destructive move is required - discard + # we don't need to check for part-to-whole assignments here, because + # if the destination location has no value, so don't locations derived + # from it, in which case it doesn't matter when the reset happens + # XXX: the reset could be omitted for part-to-whole assignments + c.replaceMulti(tree, stmt, bu): + let + a = bu.bindMut(tree, dest) + (b, clear) = bu.destructiveMoveOperands(tree, source) + bu.asgnMove a, b + genWasMoved(bu, ctx.graph, clear) + + elif tree[operator].kind == mnkSink: + # no reset and/or hook call needs to be injected, simply replace the + # sink modifier with a move + c.changeTree(tree, operator): MirNode(kind: mnkMove) + else: + # no hook call nor destructive move is required + discard "nothing to do" else: # a move is not possible -> copy @@ -753,18 +735,18 @@ proc expandDef(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, c: var Changeset) = ## Depending on whether the source can be moved out of, either rewrites the ## 'def' at `at` into a call to the ``=copy`` hook call or into a - ## destructive move. If the source can be moved out of non-destructively, - ## nothing is changed. `pos` is the data-flow instruction + ## destructive or non-destructive move. `pos` is the data-flow instruction. let - dest = tree.child(at, 0) - source = tree.child(at, 1) - case isMoveable(tree, ar.v[], source) + dest = tree.child(at, 0) + operator = tree.child(at, 1) + source = tree.child(operator, 0) + case isMove(tree, ar.v[], operator) of false: - # a copy is required. Transform ``def x = a.b`` into: + # a copy is required. Transform ``def x = copy a.b`` into: # def x # bind _1 = a.b # =copy(name x, arg _1) - c.replace(tree, source): MirNode(kind: mnkNone) + c.replace(tree, operator): MirNode(kind: mnkNone) c.insert(tree, tree.sibling(at), source, bu): let a = bu.bindMut(tree, dest) @@ -773,20 +755,25 @@ proc expandDef(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, # etc.), no cycle can possibly be introduced genCopy(bu, ctx.graph, env, a, b, false) of true: - if isNamed(tree, OpValue source) and - needsReset(tree, ctx.cfg, ar, computePath(tree, source), pos): + assert tree[operator].kind == mnkSink + if needsReset(tree, ctx.cfg, ar, computePath(tree, source), pos): # the value can be moved, but the location needs to be reset. Transform - # ``def x = a.b`` into: + # ``def x = sink a.b`` into: # bind_mut _1 = a.b - # def x = _1 + # def x = move _1 # wasMoved(name x) var tmp, clear: Value c.insert(tree, at, source, bu): (tmp, clear) = bu.destructiveMoveOperands(tree, source) - c.replaceMulti(tree, source, bu): - bu.use tmp + c.replaceMulti(tree, operator, bu): + bu.move tmp c.insert(tree, tree.sibling(at), source, bu): genWasMoved(bu, ctx.graph, clear) + else: + # turn into a ``Move`` operation + c.changeTree(tree, operator): + MirNode(kind: mnkMove, typ: tree[operator].typ) + proc consumeArg(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, expr: NodePosition, src: OpValue, pos: InstrPos, @@ -812,7 +799,7 @@ proc consumeArg(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, var tmp: Value c.insert(tree, stmt, NodePosition src, bu): let v = bu.bindMut(tree, NodePosition src) - tmp = bu.materialize(v) + tmp = bu.materializeMove(v) genWasMoved(bu, ctx.graph, v) # replace the argument with the injected temporary: @@ -876,8 +863,7 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, env: var MirEnv, diags: var seq[LocalDiag], c: var Changeset) = ## Rewrites assignments to locations into calls to either the ``=copy`` - ## or ``=sink`` hook (see ``expandAsgn`` for more details), using the - ## previously computed ownership information to decide. + ## or ``=sink`` hook (see ``expandAsgn`` for more details). ## ## Also injects the necessary location reset logic for lvalues passed to ## 'consume' argument sinks. @@ -893,7 +879,7 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, consumeArg(tree, ctx, ar, tree.parent(parent), val, i, c) of mnkRaise: consumeArg(tree, ctx, ar, NodePosition val, val, i, c) - of mnkAsgn, mnkInit, mnkDef, mnkDefUnpack: + of mnkMove, mnkSink: # assignments are handled separately discard else: @@ -906,9 +892,10 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, case tree[stmt].kind of mnkDef, mnkDefUnpack: let src = tree.child(stmt, 1) - # ignore definitions with no initializer - if tree[src].kind != mnkNone: - if not isMoveable(tree, ar.v[], src): + # only rewrite definitions with modifiers. The ``move`` modifier + # is ignored since there's nothing to be rewritten for it + if tree[src].kind in ModifierNodes - {mnkMove}: + if not isMove(tree, ar.v[], src): checkCopy(ctx.graph, tree, src, diags) # emit a warning for copies-to-sink if isUsedForSink(tree, stmt): @@ -917,9 +904,11 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, expandDef(tree, ctx, ar, env, stmt, i, c) of mnkAsgn, mnkInit: let src = tree.child(stmt, 1) - if not isMoveable(tree, ar.v[], src): - checkCopy(ctx.graph, tree, src, diags) - expandAsgn(tree, ctx, ar, env, stmt, i, c) + # only rewrite assignments with modifiers + if tree[src].kind in ModifierNodes: + if not isMove(tree, ar.v[], src): + checkCopy(ctx.graph, tree, src, diags) + expandAsgn(tree, ctx, ar, env, stmt, i, c) else: # e.g., output arguments to procedures discard "ignore" @@ -1164,13 +1153,13 @@ proc injectDestructorCalls*(g: ModuleGraph, idgen: IdGenerator, var changes = initChangeset(body.code) diags: seq[LocalDiag] + actx = AnalyseCtx(graph: g, cfg: computeDfg(body.code)) let - actx = AnalyseCtx(graph: g, cfg: computeDfg(body.code)) entities = initEntityDict(body.code, actx.cfg) - values = solveOwnership(body.code, actx.cfg, entities) + values = collapseSink(body.code, actx.cfg, entities) - let destructors = computeDestructors(body.code, actx.cfg, values, entities) + let destructors = computeDestructors(body.code, actx.cfg, entities) rewriteAssignments( body.code, actx, diff --git a/compiler/sem/mirexec.nim b/compiler/sem/mirexec.nim index 02309eda105..c9b51c3fbeb 100644 --- a/compiler/sem/mirexec.nim +++ b/compiler/sem/mirexec.nim @@ -48,11 +48,7 @@ type opInvalidate ## all information gathered about a value becomes invalid opMutate ## mutation of a value. Can be viewed as a combined 'use' + ## 'def' - opConsume ## a value is consumed. Counts as either a 'use' or a 'use' - ## + 'kill', depending on the context - # future direction: change ``opConsume`` to always mean 'use' + 'kill'. - # The callsite should be fully responsible for handling sinks, rather - # than this being partially pushed into the DFG + opConsume ## a value is consumed. This is effectively a 'use' + 'kill' opMutateGlobal ## an unspecified global is mutated @@ -233,8 +229,7 @@ func emitForArgs(env: var ClosureEnv, tree: MirTree, at, source: NodePosition) = else: emitLvalueOp(env, opUse, tree, at, OpValue it) -func emitForExpr(env: var ClosureEnv, tree: MirTree, at, source: NodePosition, - consume: bool) = +func emitForExpr(env: var ClosureEnv, tree: MirTree, at, source: NodePosition) = ## Emits the data- and control-flow instructions corresponding to the ## expression at `source`. template op(o: Opcode, v: OpValue) = @@ -273,17 +268,20 @@ func emitForExpr(env: var ClosureEnv, tree: MirTree, at, source: NodePosition, if tree[source].typ.kind == tyVar: opMutate else: opUse emitLvalueOp(env, opc, tree, at, tree.operand(source, 0)) + of mnkCopy, mnkSink: + # until it's collapsed, a sink is conservatively treated as only a + # usage (not a consumption) + emitLvalueOp(env, opUse, tree, at, tree.operand(source)) + of mnkMove: + emitLvalueOp(env, opConsume, tree, at, tree.operand(source)) of UnaryOps: emitLvalueOp(env, opUse, tree, at, tree.operand(source, 0)) of BinaryOps: emitLvalueOp(env, opUse, tree, at, tree.operand(source, 0)) emitLvalueOp(env, opUse, tree, at, tree.operand(source, 1)) of LvalueExprKinds: - # a read or consume is performed on an lvalue - let opc = - if consume: opConsume - else: opUse - emitLvalueOp(env, opc, tree, at, OpValue source) + # raw usage of an lvalue + emitLvalueOp(env, opUse, tree, at, OpValue source) of mnkNone, mnkLiteral, mnkProc: discard "okay, ignore" of AllNodeKinds - ExprKinds - {mnkNone} + {mnkType}: @@ -316,12 +314,12 @@ func emitForExpr(env: var ClosureEnv, tree: MirTree, at, source: NodePosition, else: discard -func emitForDef(env: var ClosureEnv, tree: MirTree, n: NodePosition, consume: bool) = +func emitForDef(env: var ClosureEnv, tree: MirTree, n: NodePosition) = let dest = tree.operand(n, 0) source = tree.operand(n, 1) emitForValue(env, tree, n, dest) - emitForExpr(env, tree, n, NodePosition source, consume) + emitForExpr(env, tree, n, NodePosition source) # defs with an empty initializer have no data- or control-flow properties. # Parameter definitions are an exception. if tree[dest].kind == mnkParam or tree[source].kind != mnkNone: @@ -548,10 +546,8 @@ func computeDfg*(tree: MirTree): DataFlowGraph = # no control-flow or other effects discard - of mnkAsgn, mnkInit: - emitForDef(env, tree, i, true) - of mnkFastAsgn: - emitForDef(env, tree, i, false) + of mnkDef, mnkDefCursor, mnkDefUnpack, mnkAsgn, mnkInit: + emitForDef(env, tree, i) of mnkSwitch: # the switch statement invalidates the destination rather than # reassigning it (i.e., ``opDef``) @@ -559,16 +555,12 @@ func computeDfg*(tree: MirTree): DataFlowGraph = dest = tree.operand(i, 0) source = tree.operand(i, 1) emitForValue(env, tree, i, dest) - emitForExpr(env, tree, i, NodePosition source, false) + emitForExpr(env, tree, i, NodePosition source) dfaOp env, opInvalidate, i, dest - of mnkDef, mnkDefUnpack: - emitForDef(env, tree, i, true) - of mnkDefCursor: - emitForDef(env, tree, i, false) of mnkBindMut, mnkBind: emitForValue(env, tree, i, tree.operand(i, 1)) of mnkVoid: - emitForExpr(env, tree, i, NodePosition tree.operand(i), false) + emitForExpr(env, tree, i, NodePosition tree.operand(i)) of mnkEmit, mnkAsm: emitForArgs(env, tree, i, i) @@ -606,6 +598,14 @@ func find*(dfg: DataFlowGraph, n: NodePosition): InstrPos = ## attached-to node position) operation is returned. lowerBound(dfg, n) +func change*(dfg: var DataFlowGraph, instrs: openArray[InstrPos], + to: Opcode) = + ## Changes all data-flow instructions identified by `instrs` to use the + ## `to` opcode. + for it in instrs.items: + assert dfg.instructions[it].op in DataFlowOps + dfg.instructions[it].op = to + iterator instructions*(dfg: DataFlowGraph): (InstrPos, Opcode, OpValue) = ## Returns all data-flow operations in order of appearance together with ## their position. diff --git a/doc/mir.rst b/doc/mir.rst index c090f877aca..1a6c0514193 100644 --- a/doc/mir.rst +++ b/doc/mir.rst @@ -110,7 +110,14 @@ Semantics # at the upper bound (inclusive, third # parameter) - FULL_VALUE = RVALUE | VALUE + ASGN_SRC = RVALUE + | VALUE + | Copy VALUE + | Move LVALUE + | Sink LVALUE + + SHALLOW_SRC = RVALUE + | VALUE STATEMENT = | StmtList STATEMENT ... # a list of statements @@ -118,10 +125,10 @@ Semantics # delimits the lifetime of all # definitions within | Def NAME none # definition - | Def NAME FULL_VALUE # definition + initial value assignment + | Def NAME ASGN_SRC # definition + initial value assignment | DefCursor NAME # definition of non-owning location - | DefCursor NAME FULL_VALUE # same as above, but with initial - # assignment + | DefCursor NAME SHALLOW_SRC# definition of non-owning location + + # initial (shallow copy) assignment | Bind LVALUE # bind the lvalue to the given alias. # May be used for mutation, but must # not be used as an assignment's @@ -136,13 +143,11 @@ Semantics | Void CALL_EXPR # represents a void call. The called # procedure or magic *must* have a # `void`` return type - | Asgn LVALUE FULL_VALUE # normal assignment of the right value + | Asgn LVALUE ASGN_SRC # normal assignment of the right value # to the left location - | Init LVALUE FULL_VALUE # initial assignment (the destination + | Init LVALUE ASGN_SRC # initial assignment (the destination # is empty) - | FastAsgn LVALUE FULL_VALUE# fast assignment (cannot be rewritten - # into a full copy) - | Switch LVALUE FULL_VALUE # changes the active branch of a + | Switch LVALUE ASGN_SRC # changes the active branch of a # variant. Unclear semantics. | If VALUE STATEMENT # if the value evaluates to true # execute the statement diff --git a/tests/arc/topt_cursor.nim b/tests/arc/topt_cursor.nim index 76f2b9f0ddd..2085b3c17d9 100644 --- a/tests/arc/topt_cursor.nim +++ b/tests/arc/topt_cursor.nim @@ -9,10 +9,10 @@ scope: block L0: if cond: scope: - x =fast + x = break L0 scope: - x =fast + x = def_cursor _0: (string, int) = x def _1: string = $(arg _0) (raises) echo(arg type(array[0..0, string]), arg _1) (raises) @@ -35,8 +35,8 @@ scope: while true: scope: def_cursor _1: File = f - def_cursor _2: bool = readLine(arg _1, name res) (raises) - def_cursor _3: bool = not(arg _2) + def _2: bool = readLine(arg _1, name res) (raises) + def _3: bool = not(arg _2) if _3: scope: break L0 diff --git a/tests/arc/topt_no_cursor.nim b/tests/arc/topt_no_cursor.nim index 9499d20a989..3546a896f86 100644 --- a/tests/arc/topt_no_cursor.nim +++ b/tests/arc/topt_no_cursor.nim @@ -14,16 +14,17 @@ doing shady stuff... scope: def splat: tuple[dir: string, name: string, ext: string] = splitFile(arg path) (raises) - bind_mut _3: string = splat.0 - def _0: string = _3 - wasMoved(name _3) - bind_mut _4: string = splat.1 - def _1: string = _4 + bind_mut _4: string = splat.0 + def _0: string = move _4 wasMoved(name _4) - bind_mut _5: string = splat.2 - def _2: string = _5 + bind_mut _5: string = splat.1 + def _1: string = move _5 wasMoved(name _5) - result = construct (consume _0, consume _1, consume _2) + bind_mut _6: string = splat.2 + def _2: string = move _6 + wasMoved(name _6) + def _3: Target = construct (consume _0, consume _1, consume _2) + result = move _3 =destroy(name splat) -- end of expandArc ------------------------ --expandArc: delete @@ -55,15 +56,16 @@ scope: def lresult: seq[int] = arrToSeq(consume _0) def lvalue: seq[int] def lnext: string - def _1: seq[int] = lresult + def _1: seq[int] = move lresult def _: (seq[int], string) = construct (consume _1, consume ";") - bind_mut _2: seq[int] = _.0 - lvalue = _2 - wasMoved(name _2) - bind_mut _3: string = _.1 - lnext = _3 + bind_mut _3: seq[int] = _.0 + lvalue = move _3 wasMoved(name _3) - result.value = move(name lvalue) + bind_mut _4: string = _.1 + lnext = move _4 + wasMoved(name _4) + def _2: seq[int] = move(name lvalue) + result.value = move _2 =destroy(name _) =destroy(name lnext) =destroy(name lvalue) @@ -102,8 +104,8 @@ scope: while true: scope: def_cursor _1: int = i - def_cursor _2: bool = ltI(arg _1, arg L) - def_cursor _3: bool = not(arg _2) + def _2: bool = ltI(arg _1, arg L) + def _3: bool = not(arg _2) if _3: scope: break L0 @@ -115,7 +117,7 @@ scope: def_cursor _5: string = line[] def splitted: seq[string] = split(arg _5, arg " ", arg -1) (raises) def_cursor _6: string = splitted[0] - def_cursor _7: bool = eqStr(arg _6, arg "opt") + def _7: bool = eqStr(arg _6, arg "opt") if _7: scope: def _10: string = splitted[1] @@ -148,8 +150,8 @@ scope: while true: scope: def_cursor _2: int = i - def_cursor _3: bool = ltI(arg _2, arg L) - def_cursor _4: bool = not(arg _3) + def _3: bool = ltI(arg _2, arg L) + def _4: bool = not(arg _3) if _4: scope: break L0 @@ -171,15 +173,15 @@ scope: try: def x: sink string def_cursor _0: sink string = x - def_cursor _1: int = lengthStr(arg _0) - def_cursor _2: bool = eqI(arg _1, arg 2) + def _1: int = lengthStr(arg _0) + def _2: bool = eqI(arg _1, arg 2) if _2: scope: - result = x + result = move x wasMoved(name x) return def_cursor _3: sink string = x - def_cursor _4: int = lengthStr(arg _3) + def _4: int = lengthStr(arg _3) def _5: string = $(arg _4) (raises) echo(arg type(array[0..0, string]), arg _5) (raises) finally: @@ -196,12 +198,12 @@ scope: def _1: tuple[dir: string, front: string] block L0: def_cursor _2: string = this[].value - def_cursor _3: bool = dirExists(arg _2) (raises) + def _3: bool = dirExists(arg _2) (raises) if _3: scope: def _4: string - def _14: string = this[].value - =copy(name _4, arg _14) + def _16: string = this[].value + =copy(name _4, arg _16) _1 := construct (consume _4, consume "") break L0 scope: @@ -209,33 +211,33 @@ scope: def_cursor _5: string = this[].value def _6: string = parentDir(arg _5) (raises) def _7: string - def _15: string = this[].value - =copy(name _7, arg _15) + def _17: string = this[].value + =copy(name _7, arg _17) def _8: tuple[head: string, tail: string] = splitPath(consume _7) (raises) - bind_mut _16: string = _8.1 - def _9: string = _16 - wasMoved(name _16) + bind_mut _18: string = _8.1 + def _9: string = move _18 + wasMoved(name _18) _1 := construct (consume _6, consume _9) wasMoved(name _6) finally: =destroy(name _8) =destroy(name _6) - def par: tuple[dir: string, front: string] = _1 + def par: tuple[dir: string, front: string] = move _1 block L1: def_cursor _10: string = par.0 - def_cursor _11: bool = dirExists(arg _10) (raises) + def _11: bool = dirExists(arg _10) (raises) if _11: scope: def_cursor _12: string = par.0 def_cursor _13: string = par.1 - def _17: seq[string] = getSubDirs(arg _12, arg _13) (raises) - bind_mut _18: seq[string] = this[].matchDirs - =sink(name _18, arg _17) + def _14: seq[string] = getSubDirs(arg _12, arg _13) (raises) + bind_mut _19: seq[string] = this[].matchDirs + =sink(name _19, arg _14) break L1 scope: - def _19: seq[string] = construct () + def _15: seq[string] = construct () bind_mut _20: seq[string] = this[].matchDirs - =sink(name _20, arg _19) + =sink(name _20, arg _15) finally: =destroy(name par) -- end of expandArc ------------------------''' diff --git a/tests/arc/topt_refcursors.nim b/tests/arc/topt_refcursors.nim index f9b62b5685d..20b5823ee66 100644 --- a/tests/arc/topt_refcursors.nim +++ b/tests/arc/topt_refcursors.nim @@ -10,9 +10,9 @@ scope: while true: scope: def_cursor _0: Node = it - def_cursor _1: bool = eqRef(arg _0, arg nil) - def_cursor _2: bool = not(arg _1) - def_cursor _3: bool = not(arg _2) + def _1: bool = eqRef(arg _0, arg nil) + def _2: bool = not(arg _1) + def _3: bool = not(arg _2) if _3: scope: break L0 @@ -21,16 +21,16 @@ scope: def_cursor _5: string = _4[].s echo(arg type(array[0..0, string]), arg _5) (raises) def_cursor _6: Node = it - it =fast _6[].ri + it = _6[].ri def_cursor jt: Node = root block L1: scope: while true: scope: def_cursor _7: Node = jt - def_cursor _8: bool = eqRef(arg _7, arg nil) - def_cursor _9: bool = not(arg _8) - def_cursor _10: bool = not(arg _9) + def _8: bool = eqRef(arg _7, arg nil) + def _9: bool = not(arg _8) + def _10: bool = not(arg _9) if _10: scope: break L1 @@ -40,7 +40,7 @@ scope: def_cursor _12: Node = jt def_cursor _13: string = _12[].s echo(arg type(array[0..0, string]), arg _13) (raises) - jt =fast ri + jt = ri -- end of expandArc ------------------------''' """ diff --git a/tests/arc/topt_wasmoved_destroy_pairs.nim b/tests/arc/topt_wasmoved_destroy_pairs.nim index c549515e9b8..6ee175c65ac 100644 --- a/tests/arc/topt_wasmoved_destroy_pairs.nim +++ b/tests/arc/topt_wasmoved_destroy_pairs.nim @@ -10,11 +10,11 @@ scope: block L0: if cond: scope: - def _0: seq[int] = x + def _0: seq[int] = move x add(name a, consume _0) break L0 scope: - def _1: seq[int] = x + def _1: seq[int] = move x add(name b, consume _1) =destroy(name b) =destroy(name a) @@ -29,21 +29,21 @@ scope: scope: def a: int = 0 def b: int = 4 - def i: int = a + def i: int = sink a block L0: scope: while true: scope: def_cursor _0: int = i - def_cursor _1: bool = ltI(arg _0, arg b) - def_cursor _2: bool = not(arg _1) + def _1: bool = ltI(arg _0, arg b) + def _2: bool = not(arg _1) if _2: scope: break L0 scope: scope: def_cursor i: int = i - def_cursor _3: bool = eqI(arg i, arg 2) + def _3: bool = eqI(arg i, arg 2) if _3: scope: return @@ -54,12 +54,12 @@ scope: block L1: if cond: scope: - def _5: seq[int] = x + def _5: seq[int] = move x wasMoved(name x) add(name a, consume _5) break L1 scope: - def _6: seq[int] = x + def _6: seq[int] = move x wasMoved(name x) add(name b, consume _6) finally: @@ -75,11 +75,12 @@ scope: if cond: scope: return - str = boolToStr(arg cond) - def_cursor _0: bool = not(arg cond) - if _0: + def _0: string = boolToStr(arg cond) + str = move _0 + def _1: bool = not(arg cond) + if _1: scope: - result = str + result = move str wasMoved(name str) return finally: diff --git a/tests/lang_objects/destructor/tv2_cast.nim b/tests/lang_objects/destructor/tv2_cast.nim index dbb16c26ada..e0a0f43f00a 100644 --- a/tests/lang_objects/destructor/tv2_cast.nim +++ b/tests/lang_objects/destructor/tv2_cast.nim @@ -9,10 +9,10 @@ scope: try: def _0: string = newString(arg 100) def_cursor _1: seq[byte] = cast _0 - def_cursor _2: openArray[byte] = toOpenArray _1 + def _2: openArray[byte] = toOpenArray _1 def _3: seq[byte] = encode(arg _2) (raises) + def_cursor _4: string = cast _3 def data: string - def _4: string = cast _3 =copy(name data, arg _4) finally: =destroy(name data) @@ -24,13 +24,13 @@ scope: try: def s: string = newString(arg 100) def_cursor _0: string = s - def_cursor _1: int = lengthStr(arg _0) - def_cursor _2: int = subI(arg _1, arg 1) (raises) + def _1: int = lengthStr(arg _0) + def _2: int = subI(arg _1, arg 1) (raises) chckBounds(arg s, arg 0, arg _2) (raises) - def_cursor _3: openArray[byte] = toOpenArray s, 0, _2 + def _3: openArray[byte] = toOpenArray s, 0, _2 def _4: seq[byte] = encode(arg _3) (raises) + def_cursor _5: string = cast _4 def data: string - def _5: string = cast _4 =copy(name data, arg _5) finally: =destroy(name data) @@ -41,10 +41,10 @@ scope: scope: try: def s: seq[byte] = newSeq(arg 100) (raises) - def_cursor _0: openArray[byte] = toOpenArray s + def _0: openArray[byte] = toOpenArray s def _1: seq[byte] = encode(arg _0) (raises) + def_cursor _2: string = cast _1 def data: string - def _2: string = cast _1 =copy(name data, arg _2) finally: =destroy(name data) @@ -55,10 +55,10 @@ scope: scope: try: def _0: seq[byte] = newSeq(arg 100) (raises) - def_cursor _1: openArray[byte] = toOpenArray _0 + def _1: openArray[byte] = toOpenArray _0 def _2: seq[byte] = encode(arg _1) (raises) + def_cursor _3: string = cast _2 def data: string - def _3: string = cast _2 =copy(name data, arg _3) finally: =destroy(name data) diff --git a/tests/misc/tdont_fold_procedure_cast.nim b/tests/misc/tdont_fold_procedure_cast.nim index 56a58dc4a92..81e6ce7d4c2 100644 --- a/tests/misc/tdont_fold_procedure_cast.nim +++ b/tests/misc/tdont_fold_procedure_cast.nim @@ -8,9 +8,10 @@ discard """ nimout: ''' --expandArc: test scope: - def p: proc (x: float){.nimcall.} = cast other - def_cursor _0: proc (x: int){.nimcall.} = cast p - _0(arg 1) (raises) + def_cursor _0: proc (x: float){.nimcall.} = cast other + def p: proc (x: float){.nimcall.} = copy _0 + def_cursor _1: proc (x: int){.nimcall.} = cast p + _1(arg 1) (raises) -- end of expandArc ------------------------ ''' output: "1" From 4948ae809f7d84ef6d765111a7cd0c7cf2ae77d2 Mon Sep 17 00:00:00 2001 From: Saem Ghani Date: Sun, 18 Feb 2024 15:59:14 -0800 Subject: [PATCH 006/169] sem: template `typed` param only in using pos (#1199) ## Summary Disallow `typed` parameter template substitution in defining positions, restricting them to using position substitution only. ## Details Updated `semtempl` to only allow `untyped` parameters to be inserted into definition positions during template parameter substitution. This is based on lessons learned following the initial changes to template parameter substitution rules in: 483726f957da5660268d0f72120ec234509a7b0c via the PR https://github.com/nim-works/nimskull/pull/605 Along with this change the substitution test has been expanded and manual updated. --------- Co-authored-by: zerbina <100542850+zerbina@users.noreply.github.com> --- compiler/sem/semtempl.nim | 19 ++++++----- doc/manual.rst | 19 ++++++----- ...usage_substitution_nonast_typed_params.nim | 25 --------------- ...mplate_usage_substitution_typed_params.nim | 32 +++++++++++++++++++ 4 files changed, 50 insertions(+), 45 deletions(-) delete mode 100644 tests/lang_callable/template/template_usage_substitution_nonast_typed_params.nim create mode 100644 tests/lang_callable/template/template_usage_substitution_typed_params.nim diff --git a/compiler/sem/semtempl.nim b/compiler/sem/semtempl.nim index 8ddbc3061fa..be499f5cb9d 100644 --- a/compiler/sem/semtempl.nim +++ b/compiler/sem/semtempl.nim @@ -40,18 +40,18 @@ ## Substitution Positions ## ---------------------- ## Templates are ultimately AST level constructs regardless of output type, -## even they follow the grammar. There are two types of positions in a template -## body, one is `definition` and the other is `usage`. A `definition` are any +## they must be valid syntax. There are two types of positions in a template +## body, one is `definition` and the other is `usage`. A `definition` is any ## position where the grammar construct is intended to introduce a new symbol, ## i.e.: the name of a routine, including its parameters; names of variables ## (`const`, `let`, `var`), and so on. All other sites are `usage` sites, where -## a symbol of "chunk" of AST might be used. +## a symbol referring to a "chunk" of AST might be used. ## ## This is a draft of subsitution rules: -## - `untyped` template bodies accept `typed` and `untyped` params in -## definition or usage positions; and all other params are usage only -## - `typed` template bodies accept `typed` and `untyped` params in definition -## or usage positions; and all other params are usage only +## - `untyped` template bodies accept `untyped` params in definition or usage +## positions; and all other params are usage only +## - `typed` template bodies accept `untyped` params in definition or usage +## positions; and all other params are usage only ## - non-ast template bodies only allow subsitutions within usage positions discard """ @@ -257,9 +257,8 @@ func isTemplParam(c: TemplCtx, n: PNode): bool {.inline.} = sfTemplateParam in n.sym.flags func definitionTemplParam(c: TemplCtx, n: PNode): bool {.inline.} = - ## True if `n` is an AST typed (`typed`/`untyped`) parameter symbol of the - ## current template - isTemplParam(c, n) and n.sym.typ.kind in {tyUntyped, tyTyped} + ## True if `n` is an `untyped` parameter symbol of the current template. + isTemplParam(c, n) and n.sym.typ.kind in {tyUntyped} proc semTemplBody(c: var TemplCtx, n: PNode): PNode diff --git a/doc/manual.rst b/doc/manual.rst index cc37bc7a9e6..53af501e4e9 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -5421,7 +5421,7 @@ Templates A template is a form of metaprogramming: a template call evaluates to a |Nimskull| abstract syntax tree that is substituted in place of the call. The -evaluation and substitution is done during semantic pass of the compiler. +evaluation and substitution is done during the semantic pass of the compiler. The syntax to *invoke* a template is the same as calling a procedure. @@ -5441,8 +5441,8 @@ templates: | `a in b` is transformed into `contains(b, a)`. | `notin` and `isnot` have the obvious meanings. -The "types" of templates can be the symbols `untyped`, `typed` or `typedesc`. -These are "meta types", they can only be used in certain contexts. Regular +The "types" of templates can be the symbols `untyped`, `typed` or `typedesc`, +these are "meta types" and can only be used in certain contexts. Regular types can be used too; this implies that `typed` expressions are expected. **Future directions**: the output type of a template is the output type of the @@ -5450,10 +5450,10 @@ template body, which itself can be thought of as an out parameter. Templates will be classified into two major categories AST output (`untyped` and `typed`) and expression based (other types). Along with substitution positions (see below) template evaluation will be revised as follows: -- `untyped` template: allow `typed` and `untyped` params in defining or - using positions; and all other params only in using positions -- `typed` template: allow `typed` and `untyped` params in defining or using - positions; and all other params only in using positions +- `untyped` template: allow `untyped` parameters in defining or using + positions; and all other parameters only in using positions +- `typed` template: allow `untyped` parameters in defining or using positions; + and all other parameters only in using positions - non-ast template: only allow substitution in the using positions The above direction describes the nuance that will be incorporated into a broader redesign of how templates work in |Nimskull|. @@ -5495,9 +5495,8 @@ performed before the expression is passed to the template. This allows declareInt(x) # invalid, because x has not been declared and so it has no type -`typed` and `untyped` parameters may appear in defining or using symbol -positions, while all other parameters are only substituted for using symbol -positions. +`untyped` parameters may appear in defining or using symbol positions, while +all other parameters are only substituted for using symbol positions. A template where every parameter is `untyped` is called an `immediate`:idx: template. For historical reasons, templates can be explicitly annotated with diff --git a/tests/lang_callable/template/template_usage_substitution_nonast_typed_params.nim b/tests/lang_callable/template/template_usage_substitution_nonast_typed_params.nim deleted file mode 100644 index 302fbb4b5b7..00000000000 --- a/tests/lang_callable/template/template_usage_substitution_nonast_typed_params.nim +++ /dev/null @@ -1,25 +0,0 @@ -discard """ - description: ''' -Templates parameters of non-AST type do not replace identifiers in new symbol -definition positions. Meaning a template parameter that is not `untyped` or -`typed` will not substitute for a matching identifier if defining things like -variables, routines, parameters, types, fields, etc. -''' -""" - -block originally_this_did_not_work_now_it_does: - # this was kept for historical reasons and can be replaced, when this was an - # error it originated from https://github.com/nim-lang/nim/issues/3158 - type - MyData = object - x: int - - template newDataWindow(data: ref MyData): untyped = - proc testProc(data: ref MyData): string = - "Hello, " & $data.x - testProc(data) - - var d: ref MyData - new(d) - d.x = 10 - doAssert newDataWindow(d) == "Hello, 10" \ No newline at end of file diff --git a/tests/lang_callable/template/template_usage_substitution_typed_params.nim b/tests/lang_callable/template/template_usage_substitution_typed_params.nim new file mode 100644 index 00000000000..9b000e071f6 --- /dev/null +++ b/tests/lang_callable/template/template_usage_substitution_typed_params.nim @@ -0,0 +1,32 @@ +discard """ + description: ''' +Except for `untyped`, template parameters do not replace identifiers in new +symbol definition positions. Meaning a template parameter that is not `untyped` +will not substitute for a matching identifier if defining things like +variables, routines, parameters, types, fields, etc. +''' +""" + +block originally_this_did_not_work_now_it_does: + # this was kept for historical reasons and can be replaced, when this was an + # error, example originated from https://github.com/nim-lang/nim/issues/3158 + type + MyData = object + x: int + + template newDataWindow(data: ref MyData): untyped = + proc testProc(data: ref MyData): string = + "Hello, " & $data.x + testProc(data) + + var d: ref MyData + new(d) + d.x = 10 + doAssert newDataWindow(d) == "Hello, 10" + +block also_true_for_typed_parameters: + template foo(a: typed): untyped = + let a = a + 10 + a + let x = foo(10) + doAssert x == 20 \ No newline at end of file From 8e2d0faf5fa2ba0cc4c50f1ba44dbeefafd57786 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Mon, 19 Feb 2024 21:03:42 +0100 Subject: [PATCH 007/169] refactor: remove `Values` type from `analysis` (#1201) ## Summary Replace the `Values` usages in `injectdestructors` with directly using a `PackedSet`, and remove the `Values` type. --- compiler/mir/analysis.nim | 11 ----------- compiler/sem/injectdestructors.nim | 29 +++++++++++++++++------------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/compiler/mir/analysis.nim b/compiler/mir/analysis.nim index eec14495a53..f815d493586 100644 --- a/compiler/mir/analysis.nim +++ b/compiler/mir/analysis.nim @@ -38,11 +38,6 @@ import ] type - Values* = object - ## Stores information about MIR expressions. - owned: PackedSet[OpValue] - ## all lvalue expressions that can be moved from - AliveState = enum unchanged dead @@ -58,12 +53,6 @@ func skipConversions*(tree: MirTree, val: OpValue): OpValue = while tree[result].kind == mnkPathConv: result = tree.operand(result) -func isOwned*(v: Values, val: OpValue): bool {.inline.} = - val in v.owned - -func markOwned*(v: var Values, val: OpValue) {.inline.} = - v.owned.incl val - func isAlive*(tree: MirTree, cfg: DataFlowGraph, span: Subgraph, loc: Path, start: InstrPos): bool = ## Computes whether the location named by `loc` does contain a value (i.e., diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index fb8471c3f07..07d961727c2 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -175,11 +175,15 @@ type needsFinally: bool ## whether the destructor needs to be placed in a ## 'finally' clause + Moves = PackedSet[OpValue] + ## A set storing the operands of all sinks that were collapsed into + ## moves. + AnalysisResults = object ## Bundled-up immutable state needed for assignment rewriting. Since ## they're immutable, ``Cursor``s are used in order to not copy # XXX: ideally, view types (i.e. ``lent``) would be used here - v: Cursor[Values] + moves: Cursor[Moves] entities: Cursor[EntityDict] destroy: Cursor[seq[DestroyEntry]] @@ -357,9 +361,10 @@ func computeOwnership(tree: MirTree, cfg: DataFlowGraph, entities: EntityDict, unreachable() func collapseSink(tree: MirTree, cfg: var DataFlowGraph, - entities: EntityDict): Values = + entities: EntityDict): Moves = ## Computes for every ``mnkSink`` node what operation (copy or move) it has - ## to collapse to, returning the result(s) as a ``Values`` instance. + ## to collapse to, returning a set with the operands of all sinks that are + ## collapsed into moves. ## ## In addition, the DFG instructions in `cfg` for sinks-turned-into-moves ## are updated to ``opConsume`` instructions. @@ -375,7 +380,7 @@ func collapseSink(tree: MirTree, cfg: var DataFlowGraph, computeOwnership(tree, cfg, entities, computePath(tree, NodePosition opr), i + 1): update.add i - result.markOwned(opr) + result.incl opr # for the moment, sinks are always turned into copies for values without # custom destroy/copy/sink behaviour @@ -553,13 +558,13 @@ func needsReset(tree: MirTree, cfg: DataFlowGraph, ar: AnalysisResults, # the presence of the value is observed -> a reset is required result = true -func isMove(tree: MirTree, v: Values, n: NodePosition): bool = +func isMove(tree: MirTree, moves: Moves, n: NodePosition): bool = ## Returns whether the assignment modifier at `n` is a move modifier (after ## collapsing sink). case tree[n].kind: of mnkCopy: false of mnkMove: true - of mnkSink: v.isOwned(tree.operand(n)) + of mnkSink: tree.operand(n) in moves else: unreachable(tree[n].kind) # ------- code generation routines -------- @@ -662,7 +667,7 @@ proc expandAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, if relation.isSame: # a self-assignment -> elide c.remove(tree, stmt) - elif isMove(tree, ar.v[], operator): + elif isMove(tree, ar.moves[], operator): # a move is possible -> sink if true: template needsReset(): bool = @@ -740,7 +745,7 @@ proc expandDef(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, dest = tree.child(at, 0) operator = tree.child(at, 1) source = tree.child(operator, 0) - case isMove(tree, ar.v[], operator) + case isMove(tree, ar.moves[], operator) of false: # a copy is required. Transform ``def x = copy a.b`` into: # def x @@ -895,7 +900,7 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, # only rewrite definitions with modifiers. The ``move`` modifier # is ignored since there's nothing to be rewritten for it if tree[src].kind in ModifierNodes - {mnkMove}: - if not isMove(tree, ar.v[], src): + if not isMove(tree, ar.moves[], src): checkCopy(ctx.graph, tree, src, diags) # emit a warning for copies-to-sink if isUsedForSink(tree, stmt): @@ -906,7 +911,7 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, let src = tree.child(stmt, 1) # only rewrite assignments with modifiers if tree[src].kind in ModifierNodes: - if not isMove(tree, ar.v[], src): + if not isMove(tree, ar.moves[], src): checkCopy(ctx.graph, tree, src, diags) expandAsgn(tree, ctx, ar, env, stmt, i, c) else: @@ -1157,13 +1162,13 @@ proc injectDestructorCalls*(g: ModuleGraph, idgen: IdGenerator, let entities = initEntityDict(body.code, actx.cfg) - values = collapseSink(body.code, actx.cfg, entities) + moves = collapseSink(body.code, actx.cfg, entities) let destructors = computeDestructors(body.code, actx.cfg, entities) rewriteAssignments( body.code, actx, - AnalysisResults(v: cursor(values), + AnalysisResults(moves: cursor(moves), entities: cursor(entities), destroy: cursor(destructors)), env, diags, changes) From 8969bf2ef5996b51f3417c466865baf1f2b71a1b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Mon, 19 Feb 2024 22:19:56 +0100 Subject: [PATCH 008/169] fix wrong signed-ness for floats in constants (#1202) ## Summary Fix the sign of `0.0` float values being inverted in implicit or explicit constant expression. All backends were affected, but the issue only surfaced seemingly at random. ## Details `datatables` used `exprStructuralEquivalent`, which treats `0.0` as being equal to `-0.0`, for comparing nodes storing literal values. Since the hashing procedure for nodes hashed the float values' bit- representation, the problem only surfaced in case of hash or bucket collision. A dedicated comparison procedure that compares the bit-representations of float values is now used, fixing the issue. --- compiler/mir/datatables.nim | 23 +++++++++++++++++++++-- tests/compiler/tdatatables.nim | 3 +-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/compiler/mir/datatables.nim b/compiler/mir/datatables.nim index e2ac06d6857..cefb94ba452 100644 --- a/compiler/mir/datatables.nim +++ b/compiler/mir/datatables.nim @@ -8,7 +8,6 @@ import compiler/ast/[ ast_query, ast_types, - trees, types ], compiler/mir/[ @@ -74,6 +73,26 @@ func hashTree(tree: ConstrTree): Hash = result = result !& hash(tree[0].typ.kind) result = !$(result) +func cmp(a, b: PNode): bool = + ## Compares for equality two nodes storing literal values. For float values, + ## their bit-representation is compared. + if a.kind != b.kind: + return false + + case a.kind + of nkIntLiterals: + a.intVal == b.intVal + of nkFloatLiterals: + cast[BiggestUInt](a.floatVal) == cast[BiggestUInt](b.floatVal) + of nkStrLiterals: + a.strVal == b.strVal + of nkNilLit: + true + of nkRange: + cmp(a[0], b[0]) and cmp(a[1], b[1]) + else: + unreachable(a.kind) + proc cmp(a, b: ConstrTree): bool = ## Compares two MIR constant expressions for structural equality. proc `==`(a, b: MirNode): bool {.nimcall.} = @@ -82,7 +101,7 @@ proc cmp(a, b: ConstrTree): bool = case a.kind of mnkLiteral: - exprStructuralEquivalent(a.lit, b.lit) + cmp(a.lit, b.lit) of mnkProc: a.prc == b.prc of mnkConstr, mnkObjConstr: diff --git a/tests/compiler/tdatatables.nim b/tests/compiler/tdatatables.nim index d0129b49977..9485607bdb8 100644 --- a/tests/compiler/tdatatables.nim +++ b/tests/compiler/tdatatables.nim @@ -37,8 +37,7 @@ block tree_equality: @[node(mnkLiteral, t1, lit, newStrNode(nkStrLit, "a"))], @[node(mnkLiteral, t1, lit, newFloatNode(nkFloatLit, 0.0))], # 0.0 and -0.0 are different float values - # FIXME: doesn't work yet - #@[node(mnkLiteral, t1, lit, newFloatNode(nkFloatLit, -0.0))], + @[node(mnkLiteral, t1, lit, newFloatNode(nkFloatLit, -0.0))], # --- ordered aggregates @[node(mnkConstr, t1, len, 0), node(mnkEnd)], From 4f591b377fbbcaff7571387feb4680175fcac0b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 03:20:37 +0000 Subject: [PATCH 009/169] build(deps): Bump dawidd6/action-download-artifact from 3.0.0 to 3.1.1 (#1203) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [dawidd6/action-download-artifact](https://github.com/dawidd6/action-download-artifact) from 3.0.0 to 3.1.1.
Release notes

Sourced from dawidd6/action-download-artifact's releases.

v3.1.1

What's Changed

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3...v3.1.1

v3.1.0

What's Changed

New Contributors

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3...v3.1.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=dawidd6/action-download-artifact&package-manager=github_actions&previous-version=3.0.0&new-version=3.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase` . [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/publisher.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publisher.yml b/.github/workflows/publisher.yml index 694f51c37ae..9bc6c6939ed 100644 --- a/.github/workflows/publisher.yml +++ b/.github/workflows/publisher.yml @@ -30,7 +30,7 @@ jobs: # Download the latest instance of artifacts from a build done previously - name: Download generated docs - uses: dawidd6/action-download-artifact@v3.0.0 + uses: dawidd6/action-download-artifact@v3.1.1 with: workflow: ci.yml workflow_conclusion: success @@ -40,7 +40,7 @@ jobs: path: doc/html - name: Download generated source archive - uses: dawidd6/action-download-artifact@v3.0.0 + uses: dawidd6/action-download-artifact@v3.1.1 with: workflow: ci.yml workflow_conclusion: success @@ -50,7 +50,7 @@ jobs: path: release-staging - name: Download generated release binaries - uses: dawidd6/action-download-artifact@v3.0.0 + uses: dawidd6/action-download-artifact@v3.1.1 with: workflow: ci.yml workflow_conclusion: success @@ -61,7 +61,7 @@ jobs: path: release-staging - name: Download release manifest tool - uses: dawidd6/action-download-artifact@v3.0.0 + uses: dawidd6/action-download-artifact@v3.1.1 with: workflow: ci.yml workflow_conclusion: success From 4c2ff3c4f9f8e1b16e7f0504a16d14acf8cc4b28 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 20 Feb 2024 21:36:54 +0100 Subject: [PATCH 010/169] mir: separate hook injection from move analyzer (#1204) ## Summary Make turning copy and move assignments into `=copy` and `=sink` calls a standalone MIR pass; semantics don't change, but higher quality MIR code is produced. This is progress towards fully decoupling lifetime hooks from the move analyzer / destructor injection. ## Details ### General architecture * the `injectdestructors` pass/module only collapses `sink` assignments and updates the MIR tree accordingly * turning copy assignments into `=copy` and sink assignments into `=sink` calls is done by the new `injecthooks` pass/module * reporting errors and warnings regarding hooks is also made part of the `injecthooks` pass * the `injecthooks` is run after the `injectdestructors` pass * handling of `--expandArc` is moved to `backends.process`, so that it can happen after hook injection ### `injectdestructors` Pass * assignments are not replaced with `=copy` or `=sink` calls * `expandAsgn` is renamed to `specializeAsgn` * `sink` assignments are turned into either `copy`, `move`, or destructive `move` assignments (same as before) * assignments to locations that don't yet store a value are turned into *initializing* assignments (this is necessary for the later `=sink` injection to work) * compared to before, *all* assignments are processed by `specializeAsgn`, not only those for locations with lifetime hooks * `=destroy` hooks are still injected ### `injecthooks` Pass * the pass looks for `move` and `copy` assignment modifiers * if the involved type has lifetime hooks, the assignment is replaced with a call to the appropriate hook * the same injection rules as used previously apply * the error detection and reporting logic is moved over - without change - from `injectdestructors` * an adapter procedure to the legacy pass managements is provided (needed by `backends`) With less surrounding complexity, more effort is spent on using less temporaries for the hook call injection: * no temporary is used for the destination operand * no temporary is used for the source operand when it's guaranteed to not alias the destination. This is the case for: * `move` assignments * define with `copy` (e.g., `def x = copy y`) * initial assignments with `copy` (e.g., `x := copy y`) Less temporaries means less work for the pass eliminating unnecessary temporaries. ### Technical Correctness * `mirconstr.inline` is only usable with lvalue expressions and always creates a non-owning temporary (instead of an owning temporary) * the remaining usage of `mirconstr.inline` in `injectdestructors.lowerBranchSwitch`, where an owning temporary is required, is replaced with using `wrapTemp` --------- Co-authored-by: Saem Ghani --- compiler/backend/backends.nim | 12 +- compiler/mir/injecthooks.nim | 259 +++++++++++++++ compiler/mir/mirconstr.nim | 18 +- compiler/sem/injectdestructors.nim | 369 +++++----------------- tests/arc/topt_no_cursor.nim | 56 ++-- tests/arc/topt_wasmoved_destroy_pairs.nim | 6 +- 6 files changed, 378 insertions(+), 342 deletions(-) create mode 100644 compiler/mir/injecthooks.nim diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index a2e48ec4b28..5fe5bd5fe03 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -4,6 +4,7 @@ import std/[ deques, dynlib, # for computing possible candidate names + strtabs, tables ], compiler/ast/[ @@ -17,10 +18,12 @@ import cgirgen ], compiler/front/[ + msgs, options ], compiler/mir/[ datatables, + injecthooks, mirbodies, mirbridge, mirconstr, @@ -28,7 +31,8 @@ import mirgen, mirpasses, mirtrees, - sourcemaps + sourcemaps, + utils ], compiler/modules/[ modulegraphs, @@ -324,6 +328,12 @@ proc process(body: var MirBody, prc: PSym, graph: ModuleGraph, ## procedure. if shouldInjectDestructorCalls(prc): injectDestructorCalls(graph, idgen, env, prc, body) + injectHooks(body, graph, env, prc) + + if graph.config.arcToExpand.hasKey(prc.name.s): + graph.config.msgWrite("--expandArc: " & prc.name.s & "\n") + graph.config.msgWrite(render(body.code, addr env)) + graph.config.msgWrite("\n-- end of expandArc ------------------------\n") let target = case graph.config.backend diff --git a/compiler/mir/injecthooks.nim b/compiler/mir/injecthooks.nim new file mode 100644 index 00000000000..d7d4cad67a3 --- /dev/null +++ b/compiler/mir/injecthooks.nim @@ -0,0 +1,259 @@ +## Implements the MIR pass for replacing copy and move assignments with the +## ``=copy`` or ``=sink`` hook (if available for the type). +## +## Future direction: injection of ``=destroy`` hooks also needs to happen +## here. + +import + compiler/ast/[ + ast_query, + ast_types, + lineinfos + ], + compiler/front/[ + msgs, + options + ], + compiler/mir/[ + mirbodies, + mirchangesets, + mirconstr, + mirenv, + mirtrees + ], + compiler/modules/[ + modulegraphs + ], + compiler/utils/[ + idioms + ] + +# XXX: reports are a code smell meaning data types are misplaced +from compiler/ast/reports_sem import SemReport +from compiler/ast/report_enums import ReportKind + +# XXX: temporary dependency until destroy hooks are injected here +from compiler/sem/injectdestructors import getOp, genDestroy, buildVoidCall + +from compiler/sem/liftdestructors import boolLit, cyclicType + +type + LocalDiagKind = enum + ldkPassCopyToSink ## a copy is introduced in a consume context + ldkUnavailableTypeBound ## a type-bound operator is requested but not + ## available + + LocalDiag = object + ## A temporary diagnostic representation that is later turned into a + ## ``SemReport`` + pos: NodePosition ## the location of the report + case kind: LocalDiagKind + of ldkUnavailableTypeBound: + op: TTypeAttachedOp + of ldkPassCopyToSink: + discard + +const + skipAliases = {tyGenericInst, tyAlias, tySink} + +proc isUsedForSink(tree: MirTree, stmt: NodePosition): bool = + ## Computes whether the definition statement is something produced for + ## sink parameter handling. + assert tree[stmt].kind in {mnkDef, mnkDefUnpack} + let def = tree.operand(stmt, 0) + if tree[def].kind != mnkTemp: + # only temporaries are used for sink handling + return + + # look for whether the temporary is used as a 'consume' node's operand, + # but do reduce the amount of work by not searching beyond the + # temporary's lifetime + # HACK: this detection relies on the code shapes ``mirgen`` currently + # emits for sink parameters and is thus very brittle. The proper + # solution is to mark through a side channel the statement as being + # generated for a sink parameter + var + n = tree.sibling(stmt) + depth = 0 + while n < NodePosition tree.len: + case tree[n].kind + of mnkConsume: + let x = tree.operand(n) + if tree[x].kind == mnkTemp and tree[x].temp == tree[def].temp: + # the temporary is used for sink parameter passing + result = true + break + of mnkScope: + inc depth + of mnkEnd: + if tree[n].kind == mnkScope: + dec depth + if depth < 0: + # the end of the temporary's surrounding scope is reached + break + else: + discard + + inc n + +proc reportDiagnostics(g: ModuleGraph, body: MirBody, + owner: PSym, diags: var seq[LocalDiag]) = + ## Reports all diagnostics in `diags` as ``SemReport``s and clear the list + for diag in diags.items: + let ast = body.sourceFor(diag.pos) + let rep = + case diag.kind + of ldkUnavailableTypeBound: + SemReport(kind: rsemUnavailableTypeBound, + typ: body[diag.pos].typ, + str: AttachedOpToStr[diag.op], + ast: ast, + sym: owner) + of ldkPassCopyToSink: + SemReport(kind: rsemCopiesToSink, ast: ast) + + localReport(g.config, ast.info, rep) + +func couldIntroduceCycle(tree: MirTree, dest: NodePosition): bool = + # copies to locals or globals can't introduce cyclic structures, as + # both are standlone and not part of any other structure + tree[dest].kind notin {mnkLocal, mnkTemp, mnkParam, mnkGlobal} + +template genCopy(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, + op: PSym, tree: MirTree, dst: NodePosition, + maybeCyclic: bool, src: untyped) = + ## Emits a ``=copy`` hook call with `dst` and `src` as the arguments. If ORC + ## is enabled, an additional bool value is passed to the hook, informing + ## whether a reference cycle might be created at run-time. + bu.buildVoidCall(env, op): + bu.emitByName ekMutate: + bu.emitFrom(tree, dst) + bu.subTree mnkArg: + src + + if graph.config.selectedGC == gcOrc and + cyclicType(tree[dst].typ.skipTypes(skipAliases + {tyDistinct}), graph): + # pass whether the copy can potentially introduce cycles as the third + # parameter: + let c = maybeCyclic and couldIntroduceCycle(tree, dest) + bu.emitByVal literal(boolLit(graph, unknownLineInfo, c)) + +proc injectHooks*(body: MirBody, graph: ModuleGraph, env: var MirEnv, + owner: PSym, changes: var Changeset) = + ## Replaces all copy and move assignments for locations with lifetime hooks + ## to the types' respective hook. + var diags: seq[LocalDiag] + template tree: MirTree = body.code + + for i, n in tree.pairs: + case n.kind + of mnkCopy: + let + stmt = tree.parent(i) + typ = tree[stmt, 0].typ + + if not hasDestructor(typ): + # nothing to insert + continue + + let + dest = tree.child(stmt, 0) + src = tree.child(i, 0) + op = getOp(graph, typ, attachedAsgn) + + if sfError in op.flags: + # emit an error if the hook is not available, but still continue + diags.add LocalDiag(pos: src, kind: ldkUnavailableTypeBound, + op: attachedAsgn) + + if tree[stmt].kind == mnkDef and isUsedForSink(tree, stmt): + # emit a warning for copies-to-sink: + diags.add LocalDiag(pos: src, kind: ldkPassCopyToSink) + + case tree[stmt].kind + of mnkDef, mnkDefUnpack: + # turn a ``def x = copy a.b`` into: + # def x + # =copy(name x, arg a.b) + changes.replace(tree, i): MirNode(kind: mnkNone) + changes.insert(tree, tree.sibling(stmt), i, bu): + # the destination is a local; the assignment thus cannot introduce a + # cycle + genCopy(bu, graph, env, op, tree, dest, false): + bu.emitFrom(tree, src) + of mnkInit: + # we know the destination cannot overlap with the source. Replace + # ``x := copy a.b`` with: + # =copy(name x, arg a.b) + changes.replaceMulti(tree, stmt, bu): + genCopy(bu, graph, env, op, tree, dest, true): + bu.emitFrom(tree, src) + of mnkAsgn: + # the source and destination could overlap. Replace ``x = copy a.b`` + # with: + # def_cursor _1 = a.b + # =copy(name x, arg _1) + # XXX: the temporary could be omitted in more cases by using proper + # alias analysis + changes.replaceMulti(tree, stmt, bu): + let tmp = bu.inline(tree, src) + genCopy(bu, graph, env, op, tree, dest, true): + bu.use(tmp) + else: + unreachable(tree[stmt].kind) + + of mnkMove: + let + stmt = tree.parent(i) + typ = tree[stmt, 0].typ + + if not hasDestructor(typ) or + tree[stmt].kind in {mnkDef, mnkDefUnpack, mnkInit}: + # nothing to do if: + # * the type has no hooks + # * it's guaranteed that there's no value in the destination + continue + + let + dest = tree.child(stmt, 0) + src = tree.child(i, 0) + op = getOp(graph, typ, attachedSink) + + # note: the move analyzer has to make sure that the source operand + # doesn't overlap with the destination, so no temporary for the source is + # needed + if op != nil: + # replace ``x = move a.b`` with: + # =sink(name x, arg a.b) + changes.replaceMulti(tree, stmt, bu): + bu.buildVoidCall(env, op): + bu.subTree mnkName: + bu.subTree MirNode(kind: mnkTag, effect: ekMutate): + bu.emitFrom(tree, dest) + bu.subTree mnkArg: + bu.emitFrom(tree, src) + else: + # no sink hook exists, rewrite ``x.y = move a.b`` into: + # bind_mut _1 = x.y + # =destroy(name _1) + # _1 = move a.b + var loc: Value + changes.insert(tree, stmt, dest, bu): + loc = bu.bindMut(tree, dest) + genDestroy(bu, graph, env, loc) + changes.replaceMulti(tree, dest, bu): + bu.use loc + + else: + discard "nothing to do" + + # turn the collected diagnostics into reports and report them: + reportDiagnostics(graph, body, owner, diags) + +proc injectHooks*(body: var MirBody, graph: ModuleGraph, env: var MirEnv, + owner: PSym) = + ## Adapter for the legacy pass-application pipeline. Once possible, the pass + ## needs to be treated as just another MIR pass. + var c = initChangeset(body.code) + injectHooks(body, graph, env, owner, c) + apply(body.code, prepare(c)) diff --git a/compiler/mir/mirconstr.nim b/compiler/mir/mirconstr.nim index e7db1ec85fa..32500e74733 100644 --- a/compiler/mir/mirconstr.nim +++ b/compiler/mir/mirconstr.nim @@ -352,10 +352,14 @@ func emitByVal*(bu: var MirBuilder, y: Value) = bu.subTree mnkArg: bu.use y -func emitByName*(bu: var MirBuilder, val: Value, e: EffectKind) = +template emitByName*(bu: var MirBuilder, e: EffectKind, body: untyped) = bu.subTree mnkName: bu.subTree MirNode(kind: mnkTag, effect: e): - bu.use val + body + +func emitByName*(bu: var MirBuilder, val: Value, e: EffectKind) = + bu.emitByName e: + bu.use val func move*(bu: var MirBuilder, val: Value) = ## Emits ``move val``. @@ -375,17 +379,19 @@ func asgnMove*(bu: var MirBuilder, a, b: Value) = bu.move b func inline*(bu: var MirBuilder, tree: MirTree, fr: NodePosition): Value = - ## Inlines the operand for non-mutating use. This is meant to be used for - ## materialzing immutable arguments when inlining calls / expanding + ## Inlines the lvalue operand for non-mutating use. This is meant to be used + ## for materialzing immutable arguments when inlining calls / expanding ## assignments. case tree[fr].kind of Atoms: result = Value(node: tree[fr]) - else: + of LvalueExprKinds - Atoms: result = allocTemp(bu, tree[fr].typ) - bu.subTree mnkDef: + bu.subTree mnkDefCursor: bu.use result bu.emitFrom(tree, fr) + else: + unreachable("can only inline lvalue-expression arguments") func bindImmutable*(bu: var MirBuilder, tree: MirTree, lval: NodePosition): Value = diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index 07d961727c2..48e4335fc04 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -9,8 +9,8 @@ ## This module implements the following MIR passes: ## - the 'switch' operation lowering (``lowerBranchSwitch``) -## - the pass for rewriting assignments into calls to the respective -## lifetime-tracking hooks +## - the pass for collapsing sink assignments into copies, moves, and +## destrutive moves ## - the pass for injected ``wasMoved`` calls for consumed lvalues ## - the destructor (i.e. ``=destroy`` hook) injection ## @@ -102,7 +102,6 @@ import algorithm, hashes, packedsets, - strtabs, tables ], compiler/ast/[ @@ -117,16 +116,14 @@ import mirconstr, mirenv, mirtrees, - sourcemaps, - utils + sourcemaps ], compiler/modules/[ magicsys, modulegraphs ], compiler/front/[ - options, - msgs + options ], compiler/sem/[ aliasanalysis, @@ -139,10 +136,6 @@ import idioms ] -# xxx: reports are a code smell meaning data types are misplaced -from compiler/ast/reports_sem import SemReport -from compiler/ast/report_enums import ReportKind - type AnalyseCtx = object cfg: DataFlowGraph @@ -187,25 +180,6 @@ type entities: Cursor[EntityDict] destroy: Cursor[seq[DestroyEntry]] - LocalDiagKind = enum - ldkPassCopyToSink ## a copy is introduced in a consume context - ldkUnavailableTypeBound ## a type-bound operator is requested but not - ## available - - LocalDiag = object - ## A temporary diagnostic representation that is later turned into a - ## ``SemReport`` - pos: NodePosition ## the location of the report - case kind: LocalDiagKind - of ldkUnavailableTypeBound: - op: TTypeAttachedOp - of ldkPassCopyToSink: - discard - -const - skipAliases = {tyGenericInst, tyAlias, tySink} - ## the set of types to not consider when looking up a type-bound operator - iterator ritems[T](x: openArray[T]): lent T = ## Iterates and yields the items from the container `x` in reverse var i = x.high @@ -558,18 +532,9 @@ func needsReset(tree: MirTree, cfg: DataFlowGraph, ar: AnalysisResults, # the presence of the value is observed -> a reset is required result = true -func isMove(tree: MirTree, moves: Moves, n: NodePosition): bool = - ## Returns whether the assignment modifier at `n` is a move modifier (after - ## collapsing sink). - case tree[n].kind: - of mnkCopy: false - of mnkMove: true - of mnkSink: tree.operand(n) in moves - else: unreachable(tree[n].kind) - # ------- code generation routines -------- -template buildVoidCall(bu: var MirBuilder, env: var MirEnv, p: PSym, +template buildVoidCall*(bu: var MirBuilder, env: var MirEnv, p: PSym, body: untyped) = let prc = p # prevent multi evaluation bu.subTree mnkVoid: @@ -596,46 +561,6 @@ proc genDestroy*(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, bu.buildVoidCall(env, destr): bu.emitByName(target, ekMutate) -proc genInjectedSink(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, - dest, source: Value) = - ## Generates and emits either a call to the ``=sink`` hook, or (if none - ## exists), a sink emulated via a destructor-call + bitwise-copy. - let op = getOp(graph, dest.typ, attachedSink) - if op != nil: - bu.buildVoidCall(env, op): - bu.emitByName(dest, ekMutate) - bu.emitByVal source - else: - # without a sink hook, a ``=destroy`` + blit-copy is used - genDestroy(bu, graph, env, dest) - bu.asgnMove dest, source - -proc genSinkFromTemporary(bu: var MirBuilder, graph: ModuleGraph, - env: var MirEnv, dest, source: Value) = - ## Similar to ``genInjectedSink`` but generates code for destructively - ## moving the source operand into a temporary first. - let tmp = bu.materializeMove(source) - genWasMoved(bu, graph, source) - genInjectedSink(bu, graph, env, dest, tmp) - -proc genCopy(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, - dst, src: Value, maybeCyclic: bool) = - ## Emits a ``=copy`` hook call with `dst`, `src`, and (if necessary) - ## `maybeCyclic` as the arguments. - let - t = dst.typ - op = getOp(graph, t, attachedAsgn) - - bu.buildVoidCall(env, op): - bu.emitByName(dst, ekMutate) - bu.emitByVal src - - if graph.config.selectedGC == gcOrc and - cyclicType(t.skipTypes(skipAliases + {tyDistinct}), graph): - # pass whether the copy can potentially introduce cycles as the third - # parameter: - bu.emitByVal literal(boolLit(graph, unknownLineInfo, maybeCyclic)) - func destructiveMoveOperands(bu: var MirBuilder, tree: MirTree, src: NodePosition ): tuple[src, clear: Value] = @@ -650,11 +575,17 @@ func destructiveMoveOperands(bu: var MirBuilder, tree: MirTree, # the assignment source (bu.bindImmutable(tree, src), bu.bindMut(tree, x)) -proc expandAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, - env: var MirEnv, stmt: NodePosition, pos: InstrPos, - c: var Changeset) = - ## Rewrites the assignment at `stmt` into either a ``=copy`` hook call, - ## ``=sink`` hook call, move, or destructive move. +proc specializeAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, + stmt: NodePosition, pos: InstrPos, c: var Changeset) = + ## Specializes the modifier-using assignment at `stmt` using the analysis + ## results: + ## * guaranteed self-assignments are eliminated (i.e., the assignment is + ## removed) + ## * sink assignments are turned into copy, move, or destructive move + ## assignments + ## * normal assignments are turned into initializing assignments (if + ## possible) + ## ## `pos` is the 'def' data-flow instruction corresponding to the assignment. let dest = tree.child(stmt, 0) @@ -667,118 +598,57 @@ proc expandAsgn(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, if relation.isSame: # a self-assignment -> elide c.remove(tree, stmt) - elif isMove(tree, ar.moves[], operator): - # a move is possible -> sink - if true: - template needsReset(): bool = - # only a ``sink`` modifier allows for the injection of resets - (tree[operator].kind == mnkSink and - needsReset(tree, ctx.cfg, ar, sourcePath, pos)) - - if tree[stmt].kind != mnkInit and - isAlive(tree, ctx.cfg, ar.entities[], destPath, pos): - # there already exists a value in the destination location -> use the - # sink operation - if true: - c.replaceMulti(tree, stmt, bu): - let a = bu.bindMut(tree, dest) - if isAPartOfB(relation) != no: - # this is a potential part-to-whole assignment, e.g.: - # ``x = move x.y``. We need to move the source value into a - # temporary first, as ``=sink`` would otherwise destroy ``x`` - # first, also destroying ``x.y`` in the process - let b = bu.bindMut(tree, source) - genSinkFromTemporary(bu, ctx.graph, env, a, b) - elif needsReset(): - # a sink from a location that needs to be reset after the move - # (i.e., a destructive move) - let (b, clear) = bu.destructiveMoveOperands(tree, source) - genInjectedSink(bu, ctx.graph, env, a, b) - genWasMoved(bu, ctx.graph, clear) - else: - # a sink from a location that doesn't need to be reset afterwards - let b = bu.bindImmutable(tree, source) - genInjectedSink(bu, ctx.graph, env, a, b) - - elif needsReset(): - # the destination location doesn't contain a value yet (which would - # need to be destroyed first otherwise) -> a bitwise copy can be used - # we don't need to check for part-to-whole assignments here, because - # if the destination location has no value, so don't locations derived - # from it, in which case it doesn't matter when the reset happens - # XXX: the reset could be omitted for part-to-whole assignments - c.replaceMulti(tree, stmt, bu): - let - a = bu.bindMut(tree, dest) - (b, clear) = bu.destructiveMoveOperands(tree, source) - bu.asgnMove a, b + elif tree[operator].kind == mnkSink: + let isAlive = tree[stmt].kind == mnkAsgn and + isAlive(tree, ctx.cfg, ar.entities[], destPath, pos) + if tree.operand(operator) in ar.moves[]: + # turn the sink into a move + if isAlive and isAPartOfB(relation) != no: + # it's potentially a part-to-whole assignment, e.g.: ``x = move x.y``, + # and the destination contains a value. The value must first be moved + # into a temporary, since a move destination must not overlap with + # the source + var tmp: Value + c.insert(tree, stmt, source, bu): + let b = bu.bindMut(tree, source) + tmp = bu.wrapTemp b.typ: + bu.move b + genWasMoved(bu, ctx.graph, b) + + c.replaceMulti(tree, operator, bu): + bu.move tmp + elif needsReset(tree, ctx.cfg, ar, sourcePath, pos): + # the value can be moved, but the source location needs to be cleared + # afterwards. Turn ``a = sink x.y`` into: + # bind_mut _1 = x.y + # a = move _1 + # wasMoved(name _1) + var b, clear: Value + c.insert(tree, stmt, source, bu): + (b, clear) = bu.destructiveMoveOperands(tree, source) + c.replaceMulti(tree, operator, bu): + bu.move b + c.insert(tree, tree.sibling(stmt), source, bu): genWasMoved(bu, ctx.graph, clear) - - elif tree[operator].kind == mnkSink: - # no reset and/or hook call needs to be injected, simply replace the - # sink modifier with a move - c.changeTree(tree, operator): MirNode(kind: mnkMove) else: - # no hook call nor destructive move is required - discard "nothing to do" - - else: - # a move is not possible -> copy - c.replaceMulti(tree, stmt, bu): - # copies to locals or globals can't introduce cyclic structures, as - # those are standlone and not part of any other structure - let maybeCyclic = - tree[dest].kind notin {mnkLocal, mnkTemp, mnkParam, mnkGlobal} - let - a = bu.bindMut(tree, dest) - b = bu.inline(tree, source) - - genCopy(bu, ctx.graph, env, a, b, maybeCyclic) - -proc expandDef(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, - env: var MirEnv, at: NodePosition, pos: InstrPos, - c: var Changeset) = - ## Depending on whether the source can be moved out of, either rewrites the - ## 'def' at `at` into a call to the ``=copy`` hook call or into a - ## destructive or non-destructive move. `pos` is the data-flow instruction. - let - dest = tree.child(at, 0) - operator = tree.child(at, 1) - source = tree.child(operator, 0) - case isMove(tree, ar.moves[], operator) - of false: - # a copy is required. Transform ``def x = copy a.b`` into: - # def x - # bind _1 = a.b - # =copy(name x, arg _1) - c.replace(tree, operator): MirNode(kind: mnkNone) - c.insert(tree, tree.sibling(at), source, bu): - let - a = bu.bindMut(tree, dest) - b = bu.inline(tree, source) - # the destination can only be a cell-like location (local, global, - # etc.), no cycle can possibly be introduced - genCopy(bu, ctx.graph, env, a, b, false) - of true: - assert tree[operator].kind == mnkSink - if needsReset(tree, ctx.cfg, ar, computePath(tree, source), pos): - # the value can be moved, but the location needs to be reset. Transform - # ``def x = sink a.b`` into: - # bind_mut _1 = a.b - # def x = move _1 - # wasMoved(name x) - var tmp, clear: Value - c.insert(tree, at, source, bu): - (tmp, clear) = bu.destructiveMoveOperands(tree, source) - c.replaceMulti(tree, operator, bu): - bu.move tmp - c.insert(tree, tree.sibling(at), source, bu): - genWasMoved(bu, ctx.graph, clear) + # the value can be moved without the source location having to be + # cleared + c.changeTree(tree, operator): + MirNode(kind: mnkMove, typ: tree[operator].typ) else: - # turn into a ``Move`` operation + # the value cannot be moved, turn the sink into a copy c.changeTree(tree, operator): - MirNode(kind: mnkMove, typ: tree[operator].typ) + MirNode(kind: mnkCopy, typ: tree[operator].typ) + if tree[stmt].kind == mnkAsgn and not isAlive: + # the assignment initializes the location + c.changeTree(tree, stmt): MirNode(kind: mnkInit) + else: + # it's a move or copy already, so nothing to change there + if tree[stmt].kind == mnkAsgn and + not isAlive(tree, ctx.cfg, ar.entities[], destPath, pos): + # the assignment initializes the location + c.changeTree(tree, stmt): MirNode(kind: mnkInit) proc consumeArg(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, expr: NodePosition, src: OpValue, pos: InstrPos, @@ -816,56 +686,7 @@ proc consumeArg(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, let v = bu.bindMut(tree, NodePosition src) genWasMoved(bu, ctx.graph, v) -proc isUsedForSink(tree: MirTree, stmt: NodePosition): bool = - ## Computes whether the definition statement is something produced for - ## sink parameter handling. - assert tree[stmt].kind in {mnkDef, mnkDefUnpack} - let def = tree.operand(stmt, 0) - if tree[def].kind != mnkTemp: - # only temporaries are used for sink handling - return - - # look for whether the temporary is used as a 'consume' node's operand, - # but do reduce the amount of work by not searching beyond the - # temporary's lifetime - # HACK: this detection relies on the code shapes ``mirgen`` currently - # emits for sink parameters and is thus very brittle. The proper - # solution is to mark through a side channel the statement as being - # generated for a sink parameter - var - n = tree.sibling(stmt) - depth = 0 - while n < NodePosition tree.len: - case tree[n].kind - of mnkConsume: - let x = tree.operand(n) - if tree[x].kind == mnkTemp and tree[x].temp == tree[def].temp: - # the temporary is used for sink parameter passing - result = true - break - of mnkScope: - inc depth - of mnkEnd: - if tree[n].kind == mnkScope: - dec depth - if depth < 0: - # the end of the temporary's surrounding scope is reached - break - else: - discard - - inc n - -proc checkCopy(graph: ModuleGraph, tree: MirTree, expr: NodePosition, - diags: var seq[LocalDiag]) = - let op = getOp(graph, tree[expr].typ, attachedAsgn) - if sfError in op.flags: - diags.add LocalDiag(pos: expr, - kind: ldkUnavailableTypeBound, - op: attachedAsgn) - proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, - env: var MirEnv, diags: var seq[LocalDiag], c: var Changeset) = ## Rewrites assignments to locations into calls to either the ``=copy`` ## or ``=sink`` hook (see ``expandAsgn`` for more details). @@ -889,34 +710,11 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, discard else: unreachable(tree[parent].kind) - elif opc == opDef and hasDestructor(tree[val].typ): - # where necessary, rewrite assignments into moves, destructive moves, - # and copies - let stmt = tree.parent(NodePosition val) - - case tree[stmt].kind - of mnkDef, mnkDefUnpack: - let src = tree.child(stmt, 1) - # only rewrite definitions with modifiers. The ``move`` modifier - # is ignored since there's nothing to be rewritten for it - if tree[src].kind in ModifierNodes - {mnkMove}: - if not isMove(tree, ar.moves[], src): - checkCopy(ctx.graph, tree, src, diags) - # emit a warning for copies-to-sink - if isUsedForSink(tree, stmt): - diags.add LocalDiag(kind: ldkPassCopyToSink, - pos: src) - expandDef(tree, ctx, ar, env, stmt, i, c) - of mnkAsgn, mnkInit: - let src = tree.child(stmt, 1) - # only rewrite assignments with modifiers - if tree[src].kind in ModifierNodes: - if not isMove(tree, ar.moves[], src): - checkCopy(ctx.graph, tree, src, diags) - expandAsgn(tree, ctx, ar, env, stmt, i, c) - else: - # e.g., output arguments to procedures - discard "ignore" + elif opc == opDef and (let stmt = tree.parent(NodePosition val); + tree[stmt, 1].kind in {mnkCopy, mnkMove, mnkSink}): + # specialize the modifier-using assignment + assert tree[stmt].kind in {mnkDef, mnkDefUnpack, mnkAsgn, mnkInit} + specializeAsgn(tree, ctx, ar, stmt, i, c) # --------- destructor injection ------------- @@ -1026,13 +824,15 @@ proc lowerBranchSwitch(bu: var MirBuilder, body: MirTree, graph: ModuleGraph, typ = body[target].field.typ assert body[target].kind == mnkPathVariant + assert body[stmt, 1].kind in ModifierNodes let a = bu.wrapMutAlias(typ): # bind the discriminator lvalue, not the variant lvalue bu.subTree MirNode(kind: mnkPathNamed, typ: typ, field: body[target].field): bu.emitFrom(body, NodePosition body.operand(target)) - b = bu.inline(body, body.child(stmt, 1)) + b = bu.wrapTemp typ: + bu.emitFrom(body, body.child(stmt, 1)) # check if the object contains fields requiring destruction: if hasDestructor(objType): @@ -1093,24 +893,6 @@ proc lowerBranchSwitch(bu: var MirBuilder, body: MirTree, graph: ModuleGraph, # generate the ``discriminant = newValue`` assignment: bu.asgn(a, b) -proc reportDiagnostics(g: ModuleGraph, body: MirBody, - owner: PSym, diags: var seq[LocalDiag]) = - ## Reports all diagnostics in `diags` as ``SemReport``s and clear the list - for diag in diags.items: - let ast = body.sourceFor(diag.pos) - let rep = - case diag.kind - of ldkUnavailableTypeBound: - SemReport(kind: rsemUnavailableTypeBound, - typ: body[diag.pos].typ, - str: AttachedOpToStr[diag.op], - ast: ast, - sym: owner) - of ldkPassCopyToSink: - SemReport(kind: rsemCopiesToSink, ast: ast) - - localReport(g.config, ast.info, rep) - func shouldInjectDestructorCalls*(owner: PSym): bool = # only inject destructor calls if the owner is not a generated OP (e.g. a # generated ``=destroy``) and also not an ``.inline`` iterator @@ -1157,7 +939,6 @@ proc injectDestructorCalls*(g: ModuleGraph, idgen: IdGenerator, block: var changes = initChangeset(body.code) - diags: seq[LocalDiag] actx = AnalyseCtx(graph: g, cfg: computeDfg(body.code)) let @@ -1171,16 +952,8 @@ proc injectDestructorCalls*(g: ModuleGraph, idgen: IdGenerator, AnalysisResults(moves: cursor(moves), entities: cursor(entities), destroy: cursor(destructors)), - env, diags, changes) - - # turn the collected diagnostics into reports and report them: - reportDiagnostics(g, body, owner, diags) + changes) injectDestructors(body.code, g, destructors, env, changes) apply(changes) - - if g.config.arcToExpand.hasKey(owner.name.s): - g.config.msgWrite("--expandArc: " & owner.name.s & "\n") - g.config.msgWrite(render(body.code, addr env)) - g.config.msgWrite("\n-- end of expandArc ------------------------\n") \ No newline at end of file diff --git a/tests/arc/topt_no_cursor.nim b/tests/arc/topt_no_cursor.nim index 3546a896f86..398a0fe593f 100644 --- a/tests/arc/topt_no_cursor.nim +++ b/tests/arc/topt_no_cursor.nim @@ -24,7 +24,7 @@ scope: def _2: string = move _6 wasMoved(name _6) def _3: Target = construct (consume _0, consume _1, consume _2) - result = move _3 + result := move _3 =destroy(name splat) -- end of expandArc ------------------------ --expandArc: delete @@ -33,20 +33,16 @@ scope: def_cursor _0: Node = target[] def_cursor _1: Node = _0[].parent def sibling: Node - def _6: Node = _1[].left - =copy(name sibling, arg _6) (raises) + =copy(name sibling, arg _1[].left) (raises) def_cursor _2: Node = sibling def saved: Node - def _7: Node = _2[].right - =copy(name saved, arg _7) (raises) + =copy(name saved, arg _2[].right) (raises) def_cursor _3: Node = sibling def_cursor _4: Node = saved - bind_mut _8: Node = _3[].right - def _9: Node = _4[].left - =copy(name _8, arg _9) (raises) + def_cursor _6: Node = _4[].left + =copy(name _3[].right, arg _6) (raises) def_cursor _5: Node = sibling - bind_mut _10: Node = _5[].parent - =sink(name _10, arg saved) (raises) + =sink(name _5[].parent, arg saved) (raises) =destroy(name sibling) (raises) -- end of expandArc ------------------------ --expandArc: p1 @@ -59,13 +55,13 @@ scope: def _1: seq[int] = move lresult def _: (seq[int], string) = construct (consume _1, consume ";") bind_mut _3: seq[int] = _.0 - lvalue = move _3 + lvalue := move _3 wasMoved(name _3) bind_mut _4: string = _.1 - lnext = move _4 + lnext := move _4 wasMoved(name _4) def _2: seq[int] = move(name lvalue) - result.value = move _2 + result.value := move _2 =destroy(name _) =destroy(name lnext) =destroy(name lvalue) @@ -76,11 +72,9 @@ scope: try: def_cursor it: KeyValue = x def _0: seq[int] - def _4: seq[int] = it.0 - =copy(name _0, arg _4) + =copy(name _0, arg it.0) def _1: seq[int] - def _5: seq[int] = it.1 - =copy(name _1, arg _5) + =copy(name _1, arg it.1) def a: (seq[int], seq[int]) = construct (consume _0, consume _1) def_cursor _2: (seq[int], seq[int]) = a def _3: string = $(arg _2) (raises) @@ -120,7 +114,7 @@ scope: def _7: bool = eqStr(arg _6, arg "opt") if _7: scope: - def _10: string = splitted[1] + def_cursor _10: string = splitted[1] =copy(name lan_ip, arg _10) def_cursor _8: string = lan_ip echo(arg type(array[0..0, string]), arg _8) (raises) @@ -136,8 +130,7 @@ scope: scope: try: def shadowScope: Scope - def _7: Scope = c[].currentScope - =copy(name shadowScope, arg _7) (raises) + =copy(name shadowScope, arg c[].currentScope) (raises) rawCloseScope(arg c) (raises) scope: def_cursor _0: Scope = shadowScope @@ -160,8 +153,7 @@ scope: def_cursor _5: int = i def sym: lent Symbol = borrow a[_5] def _6: Symbol - def _8: Symbol = sym[] - =copy(name _6, arg _8) + =copy(name _6, arg sym[]) addInterfaceDecl(arg c, consume _6) (raises) i = addI(arg i, arg 1) (raises) finally: @@ -177,7 +169,7 @@ scope: def _2: bool = eqI(arg _1, arg 2) if _2: scope: - result = move x + result := move x wasMoved(name x) return def_cursor _3: sink string = x @@ -202,8 +194,7 @@ scope: if _3: scope: def _4: string - def _16: string = this[].value - =copy(name _4, arg _16) + =copy(name _4, arg this[].value) _1 := construct (consume _4, consume "") break L0 scope: @@ -211,12 +202,11 @@ scope: def_cursor _5: string = this[].value def _6: string = parentDir(arg _5) (raises) def _7: string - def _17: string = this[].value - =copy(name _7, arg _17) + =copy(name _7, arg this[].value) def _8: tuple[head: string, tail: string] = splitPath(consume _7) (raises) - bind_mut _18: string = _8.1 - def _9: string = move _18 - wasMoved(name _18) + bind_mut _16: string = _8.1 + def _9: string = move _16 + wasMoved(name _16) _1 := construct (consume _6, consume _9) wasMoved(name _6) finally: @@ -231,13 +221,11 @@ scope: def_cursor _12: string = par.0 def_cursor _13: string = par.1 def _14: seq[string] = getSubDirs(arg _12, arg _13) (raises) - bind_mut _19: seq[string] = this[].matchDirs - =sink(name _19, arg _14) + =sink(name this[].matchDirs, arg _14) break L1 scope: def _15: seq[string] = construct () - bind_mut _20: seq[string] = this[].matchDirs - =sink(name _20, arg _15) + =sink(name this[].matchDirs, arg _15) finally: =destroy(name par) -- end of expandArc ------------------------''' diff --git a/tests/arc/topt_wasmoved_destroy_pairs.nim b/tests/arc/topt_wasmoved_destroy_pairs.nim index 6ee175c65ac..3e23ce3ee25 100644 --- a/tests/arc/topt_wasmoved_destroy_pairs.nim +++ b/tests/arc/topt_wasmoved_destroy_pairs.nim @@ -29,7 +29,7 @@ scope: scope: def a: int = 0 def b: int = 4 - def i: int = sink a + def i: int = copy a block L0: scope: while true: @@ -76,11 +76,11 @@ scope: scope: return def _0: string = boolToStr(arg cond) - str = move _0 + str := move _0 def _1: bool = not(arg cond) if _1: scope: - result = move str + result := move str wasMoved(name str) return finally: From f46227d7e16c444282f3ea51cc807a51c9122653 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 23 Feb 2024 02:54:13 +0100 Subject: [PATCH 011/169] fix(sem): crash with manual `=destroy` hook call (#1205) ## Summary Fix a bug where manually calling the `=destroy` (or `=trace`) hook for a type where the hook is compiler-generated crashed the compiler when the call is within a typed procedure definition that is returned by a macro or template. Fixes https://github.com/nim-works/nimskull/issues/1161. ## Details All compiler-synthesized hook procedures are marked as `sfFromGeneric`. The `owner` of such procedures is expected to be the generic procedure the procedure is an instantiation of (`skipGenericOwner` the immediate owner), but this wasn't the case for the synthesized hooks, where the owner was set to the owner of the type the hook is synthesized for. When a compiler-synthesized hook is used in overload resolution, the search for the owning module in `initCallCandidate` skips over the `skModule` symbol (via `skipGenericOwner`), thus resulting in an NPE. To fix the issue, the owner of synthesized hook procedures is now set to the hook kind's corresponding generic magic procedure from the `system` module, making the produced symbols well-formed. --------- Co-authored-by: Saem Ghani --- compiler/ast/ast_query.nim | 2 + compiler/sem/liftdestructors.nim | 8 ++-- .../tsynthesized_destroy_hook_roundtrip.nim | 43 +++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 tests/lang_callable/macros/tsynthesized_destroy_hook_roundtrip.nim diff --git a/compiler/ast/ast_query.nim b/compiler/ast/ast_query.nim index 19c9fa1426f..57533b37bbe 100644 --- a/compiler/ast/ast_query.nim +++ b/compiler/ast/ast_query.nim @@ -209,6 +209,8 @@ const UnknownLockLevel* = TLockLevel(1001'i16) AttachedOpToStr*: array[TTypeAttachedOp, string] = [ "=destroy", "=copy", "=sink", "=trace", "=deepcopy"] + AttachedOpToMagic*: array[TTypeAttachedOp, TMagic] = [ + mDestroy, mAsgn, mAsgn, mTrace, mDeepCopy] proc `$`*(x: TLockLevel): string = diff --git a/compiler/sem/liftdestructors.nim b/compiler/sem/liftdestructors.nim index b909adf6a69..7493f01316d 100644 --- a/compiler/sem/liftdestructors.nim +++ b/compiler/sem/liftdestructors.nim @@ -854,9 +854,11 @@ proc produceSymDistinctType(g: ModuleGraph; c: PContext; typ: PType; proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp; info: TLineInfo; idgen: IdGenerator): PSym = - + # a synthesized hook is treated as an instantiation of the respective generic + # magic procedure from the system module let procname = getIdent(g.cache, AttachedOpToStr[kind]) - result = newSym(skProc, procname, nextSymId(idgen), owner, info) + let base = getSysMagic(g, info, AttachedOpToStr[kind], AttachedOpToMagic[kind]) + result = newSym(skProc, procname, nextSymId(idgen), base, info) let dest = newSym(skParam, getIdent(g.cache, "dest"), nextSymId(idgen), result, info) let src = newSym(skParam, getIdent(g.cache, if kind == attachedTrace: "env" else: "src"), nextSymId(idgen), result, info) @@ -866,7 +868,7 @@ proc symPrototype(g: ModuleGraph; typ: PType; owner: PSym; kind: TTypeAttachedOp else: src.typ = typ - result.typ = newProcType(info, nextTypeId(idgen), owner) + result.typ = newProcType(info, nextTypeId(idgen), result) result.typ.addParam dest if kind != attachedDestructor: result.typ.addParam src diff --git a/tests/lang_callable/macros/tsynthesized_destroy_hook_roundtrip.nim b/tests/lang_callable/macros/tsynthesized_destroy_hook_roundtrip.nim new file mode 100644 index 00000000000..dd3ae20c77f --- /dev/null +++ b/tests/lang_callable/macros/tsynthesized_destroy_hook_roundtrip.nim @@ -0,0 +1,43 @@ +discard """ + description: ''' + Regression test for a bug where manually invoking a type's `=destroy` hook + within a procedure defined as part of a typed macro/template argument + crashed the compiler + ''' +""" + +type Destroy = object + ## A type that has a user-defined destroy hook. + +proc `=destroy`(x: var Destroy) = + discard + +type + NoHookObject = object + ## Object type where no destroy hook needs to be synthesized. + HookObject = object + ## Object type that needs a compiler-synthesized destroy hook. + field: Destroy + +macro m(x: typed): untyped = x + +# the compiler crashed when processing the macro output +m: + proc f() = + # the bug only surfaced when the hook call is part of a procedure. Types + # that need a compiler-synthesized as well as those that don't were + # affected. + var v = NoHookObject() + `=destroy`(v) + var v2 = HookObject() + `=destroy`(v2) + +# the same bug happened when using a template: +template t(x: typed): untyped = x + +t: + proc f2() = + var v = NoHookObject() + `=destroy`(v) + var v2 = HookObject() + `=destroy`(v2) \ No newline at end of file From 55deaaa31f54f81b4c21db86c93230cb629a7822 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sat, 24 Feb 2024 17:50:38 +0100 Subject: [PATCH 012/169] mir: separate destructor from `=destroy` injection (#1206) ## Summary Introduce the new `destroy` MIR operation and use it to decouple destructor injection from the injection of `=destroy` hooks. Injection of lifetime hooks is now fully decoupled from the move-analyzer / destructor-injection pass. This is an internal-only change. ## Details * introduce the `mnkDestroy` node kind / operator * instead of `=destroy` hook calls, the `injectdestructors` pass only injects `mnkDestroy` operations * the `injecthooks` pass then replaces `mnkDestroy` operations with `=destroy` calls * the `getOp` and `genDestroy` procedure are moved to `injecthooks` without change * injection of `=destroy` calls for variant objects (part of the `mnkSwitch` lowering) is still part of the `injectdestructors` pass * since `mnkDestroy` operations are replaced right away, both `mirexec` and `cgirgen` (MIR-to-CGIR translation) don't handle them yet --- compiler/backend/cgirgen.nim | 2 ++ compiler/mir/injecthooks.nim | 52 ++++++++++++++++++++++++++---- compiler/mir/mirtrees.nim | 7 ++-- compiler/mir/utils.nim | 4 +++ compiler/sem/injectdestructors.nim | 46 +++++--------------------- compiler/sem/mirexec.nim | 2 ++ compiler/sem/modulelowering.nim | 2 +- doc/mir.rst | 1 + 8 files changed, 68 insertions(+), 48 deletions(-) diff --git a/compiler/backend/cgirgen.nim b/compiler/backend/cgirgen.nim index 96b31615263..18c3f8ad58b 100644 --- a/compiler/backend/cgirgen.nim +++ b/compiler/backend/cgirgen.nim @@ -741,6 +741,8 @@ proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, res.kids.addIfNotEmpty stmtToIr(tree, env, cl, cr) of mnkScope: toSingleNode scopeToIr(tree, env, cl, cr) + of mnkDestroy: + unreachable("a 'destroy' that wasn't lowered") of AllNodeKinds - StmtNodes: unreachable(n.kind) diff --git a/compiler/mir/injecthooks.nim b/compiler/mir/injecthooks.nim index d7d4cad67a3..6f7d3b08b44 100644 --- a/compiler/mir/injecthooks.nim +++ b/compiler/mir/injecthooks.nim @@ -1,10 +1,13 @@ -## Implements the MIR pass for replacing copy and move assignments with the -## ``=copy`` or ``=sink`` hook (if available for the type). -## -## Future direction: injection of ``=destroy`` hooks also needs to happen -## here. +## Implements the MIR pass for: +## * replacing copy and move assignments with the ``=copy`` or ``=sink`` +## hook (if available for the type). +## * replacing destroy operations with calls to the ``=destroy`` hook (if +## available for the type) import + std/[ + tables + ], compiler/ast/[ ast_query, ast_types, @@ -24,6 +27,9 @@ import compiler/modules/[ modulegraphs ], + compiler/sem/[ + sighashes + ], compiler/utils/[ idioms ] @@ -32,8 +38,8 @@ import from compiler/ast/reports_sem import SemReport from compiler/ast/report_enums import ReportKind -# XXX: temporary dependency until destroy hooks are injected here -from compiler/sem/injectdestructors import getOp, genDestroy, buildVoidCall +# XXX: temporary dependency until switch assignments are lowered differently +from compiler/sem/injectdestructors import buildVoidCall from compiler/sem/liftdestructors import boolLit, cyclicType @@ -56,6 +62,18 @@ type const skipAliases = {tyGenericInst, tyAlias, tySink} +proc getOp*(g: ModuleGraph, t: PType, kind: TTypeAttachedOp): PSym = + ## Returns the symbol for the `kind` type-bound hook for `t` (or nil, if + ## there's none). + let t = t.skipTypes(skipForHooks) + result = getAttachedOp(g, t, kind) + if result == nil or result.ast.isGenericRoutine: + # give up and find the canonical type instead: + let h = sighashes.hashType(t, {CoType, CoDistinct}) + let canon = g.canonTypes.getOrDefault(h) + if canon != nil: + result = getAttachedOp(g, canon, kind) + proc isUsedForSink(tree: MirTree, stmt: NodePosition): bool = ## Computes whether the definition statement is something produced for ## sink parameter handling. @@ -138,6 +156,13 @@ template genCopy(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, let c = maybeCyclic and couldIntroduceCycle(tree, dest) bu.emitByVal literal(boolLit(graph, unknownLineInfo, c)) +proc genDestroy*(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, + target: Value) = + ## Emits a destructor call with `target` as the argument. + let destr = getOp(graph, target.typ, attachedDestructor) + bu.buildVoidCall(env, destr): + bu.emitByName(target, ekMutate) + proc injectHooks*(body: MirBody, graph: ModuleGraph, env: var MirEnv, owner: PSym, changes: var Changeset) = ## Replaces all copy and move assignments for locations with lifetime hooks @@ -244,6 +269,19 @@ proc injectHooks*(body: MirBody, graph: ModuleGraph, env: var MirEnv, changes.replaceMulti(tree, dest, bu): bu.use loc + of mnkDestroy: + let destr = getOp(graph, tree[tree.operand(i)].typ, attachedDestructor) + changes.replaceMulti(tree, i, bu): + bu.buildVoidCall(env, destr): + # XXX: the by-name passing and usage of ``ekMutate`` is not really + # correct. For all intents and purposes, a destructor + # *consumes* the value (and then effectively voids it), meaning + # that ``mnkConsume`` should actually be used. However, this + # would require changing the signature of ``=destroy`` to use + # ``sink`` + bu.emitByName ekMutate: + bu.emitFrom(tree, tree.child(i, 0)) + else: discard "nothing to do" diff --git a/compiler/mir/mirtrees.nim b/compiler/mir/mirtrees.nim index d29892ae298..85578bf341f 100644 --- a/compiler/mir/mirtrees.nim +++ b/compiler/mir/mirtrees.nim @@ -239,6 +239,9 @@ type mnkBranch ## defines a branch of an ``mnkExcept`` or ``mnkCase`` + mnkDestroy## destroys the value stored in the given location, leaving the + ## location in an undefined state + mnkAsm ## embeds backend-dependent code directly into the output mnkEmit ## embeds backend-dependent code directly into the output @@ -339,7 +342,7 @@ const mnkAddr, mnkDeref, mnkView, mnkDerefView, mnkStdConv, mnkConv, mnkCast, mnkRaise, mnkTag, mnkArg, mnkName, mnkConsume, mnkVoid, mnkCopy, mnkMove, - mnkSink} + mnkSink, mnkDestroy} ## Nodes that start sub-trees but that always have a single sub node. ArgumentNodes* = {mnkArg, mnkName, mnkConsume} @@ -363,7 +366,7 @@ const StmtNodes* = {mnkScope, mnkStmtList, mnkIf, mnkCase, mnkRepeat, mnkTry, mnkBlock, mnkBreak, mnkReturn, mnkRaise, mnkPNode, mnkInit, - mnkAsgn, mnkSwitch, mnkVoid, mnkRaise, mnkEmit, + mnkAsgn, mnkSwitch, mnkVoid, mnkRaise, mnkDestroy, mnkEmit, mnkAsm} + DefNodes UnaryOps* = {mnkNeg} diff --git a/compiler/mir/utils.nim b/compiler/mir/utils.nim index 4bbf962cc2f..735978b6b1a 100644 --- a/compiler/mir/utils.nim +++ b/compiler/mir/utils.nim @@ -492,6 +492,10 @@ proc stmtToStr(nodes: MirTree, i: var int, indent: int, result: var string, tree "raise ": valueToStr() result.add "\n" + of mnkDestroy: + tree "destroy ": + valueToStr() + result.add "\n" of mnkPNode: result.add repeat(" ", indent) result.add "PNode " & $n.node & "\n" diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index 48e4335fc04..fa48d3d6c6f 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -12,7 +12,7 @@ ## - the pass for collapsing sink assignments into copies, moves, and ## destrutive moves ## - the pass for injected ``wasMoved`` calls for consumed lvalues -## - the destructor (i.e. ``=destroy`` hook) injection +## - the pass for injecting destructors ## ## Overview ## ======== @@ -129,7 +129,6 @@ import aliasanalysis, liftdestructors, mirexec, - sighashes ], compiler/utils/[ cursors, @@ -229,16 +228,6 @@ func findScope(entities: EntityDict, name: EntityName, at: InstrPos, proc getVoidType(g: ModuleGraph): PType {.inline.} = g.getSysType(unknownLineInfo, tyVoid) -proc getOp*(g: ModuleGraph, t: PType, kind: TTypeAttachedOp): PSym = - let t = t.skipTypes(skipForHooks) - result = getAttachedOp(g, t, kind) - if result == nil or result.ast.isGenericRoutine: - # give up and find the canonical type instead: - let h = sighashes.hashType(t, {CoType, CoDistinct}) - let canon = g.canonTypes.getOrDefault(h) - if canon != nil: - result = getAttachedOp(g, canon, kind) - func isNamed(tree: MirTree, val: OpValue): bool = ## Returns whether `val` is the projection of a named location (or refers to ## the named location itself). @@ -554,13 +543,6 @@ proc genWasMoved(bu: var MirBuilder, graph: ModuleGraph, target: Value) = bu.buildMagicCall mWasMoved, getVoidType(graph): bu.emitByName(target, ekKill) -proc genDestroy*(bu: var MirBuilder, graph: ModuleGraph, env: var MirEnv, - target: Value) = - let destr = getOp(graph, target.typ, attachedDestructor) - - bu.buildVoidCall(env, destr): - bu.emitByName(target, ekMutate) - func destructiveMoveOperands(bu: var MirBuilder, tree: MirTree, src: NodePosition ): tuple[src, clear: Value] = @@ -718,22 +700,12 @@ proc rewriteAssignments(tree: MirTree, ctx: AnalyseCtx, ar: AnalysisResults, # --------- destructor injection ------------- -proc injectDestructorsInner(bu: var MirBuilder, orig: MirTree, - graph: ModuleGraph, env: var MirEnv, - entries: openArray[DestroyEntry]) = - ## Generates a destructor call for each item in `entries`, using `buf` as the - ## output. +proc injectDestroysAux(bu: var MirBuilder, orig: MirTree, + entries: openArray[DestroyEntry]) = + ## Emits a destroy operation for each item in `entries`. for it in ritems(entries): - let def = getDefEntity(orig, it.pos) - let t = - case orig[def].kind - of SymbolLike: orig[def].sym.typ - of mnkGlobal: orig[def].typ - of mnkTemp: orig[def].typ - else: unreachable() - - bu.buildVoidCall(env, getOp(graph, t, attachedDestructor)): - bu.emitByName(Value(node: orig[def]), ekMutate) + bu.subTree mnkDestroy: + bu.emitFrom(orig, getDefEntity(orig, it.pos)) proc injectDestructors(tree: MirTree, graph: ModuleGraph, destroy: seq[DestroyEntry], env: var MirEnv, @@ -803,13 +775,11 @@ proc injectDestructors(tree: MirTree, graph: ModuleGraph, # there's no need for opening a new scope -- we use a statement-list # instead buf.subTree MirNode(kind: mnkStmtList): - injectDestructorsInner(buf, tree, graph, env, - toOpenArray(entries, s.a, s.b)) + injectDestroysAux(buf, tree, toOpenArray(entries, s.a, s.b)) buf.add endNode(mnkTry) else: - injectDestructorsInner(buf, tree, graph, env, - toOpenArray(entries, s.a, s.b)) + injectDestroysAux(buf, tree, toOpenArray(entries, s.a, s.b)) proc lowerBranchSwitch(bu: var MirBuilder, body: MirTree, graph: ModuleGraph, idgen: IdGenerator, env: var MirEnv, diff --git a/compiler/sem/mirexec.nim b/compiler/sem/mirexec.nim index c9b51c3fbeb..bb0d6bccf45 100644 --- a/compiler/sem/mirexec.nim +++ b/compiler/sem/mirexec.nim @@ -561,6 +561,8 @@ func computeDfg*(tree: MirTree): DataFlowGraph = emitForValue(env, tree, i, tree.operand(i, 1)) of mnkVoid: emitForExpr(env, tree, i, NodePosition tree.operand(i)) + of mnkDestroy: + unreachable("not implemented yet") of mnkEmit, mnkAsm: emitForArgs(env, tree, i, i) diff --git a/compiler/sem/modulelowering.nim b/compiler/sem/modulelowering.nim index 694d1955586..cce620e81f9 100644 --- a/compiler/sem/modulelowering.nim +++ b/compiler/sem/modulelowering.nim @@ -36,7 +36,7 @@ import idioms ] -from compiler/sem/injectdestructors import getOp +from compiler/mir/injecthooks import getOp type ModuleStructs* = object diff --git a/doc/mir.rst b/doc/mir.rst index 1a6c0514193..ac7fff3ac45 100644 --- a/doc/mir.rst +++ b/doc/mir.rst @@ -173,6 +173,7 @@ Semantics | Return # exit the procedure, but execute all # enclosing finalizers first (from # innermost to outermost) + | Destroy LVALUE | Emit VALUE ... | Asm VALUE ... From 9be395f752aa23c01e7214987e0b2386609ae821 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Mon, 26 Feb 2024 22:24:54 +0100 Subject: [PATCH 013/169] cgen: better default initialization for `result` (#1209) ## Summary Use a MIR pass for handling the default-initialization of `result` variables for the C backend. This allows using the MIR's more precise data-flow analysis, allowing for broader omission of `result` variable initialization. ## Details * introduce the `injectResultInit` MIR pass * it's only used with the C backend for now * if a data-flow analysis deems it required, a `result := default()` assignment is placed at the start of the body * remove the `CgNode`-based `result`-assignment analysis from `cgen` ### Data-flow Analysis Bug A small bug with the `mirexec.traversal` (forward traversal) routine was discovered and fixed: the `exit` flag was erroneously set to true when the main path was aborted on a data-flow instruction that's the last instruction in the subgraph. The `exit` flag must only be true when the end of the subgraph is reached, so this is wrong. A regression test for the `traverse` routine is added. This bug only affected the `injectdestructors.needsReset` optimization (unnecessary but otherwise harmless `wasMoved` calls were injected because of it) and `injectResultInit` (unnecessary initialization was emitted). --- compiler/backend/cgen.nim | 124 ---------------------------------- compiler/mir/mirconstr.nim | 3 + compiler/mir/mirpasses.nim | 60 ++++++++++++++++ compiler/sem/mirexec.nim | 9 ++- tests/compiler/tmir_exec2.nim | 40 +++++++++++ 5 files changed, 109 insertions(+), 127 deletions(-) create mode 100644 tests/compiler/tmir_exec2.nim diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 0d72dafaa81..9556a1f08ef 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -730,117 +730,6 @@ proc closureSetup(p: BProc, prc: PSym) = linefmt(p, cpsStmts, "$1 = ($2) ClE_0;$n", [rdLoc(p.locals[n.local]), getTypeDesc(p.module, ls.typ)]) -func containsResult(n: CgNode): bool = - result = false - case n.kind - of cnkAtoms - {cnkLocal}: - discard "ignore" - of cnkLocal: - if n.local == resultId: - result = true - of cnkWithOperand: - result = containsResult(n.operand) - of cnkWithItems: - for i in 0.. quit the + # path + s.exit = true + + of opUse, opConsume, opMutate, opInvalidate: + if isResult(tree, getRoot(tree, n)): + # the result variable is read from or modified before it was + # initialized + return true + + of opMutateGlobal: + discard "not relevant" + + # the exit flag indicates that traversal reached the end of the body + # (without ``result`` being an initialized). The a > b check makes sure + # an empty procedure body also requires initialization of the result + # var + result = s.exit or all.a > all.b + + if requiresInit(tree): + assert tree[0].kind == mnkScope + let at = tree.child(NodePosition 0, 0) + changes.insert(tree, at, at, bu): + bu.subTree mnkInit: + bu.use toValue(mnkLocal, resultVar) + bu.buildMagicCall mDefault, resultVar.typ: + discard + proc applyPasses*(body: var MirBody, prc: PSym, env: var MirEnv, config: ConfigRef, target: TargetBackend) = ## Applies all applicable MIR passes to the body (`tree` and `source`) of @@ -389,6 +443,12 @@ proc applyPasses*(body: var MirBody, prc: PSym, env: var MirEnv, preventRvo(body.code, c) batch: + if target == targetC and (prc.kind in routineKinds) and + (sfNoInit notin prc.flags) and not prc.typ[0].isEmptyType(): + # the procedure has a result variable and initialization of it is + # allowed + injectResultInit(body.code, prc.ast[resultPos].sym, c) + lowerSwap(body.code, c) if target == targetVm: # only the C and VM targets need the extraction, and only the VM diff --git a/compiler/sem/mirexec.nim b/compiler/sem/mirexec.nim index bb0d6bccf45..a1b82a55203 100644 --- a/compiler/sem/mirexec.nim +++ b/compiler/sem/mirexec.nim @@ -686,14 +686,17 @@ iterator traverse*(c: DataFlowGraph, span: Subgraph, start: InstrPos, of DataFlowOps: yield (DataFlowOpcode(instr.op), instr.val) - inc pc - - if state.exit or pc == start: + if state.exit or pc + 1 == start: # abort the current path if we either reached the instruction we # started at or the path was manually killed state.exit = false abort() + # increment *after* the abort handling, otherwise it wouldn't be + # possible to detect that the end wasn't reached when an abort is + # triggered by the very last instruction + inc pc + assert queue.len <= 1 # don't set `exit` to true if nothing was traversed diff --git a/tests/compiler/tmir_exec2.nim b/tests/compiler/tmir_exec2.nim new file mode 100644 index 00000000000..39ecf5d7f9b --- /dev/null +++ b/tests/compiler/tmir_exec2.nim @@ -0,0 +1,40 @@ +discard """ + description: ''' + Tests for the behaviour of the traversal routines from ``mirexec.nim`` + ''' + target: native +""" + +include compiler/sem/mirexec + +# setup a very basic graph for testing: +var graph = DataFlowGraph(instructions: + @[Instr(node: NodePosition 0, op: opDef), + Instr(node: NodePosition 1, op: opUse)]) + +block forward_traverse_empty_slice: + # ensure that the exit flag is not set to true when no instructions are + # traversed + let empty = graph.subgraphFor(NodePosition(4)..NodePosition(5)) + var s = TraverseState() + for _ in traverse(graph, empty, empty.a, s): + discard + doAssert s.exit == false + + # the flag is also set to false if it was set to true externally + s.exit = true + for _ in traverse(graph, empty, empty.a, s): + discard + doAssert s.exit == false + +block forward_traverse_abort_path_on_last: + # aborting the main path on the very last operation of the traversed + # subgraph must not lead to the exit flag being set + let all = graph.subgraphFor(NodePosition(0)..NodePosition(1)) + var s = TraverseState() + for op, _ in traverse(graph, all, all.a, s): + if op == opUse: + # abort the path on the last operation of the traversed subgraph + s.exit = true + + doAssert s.exit == false From 8a6a476d89ecdd0d64394728910267a31c5a8dad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 20:42:02 +0000 Subject: [PATCH 014/169] build(deps): Bump dawidd6/action-download-artifact from 3.1.1 to 3.1.2 (#1210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [dawidd6/action-download-artifact](https://github.com/dawidd6/action-download-artifact) from 3.1.1 to 3.1.2.
Release notes

Sourced from dawidd6/action-download-artifact's releases.

v3.1.2

What's Changed

New Contributors

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3.1.1...v3.1.2

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=dawidd6/action-download-artifact&package-manager=github_actions&previous-version=3.1.1&new-version=3.1.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase` . [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/publisher.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publisher.yml b/.github/workflows/publisher.yml index 9bc6c6939ed..28b483edd74 100644 --- a/.github/workflows/publisher.yml +++ b/.github/workflows/publisher.yml @@ -30,7 +30,7 @@ jobs: # Download the latest instance of artifacts from a build done previously - name: Download generated docs - uses: dawidd6/action-download-artifact@v3.1.1 + uses: dawidd6/action-download-artifact@v3.1.2 with: workflow: ci.yml workflow_conclusion: success @@ -40,7 +40,7 @@ jobs: path: doc/html - name: Download generated source archive - uses: dawidd6/action-download-artifact@v3.1.1 + uses: dawidd6/action-download-artifact@v3.1.2 with: workflow: ci.yml workflow_conclusion: success @@ -50,7 +50,7 @@ jobs: path: release-staging - name: Download generated release binaries - uses: dawidd6/action-download-artifact@v3.1.1 + uses: dawidd6/action-download-artifact@v3.1.2 with: workflow: ci.yml workflow_conclusion: success @@ -61,7 +61,7 @@ jobs: path: release-staging - name: Download release manifest tool - uses: dawidd6/action-download-artifact@v3.1.1 + uses: dawidd6/action-download-artifact@v3.1.2 with: workflow: ci.yml workflow_conclusion: success From 8b68b0d610b28ea47204ed0ca4fe9d1fcc19dad8 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 28 Feb 2024 22:08:17 +0100 Subject: [PATCH 015/169] make `.computedGoto` pragma a no-op (#1212) ## Summary The `.computedGoto` pragma is now a no-op pragma usable in pragma statements everywhere. It's seldomly useful, and since the current implementation complicates the rework of the C code generator, dedicated C code generator support is removed for now. ## Details * remove the `.computedGoto` implementation from `cgen` * remove `compat.flattenStmts`, `ccgutils.stmtsContainsPragma`, and `ccgutils.getPragmaStmt`; they were only used by the `.computedGoto` implementation * as a consequence, `cnkPragmaStmt` is no longer used; the node kind is removed too * without `cnkPragmaStmt`, `mnkPNode` is also obsolete; it's removed too * remove the now-unused report kinds associated with the `.computedGoto` pragma --- compiler/ast/report_enums.nim | 5 - compiler/backend/ccgexprs.nim | 1 - compiler/backend/ccgstmts.nim | 96 +------------------ compiler/backend/ccgutils.nim | 15 --- compiler/backend/cgir.nim | 7 +- compiler/backend/cgirgen.nim | 16 ---- compiler/backend/cgirutils.nim | 3 - compiler/backend/compat.nim | 15 --- compiler/backend/jsgen.nim | 1 - compiler/front/cli_reporter.nim | 15 --- compiler/mir/mirgen.nim | 9 -- compiler/mir/mirtrees.nim | 12 +-- compiler/mir/utils.nim | 6 -- compiler/vm/vmgen.nim | 2 +- doc/manual.rst | 43 +-------- .../tcase_computedgoto_holed_enum.nim | 23 ----- 16 files changed, 8 insertions(+), 261 deletions(-) delete mode 100644 tests/lang_stmts/casestmt/tcase_computedgoto_holed_enum.nim diff --git a/compiler/ast/report_enums.nim b/compiler/ast/report_enums.nim index 2e893630aec..78a92bcdfd0 100644 --- a/compiler/ast/report_enums.nim +++ b/compiler/ast/report_enums.nim @@ -628,11 +628,6 @@ type # Codegen rsemRttiRequestForIncompleteObject rsemExpectedNimcallProc - rsemExpectedExhaustiveCaseForComputedGoto - rsemExpectedUnholyEnumForComputedGoto - rsemTooManyEntriesForComputedGoto - rsemExpectedLow0ForComputedGoto - rsemExpectedCaseForComputedGoto rsemDisallowedRangeForComputedGoto rsemExpectedParameterForJsPattern rsemExpectedLiteralForGoto diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index 628bc3b1262..24acfe516de 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -2129,7 +2129,6 @@ proc expr(p: BProc, n: CgNode, d: var TLoc) = assert p.config.exc == excGoto genTryGoto(p, n) of cnkRaiseStmt: genRaiseStmt(p, n) - of cnkPragmaStmt: discard of cnkInvalid, cnkType, cnkAstLit, cnkMagic, cnkRange, cnkBinding, cnkExcept, cnkFinally, cnkBranch, cnkLabel, cnkStmtListExpr, cnkField: internalError(p.config, n.info, "expr(" & $n.kind & "); unknown node kind") diff --git a/compiler/backend/ccgstmts.nim b/compiler/backend/ccgstmts.nim index 97ad08400ee..c418c427ce7 100644 --- a/compiler/backend/ccgstmts.nim +++ b/compiler/backend/ccgstmts.nim @@ -189,97 +189,6 @@ proc genGotoForCase(p: BProc; caseStmt: CgNode) = genStmts(p, it.lastSon) endBlock(p) -proc genAsgn(p: BProc, e: CgNode) - -proc genComputedGoto(p: BProc; n: CgNode) = - # first pass: Generate array of computed labels: - - # flatten the loop body because otherwise let and var sections - # wrapped inside stmt lists by inject destructors won't be recognised - # XXX: ^^ this doesn't work as intended (see the comment in - # ``flattenStmts``) - let n = n.flattenStmts() - var casePos = -1 - var arraySize: int - for i in 0.. 10_000: - localReport(p.config, it.info, reportSem rsemTooManyEntriesForComputedGoto) - return - - arraySize = toInt(aSize) - if firstOrd(p.config, it[0].typ) != 0: - localReport(p.config, it.info, reportSem rsemExpectedLow0ForComputedGoto) - return - - if casePos < 0: - localReport(p.config, n.info, reportSem rsemExpectedCaseForComputedGoto) - return - - var id = p.labels+1 - inc p.labels, arraySize+1 - let tmp = "TMP$1_" % [id.rope] - var gotoArray = "static void* $#[$#] = {" % [tmp, arraySize.rope] - for i in 1..arraySize-1: - gotoArray.addf("&&TMP$#_, ", [rope(id+i)]) - gotoArray.addf("&&TMP$#_};$n", [rope(id+arraySize)]) - line(p, cpsLocals, gotoArray) - - for j in 0..`_ - proc unnestStmts(n: CgNode, result: var CgNode) = - case n.kind - of cnkStmtList: - for it in n.items: - unnestStmts(it, result) - else: - result.kids.add n - - result = CgNode(kind: cnkStmtList) - unnestStmts(n, result) - if result.len == 1: - result = result[0] - proc newSymNode*(env: MirEnv, s: PSym): CgNode {.inline.} = case s.kind of skConst: diff --git a/compiler/backend/jsgen.nim b/compiler/backend/jsgen.nim index cfd859f9567..58e66953a1a 100644 --- a/compiler/backend/jsgen.nim +++ b/compiler/backend/jsgen.nim @@ -2446,7 +2446,6 @@ proc gen(p: PProc, n: CgNode, r: var TCompRes) = of cnkAsmStmt, cnkEmitStmt: genAsmOrEmitStmt(p, n) of cnkTryStmt: genTry(p, n) of cnkRaiseStmt: genRaiseStmt(p, n) - of cnkPragmaStmt: discard of cnkInvalid, cnkMagic, cnkRange, cnkBinding, cnkExcept, cnkFinally, cnkBranch, cnkAstLit, cnkLabel, cnkStmtListExpr, cnkField: internalError(p.config, n.info, "gen: unknown node type: " & $n.kind) diff --git a/compiler/front/cli_reporter.nim b/compiler/front/cli_reporter.nim index 1ca21286a25..cf89556fe88 100644 --- a/compiler/front/cli_reporter.nim +++ b/compiler/front/cli_reporter.nim @@ -1993,21 +1993,6 @@ proc reportBody*(conf: ConfigRef, r: SemReport): string = of rsemDisallowedRangeForComputedGoto: result = "range notation not available for computed goto" - of rsemExpectedCaseForComputedGoto: - result = "no case statement found for computed goto" - - of rsemExpectedLow0ForComputedGoto: - result = "case statement has to start at 0 for computed goto" - - of rsemTooManyEntriesForComputedGoto: - result = "case statement has too many cases for computed goto" - - of rsemExpectedUnholyEnumForComputedGoto: - result = "case statement cannot work on enums with holes for computed goto" - - of rsemExpectedExhaustiveCaseForComputedGoto: - result = "case statement must be exhaustive for computed goto" - of rsemExpectedNimcallProc: result = r.symstr & " needs to have the 'nimcall' calling convention" diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index 3a1666de2e4..b599a46b798 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -1963,15 +1963,6 @@ proc gen(c: var TCtx, n: PNode) = of wEmit: c.builder.useSource(c.sp, it) genAsmOrEmitStmt(c, mnkEmit, it[1]) - of wComputedGoto: - # the MIR doesn't handle this directive, but the code generators - # might. As such, we need to keep it via a ``mnkPNode``. Since the - # directive might be combined with some other directive in a - # single statement, we split it out into a standalone pragma statement - # first - # XXX: ideally, sem or transf would split pragma statement up - c.builder.useSource(c.sp, it) - c.add MirNode(kind: mnkPNode, node: newTree(nkPragma, [it])) else: discard of nkAsmStmt: diff --git a/compiler/mir/mirtrees.nim b/compiler/mir/mirtrees.nim index 85578bf341f..5db9af07b8a 100644 --- a/compiler/mir/mirtrees.nim +++ b/compiler/mir/mirtrees.nim @@ -252,12 +252,6 @@ type # structural changes, as not all node kinds are able to use the length # field at the moment - mnkPNode ## depending on the context, either statement or something else. - ## If it appears as a statement, it is expected to not have any - ## obsersvable effects - ## XXX: eventually, everything that currently requires - ## ``mnkPNode`` should be expressable directly in the IR - EffectKind* = enum ekMutate ## the value in the location is mutated ekReassign ## a new value is assigned to the location @@ -301,8 +295,6 @@ type ## for a break, the label of the block to break out of of mnkEnd: start*: MirNodeKind ## the kind of the corresponding start node - of mnkPNode: - node*: PNode of mnkTag: effect*: EffectKind ## the effect that happens when the operator the ## tagged value is passed to is executed @@ -331,7 +323,7 @@ const ## Node kinds that represent definition statements (i.e. something that ## introduces a named entity) - AtomNodes* = {mnkNone..mnkType, mnkMagic, mnkBreak, mnkReturn, mnkPNode} + AtomNodes* = {mnkNone..mnkType, mnkMagic, mnkBreak, mnkReturn} ## Nodes that don't support sub nodes. SubTreeNodes* = AllNodeKinds - AtomNodes - {mnkEnd} @@ -365,7 +357,7 @@ const ## Nodes that may be appear in atom-expecting slots. StmtNodes* = {mnkScope, mnkStmtList, mnkIf, mnkCase, mnkRepeat, mnkTry, - mnkBlock, mnkBreak, mnkReturn, mnkRaise, mnkPNode, mnkInit, + mnkBlock, mnkBreak, mnkReturn, mnkRaise, mnkInit, mnkAsgn, mnkSwitch, mnkVoid, mnkRaise, mnkDestroy, mnkEmit, mnkAsm} + DefNodes diff --git a/compiler/mir/utils.nim b/compiler/mir/utils.nim index 735978b6b1a..b408c47c9b3 100644 --- a/compiler/mir/utils.nim +++ b/compiler/mir/utils.nim @@ -59,9 +59,6 @@ func `$`(n: MirNode): string = of mnkEnd: result.add " start: " result.add $n.start - of mnkPNode: - result.add " node: " - result.add $n.node.kind of mnkTag: result.add " effect: " result.add $n.effect @@ -496,9 +493,6 @@ proc stmtToStr(nodes: MirTree, i: var int, indent: int, result: var string, tree "destroy ": valueToStr() result.add "\n" - of mnkPNode: - result.add repeat(" ", indent) - result.add "PNode " & $n.node & "\n" of mnkBreak: result.add repeat(" ", indent) result.add "break L" & $n.label.int & "\n" diff --git a/compiler/vm/vmgen.nim b/compiler/vm/vmgen.nim index 822c863cd5e..7d99e1cd1f1 100644 --- a/compiler/vm/vmgen.nim +++ b/compiler/vm/vmgen.nim @@ -3197,7 +3197,7 @@ proc gen(c: var TCtx; n: CgNode; dest: var TDest) = genCastIntFloat(c, n, dest) of cnkType: genTypeLit(c, n, n.typ, dest) - of cnkPragmaStmt, cnkAsmStmt, cnkEmitStmt: + of cnkAsmStmt, cnkEmitStmt: unused(c, n, dest) of cnkInvalid, cnkMagic, cnkRange, cnkExcept, cnkFinally, cnkBranch, cnkBinding, cnkLabel, cnkStmtListExpr, cnkField, cnkToSlice: diff --git a/doc/manual.rst b/doc/manual.rst index 53af501e4e9..2c0efbada36 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -6594,47 +6594,8 @@ If the `line` pragma is used with a parameter, the parameter needs be a computedGoto pragma ------------------- -The `computedGoto` pragma can be used to tell the compiler how to -compile a Nim `case`:idx: in a `while true` statement. -Syntactically it has to be used as a statement inside the loop: - -.. code-block:: nim - - type - MyEnum = enum - enumA, enumB, enumC, enumD, enumE - - proc vm() = - var instructions: array[0..100, MyEnum] - instructions[2] = enumC - instructions[3] = enumD - instructions[4] = enumA - instructions[5] = enumD - instructions[6] = enumC - instructions[7] = enumA - instructions[8] = enumB - - instructions[12] = enumE - var pc = 0 - while true: - {.computedGoto.} - let instr = instructions[pc] - case instr - of enumA: - echo "yeah A" - of enumC, enumD: - echo "yeah CD" - of enumB: - echo "yeah B" - of enumE: - break - inc(pc) - - vm() - -As the example shows, `computedGoto` is mostly useful for interpreters. If -the underlying backend (C compiler) does not support the computed goto -extension the pragma is simply ignored. +The `computedGoto` pragma is kept for backwards compatibility. It can be used +in pragma statements, but has no effect. immediate pragma diff --git a/tests/lang_stmts/casestmt/tcase_computedgoto_holed_enum.nim b/tests/lang_stmts/casestmt/tcase_computedgoto_holed_enum.nim deleted file mode 100644 index ee6105b069d..00000000000 --- a/tests/lang_stmts/casestmt/tcase_computedgoto_holed_enum.nim +++ /dev/null @@ -1,23 +0,0 @@ -discard """ - errormsg: "case statement cannot work on enums with holes for computed goto" - line: 21 - description: ''' - . From ComputedGoto: bad codegen with enum with holes - . There are two possible solutions: - Raise an error if the enum has holes - Complicate the codegen a bit by generating a set[uint16] - of the enum coverage and then use it to generate the jump tables. - . IMHO, error is the way to go. If you want speed, design your enums properly. - ''' -""" - -type - X = enum - A = 0, B = 100 - -var z = A -while true: - {.computedGoto.} - case z - of A: discard - of B: discard \ No newline at end of file From dff5aea072c823e27756f8f4bb27e14c27ed2023 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 29 Feb 2024 02:59:20 +0100 Subject: [PATCH 016/169] fix(mirgen): respect `--panics:on` for overflow checks (#1213) ## Summary Fix overflow checks for binary integer arithmetic being treated as potentially raising exceptions when panics are enabled (`--panics:on`). This didn't affect correctness, but some optimizations were inhibited. ## Details Use `buildDefectMagicCall` for emitting the binary arithmetic operation in `mirgen`. If panics are enabled, this makes sure a `mnkCall` is emitted rather than a `mnkCheckedCall`. Checked calls (i.e., calls that can raise an exception) introduce unstructured control-flow, which, depending on where the call is located, can result in the compiler having to place additional `wasMoved` and/or destructor calls. An `--expandArc`-based test for making sure that all run-time checks are correctly treated as not raising an exception is added. --- compiler/mir/mirgen.nim | 2 +- tests/exception/truntime_check_panics.nim | 58 +++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tests/exception/truntime_check_panics.nim diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index b599a46b798..b1498f11023 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -894,7 +894,7 @@ proc genMagic(c: var TCtx, n: PNode; m: TMagic) = if optOverflowCheck in c.userOptions: const Map = [mAddI: mAddI, mSubI, mMulI, mDivI, mModI, mSucc: mAddI, mPred: mSubI] - c.buildCheckedMagicCall Map[m], n.typ: + c.buildDefectMagicCall Map[m], n.typ: arg n[1] arg n[2] else: diff --git a/tests/exception/truntime_check_panics.nim b/tests/exception/truntime_check_panics.nim new file mode 100644 index 00000000000..d770f2a4c5a --- /dev/null +++ b/tests/exception/truntime_check_panics.nim @@ -0,0 +1,58 @@ +discard """ + description: ''' + Ensure that the compiler-inserted run-time checks don't have exceptional + exits when panics are enabled. + ''' + targets: native + matrix: "--panics:on --hints:off --expandArc:test" + action: compile + nimout: ''' +--expandArc: test +scope: + def a: array[0..0, int] + chckIndex(arg a, arg i) + discard a[i] + chckBounds(arg a, arg 0, arg i) + def _0: openArray[int] = toOpenArray a, 0, i + def _1: int = addI(arg i, arg i) + def _2: int = unaryMinusI(arg i) + def _3: range 0..1(int) = chckRange(arg i, arg 0, arg 1) + chckField(arg , arg o.kind, arg false, arg "field \'x\' is not accessible for type \'Object\' using \'kind = ") + discard o.kind.x + def _5: bool = isNil(arg r) + def _4: bool = not(arg _5) + if _4: + chckObj(arg r, arg type(Sub:ObjectType)) + discard r.(Sub) + def _6: float = mulF64(arg f, arg f) + chckNaN(arg _6) + +-- end of expandArc ------------------------''' +""" + +# make sure all run-time checks are enabled +{.push boundChecks: on, overflowChecks: on, rangeChecks: on, objChecks: on, + fieldChecks: on, infChecks: on, nanChecks: on.} + +type + Sub = ref object of RootObj + Object = object + case kind: bool + of true: + x: int + else: + discard + +# export the procedure so that it's not omitted +proc test(i: int, f: float, o: Object, r: ref RootObj) {.exportc.} = + var a: array[1, int] + discard a[i] # index check + discard toOpenArray(a, 0, i) # bound check + discard i + i # overflow check for binary arithmetic + discard -i # overflow check for unary arithmetic + discard range[0..1](i) # range check + discard o.x # field check + discard Sub(r) # object check + discard f * f # infinity and nan check + +{.pop.} \ No newline at end of file From 12f0aa45abab91601f53cc79854a27376c403d0f Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 29 Feb 2024 23:17:08 +0100 Subject: [PATCH 017/169] internal: implement `--profiler:on` support with MIR pass (#1214) ## Summary Make routine instrumentation with `nimProfile` calls a MIR pass, moving more logic out of the C code generator and working towards `--profiler:on` support with all backends. `nimProfile` calls are also no longer inserted into loops within `.asmNoStackFrame` routines. ## Details * require a `ModuleGraph` instance for `mirpasses.applyPasses`, so that compilerprocs can be looked up * add the `injectProfilerCalls` MIR pass. It injects `nimProfile` in the same way that `cgen` does * remove injection of `nimProfile` calls from `cgen` * don't instrument loops within `.asmNoStackFrame` procedures; injection of `nimProfile` at procedure entry was already skipped for these procedures * temporarily remove the `optProfiler` option from symbols when in JIT mode, that code generated for compile-time execution is not instrumented * add a test for the `--profiler:on` feature, to make sure it works and keeps working --- compiler/backend/backends.nim | 2 +- compiler/backend/ccgstmts.nim | 4 -- compiler/backend/cgen.nim | 7 ---- compiler/mir/mirpasses.nim | 41 +++++++++++++++++- compiler/vm/vmjit.nim | 18 ++++++-- tests/compilerfeatures/tprofiler.nim | 63 ++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 16 deletions(-) create mode 100644 tests/compilerfeatures/tprofiler.nim diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index 5fe5bd5fe03..b2cbf3fdf9a 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -342,7 +342,7 @@ proc process(body: var MirBody, prc: PSym, graph: ModuleGraph, of backendNimVm: targetVm of backendInvalid: unreachable() - applyPasses(body, prc, env, graph.config, target) + applyPasses(body, prc, env, graph, target) proc translate*(id: ProcedureId, body: PNode, graph: ModuleGraph, config: BackendConfig, idgen: IdGenerator, diff --git a/compiler/backend/ccgstmts.nim b/compiler/backend/ccgstmts.nim index c418c427ce7..811116a43c1 100644 --- a/compiler/backend/ccgstmts.nim +++ b/compiler/backend/ccgstmts.nim @@ -200,10 +200,6 @@ proc genRepeatStmt(p: BProc, t: CgNode) = if true: startBlock(p, "while (1) {$n") genStmts(p, loopBody) - - if optProfiler in p.options: - # invoke at loop body exit: - linefmt(p, cpsStmts, "#nimProfile();$n", []) endBlock(p) dec(p.withinLoop) diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 9556a1f08ef..86e358e5375 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -258,10 +258,6 @@ template appcg(m: BModule, sec: TCFileSection, frmt: FormatStr, args: untyped) = m.s[sec].add(ropecg(m, frmt, args)) -template appcg(p: BProc, sec: TCProcSection, frmt: FormatStr, - args: untyped) = - p.s(sec).add(ropecg(p.module, frmt, args)) - template line(p: BProc, sec: TCProcSection, r: Rope) = p.s(sec).add(indentLine(p, r)) @@ -824,9 +820,6 @@ proc finishProc*(p: BProc, id: ProcedureId): string = generatedProc.add(initFrame(p, procname, quotedFilename(p.config, prc.info))) else: generatedProc.add(p.s(cpsLocals)) - if optProfiler in prc.options: - # invoke at proc entry for recursion: - appcg(p, cpsInit, "\t#nimProfile();$n", []) # this pair of {} was added because C++ is stricter with its control flow # integrity checks, leaving them in if beforeRetNeeded in p.flags: generatedProc.add("{") diff --git a/compiler/mir/mirpasses.nim b/compiler/mir/mirpasses.nim index c8ba345245a..a561fda2b4c 100644 --- a/compiler/mir/mirpasses.nim +++ b/compiler/mir/mirpasses.nim @@ -8,8 +8,12 @@ import compiler/ast/[ ast_query, ast_types, + lineinfos, types ], + compiler/front/[ + in_options + ], compiler/mir/[ analysis, datatables, @@ -20,6 +24,10 @@ import mirtrees, sourcemaps ], + compiler/modules/[ + modulegraphs, + magicsys + ], compiler/sem/[ aliasanalysis, mirexec @@ -425,8 +433,33 @@ proc injectResultInit(tree: MirTree, resultVar: PSym, changes: var Changeset) = bu.buildMagicCall mDefault, resultVar.typ: discard +proc injectProfilerCalls(tree: MirTree, graph: ModuleGraph, env: var MirEnv, + changes: var Changeset) = + ## Instruments the body with calls to the ``nimProfile`` compiler runtime + ## procedure. Profiler calls are placed: + ## * at the beginning of a procedure's body + ## * at the end of a loop's body + let + voidType = graph.getSysType(unknownLineInfo, tyVoid) + prc = graph.getCompilerProc("nimProfile") + prcId = env.procedures.add(prc) + + # insert the entry call within the outermost scope: + changes.insert(tree, tree.child(NodePosition 0, 0), NodePosition 0, bu): + bu.subTree mnkVoid: + bu.buildCall prcId, prc.typ, voidType: + discard "no arguments" + + for i in search(tree, {mnkEnd}): + if tree[i].start == mnkRepeat: + # insert the call before the end node: + changes.insert(tree, i - 1, i, bu): + bu.subTree mnkVoid: + bu.buildCall prcId, prc.typ, voidType: + discard "no arguments" + proc applyPasses*(body: var MirBody, prc: PSym, env: var MirEnv, - config: ConfigRef, target: TargetBackend) = + graph: ModuleGraph, target: TargetBackend) = ## Applies all applicable MIR passes to the body (`tree` and `source`) of ## `prc`. `target` is the targeted backend and is used to enable/disable ## certain passes. Passes may register new entities with `env`. @@ -455,6 +488,12 @@ proc applyPasses*(body: var MirBody, prc: PSym, env: var MirEnv, # requires the extraction for cstring literals extractStringLiterals(body.code, env, c) + # instrument the body with profiler calls after all lowerings, but before + # optimization + if (sfPure notin prc.flags) and (optProfiler in prc.options): + batch: + injectProfilerCalls(body.code, graph, env, c) + # eliminate temporaries after all other passes batch: eliminateTemporaries(body.code, c) diff --git a/compiler/vm/vmjit.nim b/compiler/vm/vmjit.nim index 7f87eea38f1..1b9a96a6e11 100644 --- a/compiler/vm/vmjit.nim +++ b/compiler/vm/vmjit.nim @@ -25,6 +25,9 @@ import backends, cgir ], + compiler/front/[ + in_options, + ], compiler/mir/[ datatables, mirbodies, @@ -175,6 +178,15 @@ template runCodeGen(c: var TCtx, cg: var CodeGenCtx, b: Body, swapState(c, cg) r +proc applyPasses(c: var TCtx, env: var MirEnv, prc: PSym, body: var MirBody) = + let restore = optProfiler in prc.options + # don't instrument procedures when using the JIT + if restore: + prc.options.excl optProfiler + applyPasses(body, prc, env, c.graph, targetVm) + if restore: + prc.options.incl optProfiler + proc genStmt*(jit: var JitState, c: var TCtx; n: PNode): VmGenResult = ## Generates and emits code for the standalone top-level statement `n`. preCheck(jit.gen.env, n) @@ -184,7 +196,7 @@ proc genStmt*(jit: var JitState, c: var TCtx; n: PNode): VmGenResult = # `n` is expected to have been put through ``transf`` already var mirBody = generateMirCode(c, jit.gen.env, n, isStmt = true) - applyPasses(mirBody, c.module, jit.gen.env, c.config, targetVm) + applyPasses(c, jit.gen.env, c.module, mirBody) for _ in discover(jit.gen.env, cp): discard "nothing to register" @@ -216,7 +228,7 @@ proc genExpr*(jit: var JitState, c: var TCtx, n: PNode): VmGenResult = let cp = checkpoint(jit.gen.env) var mirBody = generateMirCode(c, jit.gen.env, n) - applyPasses(mirBody, c.module, jit.gen.env, c.config, targetVm) + applyPasses(c, jit.gen.env, c.module, mirBody) for _ in discover(jit.gen.env, cp): discard "nothing to register" @@ -255,7 +267,7 @@ proc genProc(jit: var JitState, c: var TCtx, s: PSym): VmGenResult = echoInput(c.config, s, body) var mirBody = generateCode(c.graph, jit.gen.env, s, selectOptions(c), body) echoMir(c.config, s, mirBody) - applyPasses(mirBody, s, jit.gen.env, c.config, targetVm) + applyPasses(c, jit.gen.env, s, mirBody) for _ in discover(jit.gen.env, cp): discard "nothing to register" diff --git a/tests/compilerfeatures/tprofiler.nim b/tests/compilerfeatures/tprofiler.nim new file mode 100644 index 00000000000..88e71fe8f4d --- /dev/null +++ b/tests/compilerfeatures/tprofiler.nim @@ -0,0 +1,63 @@ +discard """ + description: ''' + Ensure that the built-in instrumentation with profiler callback calls + works + ''' + targets: "c js vm" + matrix: "--profiler:on" + knownIssue.js vm: ''' + The `system/profile.nim` module is not available for the targets + ''' +""" + +var + traces: array[3, StackTrace] + enabled = true + numTraces = 0 + +# instrumentation needs to be disabled for the callbacks, otherwise there'd be +# an infinite recursion +{.push profiler: off.} + +proc enabledCallback(): bool = + result = enabled + # XXX: an issue with the profiler runtime requires disabling the callback + # until the `profileCallback` is done + enabled = false + +proc profileCallback(st: StackTrace) = + traces[numTraces] = st + inc numTraces + enabled = true # re-enable + +{.pop.} + +# nothing will happen before the hook is set +profilerHook = profileCallback +# the "profiling requested" callback guards whether to invoke the profiler +# callback +profilingRequestedHook = enabledCallback + +proc test() = + # the callback is invoked when a procedure is entered + var i = 0 + while i < 2: + inc i + # the callback is also invoked at the end of while loop's body + +test() # run once + +proc testPure() {.asmNoStackFrame.} = + # pure routines aren't instrumented + var i = 0 + while i < 2: + inc i + +# disable the callback so that the traces can be inspected +enabled = false + +# validate the traces: +doAssert numTraces == 3 +# the end of the list is signaled by a nil cstring +doAssert traces[0].lines[0..2] == [cstring"test", "tprofiler", nil] +doAssert traces[0].files[0..2] == [cstring"tprofiler.nim", "tprofiler.nim", nil] \ No newline at end of file From fff9caeaeec26a1ae419b0979bd0f7a23147303b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sun, 3 Mar 2024 20:59:57 +0100 Subject: [PATCH 018/169] make the CGIR goto-based and fix exception-handling bugs (#1215) ## Summary * extend the MIR's design with goto-based control-flow primitives * implement the new control-flow primitives for the CGIR * make the C code generator use the new control-flow primitives * (C backend) fix multiple bugs with `getCurrentException` returning the wrong exception * (C backend) fix exceptions being leaked when aborting a `finally` section ### Details The main goals of replacing the higher-level control-flow primitives in the MIR with lower-level, goto-based ones are to: * be able to express more complex control-flow * make control-flow-related transformations easier and/or possible * reduce the cost of computing the data-flow graph * make evolving the language and MIR easier A guiding principle was that the design should allow for enough freedom to generate efficient code with all three code generators (which differ significantly in control-flow-related capabilities). To be able to implement this incrementally, as a first step, the new control-flow primitives are only added to the CGIR, with only `cgen` actually supporting them. So that `jsgen` and `vmgen` continue to work without change, the legacy primitives are kept, with `cgirgen_legacy` (a full copy of the original `cgirgen`) continuing to produce the legacy CGIR. ### MIR An in-depth description of syntax and behaviour is added to `docs/mir.rst`. The gist of the design is that: * terminators of basic-blocks become explicit (an exception being `If`) * jump targets are directly specified on jump/fork-like operations (e.g., `Goto`, `CheckedCall`, etc.) * intercepted control-flow (via `finally` sections) is also specified on jump/fork-like operations * `finally` sections are *not* required to be duplicated for each unique control-flow passing through them * jump targets and join points are identified by *labels*, which are integers IDs unique within a body As a consequence of this design, there's no more statement-in-statement nesting. ### CGIR & Translation The CGIR implements all the new control-flow primitives from the MIR, with the only difference being that the CGIR's `ContinueStmt` doesn't track the possible exits of a `Finally` section (it's information irrelevant to the code generators). `cgirgen` handles translation of the old-style primitives still used by the MIR to the new-style ones. While not strictly needed at the moment, translation is disabled of intra-procedure unreachable code is disabled, meaning that unreachable code is effectively dropped. This is so that `Finally` sections that never exit normally (e.g., `finally: return`) can be ensured to work. ### C code generation The CGIR is first translated into a small, low-level, C-specific IR (all implemented in `ccgflow`). Here, the focus is on figuring out what C code to produce for `Finally` sections; unnecessary gotos are also eliminated. #### Finally Handling The previous strategy for `finally` was to: * emit an error-state considering version of the `finally` at the end of the `try` * duplicate the body of all enclosing `finally`s at `break`s and `return`s Now, only a single version of the `finally` section is ever emitted. If the `finally` section has more than one possible exit, a run-time dispatcher is used, like so: ```c { NI32 Target0_; L1_1_: Target0_ = 0; goto L1_; L1_2_: Target0_ = 1; L1_: // never jumped to from the outside NI32 oldNimErr0_ = *nimErr_; *nimErr_ = NIM_FALSE; // temporarily disable error mode // ... body of the finally ... *nimErr_ = oldNimErr0_; switch (Target0_) { case 0: goto L2_; case 1: goto L3_; } } ``` Depending on the context and complexity of the code in-between, an optimizing C compiler is able to eliminate the dispatcher and `Target0_` assignment by directly inlining the body at a `goto L1_1_;`. For the common case of a `Finally` section having two exits - one for exceptional control-flow cases, and one for the -, a `if (NIM_UNLIKELY(*nimErr_)) goto ; goto ` is used instead of a full dispatcher. This mirrors how the exit of a `finally` section previously looked like. The idea behind not duplicating `Finally` sections is that it reduces the amount of C code the compiler has to output. Nonetheless, the IR from `ccgflow` is flexible enough to use the duplication strategy, should this be needed again. #### Exception Handling Whether control-flow crosses the boundary of a (i.e., leaves it) is encoded in the MIR/CGIR via the `Leave` item in target lists. `ccgflow` uses this information to make sure that leaving a `Finally` via unstructured control-flow properly aborts the active exception (if any) and that leaving an `Except` properly pops the handler. #### Other The IR from `ccgflow` then drives the rest of C code generation. Since there's no more statement-in-statement nesting, recursion reduces significantly. A side-effect of the flatter representation is that less C scopes are used (only `if`, loops, `finally`, and `except` use C scopes), which seems to harm C compiler optimization. Support for the legacy control-flow primitives is fully removed. ### Exception Handling Runtime The runtime part of exception handling for the C target is partially overhauled. For compatibility with the `csources` compiler, the new version is guarded behind the `nimskullNewExceptionRt` condsym. A distinction is now made between an *in-flight* exception and the *current* exception. Previously, both were treated as being one and the same. An exception becomes *in-flight* once it is raised (`raiseException2`), and stops being in-flight when it is caught (`nimCatchException`). While in-flight, an exception can be aborted (`nimAbortException`), by breaking out of an intercepting `finally`. On being caught by a handler, the exception is associated with the handler and the handler pushed to the handler stack. When control-flow leaves the handler (`nimLeaveExcept`), the handler is popped from the stack. The new runtime fixes the `currentException` not being updated when breaking out of a `finally` or raising from within an `except` handler. In addition, an exception caught by a handler can now be raised again within the handler (e.g., `except CatchableError as e: raise e`) without reference cycles being introduced. --- compiler/backend/backends.nim | 10 +- compiler/backend/ccgcalls.nim | 70 +- compiler/backend/ccgexprs.nim | 81 +- compiler/backend/ccgflow.nim | 543 +++++++++++ compiler/backend/ccgstmts.nim | 428 ++++---- compiler/backend/cgen.nim | 35 +- compiler/backend/cgendata.nim | 21 +- compiler/backend/cgir.nim | 68 +- compiler/backend/cgirgen.nim | 448 +++++++-- compiler/backend/cgirgen_legacy.nim | 920 ++++++++++++++++++ compiler/backend/cgirutils.nim | 3 +- compiler/backend/compat.nim | 5 + compiler/backend/jsbackend.nim | 7 +- compiler/backend/jsgen.nim | 2 +- compiler/front/condsyms.nim | 1 + compiler/mir/mirbridge.nim | 9 +- compiler/vm/vmbackend.nim | 4 +- compiler/vm/vmgen.nim | 3 +- compiler/vm/vmjit.nim | 4 +- doc/mir.rst | 153 ++- lib/system/excpt.nim | 104 +- tests/exception/tfinally6.nim | 2 +- tests/exception/tleave_except2.nim | 87 ++ tests/exception/treraise2.nim | 58 ++ .../tdestruction_in_unreachable.nim | 35 + 25 files changed, 2620 insertions(+), 481 deletions(-) create mode 100644 compiler/backend/ccgflow.nim create mode 100644 compiler/backend/cgirgen_legacy.nim create mode 100644 tests/exception/tleave_except2.nim create mode 100644 tests/exception/treraise2.nim create mode 100644 tests/lang_objects/destructor/tdestruction_in_unreachable.nim diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index b2cbf3fdf9a..80b9999af09 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -15,7 +15,8 @@ import compiler/backend/[ cgmeth, cgir, - cgirgen + cgirgen, + cgirgen_legacy ], compiler/front/[ msgs, @@ -370,6 +371,13 @@ proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, env: MirEnv, result = cgirgen.generateIR(graph, idgen, env, owner, body) echoOutput(graph.config, owner, result) +proc generateIRLegacy*(graph: ModuleGraph, idgen: IdGenerator, env: MirEnv, + owner: PSym, body: sink MirBody): Body = + ## Translates the MIR code provided by `code` into legacy ``CgNode`` IR and, + ## if enabled, echoes the result. + result = cgirgen_legacy.generateIR(graph, idgen, env, owner, body) + echoOutput(graph.config, owner, result) + # ------- handling of lifted globals --------- proc produceFragmentsForGlobals( diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls.nim index 1b164a61efb..a9cdb221b2d 100644 --- a/compiler/backend/ccgcalls.nim +++ b/compiler/backend/ccgcalls.nim @@ -35,33 +35,59 @@ proc reportObservableStore(p: BProc; le, ri: CgNode) = # cannot analyse the location; assume the worst return true - if le != nil and locationEscapes(p, le, p.nestedTryStmts.len > 0): + # XXX: this whole procedure needs to be removed; RVO calls must only be used + # if safe + var inTryStmt = false + # analyse the target to check whether a local exception handler or finally + # is reached + case ri[^1].kind + of cnkLabel: + inTryStmt = true + of cnkTargetList: + for it in ri[^1].items: + if it.kind == cnkLabel: + inTryStmt = true + break + else: + discard "no local exception handler or finally is reached" + + if le != nil and locationEscapes(p, le, inTryStmt): localReport(p.config, le.info, reportSem rsemObservableStores) -proc isHarmlessStore(p: BProc; canRaise: bool; d: TLoc): bool = - if d.k in {locTemp, locNone} or not canRaise: +proc observableInExcept(n: CgNode): bool = + ## Computes whether the call expression `n` has an exceptional exit + ## that leads to an exception handler within the current procedure. + let target = n[^1] + case target.kind + of cnkLabel: true # can only be an exception handler (of finally) + of cnkTargetList: target[^1].kind == cnkLabel + else: + unreachable() + +proc isHarmlessStore(p: BProc; ri: CgNode, d: TLoc): bool = + if d.k in {locTemp, locNone} or ri.kind != cnkCheckedCall: result = true - elif d.k == locLocalVar and p.withinTryWithExcept == 0: + elif d.k == locLocalVar and not observableInExcept(ri): # we cannot observe a store to a local variable if the current proc # has no error handler: result = true else: result = false -proc exitCall(p: BProc, callee: CgNode, canRaise: bool) = +proc exitCall(p: BProc, call: CgNode) = ## Emits the exceptional control-flow related post-call logic. - if p.config.exc == excGoto: + if call.kind == cnkCheckedCall: if nimErrorFlagDisabled in p.flags: - if callee.kind == cnkProc and sfNoReturn in p.env[callee.prc].flags and - canRaiseConservative(p.env, callee): + if call[0].kind == cnkProc and sfNoReturn in p.env[call[0].prc].flags and + canRaiseConservative(p.env, call[0]): # when using goto-exceptions, noreturn doesn't map to "doesn't return" # at the C-level. In order to still support dispatching to wrapper # procedures around ``raise`` from inside ``.compilerprocs``, we emit # an exit after the call p.flags.incl beforeRetNeeded lineF(p, cpsStmts, "goto BeforeRet_;$n", []) - elif canRaise: - raiseExit(p) + else: + raiseExit(p, call[^1]) proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, callee, params: Rope) = @@ -86,17 +112,17 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, pl.add(addrLoc(p.config, d)) pl.add(~");$n") line(p, cpsStmts, pl) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) else: pl.add(~")") - if isHarmlessStore(p, canRaise, d): + if isHarmlessStore(p, ri, d): if d.k == locNone: getTemp(p, typ[0], d) assert(d.t != nil) # generate an assignment to d: var list: TLoc initLoc(list, locCall, d.lode, OnUnknown) list.r = pl genAssignment(p, d, list) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) else: var tmp: TLoc getTemp(p, typ[0], tmp) @@ -104,12 +130,12 @@ proc fixupCall(p: BProc, le, ri: CgNode, d: var TLoc, initLoc(list, locCall, d.lode, OnUnknown) list.r = pl genAssignment(p, tmp, list) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) genAssignment(p, d, tmp) else: pl.add(~");$n") line(p, cpsStmts, pl) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) proc reifiedOpenArray(p: BProc, n: CgNode): bool {.inline.} = # all non-parameter openArrays are reified @@ -197,7 +223,7 @@ proc genArgNoParam(p: BProc, n: CgNode, needsTmp = false): Rope = result = rdLoc(a) proc genParams(p: BProc, ri: CgNode, typ: PType): Rope = - for i in 1.. 1: pl.add(~", ") + if numArgs(ri) > 0: pl.add(~", ") # the destination is guaranteed to be either a temporary or an lvalue # that can be modified in-place if true: @@ -264,8 +290,8 @@ proc genClosureCall(p: BProc, le, ri: CgNode, d: var TLoc) = getTemp(p, typ[0], d) pl.add(addrLoc(p.config, d)) genCallPattern() - exitCall(p, ri[0], canRaise) - elif isHarmlessStore(p, canRaise, d): + exitCall(p, ri) + elif isHarmlessStore(p, ri, d): if d.k == locNone: getTemp(p, typ[0], d) assert(d.t != nil) # generate an assignment to d: var list: TLoc @@ -275,7 +301,7 @@ proc genClosureCall(p: BProc, le, ri: CgNode, d: var TLoc) = else: list.r = PatProc % [rdLoc(op), pl, pl.addComma, rawProc] genAssignment(p, d, list) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) else: var tmp: TLoc getTemp(p, typ[0], tmp) @@ -287,11 +313,11 @@ proc genClosureCall(p: BProc, le, ri: CgNode, d: var TLoc) = else: list.r = PatProc % [rdLoc(op), pl, pl.addComma, rawProc] genAssignment(p, tmp, list) - exitCall(p, ri[0], canRaise) + exitCall(p, ri) genAssignment(p, d, tmp) else: genCallPattern() - exitCall(p, ri[0], canRaise) + exitCall(p, ri) proc genAsgnCall(p: BProc, le, ri: CgNode, d: var TLoc) = if ri[0].typ.skipTypes({tyGenericInst, tyAlias, tySink}).callConv == ccClosure: diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs.nim index 24acfe516de..1a1aa504c38 100644 --- a/compiler/backend/ccgexprs.nim +++ b/compiler/backend/ccgexprs.nim @@ -294,11 +294,11 @@ template binaryArithOverflowRaw(p: BProc, t: PType, a, b: TLoc; var result = getTempName(p.module) linefmt(p, cpsLocals, "$1 $2;$n", [storage, result]) lineCg(p, cpsStmts, "if (#$2($3, $4, &$1)) { #raiseOverflow(); $5};$n", - [result, cpname, rdCharLoc(a), rdCharLoc(b), raiseInstr(p)]) + [result, cpname, rdCharLoc(a), rdCharLoc(b), raiseInstr(p, e.exit)]) if size < p.config.target.intSize or t.kind in {tyRange, tyEnum}: linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseOverflow(); $4}$n", [result, intLiteral(firstOrd(p.config, t)), intLiteral(lastOrd(p.config, t)), - raiseInstr(p)]) + raiseInstr(p, e.exit)]) result proc binaryArithOverflow(p: BProc, e: CgNode, d: var TLoc, m: TMagic) = @@ -324,7 +324,7 @@ proc binaryArithOverflow(p: BProc, e: CgNode, d: var TLoc, m: TMagic) = # result is only for overflows. if m in {mDivI, mModI}: linefmt(p, cpsStmts, "if ($1 == 0){ #raiseDivByZero(); $2}$n", - [rdLoc(b), raiseInstr(p)]) + [rdLoc(b), raiseInstr(p, e.exit)]) let res = binaryArithOverflowRaw(p, t, a, b, if t.kind == tyInt64: prc64[m] else: prc[m]) @@ -338,7 +338,7 @@ proc unaryArithOverflow(p: BProc, e: CgNode, d: var TLoc, m: TMagic) = initLocExpr(p, e[1], a) t = skipTypes(e.typ, abstractRange) linefmt(p, cpsStmts, "if ($1 == $2){ #raiseOverflow(); $3}$n", - [rdLoc(a), intLiteral(firstOrd(p.config, t)), raiseInstr(p)]) + [rdLoc(a), intLiteral(firstOrd(p.config, t)), raiseInstr(p, e.exit)]) case m of mUnaryMinusI: putIntoDest(p, d, e, "((NI$2)-($1))" % [rdLoc(a), rope(getSize(p.config, t) * 8)]) @@ -631,7 +631,7 @@ proc genFieldCheck(p: BProc, e: CgNode) = discard cgsym(p.module, raiseProc) # make sure the compilerproc is generated linefmt(p, cpsStmts, "{ $1($3, $4); $2} $n", - [raiseProc, raiseInstr(p), strLit, toStr]) + [raiseProc, raiseInstr(p, e.exit), strLit, toStr]) proc genUncheckedArrayElem(p: BProc, n, x, y: CgNode, d: var TLoc) = var a, b: TLoc @@ -660,7 +660,7 @@ proc genCStringElem(p: BProc, n, x, y: CgNode, d: var TLoc) = putIntoDest(p, d, n, ropecg(p.module, "$1[$2]", [rdLoc(a), rdCharLoc(b)]), a.storage) -proc genBoundsCheck(p: BProc; arr, a, b: TLoc) = +proc genBoundsCheck(p: BProc; arr, a, b: TLoc, exit: CgNode) = # types that map to C pointers need to be skipped here too, since no # dereference is generated for ``ptr array`` and the like let ty = skipTypes(arr.t, abstractVarRange + {tyPtr, tyRef, tyLent}) @@ -670,29 +670,29 @@ proc genBoundsCheck(p: BProc; arr, a, b: TLoc) = linefmt(p, cpsStmts, "if ($2-$1 != -1 && " & "((NU)($1) >= (NU)($3.Field1) || (NU)($2) >= (NU)($3.Field1))){ #raiseIndexError(); $4}$n", - [rdLoc(a), rdLoc(b), rdLoc(arr), raiseInstr(p)]) + [rdLoc(a), rdLoc(b), rdLoc(arr), raiseInstr(p, exit)]) else: linefmt(p, cpsStmts, "if ($2-$1 != -1 && " & "((NU)($1) >= (NU)($3Len_0) || (NU)($2) >= (NU)($3Len_0))){ #raiseIndexError(); $4}$n", - [rdLoc(a), rdLoc(b), rdLoc(arr), raiseInstr(p)]) + [rdLoc(a), rdLoc(b), rdLoc(arr), raiseInstr(p, exit)]) of tyArray: let first = intLiteral(firstOrd(p.config, ty)) linefmt(p, cpsStmts, "if ($2-$1 != -1 && " & "($2-$1 < -1 || $1 < $3 || $1 > $4 || $2 < $3 || $2 > $4)){ #raiseIndexError(); $5}$n", - [rdCharLoc(a), rdCharLoc(b), first, intLiteral(lastOrd(p.config, ty)), raiseInstr(p)]) + [rdCharLoc(a), rdCharLoc(b), first, intLiteral(lastOrd(p.config, ty)), raiseInstr(p, exit)]) of tySequence, tyString: linefmt(p, cpsStmts, "if ($2-$1 != -1 && " & "((NU)($1) >= (NU)$3 || (NU)($2) >= (NU)$3)){ #raiseIndexError(); $4}$n", - [rdLoc(a), rdLoc(b), lenExpr(p, arr), raiseInstr(p)]) + [rdLoc(a), rdLoc(b), lenExpr(p, arr), raiseInstr(p, exit)]) of tyUncheckedArray, tyCstring: discard "no checks are used" else: unreachable(ty.kind) -proc genIndexCheck(p: BProc; x: CgNode, arr, idx: TLoc) = +proc genIndexCheck(p: BProc; x: CgNode, arr, idx: TLoc, exit: CgNode) = ## Emits the index check logic + subsequent raise operation. `x` is ## the array expression the `arr` loc resulted from from. let ty = arr.t.skipTypes(abstractVar + tyUserTypeClasses + @@ -703,22 +703,22 @@ proc genIndexCheck(p: BProc; x: CgNode, arr, idx: TLoc) = if firstOrd(p.config, ty) == 0 and lastOrd(p.config, ty) >= 0: linefmt(p, cpsStmts, "if ((NU)($1) > (NU)($2)){ #raiseIndexError2($1, $2); $3}$n", [rdCharLoc(idx), intLiteral(lastOrd(p.config, ty)), - raiseInstr(p)]) + raiseInstr(p, exit)]) else: linefmt(p, cpsStmts, "if ($1 < $2 || $1 > $3){ #raiseIndexError3($1, $2, $3); $4}$n", [rdCharLoc(idx), first, intLiteral(lastOrd(p.config, ty)), - raiseInstr(p)]) + raiseInstr(p, exit)]) of tySequence, tyString: linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)$2){ #raiseIndexError2($1,$2-1); $3}$n", - [rdCharLoc(idx), lenExpr(p, arr), raiseInstr(p)]) + [rdCharLoc(idx), lenExpr(p, arr), raiseInstr(p, exit)]) of tyOpenArray, tyVarargs: if reifiedOpenArray(p, x): linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2.Field1)){ #raiseIndexError2($1,$2.Field1-1); $3}$n", - [rdCharLoc(idx), rdLoc(arr), raiseInstr(p)]) + [rdCharLoc(idx), rdLoc(arr), raiseInstr(p, exit)]) else: linefmt(p, cpsStmts, "if ((NU)($1) >= (NU)($2Len_0)){ #raiseIndexError2($1,$2Len_0-1); $3}$n", - [rdCharLoc(idx), rdLoc(arr), raiseInstr(p)]) + [rdCharLoc(idx), rdLoc(arr), raiseInstr(p, exit)]) of tyCstring: discard "no bound checks" else: @@ -766,18 +766,19 @@ proc genArrayLikeElem(p: BProc; n: CgNode; d: var TLoc) = proc genEcho(p: BProc, n: CgNode) = ## Generates and emits the code for the magic echo call. - if n.len == 1: + let argCount = numArgs(n) + if argCount == 0: linefmt(p, cpsStmts, "#echoBinSafe(NIM_NIL, 0);$n", []) else: # allocate a temporary array and fill it with the arguments: var tmp: TLoc getTemp(p, n[1].typ, tmp) # the first argument stores the type to use - for i in 2.. ($6)($3)){ #raiseRangeErrorNoArgs(); $5}$n", [rdCharLoc(a), genLiteral(p, n[2], dest), genLiteral(p, n[3], dest), - raiser, raiseInstr(p), getTypeDesc(p.module, n0t)]) + raiser, raiseInstr(p, n.exit), getTypeDesc(p.module, n0t)]) else: let raiser = case skipTypes(n.typ, abstractVarRange).kind @@ -1531,7 +1532,7 @@ proc genRangeChck(p: BProc, n: CgNode, d: var TLoc) = "" linefmt(p, cpsStmts, "if ($6($1) < $2 || $6($1) > $3){ $4($1, $2, $3); $5}$n", [rdCharLoc(a), genLiteral(p, n[2], dest), genLiteral(p, n[3], dest), - raiser, raiseInstr(p), boundaryCast]) + raiser, raiseInstr(p, n.exit), boundaryCast]) putIntoDest(p, d, n, "(($1) ($2))" % [getTypeDesc(p.module, dest), rdCharLoc(a)], a.storage) @@ -1568,7 +1569,8 @@ proc binaryFloatArith(p: BProc, e: CgNode, d: var TLoc, m: TMagic) = putIntoDest(p, d, e, ropecg(p.module, "(($4)($2) $1 ($4)($3))", [opr[m], rdLoc(a), rdLoc(b), getSimpleTypeDesc(p.module, e[1].typ)])) - linefmt(p, cpsStmts, "if ($1 != 0.0 && $1*0.5 == $1) { #raiseFloatOverflow($1); $2}$n", [rdLoc(d), raiseInstr(p)]) + linefmt(p, cpsStmts, "if ($1 != 0.0 && $1*0.5 == $1) { #raiseFloatOverflow($1); $2}$n", + [rdLoc(d), raiseInstr(p, e.exit)]) proc skipAddr(n: CgNode): CgNode = if n.kind == cnkHiddenAddr: n.operand @@ -1758,18 +1760,18 @@ proc genMagicExpr(p: BProc, e: CgNode, d: var TLoc, op: TMagic) = # NOTE: if the value is a signaling NaN, the comparison itself results in # a float-point exception (which might result in a trap) linefmt(p, cpsStmts, "if ($1 != $1){ #raiseFloatInvalidOp(); $2}$n", - [rdLoc(a), raiseInstr(p)]) + [rdLoc(a), raiseInstr(p, e.exit)]) of mChckIndex: var arr, a: TLoc initLocExpr(p, e[1], arr) initLocExpr(p, e[2], a) - genIndexCheck(p, e[1], arr, a) + genIndexCheck(p, e[1], arr, a, e.exit) of mChckBounds: var arr, a, b: TLoc initLocExpr(p, e[1], arr) initLocExpr(p, e[2], a) initLocExpr(p, e[3], b) - genBoundsCheck(p, arr, a, b) + genBoundsCheck(p, arr, a, b, e.exit) of mChckField: genFieldCheck(p, e) of mChckObj: @@ -1781,7 +1783,7 @@ proc genMagicExpr(p: BProc, e: CgNode, d: var TLoc, op: TMagic) = # the nil-check is expected to have taken place already linefmt(p, cpsStmts, "if (!#isObj($2, $3)){ #raiseObjectConversionError(); $4}$n", [nilCheck, r, genTypeInfo2Name(p.module, e[2].typ), - raiseInstr(p)]) + raiseInstr(p, e.exit)]) of mSamePayload: var a, b: TLoc initLocExpr(p, e[1], a) @@ -1899,10 +1901,6 @@ proc genArrayConstr(p: BProc, n: CgNode, d: var TLoc) = arr.r = "$1[$2]" % [rdLoc(d), intLiteral(i)] expr(p, n[i], arr) -proc genStmtList(p: BProc, n: CgNode) = - for i in 0..D->C->B->A and G->F->C->B->A. If + ## both are added to the storage, the content would look like this: + ## + ## (0: A) -> (1: B) -> (2: C) -> (3: D) -> (4: E) + ## \ (5: F) -> (6: G) + ## + ## The numbers represent the items' index in the sequence. The `sibling` + ## item of 3 is 5 (all other items have no siblings); the `next` pointer + ## of 5 points to 2. As can be seen, common trailing paths are merged into + ## one. + + Context = object + ## Local state used during the translation bundled into an object for + ## convenience. + paths: Paths + stmtToPath: Table[int, int] + finallys: Table[CLabelId, FinallyInfo] + cleanups: Table[CLabelId, FinallyInfo] + ## cleanup here refers to the exception-related cleanup when + ## exiting a finally or except section + +const + ExitLabel* = CLabelId(0) + ## The label of the procedure exit. + ResumeLabel* = ExitLabel + ## The C label that a ``cnkResume`` targets. + +func `==`*(a, b: CLabelId): bool {.borrow.} + +func toCLabel*(n: CgNode): CLabelId = + ## Returns the ID of the C label the label-like node `n` represents. + case n.kind + of cnkResume: + ResumeLabel + of cnkLabel: + CLabelId(ord(n.label) + 2) + of cnkLeave: + toCLabel(n[0]) + else: + unreachable(n.kind) + +func toCLabel*(n: CgNode, specifier: Option[CLabelSpecifier] + ): CLabel {.inline.} = + (toCLabel(n), specifier) + +func toBlockId*(id: CLabelId): BlockId = + ## If `id` was converted to from a valid CGIR label, converts it back to + ## the CGIR label. + BlockId(ord(id) - 2) + +func rawAdd(p: var Paths, x: openArray[PathItemTarget]): PathIndex = + ## Appends the chain `x` to `p` without any deduplication or + ## linking with the existing items. Returns the index of the + ## tail item. + result = p.len.PathIndex + for i in countdown(x.high, 0): + let pos = p.len.PathIndex + p.add PathItem(prev: (if i > 0: pos + 1 else: pos), + next: (if i < x.high: pos - 1 else: pos), + sibling: pos, + target: x[i]) + +func add(p: var Paths, path: openArray[PathItemTarget]): PathIndex = + ## Adds `path` to the `p`. Only the sub-path of `path` not yet present in + ## `p` is added. The index of the *head* item of the added (or existing) + ## path is returned. + if p.len == 0: + discard rawAdd(p, path) + p[0].next = 0'u32 + return p.high.PathIndex + + var pos = 0'u32 ## the current search position + for i in countdown(path.len-1, 0): + # search the sibling list for a matching item: + while p[pos].target != path[i] and pos != p[pos].sibling: + pos = p[pos].sibling + + if p[pos].target != path[i]: + # no item was found, meaning that this is the end of the common paths. + # Add the remaining items to the storage. + let next = rawAdd(p, path.toOpenArray(0, i)) + p[pos].sibling = next + # only set the next pointer if there was a common sub-path (otherwise + # there's no next item): + if i != path.high: + p[next].next = p[pos].next + return p.high.PathIndex + + # it's a match! continue down the chain + if i > 0: + if p[pos].prev == pos: + # there's no next item, append the remaining new targets to the + # pre-existing path + let next = rawAdd(p, path.toOpenArray(0, i-1)) + p[pos].prev = next + p[next].next = pos + return p.high.PathIndex + else: + pos = p[pos].prev + + # the chain `path` already exists in `p` + result = pos + +func incl(p: var Paths, at: PathIndex, kind: PathKind) = + ## Marks all items following and including `at` with `kind`. + var i = at + while p[i].next != i: + p[i].kinds.incl kind + i = p[i].next + p[i].kinds.incl kind + +func needsDispatcher(f: FinallyInfo): bool = + # a dispatcher is required if re are more than one exits. An exception is + # the case where one exit is only taken when in error mode and the other is + # not. If a dispatcher is required, the finally has sub-labels. + f.numExits > 1 and + not(f.routes.len == 2 and f.numErr == 1 and f.numNormal == 1) + +func needsSpecifier(c: Context, target: PathItemTarget): bool = + # cleanup sections don't have a unique label themselves, so using a + # specifier is required + target.isCleanup or + ((target.label in c.finallys) and + needsDispatcher(c.finallys[target.label])) + +proc append(targets: var seq[PathItemTarget], + redirects: Table[BlockId, CgNode], + exits: PackedSet[BlockId], n: CgNode) = + ## Appends all jump targets `n` represents to `targets`, following + ## `redirects` and turning all labels part of `exits` into the + ## "before return" label. + template addTarget(t: CLabelId; cleanup = false) = + targets.add PathItemTarget(label: t, isCleanup: cleanup) + + case n.kind + of cnkLabel: + if n.label in redirects: + append(targets, redirects, exits, redirects[n.label]) + elif n.label in exits: + addTarget ExitLabel + else: + addTarget toCLabel(n) + of cnkTargetList: + # only the final target could possibly be redirected + let hasRedir = n[^1].kind == cnkLabel and n[^1].label in redirects + for i in 0.. 1 or targets[0].label in c.finallys: + let id = c.paths.add(targets) + if isErr: incl(c.paths, id, pkError) + else: incl(c.paths, id, pkNormal) + + # remember the path associated with the statement for later: + c.stmtToPath[i] = id.int + + case it.kind + of cnkDef, cnkAsgn, cnkFastAsgn: + if it[1].kind == cnkCheckedCall: + exit(it[1][^1], true) + of cnkRaiseStmt, cnkCheckedCall: + exit(it[^1], true) + of cnkGotoStmt: + exit(it[0]) + of cnkCaseStmt: + for j in 1.. 1: + exit(it[^1], true) + else: + discard + + # register every path item with the finally section it targets, and compute + # some statistics that are used during the later code generation: + for i, it in c.paths.pairs: + func setup(f: var FinallyInfo, it: PathItem) = + f.routes.add i.PathIndex + f.numExits += ord(it.next.int != i) + f.numErr += ord(pkError in it.kinds) + f.numNormal += ord(pkNormal in it.kinds) + + if it.target.isCleanup: + setup(c.cleanups.mgetOrPut(it.target.label, FinallyInfo()), it) + elif it.target.label in c.finallys: + setup(c.finallys[it.target.label], it) + + # construction of the instruction list follows + + proc label(code: var seq[CInstr], id: CLabelId; + spec = none(CLabelSpecifier)) {.nimcall.} = + # a label must always be preceded by some code, so no length guard is + # required + if code[^1].op in {opJump, opErrJump} and code[^1].label.id == id and + code[^1].label.specifier == spec: + # optimization: remove the preceding jump if it targets the label + code.setLen(code.len - 1) + code.add CInstr(op: opLabel, label: (id, spec)) + + proc jump(code: var seq[CInstr], target: CLabelId) {.nimcall.} = + code.add CInstr(op: opJump, label: (target, none CLabelSpecifier)) + + proc jump(code: var seq[CInstr], op: JumpOp, c: Context, + path: PathIndex) {.nimcall.} = + let target = c.paths[path].target + if needsSpecifier(c, target): + code.add CInstr(op: op, label: (target.label, some path)) + else: + code.add CInstr(op: op, label: (target.label, none CLabelSpecifier)) + + proc stmt(code: var seq[CInstr], c: Context, pos: int) {.nimcall.} = + if (let path = c.stmtToPath.getOrDefault(pos, -1); path != -1 and + needsSpecifier(c, c.paths[path].target)): + # a label specifier, and thus a separate instruction, is needed + code.add CInstr(op: opStmt, stmt: pos, specifier: CLabelSpecifier path) + elif code.len > 0 and code[^1].op == opStmts and + code[^1].stmts.b == pos + 1: + # append to the sequence + inc code[^1].stmts.b + else: + # start a new sequence + code.add CInstr(op: opStmts, stmts: pos..pos) + + var + code: seq[CInstr] + nextDispId = 0'u32 + nextRecoverID = 0'u32 + + for i, it in stmts.pairs: + case it.kind + of cnkFinally: + stmt code, c, i + let + clabel = toCLabel(it[0]) + f = addr c.finallys[clabel] + + # allocate and set the ID for the discriminator variable: + f.discriminator = nextDispId + inc nextDispId + + # emit the entry-point(s); one for each route + if needsDispatcher(f[]): + # an entry point looks like this: + # L1_1_: + # Target = ... + # goto L1_ + for i, entry in f.routes.pairs: + label code, clabel, some(entry) + code.add CInstr(op: opSetTarget, discr: f.discriminator, value: i) + # jump to the main code: + jump code, clabel + + # the body follows: + label code, clabel + if f.numErr > 0: + # backing up the error state is only needed when the finally is + # entered by exceptional control-flow + f.errBackupId = nextRecoverID + code.add CInstr(op: opBackup, local: nextRecoverID) + inc nextRecoverID + + of cnkContinueStmt: + let + clabel = toCLabel(it[0]) + f {.cursor.} = c.finallys[clabel] + + # no need to restore the error state if control-flow never reaches the + # end of the finally anyway + if f.numErr > 0 and f.numExits > 0: + code.add CInstr(op: opRestore, local: f.errBackupId) + + if f.numExits == 0: + discard "the end is never reached; nothing to do" + elif not needsDispatcher(f) and f.routes.len == 2: + # optimization: if two paths go through a finally, with one of them + # an exceptional jump path and the other one not, instead of using a + # full dispatcher we emit: + # if err: goto error_exit + # goto normal_exit + let exit = if c.paths[f.routes[0]].kinds == {pkError}: 0 else: 1 + jump code, opErrJump, c, c.paths[f.routes[exit]].next + jump code, opJump, c, c.paths[f.routes[1 - exit]].next + else: + assert f.routes.len == f.numExits + # a dispatcher is only required if there is more than one exit + let op = if f.numExits > 1: opDispJump + else: opJump + + if op == opDispJump: + code.add CInstr(op: opDispatcher, discr: f.discriminator, + value: f.numExits) + + for it in f.routes.items: + jump code, op, c, c.paths[it].next + + # emit the exception-related cleanup after the dispatcher: + if clabel in c.cleanups: + let cleanup {.cursor.} = c.cleanups[clabel] + # a dispatcher is not worth the overhead, emit an abort instruction + # for each route + for entry in cleanup.routes.items: + label code, clabel, some(entry) + # TODO: omit the cleanup logic as a whole, if the finally section is + # never entered via an exception + if f.numErr > 0: + code.add CInstr(op: opAbort, local: f.errBackupId) + jump code, opJump, c, PathIndex c.paths[entry].next + + stmt code, c, i + + of cnkJoinStmt: + # XXX: labels that were redirected cannot be eliminated yet, as case + # statements (which are handled outside of ccgflow) might still + # target them + label code, toCLabel(it[0]) + of cnkExcept: + # an except section is a label followed by the filter logic + label code, toCLabel(it[0]) + stmt code, c, i + of cnkEnd: + let clabel = toCLabel(it[0]) + # emit the cleanup for except sections: + if clabel in c.cleanups: + let cleanup {.cursor.} = c.cleanups[clabel] + # a dispatcher is not worth the overhead, emit a pop instruction + # for each route + for entry in cleanup.routes.items: + label code, clabel, some(entry) + code.add CInstr(op: opPopHandler) + jump code, opJump, c, PathIndex c.paths[entry].next + + stmt code, c, i + + of cnkGotoStmt: + let target = it[0] + if (let path = c.stmtToPath.getOrDefault(i, -1); path != -1): + jump code, opJump, c, PathIndex path + elif target.kind == cnkLabel: + jump code, toCLabel(target) + else: + jump code, toCLabel(target[^1]) + of cnkRaiseStmt: + stmt code, c, i # the statement handles the exception setup part + # the goto part is the same as for a normal goto + let target = it[^1] + if target.kind == cnkLabel: + jump code, toCLabel(target) + elif (let path = c.stmtToPath.getOrDefault(i, -1); path != -1): + jump code, opJump, c, PathIndex path + else: + jump code, toCLabel(target[^1]) + + else: + stmt code, c, i + + result = code diff --git a/compiler/backend/ccgstmts.nim b/compiler/backend/ccgstmts.nim index 811116a43c1..feb064bd623 100644 --- a/compiler/backend/ccgstmts.nim +++ b/compiler/backend/ccgstmts.nim @@ -15,29 +15,14 @@ const stringCaseThreshold = 8 # above X strings a hash-switch for strings is generated -proc inExceptBlockLen(p: BProc): int = - for x in p.nestedTryStmts: - if x.inExcept: result.inc - -proc startBlockInternal(p: BProc, blk: int) = - inc(p.labels) +proc startBlockInternal(p: BProc) = let result = p.blocks.len setLen(p.blocks, result + 1) - p.blocks[result].id = p.labels - p.blocks[result].blk = blk - p.blocks[result].nestedTryStmts = p.nestedTryStmts.len.int16 - p.blocks[result].nestedExceptStmts = p.inExceptBlockLen.int16 template startBlock(p: BProc, start: FormatStr = "{$n", - args: varargs[Rope]) = + args: varargs[Rope]) {.used.} = lineCg(p, cpsStmts, start, args) - startBlockInternal(p, 0) - -template startBlock(p: BProc, id: BlockId) = - lineCg(p, cpsStmts, "{$n", []) - startBlockInternal(p, id.int + 1) - -proc endBlock(p: BProc) + startBlockInternal(p) proc loadInto(p: BProc, le, ri: CgNode, a: var TLoc) {.inline.} = if ri.kind in {cnkCall, cnkCheckedCall} and @@ -54,10 +39,6 @@ proc loadInto(p: BProc, le, ri: CgNode, a: var TLoc) {.inline.} = a.flags.incl(lfEnforceDeref) expr(p, ri, a) -proc assignLabel(b: var TBlock): Rope {.inline.} = - b.label = "LA" & b.id.rope - result = b.label - proc blockBody(b: var TBlock): Rope = result = b.sections[cpsLocals] if b.frameLen > 0: @@ -80,48 +61,9 @@ proc endBlock(p: BProc) = var blockEnd: Rope if frameLen > 0: blockEnd.addf("FR_.len-=$1;$n", [frameLen.rope]) - if p.blocks[topBlock].label != "": - blockEnd.addf("} $1: ;$n", [p.blocks[topBlock].label]) - else: - blockEnd.addf("}$n", []) + blockEnd.addf("}$n", []) endBlock(p, blockEnd) -proc stmtBlock(p: BProc, n: CgNode) = - startBlock(p) - genStmts(p, n) - endBlock(p) - -proc blockLeaveActions(p: BProc, howManyTrys, howManyExcepts: int) = - # Called by return and break stmts. - # Deals with issues faced when jumping out of try/except/finally stmts. - - var stack = newSeq[typeof(p.nestedTryStmts[0])](0) - - inc p.withinBlockLeaveActions - for i in 1..howManyTrys: - let tryStmt = p.nestedTryStmts.pop - # Pop this try-stmt of the list of nested trys - # so we don't infinite recurse on it in the next step. - stack.add(tryStmt) - - # Find finally-stmt for this try-stmt - # and generate a copy of its sons - var finallyStmt = tryStmt.fin - if finallyStmt != nil: - genStmts(p, finallyStmt[0]) - - dec p.withinBlockLeaveActions - - # push old elements again: - for i in countdown(howManyTrys-1, 0): - p.nestedTryStmts.add(stack[i]) - - # Pop exceptions that was handled by the - # except-blocks we are in - block: - for i in countdown(howManyExcepts-1, 0): - linefmt(p, cpsStmts, "#popCurrentException();$n", []) - proc genGotoVar(p: BProc; value: CgNode) = case value.kind of cnkIntLit, cnkUIntLit: @@ -166,15 +108,7 @@ proc genIf(p: BProc, n: CgNode) = initLocExprSingleUse(p, n[0], a) lineF(p, cpsStmts, "if ($1)$n", [rdLoc(a)]) - stmtBlock(p, n[1]) - -proc genReturnStmt(p: BProc, t: CgNode) = - p.flags.incl beforeRetNeeded - genLineDir(p, t) - blockLeaveActions(p, - howManyTrys = p.nestedTryStmts.len, - howManyExcepts = p.inExceptBlockLen) - lineF(p, cpsStmts, "goto BeforeRet_;$n", []) + startBlock(p) proc genGotoForCase(p: BProc; caseStmt: CgNode) = for i in 1..= 0 and p.blocks[idx].blk != (t[0].label.int + 1): - dec idx - - let label = assignLabel(p.blocks[idx]) - blockLeaveActions(p, - p.nestedTryStmts.len - p.blocks[idx].nestedTryStmts, - p.inExceptBlockLen - p.blocks[idx].nestedExceptStmts) - genLineDir(p, t) - lineF(p, cpsStmts, "goto $1;$n", [label]) +proc exit(n: CgNode): CgNode = + # XXX: exists as a convenience for overflow check, index check, etc. + # code gen. Should be removed once those are fully lowered prior + # to code generation + case n.kind + of cnkCheckedCall: n[^1] + else: nil + +proc useLabel(p: BProc, label: CLabel) {.inline.} = + if label.id == ExitLabel: + p.flags.incl beforeRetNeeded + +proc raiseInstr(p: BProc, n: CgNode): Rope = + if n != nil: + case n.kind + of cnkLabel: + # easy case, simply goto the target: + result = ropecg(p.module, "goto $1;", [n.label]) + of cnkTargetList: + # the first non-leave operand is the initial jump target + let label = toCLabel(n[0], p.specifier) + useLabel(p, label) + result = ropecg(p.module, "goto $1;", [label]) + else: + unreachable(n.kind) + else: + # absence of an node storing the target means "never exits" + if hasAssume in CC[p.config.cCompiler].props: + result = "__asume(0);" + else: + # don't just fall-through; doing so would inhibit C compiler + # optimizations + p.flags.incl beforeRetNeeded + result = "goto BeforeRet_;" -proc raiseExit(p: BProc) = +proc raiseExit(p: BProc, n: CgNode) = assert p.config.exc == excGoto if nimErrorFlagDisabled notin p.flags: p.flags.incl nimErrorFlagAccessed - if p.nestedTryStmts.len == 0: - p.flags.incl beforeRetNeeded - # easy case, simply goto 'ret': - lineCg(p, cpsStmts, "if (NIM_UNLIKELY(*nimErr_)) goto BeforeRet_;$n", []) - else: - lineCg(p, cpsStmts, "if (NIM_UNLIKELY(*nimErr_)) goto LA$1_;$n", - [p.nestedTryStmts[^1].label]) - -proc raiseInstr(p: BProc): Rope = - if p.config.exc == excGoto: - let L = p.nestedTryStmts.len - if L == 0: - p.flags.incl beforeRetNeeded - # easy case, simply goto 'ret': - result = ropecg(p.module, "goto BeforeRet_;$n", []) - else: - # raise inside an 'except' must go to the finally block, - # raise outside an 'except' block must go to the 'except' list. - result = ropecg(p.module, "goto LA$1_;$n", - [p.nestedTryStmts[L-1].label]) - # + ord(p.nestedTryStmts[L-1].inExcept)]) - else: - result = "" + lineCg(p, cpsStmts, "if (NIM_UNLIKELY(*nimErr_)) $1$n", + [raiseInstr(p, n)]) proc genRaiseStmt(p: BProc, t: CgNode) = if t[0].kind != cnkEmpty: @@ -272,12 +186,11 @@ proc genRaiseStmt(p: BProc, t: CgNode) = genLineDir(p, t) # reraise the last exception: linefmt(p, cpsStmts, "#reraiseException();$n", []) - let gotoInstr = raiseInstr(p) - if gotoInstr != "": - line(p, cpsStmts, gotoInstr) + + # the goto is emitted separately template genCaseGenericBranch(p: BProc, b: CgNode, e: TLoc, - rangeFormat, eqFormat: FormatStr, labl: TLabel) = + rangeFormat, eqFormat: FormatStr, labl: BlockId) = var x, y: TLoc for i in 0.. 0: genIfForCaseUntil(p, n, + if splitPoint > 0: + genIfForCaseUntil(p, n, rangeFormat = "if ($1 >= $2 && $1 <= $3) goto $4;$n", eqFormat = "if ($1 == $2) goto $3;$n", - splitPoint, a) else: "" + splitPoint, a) # generate switch part (might be empty): if splitPoint+1 < n.len: @@ -441,12 +324,11 @@ proc genOrdinalCase(p: BProc, n: CgNode) = # else part of case statement: lineF(p, cpsStmts, "default:$n", []) hasDefault = true - stmtBlock(p, branch.lastSon) - lineF(p, cpsStmts, "break;$n", []) + + linefmt(p, cpsStmts, "goto $1;$n", [branch[^1].label]) if (hasAssume in CC[p.config.cCompiler].props) and not hasDefault: lineF(p, cpsStmts, "default: __assume(0);$n", []) lineF(p, cpsStmts, "}$n", []) - if lend != "": fixLabel(p, lend) proc genCase(p: BProc, t: CgNode) = genLineDir(p, t) @@ -477,98 +359,38 @@ proc bodyCanRaise(p: BProc; n: CgNode): bool = if bodyCanRaise(p, it): return true result = false -proc genTryGoto(p: BProc; t: CgNode) = - let fin = if t[^1].kind == cnkFinally: t[^1] else: nil - inc p.labels - let lab = p.labels - let hasExcept = t[1].kind == cnkExcept - if hasExcept: inc p.withinTryWithExcept - p.nestedTryStmts.add((fin, false, Natural lab)) +proc genExcept(p: BProc, n: CgNode) = + ## Generates and emits the C code for an ``Except`` join point. - p.flags.incl nimErrorFlagAccessed - - var errorFlagSet = false ## tracks whether the error flag is set to 'true' - ## on a control-flow path connected to the finally section + if n.len > 1: + # it's a handler with a filter/matcher + var condExpr = "" + for j in 1..Sup.m_type, $1)", + [genTypeInfo2Name(p.module, n[j].typ)]) - if 1 < t.len and t[1].kind == cnkExcept: - startBlock(p, "if (NIM_UNLIKELY(*nimErr_)) {$n") + # jump to the next handler in the chain if the filter doesn't apply + linefmt(p, cpsStmts, "if (!($1)) {$2}$n", + [condExpr, raiseInstr(p, n[^1])]) else: - startBlock(p) - linefmt(p, cpsStmts, "LA$1_:;$n", [lab]) - - p.nestedTryStmts[^1].inExcept = true - var i = 1 - while (i < t.len) and (t[i].kind == cnkExcept): - - inc p.labels - let nextExcept = p.labels - p.nestedTryStmts[^1].label = nextExcept - - if t[i].len == 1: - # general except section: - if i > 1: lineF(p, cpsStmts, "else", []) - startBlock(p) - # we handled the exception, remember this: - linefmt(p, cpsStmts, "*nimErr_ = NIM_FALSE;$n", []) - genStmts(p, t[i][0]) - checkSetsErrorFlag(t[i][0]) - else: - var orExpr = "" - for j in 0..$1, $2)", [memberName, checkFor]) - - if i > 1: line(p, cpsStmts, "else ") - startBlock(p, "if ($1) {$n", [orExpr]) - # we handled the exception, remember this: - linefmt(p, cpsStmts, "*nimErr_ = NIM_FALSE;$n", []) - genStmts(p, t[i][^1]) - checkSetsErrorFlag(t[i][^1]) - - linefmt(p, cpsStmts, "#popCurrentException();$n", []) - linefmt(p, cpsStmts, "LA$1_:;$n", [nextExcept]) - endBlock(p) - - inc(i) - discard pop(p.nestedTryStmts) - endBlock(p) + discard "catch-all handler, nothing to check" - if i < t.len and t[i].kind == cnkFinally: - startBlock(p) - # future direction: the code generator should track for each procedure - # whether it observes the error flag. If the finally clause's body - # doesn't observes it itself, and also doesn't call any procedure that - # does, we can also omit the save/restore pair - if not errorFlagSet: - # this is an optimization; if the error flag is proven to never be - # 'true' when the finally section is reached, we don't need to erase - # nor restore it: - genStmts(p, t[i][0]) - else: - # pretend we did handle the error for the safe execution of the 'finally' section: - p.procSec(cpsLocals).add(ropecg(p.module, "NIM_BOOL oldNimErrFin$1_;$n", [lab])) - linefmt(p, cpsStmts, "oldNimErrFin$1_ = *nimErr_; *nimErr_ = NIM_FALSE;$n", [lab]) - genStmts(p, t[i][0]) - # this is correct for all these cases: - # 1. finally is run during ordinary control flow - # 2. finally is run after 'except' block handling: these however set the - # error back to nil. - # 3. finally is run for exception handling code without any 'except' - # handler present or only handlers that did not match. - linefmt(p, cpsStmts, "*nimErr_ = oldNimErrFin$1_;$n", [lab]) - endBlock(p) - raiseExit(p) - if hasExcept: inc p.withinTryWithExcept + startBlock(p) + p.flags.incl nimErrorFlagAccessed + # exit error mode: + lineCg(p, cpsStmts, "*nimErr_ = NIM_FALSE;$n", []) + # setup the handler frame: + var tmp: TLoc + getTemp(p, p.module.g.graph.getCompilerProc("ExceptionFrame").typ, tmp) + lineCg(p, cpsStmts, "#nimCatchException($1);$n", [addrLoc(p.config, tmp)]) proc genAsmOrEmitStmt(p: BProc, t: CgNode, isAsmStmt=false): Rope = var res = "" @@ -677,7 +499,7 @@ proc genAsgn(p: BProc, e: CgNode) = genLineDir(p, ri) loadInto(p, le, ri, a) -proc genStmts(p: BProc, t: CgNode) = +proc genStmt(p: BProc, t: CgNode) = var a: TLoc let isPush = p.config.hasHint(rsemExtendedContext) @@ -685,3 +507,79 @@ proc genStmts(p: BProc, t: CgNode) = expr(p, t, a) if isPush: popInfoContext(p.config) internalAssert p.config, a.k in {locNone, locTemp, locLocalVar, locExpr} + +proc gen(p: BProc, code: openArray[CInstr], stmts: CgNode) = + ## Generates and emits the C code for `code` and `stmts`. This is the main + ## driver of C code generation. + var pos = 0 + while pos < code.len: + let it = code[pos] + case it.op + of opLabel: + lineCg(p, cpsStmts, "$1:;$n", [it.label]) + of opJump: + useLabel(p, it.label) + lineCg(p, cpsStmts, "goto $1;$n", [it.label]) + of opDispJump: + # must only be part of a dispatcher + unreachable() + of opSetTarget: + lineCg(p, cpsStmts, "Target$1_ = $2;$n", [$it.discr, it.value]) + of opDispatcher: + lineF(p, cpsLocals, "NU8 Target$1_;$N", [$it.discr]) + lineF(p, cpsStmts, "switch (Target$1_) {$n", [$it.discr]) + for i in 0.. 0 if we are within a loop - withinTryWithExcept*: int ## required for goto based exception handling - withinBlockLeaveActions*: int ## complex to explain sigConflicts*: CountTable[string] + specifier*: Option[uint32] + # XXX: `specifier` is a hack. Some parts of the code generator manually + # emit gotos, and thus need a label specifier, but they shouldn't + body*: Body ## the procedure's full body locals*: OrdinalSeq[LocalId, TLoc] ## the locs for all locals of the procedure @@ -319,7 +309,6 @@ proc newProc*(prc: PSym, module: BModule): BProc = result.options = if prc != nil: prc.options else: module.config.options newSeq(result.blocks, 1) - result.nestedTryStmts = @[] result.sigConflicts = initCountTable[string]() proc newModuleList*(g: ModuleGraph): BModuleList = diff --git a/compiler/backend/cgir.nim b/compiler/backend/cgir.nim index 912a3ae19f4..2b489202d4f 100644 --- a/compiler/backend/cgir.nim +++ b/compiler/backend/cgir.nim @@ -43,6 +43,9 @@ type cnkMagic ## name of a magic procedure. Only valid in the callee ## slot of ``cnkCall`` and ``cnkCheckedCall`` nodes + cnkResume ## leave the current procedure as part of exceptional + ## control-flow + cnkCall ## a procedure call. The first operand is the procedure, ## the following operands the arguments cnkCheckedCall ## like ``cnkCall``, but the call might raise an exception @@ -103,9 +106,8 @@ type cnkStmtList cnkStmtListExpr - # future direction: remove ``cnkStmtListExpr``. The code generators know - # based on the context a statement list appears in whether its an - # expression or not + # XXX: both stmtlist and stmtlistexpr are obsolete. They're only kept for + # grouping the top-level statements under a single node cnkVoidStmt ## discard the operand value (i.e., do nothing with it) cnkEmitStmt ## an ``emit`` statement @@ -115,23 +117,29 @@ type ## evaluates to 'true' cnkRepeatStmt ## execute the body indefinitely cnkCaseStmt ## a ``case`` statement + cnkBranch ## the branch of a ``case`` statement cnkBlockStmt ## an (optionally) labeled block + cnkTryStmt + cnkGotoStmt + cnkLoopStmt ## jump back to a loop join point cnkBreakStmt ## break out of labeled block, or, if no label is provided, ## the closest ``repeat`` loop cnkRaiseStmt ## raise(x) -- set the `x` as the current exception and start ## exceptional control-flow. `x` can be ``cnkEmpty`` in which ## case "set current exception" part is skipped - # future direction: lower the high-level raise statements (which means - # "set the current exception" + "start exceptional control-flow") into - # just "start exceptional control-flow" cnkReturnStmt + cnkContinueStmt## jump to the next target in the active jump list - cnkTryStmt - cnkExcept + cnkJoinStmt ## join point for gotos + cnkLoopJoinStmt## join point for loops + cnkEnd ## marks the end of a structured control-flow block + ## (identified by the label) + cnkExcept ## special join point, representing an exception handler cnkFinally - cnkBranch ## the branch of a ``case`` statement + cnkTargetList ## an ordered list of jump target/actions + cnkLeave cnkDef ## starts the lifetime of a local and optionally assigns an ## initial value @@ -147,12 +155,19 @@ const cnkWithOperand* = {cnkConv, cnkHiddenConv, cnkDeref, cnkAddr, cnkHiddenAddr, cnkDerefView, cnkObjDownConv, cnkObjUpConv, cnkCast, cnkLvalueConv} - cnkAtoms* = {cnkInvalid..cnkMagic, cnkReturnStmt} + cnkAtoms* = {cnkInvalid..cnkResume, cnkReturnStmt} ## node kinds that denote leafs cnkWithItems* = AllKinds - cnkWithOperand - cnkAtoms ## node kinds for which the ``items`` iterator is available cnkLiterals* = {cnkIntLit, cnkUIntLit, cnkFloatLit, cnkStrLit} + cnkLegacyNodes* = {cnkBlockStmt, cnkTryStmt, cnkReturnStmt, cnkBreakStmt, + cnkRepeatStmt} + ## node kinds that belong to the legacy control-flow representation + cnkNewCfNodes* = {cnkGotoStmt, cnkJoinStmt, cnkLeave, cnkResume, + cnkContinueStmt, cnkLoopStmt, cnkLoopJoinStmt, + cnkEnd, cnkTargetList} + ## node kinds that belong to the new-style control-flow representation type Local* = object @@ -184,7 +199,8 @@ type info*: TLineInfo typ*: PType case kind*: CgNodeKind - of cnkInvalid, cnkEmpty, cnkType, cnkNilLit, cnkReturnStmt: discard + of cnkInvalid, cnkEmpty, cnkType, cnkNilLit, cnkReturnStmt, cnkResume: + discard of cnkIntLit, cnkUIntLit: # future direction: use a ``BiggestUint`` for uint values intVal*: BiggestInt @@ -281,18 +297,34 @@ proc merge*(dest: var Body, source: Body): CgNode = # merge the locals: let offset = dest.locals.merge(source.locals) - proc update(n: CgNode, offset: uint32) {.nimcall.} = + proc update(n: CgNode, offset, labelOffset: uint32) {.nimcall.} = ## Offsets the ID of all references-to-``Local`` in `n` by `offset`. case n.kind of cnkLocal: n.local.uint32 += offset - of cnkAtoms - {cnkLocal}: + of cnkLabel: + n.label.uint32 += labelOffset + of cnkAtoms - {cnkLocal, cnkLabel}: + discard "nothing to do" + of cnkWithOperand: + update(n.operand, offset, labelOffset) + of cnkWithItems: + for it in n.items: + update(it, offset, labelOffset) + + proc computeNextLabel(n: CgNode, highest: var uint32) = + ## Computes the highest ID value used by labels within `n` and writes it + ## to `highest`. + case n.kind + of cnkLabel: + highest = max(n.label.uint32, highest) + of cnkAtoms - {cnkLabel}: discard "nothing to do" of cnkWithOperand: - update(n.operand, offset) + computeNextLabel(n.operand, highest) of cnkWithItems: for it in n.items: - update(it, offset) + computeNextLabel(it, highest) result = source.code @@ -300,8 +332,10 @@ proc merge*(dest: var Body, source: Body): CgNode = # make things easier by supporting `dest` being uninitialized dest.code = source.code elif source.code.kind != cnkEmpty: - # update references to locals in source's code: - update(source.code, offset.get(LocalId(0)).uint32) + var labelOffset = 0'u32 + computeNextLabel(dest.code, labelOffset) + # update references to locals and labels in source's code: + update(source.code, offset.get(LocalId(0)).uint32, labelOffset + 1) # merge the code fragments: case dest.code.kind diff --git a/compiler/backend/cgirgen.nim b/compiler/backend/cgirgen.nim index baa1a3b7de5..72963ea341c 100644 --- a/compiler/backend/cgirgen.nim +++ b/compiler/backend/cgirgen.nim @@ -41,10 +41,17 @@ import int128 ] +import std/options as std_options +from std/sequtils import delete + from compiler/ast/ast import newSym, newType, rawAddSon from compiler/sem/semdata import makeVarType type + NodeLabelPair = tuple + node: CgNode + target: LabelId + TranslateCl = object graph: ModuleGraph idgen: IdGenerator @@ -56,9 +63,25 @@ type localsMap: Table[int, LocalId] ## maps a sybmol ID to the corresponding local. Needed because normal ## local variables reach here as ``PSym``s - blocks: seq[LabelId] + blocks: seq[tuple[input, actual: LabelId]] ## the stack of enclosing blocks for the currently processed node + numLabels: int + ## incremented when a new label ID is allocated + exits: seq[NodeLabelPair] + ## non-exception goto-like statements that need patching when crossing + ## ``try``, ``finally``, or ``except`` boundaries + raiseExits: seq[NodeLabelPair] + ## similar to `exits`, but for exceptional control-flow statements/ + ## nodes. The label doesn't matter, it's only there so that `raiseExits` + ## can be passed to the same procedures as `exits` + returnLabel: Option[LabelId] + ## the label to be placed after all other statements. A label is only + ## allocated if an ``mnkReturn`` appears somewhere in the MIR code + isActive: bool + ## whether translation of statements is enabled. Used to eliminate + ## unreachable code + locals: Store[LocalId, Local] ## the in-progress list of all locals in the translated body @@ -85,6 +108,12 @@ template isFilled(x: LocalId): bool = # temporaries, which can never map to the result variable x.int != 0 +func delete[T](s: var seq[T], a, b: int) = + # XXX: this procedure is a workaround for ``sequtils.delete`` not handling + # empty slices properly (an IndexDefect is erroneously raised) + if b > a: + sequtils.delete(s, a..(b-1)) + func newMagicNode(magic: TMagic, info: TLineInfo): CgNode = CgNode(kind: cnkMagic, info: info, magic: magic) @@ -94,6 +123,12 @@ func get(t: MirBody, cr: var TreeCursor): lent MirNode {.inline.} = inc cr.pos +func skip(body: MirBody, cr: var TreeCursor) = + ## Skips over the node or sub-tree at the cursor. + let next = uint32 body.code.sibling(NodePosition cr.pos) + assert next > cr.pos + cr.pos = next + func enter(t: MirBody, cr: var TreeCursor): lent MirNode {.inline.} = assert t.code[cr.pos].kind in SubTreeNodes, "not a sub-tree" result = get(t, cr) @@ -212,17 +247,6 @@ func addIfNotEmpty(stmts: var seq[CgNode], n: sink CgNode) = if n.kind != cnkEmpty: stmts.add n -func toSingleNode(stmts: sink seq[CgNode]): CgNode = - ## Creates a single ``CgNode`` from a list of *statements* - case stmts.len - of 0: - result = newEmpty() - of 1: - result = move stmts[0] - else: - result = newNode(cnkStmtList) - result.kids = stmts - proc newDefaultCall(info: TLineInfo, typ: PType): CgNode = ## Produces the tree for a ``default`` magic call. newExpr(cnkCall, info, typ, [newMagicNode(mDefault, info)]) @@ -259,11 +283,28 @@ proc genObjConv(n: CgNode, a, b, t: PType): CgNode = if diff < 0: cnkObjUpConv else: cnkObjDownConv, n.info, t): n +proc disable(cl: var TranslateCl) = + # consider the following MIR: + # try: + # return + # def _1 = ... + # finally: + # =destroy(name _1) + # + # Although nonesense, this is currently both legal and possible MIR. If + # translation would be disabled beyond the ``return``, then the temporary + # wouldn't be registered. Therefore, disable is a no-op when in an unscoped + # contexts (such as the above) + # XXX: eliminating unreachable code needs to happen much earlier, either in + # ``mirgen`` or ``transf`` + if not cl.inUnscoped: + cl.isActive = false + # forward declarations: proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, - cr: var TreeCursor): CgNode + cr: var TreeCursor, stmts: var seq[CgNode]) proc scopeToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, - cr: var TreeCursor, allowExpr=false): seq[CgNode] + cr: var TreeCursor, stmts: var seq[CgNode], allowExpr=false) proc handleSpecialConv(c: ConfigRef, n: CgNode, info: TLineInfo, dest: PType): CgNode = @@ -608,93 +649,279 @@ proc defToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, unreachable() proc bodyToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, - cr: var TreeCursor): CgNode = + cr: var TreeCursor, stmts: var seq[CgNode]) = ## Generates the ``CgNode`` tree for the body of a construct that implies ## some form of control-flow. let prev = cl.inUnscoped # assume the body is unscoped until stated otherwise cl.inUnscoped = true - result = stmtToIr(tree, env, cl, cr) + stmtToIr(tree, env, cl, cr, stmts) cl.inUnscoped = prev proc caseToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, n: MirNode, - cr: var TreeCursor): CgNode + cr: var TreeCursor, stmts: var seq[CgNode]) + +func newLabel(cl: var TranslateCl): LabelId = + ## Allocates a new label ID and returns it. + result = LabelId(cl.numLabels) + inc cl.numLabels + +func getReturnLabel(cl: var TranslateCl): LabelId = + ## Returns the label that points to the end of the current procedure. + if cl.returnLabel.isSome: + result = cl.returnLabel.unsafeGet() + else: + # allocate a new label first + result = newLabel(cl) + cl.returnLabel = some result + +func node(lbl: LabelId): CgNode = + newLabelNode(BlockId(lbl)) + +proc patch(stmt: CgNode, target: sink CgNode) = + ## Appends `target` to the goto-like statement `stmt`, always wrapping + ## `target` in a ``cnkTargetList`` if there's none yet. + if stmt[^1] == nil: + stmt[^1] = newTree(cnkTargetList, unknownLineInfo, target) + else: + # a target list already exists + stmt[^1].kids.add target + +proc patchSingle(stmt: CgNode, target: sink CgNode) = + ## Appends `target` to the goto-like statement `stmt`. + if stmt[^1] == nil: + stmt[^1] = target + else: + stmt[^1].kids.add target + +proc patch(x: seq[NodeLabelPair], start: int, exit: LabelId) = + for i in start.. 0 and stmts[^1].kind == cnkJoinStmt: + label = stmts[^1][0].label.LabelId + + var + i = 0 + found = false + # search for exits targetting `target`, update them with the correct label, + # and then remove them from the list + while i < cl.exits.len: + if cl.exits[i][1] == target: + patchSingle(cl.exits[i][0], node(label)) + cl.exits.del(i) + # remember that at least one exit was found: + found = true + else: + inc i + + # emit the join, but only if no coalescing took place and the label is + # actually targeted: + if label == target and (found or required): + stmts.add newTree(cnkJoinStmt, info, node(label)) + + if found or true: + # code is alive if following a join that is targeted by an alive goto + # XXX: translation has to be forcefully enabled at a join, even if not + # within a scoped context: the surrounding scope might itself be + # part of an unscoped context. This is a temporary workaround, see + # `disable <#disable,TranslateCl>`_ + cl.isActive = true + +template join(info: TLineInfo, lbl: LabelId; required = false) = + join(stmts, cl, info, lbl, required) + +template goto(kind: CgNodeKind, info: TLineInfo, target: LabelId) = + ## Emits a fixed goto-like statement targeting `target`. + stmts.add newStmt(kind, info, node(target)) + +template exit(lbl: LabelId) = + ## Emits a goto statement and registers it with `lbl` as the target. + if cl.isActive: + let n = newStmt(cnkGotoStmt, unknownLineInfo, nil) + stmts.add n + cl.exits.add((n, lbl)) + cl.disable() + +template guarded(lbl: LabelId, body: untyped) = + ## Updates all exits emitted as part of `body` with a leave instruction + ## targetting `lbl`. + let + raiseStart = cl.raiseExits.len + exitStart = cl.exits.len + body + patchLeave(cl.raiseExits, raiseStart, lbl) + patchLeave(cl.exits, exitStart, lbl) proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, - cr: var TreeCursor): CgNode = + cr: var TreeCursor, stmts: var seq[CgNode]) = + + # skip the statement if translation is disabled + if not cl.isActive: + tree.skip(cr) + return + let n {.cursor.} = tree.get(cr) let info = cr.info ## the source information of `n` - template body(): CgNode = - bodyToIr(tree, env, cl, cr) + template body() = + bodyToIr(tree, env, cl, cr, stmts) - template to(kind: CgNodeKind, args: varargs[untyped]): CgNode = + template to(kind: CgNodeKind, args: varargs[untyped]) = let r = newStmt(kind, info, args) leave(tree, cr) - r + stmts.add r - template toList(k: CgNodeKind, body: untyped): CgNode = + template toList(k: CgNodeKind, body: untyped) = let res {.inject.} = newStmt(k, info) while tree[cr].kind != mnkEnd: body leave(tree, cr) - res + stmts.add res case n.kind of DefNodes: - defToIr(tree, env, cl, n, cr) + stmts.addIfNotEmpty defToIr(tree, env, cl, n, cr) of mnkAsgn, mnkInit, mnkSwitch: let dst = lvalueToIr(tree, cl, cr) (src, useFast) = sourceExprToIr(tree, cl, cr) to (if useFast: cnkFastAsgn else: cnkAsgn), dst, src of mnkRepeat: - to cnkRepeatStmt, body() + let label = newLabel(cl) + stmts.add newTree(cnkLoopJoinStmt, info, node(label)) + body() + stmts.add newStmt(cnkLoopStmt, info, node(label)) + leave(tree, cr) of mnkBlock: - cl.blocks.add n.label # push the label to the stack - let body = body() - cl.blocks.setLen(cl.blocks.len - 1) # pop block from the stack - to cnkBlockStmt, newLabelNode(cl.blocks.len.BlockId, info), body + cl.blocks.add (n.label, newLabel(cl)) + body() + join info, cl.blocks.pop().actual + leave(tree, cr) of mnkTry: - let res = newStmt(cnkTryStmt, info, [body()]) assert n.len <= 2 + let + raiseExitStart = cl.raiseExits.len + exitStart = cl.exits.len + + body() # body of the try block + let target = newLabel(cl) + exit target # jump past the except and/or finally sections for _ in 0.. 0: + if bIdx == it.len-1: + # last branch in the handler block + excpt.add nil + cl.raiseExits.add (excpt, LabelId(0)) + else: + # setup the label for the follow-up handler + next = newLabel(cl) + excpt.add node(next) + + stmts.add excpt + guarded this: + cl.isActive = true # each branch starts as active + body() # body of the handler + exit target # jump to the after the try statement + stmts.add newStmt(cnkEnd, excpt.info, [node(this)]) + + leave(tree, cr) + + else: + # skip all branches + for _ in 0..= 0 and cl.blocks[idx] != n.label: + while idx >= 0 and cl.blocks[idx].input != n.label: dec idx - newStmt(cnkBreakStmt, info, [newLabelNode(BlockId idx, info)]) + exit cl.blocks[idx].actual of mnkReturn: - newNode(cnkReturnStmt, info) + exit getReturnLabel(cl) of mnkVoid: var res = exprToIr(tree, cl, cr) if res.typ.isEmptyType(): @@ -703,17 +930,32 @@ proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, else: res = newStmt(cnkVoidStmt, info, [res]) leave(tree, cr) - res + stmts.add res of mnkIf: - to cnkIfStmt, valueToIr(tree, cl, cr), body() + let label = newLabel(cl) + stmts.add newStmt(cnkIfStmt, info, [valueToIr(tree, cl, cr), node(label)]) + body() + stmts.add newStmt(cnkEnd, info, [node(label)]) + # if control-flow reaches the ``if`` itself, it also reaches the code + # following the ``if`` + cl.isActive = true + leave(tree, cr) of mnkRaise: # the operand can either be empty or an lvalue expression - to cnkRaiseStmt: - case tree[cr].kind - of mnkNone: atomToIr(tree, cl, cr) - else: lvalueToIr(tree, cl, cr) + let + arg {.cursor.} = tree.get(cr) + res = newStmt(cnkRaiseStmt, info): + case arg.kind + of mnkNone: newEmpty() + else: lvalueToIr(tree, cl, arg, cr) + + res.add nil # reserve a slot for the label + cl.raiseExits.add (res, LabelId(0)) + stmts.add res + cl.disable() + leave(tree, cr) of mnkCase: - caseToIr(tree, env, cl, n, cr) + caseToIr(tree, env, cl, n, cr, stmts) of mnkAsm: toList cnkAsmStmt: res.add valueToIr(tree, cl, cr) @@ -721,31 +963,55 @@ proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, toList cnkEmitStmt: res.add valueToIr(tree, cl, cr) of mnkStmtList: - toList cnkStmtList: - res.kids.addIfNotEmpty stmtToIr(tree, env, cl, cr) + while tree[cr].kind != mnkEnd: + stmtToIr(tree, env, cl, cr, stmts) + leave(tree, cr) of mnkScope: - toSingleNode scopeToIr(tree, env, cl, cr) + scopeToIr(tree, env, cl, cr, stmts) of mnkDestroy: unreachable("a 'destroy' that wasn't lowered") of AllNodeKinds - StmtNodes: unreachable(n.kind) proc caseToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, n: MirNode, - cr: var TreeCursor): CgNode = + cr: var TreeCursor, stmts: var seq[CgNode]) = assert n.kind == mnkCase - result = newStmt(cnkCaseStmt, cr.info, [valueToIr(tree, cl, cr)]) + let + exit = newLabel(cl) + result = newStmt(cnkCaseStmt, cr.info, [valueToIr(tree, cl, cr)]) + # whether the statement has a structured exit is computed manually + var doesExit = false + + stmts.add result # add the case statement already for j in 0.. 0: - for x in 0.. prev: - # insert all the lifted defs at the start + # insert all the lifted defs at the start of the scope for i in countdown(cl.defs.high, prev): - stmts.insert genDefFor(move cl.defs[i]) + stmts.insert genDefFor(move cl.defs[i]), start # "pop" the elements that were added as part of this scope: cl.defs.setLen(prev) cl.inUnscoped = prevInUnscoped - result = stmts - proc tb(tree: MirBody, env: MirEnv, cl: var TranslateCl, start: NodePosition): CgNode = ## Translate `tree` to the corresponding ``CgNode`` representation. var cr = TreeCursor(pos: start.uint32) - var nodes = scopeToIr(tree, env, cl, cr, allowExpr=true) + var stmts: seq[CgNode] + scopeToIr(tree, env, cl, cr, stmts, allowExpr=true) + if cl.raiseExits.len > 0: + # there's unhandled exceptional control-flow + patchResume(cl.raiseExits, 0) + + # emit the join for the return label, if used + if cl.returnLabel.isSome: + join unknownLineInfo, cl.returnLabel.get() + if cr.hasNext(tree): # the tree must be an expression; the last node is required to be an atom let x = atomToIr(tree, cl, cr) - if nodes.len == 0: - x - else: - nodes.add x - newExpr(cnkStmtListExpr, unknownLineInfo, nodes[^1].typ, nodes) - else: - # it's a statement list - toSingleNode nodes + stmts.add x + + # XXX: the list of statements is still wrapped in a node for now, but + # this needs to change once all code generators use the new CGIR + result = newStmt(cnkStmtList, unknownLineInfo) + result.kids = move stmts proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, env: MirEnv, owner: PSym, @@ -920,6 +1197,9 @@ proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, env: MirEnv, # environment parameter add(owner.ast[paramsPos][^1].sym) + # enable translation: + cl.isActive = true + result = Body() result.code = tb(body, env, cl, NodePosition 0) result.locals = cl.locals diff --git a/compiler/backend/cgirgen_legacy.nim b/compiler/backend/cgirgen_legacy.nim new file mode 100644 index 00000000000..cb60ba0eae0 --- /dev/null +++ b/compiler/backend/cgirgen_legacy.nim @@ -0,0 +1,920 @@ +## Implements the translation from the MIR to the ``CgNode`` IR. All code +## reaching the code generation phase passes through here. +## +## .. note:: +## The `tb` prefix that's still used in some places is an abbreviation of +## "translate back" +## +## .. note:: +## The ``CgNode`` IR is slated for removal, with the MIR intended to take +## its place as the code-generator input. + +import + std/[ + tables + ], + compiler/ast/[ + ast_types, + ast_idgen, + ast_query, + lineinfos, + types + ], + compiler/backend/[ + cgir + ], + compiler/front/[ + options + ], + compiler/mir/[ + mirbodies, + mirenv, + mirtrees, + sourcemaps + ], + compiler/modules/[ + modulegraphs + ], + compiler/utils/[ + containers, + idioms, + int128 + ] + +from compiler/ast/ast import newSym, newType, rawAddSon +from compiler/sem/semdata import makeVarType + +type + TranslateCl = object + graph: ModuleGraph + idgen: IdGenerator + + owner: PSym + + tempMap: SeqMap[TempId, LocalId] + ## maps a ``TempId`` to the ID of the local created for it + localsMap: Table[int, LocalId] + ## maps a sybmol ID to the corresponding local. Needed because normal + ## local variables reach here as ``PSym``s + blocks: seq[LabelId] + ## the stack of enclosing blocks for the currently processed node + + locals: Store[LocalId, Local] + ## the in-progress list of all locals in the translated body + + # a 'def' in the MIR means that the the local starts to exists and that it + # is accessible in all connected basic blocks part of the enclosing + # ``mnkScope``. The ``CgNode`` IR doesn't use same notion of scope, + # so for now, all 'def's (without the initial values) within nested + # control-flow-related trees are moved to the start of the enclosing + # ``mnkScope``. + inUnscoped: bool + ## whether the currently proceesed statement/expression is part of an + ## unscoped control-flow context + defs: seq[CgNode] + ## the stack of locals/globals for which the ``cnkDef``/assignemnt needs + ## to be inserted later + + TreeCursor = object + ## A cursor into a ``MirBody``. + pos: uint32 ## the index of the currently pointed to node + origin {.cursor.}: PNode ## the source node + +func newMagicNode(magic: TMagic, info: TLineInfo): CgNode = + CgNode(kind: cnkMagic, info: info, magic: magic) + +func get(t: MirBody, cr: var TreeCursor): lent MirNode {.inline.} = + cr.origin = t.sourceFor(cr.pos.NodePosition) + result = t.code[cr.pos] + + inc cr.pos + +func enter(t: MirBody, cr: var TreeCursor): lent MirNode {.inline.} = + assert t.code[cr.pos].kind in SubTreeNodes, "not a sub-tree" + result = get(t, cr) + +func leave(t: MirBody, cr: var TreeCursor) = + assert t.code[cr.pos].kind == mnkEnd, "not at the end of sub-tree" + inc cr.pos + +template info(cr: TreeCursor): TLineInfo = + cr.origin.info + +template `[]`(t: MirBody, cr: TreeCursor): untyped = + t.code[cr.pos] + +template hasNext(cr: TreeCursor, t: MirBody): bool = + cr.pos.int < t.code.len + +template `[]=`(x: CgNode, i: Natural, n: CgNode) = + x.kids[i] = n + +template `[]=`(x: CgNode, i: BackwardsIndex, n: CgNode) = + x.kids[i] = n + +template add(x: CgNode, y: CgNode) = + x.kids.add y + +proc copyTree(n: CgNode): CgNode = + case n.kind + of cnkAtoms: + new(result) + result[] = n[] + of cnkWithOperand: + result = CgNode(kind: n.kind, info: n.info, typ: n.typ) + result.operand = copyTree(n.operand) + of cnkWithItems: + result = CgNode(kind: n.kind, info: n.info, typ: n.typ) + result.kids.setLen(n.kids.len) + for i, it in n.pairs: + result[i] = copyTree(it) + +proc newEmpty(info = unknownLineInfo): CgNode = + CgNode(kind: cnkEmpty, info: info) + +proc newTree(kind: CgNodeKind, info: TLineInfo, kids: varargs[CgNode]): CgNode = + ## For node kinds that don't represent standalone statements. + result = CgNode(kind: kind, info: info) + result.kids = @kids + +func newTypeNode(info: TLineInfo, typ: PType): CgNode = + CgNode(kind: cnkType, info: info, typ: typ) + +func newFieldNode(s: PSym; info = unknownLineInfo): CgNode = + CgNode(kind: cnkField, info: info, typ: s.typ, field: s) + +func newLabelNode(blk: BlockId; info = unknownLineInfo): CgNode = + CgNode(kind: cnkLabel, info: info, label: blk) + +proc newExpr(kind: CgNodeKind, info: TLineInfo, typ: PType, + kids: sink seq[CgNode]): CgNode = + ## Variant of ``newExpr`` optimized for passing a pre-existing child + ## node sequence. + result = CgNode(kind: kind, info: info, typ: typ) + result.kids = kids + +proc translateLit*(val: PNode): CgNode = + ## Translates an ``mnkLiteral`` node to a ``CgNode``. + ## Note that the MIR not only uses ``mnkLiteral`` for "real" literals, but + ## also for pushing other raw ``PNode``s through the MIR phase. + template node(k: CgNodeKind, field, value: untyped): CgNode = + CgNode(kind: k, info: val.info, typ: val.typ, field: value) + + case val.kind + of nkIntLiterals: + # use the type for deciding what whether it's a signed or unsigned value + case val.typ.skipTypes(abstractRange + {tyEnum}).kind + of tyInt..tyInt64, tyBool: + node(cnkIntLit, intVal, val.intVal) + of tyUInt..tyUInt64, tyChar: + node(cnkUIntLit, intVal, val.intVal) + of tyPtr, tyPointer, tyProc: + # XXX: consider adding a dedicated node for pointer-like-literals + # to both ``PNode`` and ``CgNode`` + node(cnkUIntLit, intVal, val.intVal) + else: + unreachable(val.typ.skipTypes(abstractRange).kind) + of nkFloatLiterals: + case val.typ.skipTypes(abstractRange).kind + of tyFloat, tyFloat64: + node(cnkFloatLit, floatVal, val.floatVal) + of tyFloat32: + # all code-generators need to do this at one point, so we help them out + # by narrowing the value to a float32 value + node(cnkFloatLit, floatVal, val.floatVal.float32.float64) + else: + unreachable() + of nkStrKinds: + node(cnkStrLit, strVal, val.strVal) + of nkNilLit: + newNode(cnkNilLit, val.info, val.typ) + of nkNimNodeLit: + node(cnkAstLit, astLit, val[0]) + of nkRange: + node(cnkRange, kids, @[translateLit(val[0]), translateLit(val[1])]) + of nkSym: + # special case for raw symbols used with emit and asm statements + assert val.sym.kind == skField + node(cnkField, field, val.sym) + else: + unreachable("implement: " & $val.kind) + +func addIfNotEmpty(stmts: var seq[CgNode], n: sink CgNode) = + ## Only adds the node to the list if it's not an empty node. Used to prevent + ## the creation of statement-list expression that only consist of empty + ## nodes + the result-expression (a statement-list expression is unnecessary + ## in that case) + if n.kind != cnkEmpty: + stmts.add n + +func toSingleNode(stmts: sink seq[CgNode]): CgNode = + ## Creates a single ``CgNode`` from a list of *statements* + case stmts.len + of 0: + result = newEmpty() + of 1: + result = move stmts[0] + else: + result = newNode(cnkStmtList) + result.kids = stmts + +proc newDefaultCall(info: TLineInfo, typ: PType): CgNode = + ## Produces the tree for a ``default`` magic call. + newExpr(cnkCall, info, typ, [newMagicNode(mDefault, info)]) + +proc initLocal(s: PSym): Local = + ## Inits a ``Local`` with the data from `s`. + result = Local(typ: s.typ, flags: s.flags, isImmutable: (s.kind == skLet), + name: s.name) + if s.kind in {skVar, skLet, skForVar}: + result.alignment = s.alignment.uint32 + +proc wrapInHiddenAddr(cl: TranslateCl, n: CgNode): CgNode = + ## Restores the ``cnkHiddenAddr`` around lvalue expressions passed to ``var`` + ## parameters. The code-generators operating on ``CgNode``-IR depend on the + ## hidden addr to be present + if n.typ.skipTypes(abstractInst).kind != tyVar: + newOp(cnkHiddenAddr, n.info, makeVarType(cl.owner, n.typ, cl.idgen), n) + else: + # XXX: is this case ever reached? It should not be. Raw ``var`` values + # must never be passed directly to ``var`` parameters at the MIR + # level + n + +proc genObjConv(n: CgNode, a, b, t: PType): CgNode = + ## Depending on the relationship between `a` and `b`, wraps `n` in either an + ## up- or down-conversion. `t` is the type to use for the resulting + ## expression + let diff = inheritanceDiff(b, a) + #echo "a: ", a.sym.name.s, "; b: ", b.sym.name.s + #assert diff != 0 and diff != high(int), "redundant or illegal conversion" + if diff == 0: + return nil + result = newOp( + if diff < 0: cnkObjUpConv else: cnkObjDownConv, + n.info, t): n + +# forward declarations: +proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, + cr: var TreeCursor): CgNode +proc scopeToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, + cr: var TreeCursor, allowExpr=false): seq[CgNode] + +proc handleSpecialConv(c: ConfigRef, n: CgNode, info: TLineInfo, + dest: PType): CgNode = + ## Checks if a special conversion operator is required for a conversion + ## between the source type (i.e. that of `n`) and the destination type. + ## If it is, generates the conversion operation IR and returns it -- nil + ## otherwise + let + orig = dest + source = n.typ.skipTypes(abstractVarRange) + dest = dest.skipTypes(abstractVarRange) + + case dest.kind + of tyObject: + assert source.kind == tyObject + genObjConv(n, source, dest, orig) + of tyRef, tyPtr, tyVar, tyLent: + assert source.kind == dest.kind + if source.base.kind == tyObject: + genObjConv(n, source.base, dest.base, orig) + else: + nil + else: + nil + +proc convToIr(cl: TranslateCl, n: CgNode, info: TLineInfo, dest: PType): CgNode = + ## Generates the ``CgNode`` IR for an ``mnkPathConv`` operation (handle + ## conversion). + result = handleSpecialConv(cl.graph.config, n, info, dest) + if result == nil: + # no special conversion is used + result = newOp(cnkLvalueConv, info, dest, n) + +proc atomToIr(n: MirNode, cl: TranslateCl, info: TLineInfo): CgNode = + case n.kind + of mnkProc: + CgNode(kind: cnkProc, info: info, typ: n.typ, prc: n.prc) + of mnkGlobal: + CgNode(kind: cnkGlobal, info: info, typ: n.typ, global: n.global) + of mnkConst: + CgNode(kind: cnkConst, info: info, typ: n.typ, cnst: n.cnst) + of mnkLocal, mnkParam: + # paramaters are treated like locals in the code generators + assert n.sym.id in cl.localsMap + newLocalRef(cl.localsMap[n.sym.id], info, n.sym.typ) + of mnkTemp: + newLocalRef(cl.tempMap[n.temp], info, n.typ) + of mnkAlias: + # the type of the node doesn't match the real one + let + id = cl.tempMap[n.temp] + typ = cl.locals[id].typ + # the view is auto-dereferenced here for convenience + newOp(cnkDerefView, info, typ.base, newLocalRef(id, info, typ)) + of mnkLiteral: + translateLit(n.lit) + of mnkType: + newTypeNode(info, n.typ) + of mnkNone: + # type arguments do use `mnkNone` in some situtations, so keep + # the type + CgNode(kind: cnkEmpty, info: info, typ: n.typ) + else: + unreachable("not an atom: " & $n.kind) + +proc atomToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor): CgNode {.inline.} = + atomToIr(get(tree, cr), cl, cr.info) + +proc tbExceptItem(tree: MirBody, cl: var TranslateCl, cr: var TreeCursor + ): CgNode = + let n {.cursor.} = get(tree, cr) + case n.kind + of mnkLocal: + # the 'except' branch acts as a definition for the local + let id = cl.locals.add initLocal(n.sym) + cl.localsMap[n.sym.id] = id + newLocalRef(id, cr.info, n.typ) + of mnkType: newTypeNode(cr.info, n.typ) + else: unreachable() + + +proc lvalueToIr(tree: MirBody, cl: var TranslateCl, n: MirNode, + cr: var TreeCursor; preferField = true): CgNode = + ## Translates a MIR lvalue expression to the corresponding CG IR. + ## Due to tagged unions (currently) not being addressable at the type- + ## representation level, the exact meaning of ``mnkPathVariant`` is + ## context-dependent -- `preferField` disambiguates whether it should be + ## turned into a field access rather than a (pseudo) access of the tagged + ## union. + let info = cr.info + + template recurse(): CgNode = + lvalueToIr(tree, cl, tree.get(cr), cr, false) + + case n.kind + of mnkLocal, mnkGlobal, mnkParam, mnkTemp, mnkAlias, mnkConst, mnkProc: + return atomToIr(n, cl, info) + of mnkPathNamed: + result = newExpr(cnkFieldAccess, info, n.typ, + [recurse(), newFieldNode(n.field)]) + of mnkPathVariant: + if preferField: + result = newExpr(cnkFieldAccess, cr.info, n.field.typ, + [recurse(), newFieldNode(n.field)]) + else: + # variant access itself has no ``CgNode`` counterpart at the moment + result = recurse() + of mnkPathPos: + result = newExpr(cnkTupleAccess, info, n.typ, + [recurse(), + CgNode(kind: cnkIntLit, intVal: n.position.BiggestInt)]) + of mnkPathArray: + # special case in order to support string literal access + # XXX: this needs to be removed once there is a dedicated run-time- + # sequence access operator + let arg = + if tree[cr].kind == mnkLiteral: + atomToIr(tree, cl, cr) + else: + recurse() + + result = newExpr(cnkArrayAccess, info, n.typ, [arg, atomToIr(tree, cl, cr)]) + of mnkPathConv: + result = convToIr(cl, recurse(), info, n.typ) + # dereferences are allowed at the end of a path tree + of mnkDeref: + result = newOp(cnkDeref, info, n.typ, atomToIr(tree, cl, cr)) + of mnkDerefView: + result = newOp(cnkDerefView, info, n.typ, atomToIr(tree, cl, cr)) + of AllNodeKinds - LvalueExprKinds - {mnkProc}: + unreachable(n.kind) + + leave(tree, cr) + +proc lvalueToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor; preferField=true): CgNode {.inline.} = + lvalueToIr(tree, cl, tree.get(cr), cr, preferField) + +proc valueToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor): CgNode = + case tree[cr].kind + of mnkProc, mnkConst, mnkGlobal, mnkParam, mnkLocal, mnkTemp, mnkAlias, + mnkLiteral, mnkType: + atomToIr(tree, cl, cr) + of mnkPathPos, mnkPathNamed, mnkPathArray, mnkPathConv, mnkPathVariant, + mnkDeref, mnkDerefView: + lvalueToIr(tree, cl, cr) + else: + unreachable("not a value: " & $tree[cr].kind) + +proc argToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor): (bool, CgNode) = + ## Translates a MIR argument tree to the corresponding CG IR tree. + ## Returns both the tree and whether the argumnet was wrapped in a tag + ## operator (which indicates that the parameter is a ``var`` parameter). + var n {.cursor.} = tree.get(cr) + assert n.kind in ArgumentNodes, "argument node expected: " & $n.kind + # the inner node may be a tag node + n = tree.get(cr) + case n.kind + of mnkTag: + # it is one, the expression must be an lvalue + result = (true, lvalueToIr(tree, cl, cr)) + leave(tree, cr) + of mnkLiteral, mnkType, mnkProc, mnkNone: + # not a tag but an atom + result = (false, atomToIr(n, cl, cr.info)) + of LvalueExprKinds: + result = (false, lvalueToIr(tree, cl, n, cr)) + else: + unreachable("not a valid argument expression") + + leave(tree, cr) + +proc callToIr(tree: MirBody, cl: var TranslateCl, n: MirNode, + cr: var TreeCursor): CgNode = + ## Translate a valid call-like tree to the CG IR. + let info = cr.info + result = newExpr((if n.kind == mnkCall: cnkCall else: cnkCheckedCall), + info, n.typ) + result.add: # the callee + case tree[cr].kind + of mnkMagic: newMagicNode(tree.get(cr).magic, info) + else: valueToIr(tree, cl, cr) + + # the code generators currently require some magics to not have any + # arguments wrapped in ``cnkHiddenAddr`` nodes + let noAddr = result[0].kind == cnkMagic and + result[0].magic in FakeVarParams + + # translate the arguments: + while tree[cr].kind != mnkEnd: + var (mutable, arg) = argToIr(tree, cl, cr) + if noAddr: + if arg.typ.kind == tyVar: + # auto-dereference the view + # XXX: prevent this case from happening + arg = newOp(cnkDerefView, arg.info, arg.typ.base, arg) + elif mutable: + arg = wrapInHiddenAddr(cl, arg) + + result.add arg + + leave(tree, cr) + +proc exprToIr(tree: MirBody, cl: var TranslateCl, cr: var TreeCursor): CgNode + +proc sourceExprToIr(tree: MirBody, cl: var TranslateCl, + cr: var TreeCursor): tuple[n: CgNode, useFast: bool] = + ## Translates the MIR expression appearing in an assignment's source + ## slot. Assignment modifiers are dropped, and whether a fast assignment or + ## normal assignment should be used is computed and returned. + case tree[cr].kind + of mnkCopy, mnkSink: + # requires a full assignment + discard enter(tree, cr) + result = (valueToIr(tree, cl, cr), false) + leave(tree, cr) + of mnkMove: + # an ``x = move y`` assignment can be turned into a fast assignment + discard enter(tree, cr) + result = (valueToIr(tree, cl, cr), true) + leave(tree, cr) + of LvalueExprKinds: + # a fast assignment is correct for all raw lvalues + result = (lvalueToIr(tree, cl, cr), true) + else: + # rvalue expressions require a full assignment + result = (exprToIr(tree, cl, cr), false) + +proc defToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, + n: MirNode, cr: var TreeCursor): CgNode = + ## Translates a 'def'-like construct + assert n.kind in DefNodes + let + entity {.cursor.} = get(tree, cr) # the name of the defined entity + info = cr.info + + var def: CgNode + + case entity.kind + of mnkLocal: + # translate the ``PSym`` to a ``Local`` and establish a mapping + let + sym = entity.sym + id = cl.locals.add initLocal(sym) + + assert sym.id notin cl.localsMap, "re-definition of local" + cl.localsMap[sym.id] = id + + def = newLocalRef(id, info, entity.typ) + of mnkParam: + # ignore 'def's for parameters + def = newEmpty() + of mnkGlobal: + def = CgNode(kind: cnkGlobal, info: info, typ: entity.typ, + global: entity.global) + of mnkTemp: + # MIR temporaries are like normal locals, with the difference that they + # are created ad-hoc and don't have any extra information attached + assert entity.typ != nil + let tmp = cl.locals.add Local(typ: entity.typ) + + assert entity.temp notin cl.tempMap, "re-definition of temporary" + cl.tempMap[entity.temp] = tmp + + def = newLocalRef(tmp, info, entity.typ) + of mnkAlias: + # MIR aliases are translated to var/lent views + assert n.kind in {mnkBind, mnkBindMut}, "alias can only be defined by binds" + assert entity.typ != nil + let + typ = makeVarType(cl.owner, entity.typ, cl.idgen, + if n.kind == mnkBind: tyLent else: tyVar) + tmp = cl.locals.add Local(typ: typ) + + assert entity.temp notin cl.tempMap, "re-definition of temporary" + cl.tempMap[entity.temp] = tmp + + def = newLocalRef(tmp, info, typ) + else: + unreachable() + + var arg = + if n.kind in {mnkBind, mnkBindMut} and tree[cr].kind in LvalueExprKinds: + # don't use the field interperation for variant access + lvalueToIr(tree, cl, cr, preferField=false) + else: + sourceExprToIr(tree, cl, cr)[0] + leave(tree, cr) + if n.kind in {mnkBind, mnkBindMut} and arg.typ.kind notin {tyVar, tyLent}: + # wrap the operand in an address-of operation + arg = newOp(cnkHiddenAddr, info, def.typ, arg) + + let isLet = (entity.kind == mnkTemp and n.kind == mnkDefCursor) or + (entity.kind == mnkTemp and not hasDestructor(def.typ)) or + (entity.kind == mnkAlias) + # to reduce the pressure on the code generator, locals that never cross + # structured control-flow boundaries are not lifted. As a temporary + # measure, cursor temporaries and aliases are treated as such, but + # do note that this is not guaranteed and relies on how `mirgen` + # produces MIR code + + case def.kind + of cnkLocal: + if cl.inUnscoped and not isLet: + # add the local to the list of moved definitions and only emit + # an assignment + cl.defs.add copyTree(def) + result = + case arg.kind + of cnkEmpty: arg + else: newStmt(cnkAsgn, info, [def, arg]) + else: + result = newStmt(cnkDef, info, [def, arg]) + of cnkGlobal: + # there are no defs for globals in the ``CgNode`` IR, so we + # emit an assignment that has the equivalent behaviour (in + # terms of initialization) + case arg.kind + of cnkEmpty: + if sfImportc in env.globals[def.global].flags: + # for imported globals, the 'def' only means that the symbol becomes + # known to us, not that it starts its lifetime here -> don't + # initialize or move it + result = arg + elif cl.inUnscoped: + # move the default initialization to the start of the scope + cl.defs.add def + result = arg + else: + result = newStmt(cnkAsgn, info, [def, newDefaultCall(info, def.typ)]) + else: + if sfImportc notin env.globals[def.global].flags and cl.inUnscoped: + # default intialization is required at the start of the scope + cl.defs.add def + result = newStmt(cnkAsgn, info, [def, arg]) + of cnkEmpty: + result = def + else: + unreachable() + +proc bodyToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, + cr: var TreeCursor): CgNode = + ## Generates the ``CgNode`` tree for the body of a construct that implies + ## some form of control-flow. + let prev = cl.inUnscoped + # assume the body is unscoped until stated otherwise + cl.inUnscoped = true + result = stmtToIr(tree, env, cl, cr) + cl.inUnscoped = prev + +proc caseToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, n: MirNode, + cr: var TreeCursor): CgNode + +proc stmtToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, + cr: var TreeCursor): CgNode = + let n {.cursor.} = tree.get(cr) + let info = cr.info ## the source information of `n` + + template body(): CgNode = + bodyToIr(tree, env, cl, cr) + + template to(kind: CgNodeKind, args: varargs[untyped]): CgNode = + let r = newStmt(kind, info, args) + leave(tree, cr) + r + + template toList(k: CgNodeKind, body: untyped): CgNode = + let res {.inject.} = newStmt(k, info) + while tree[cr].kind != mnkEnd: + body + leave(tree, cr) + res + + case n.kind + of DefNodes: + defToIr(tree, env, cl, n, cr) + of mnkAsgn, mnkInit, mnkSwitch: + let + dst = lvalueToIr(tree, cl, cr) + (src, useFast) = sourceExprToIr(tree, cl, cr) + to (if useFast: cnkFastAsgn else: cnkAsgn), dst, src + of mnkRepeat: + to cnkRepeatStmt, body() + of mnkBlock: + cl.blocks.add n.label # push the label to the stack + let body = body() + cl.blocks.setLen(cl.blocks.len - 1) # pop block from the stack + to cnkBlockStmt, newLabelNode(cl.blocks.len.BlockId, info), body + of mnkTry: + let res = newStmt(cnkTryStmt, info, [body()]) + assert n.len <= 2 + + for _ in 0..= 0 and cl.blocks[idx] != n.label: + dec idx + newStmt(cnkBreakStmt, info, [newLabelNode(BlockId idx, info)]) + of mnkReturn: + newNode(cnkReturnStmt, info) + of mnkVoid: + var res = exprToIr(tree, cl, cr) + if res.typ.isEmptyType(): + # a void expression doesn't need to be discarded + discard + else: + res = newStmt(cnkVoidStmt, info, [res]) + leave(tree, cr) + res + of mnkIf: + to cnkIfStmt, valueToIr(tree, cl, cr), body() + of mnkRaise: + # the operand can either be empty or an lvalue expression + to cnkRaiseStmt: + case tree[cr].kind + of mnkNone: atomToIr(tree, cl, cr) + else: lvalueToIr(tree, cl, cr) + of mnkCase: + caseToIr(tree, env, cl, n, cr) + of mnkAsm: + toList cnkAsmStmt: + res.add valueToIr(tree, cl, cr) + of mnkEmit: + toList cnkEmitStmt: + res.add valueToIr(tree, cl, cr) + of mnkStmtList: + toList cnkStmtList: + res.kids.addIfNotEmpty stmtToIr(tree, env, cl, cr) + of mnkScope: + toSingleNode scopeToIr(tree, env, cl, cr) + of mnkDestroy: + unreachable("a 'destroy' that wasn't lowered") + of AllNodeKinds - StmtNodes: + unreachable(n.kind) + +proc caseToIr(tree: MirBody, env: MirEnv, cl: var TranslateCl, n: MirNode, + cr: var TreeCursor): CgNode = + assert n.kind == mnkCase + result = newStmt(cnkCaseStmt, cr.info, [valueToIr(tree, cl, cr)]) + for j in 0.. 0: + for x in 0.. prev: + # insert all the lifted defs at the start + for i in countdown(cl.defs.high, prev): + stmts.insert genDefFor(move cl.defs[i]) + + # "pop" the elements that were added as part of this scope: + cl.defs.setLen(prev) + + cl.inUnscoped = prevInUnscoped + + result = stmts + +proc tb(tree: MirBody, env: MirEnv, cl: var TranslateCl, + start: NodePosition): CgNode = + ## Translate `tree` to the corresponding ``CgNode`` representation. + var cr = TreeCursor(pos: start.uint32) + var nodes = scopeToIr(tree, env, cl, cr, allowExpr=true) + if cr.hasNext(tree): + # the tree must be an expression; the last node is required to be an atom + let x = atomToIr(tree, cl, cr) + if nodes.len == 0: + x + else: + nodes.add x + newExpr(cnkStmtListExpr, unknownLineInfo, nodes[^1].typ, nodes) + else: + # it's a statement list + toSingleNode nodes + +proc generateIR*(graph: ModuleGraph, idgen: IdGenerator, env: MirEnv, + owner: PSym, + body: sink MirBody): Body = + ## Generates the ``CgNode`` IR corresponding to the input MIR `body`, + ## using `idgen` to provide new IDs when creating symbols. + var cl = TranslateCl(graph: graph, idgen: idgen, owner: owner) + if owner.kind in routineKinds: + # setup the locals and associated mappings for the parameters + template add(v: PSym) = + let s = v + cl.localsMap[s.id] = cl.locals.add initLocal(s) + + let sig = + if owner.kind == skMacro: owner.internal + else: owner.typ + + # result variable: + if sig[0].isEmptyType(): + # always reserve a slot for the result variable, even if the latter is + # not present + discard cl.locals.add(Local()) + else: + add(owner.ast[resultPos].sym) + + # normal parameters: + for i in 1.. | LVALUE + INTERMEDIATE_TARGET =