Skip to content

Commit

Permalink
cmd/compile,runtime: implement stack objects
Browse files Browse the repository at this point in the history
Rework how the compiler+runtime handles stack-allocated variables
whose address is taken.

Direct references to such variables work as before. References through
pointers, however, use a new mechanism. The new mechanism is more
precise than the old "ambiguously live" mechanism. It computes liveness
at runtime based on the actual references among objects on the stack.

Each function records all of its address-taken objects in a FUNCDATA.
These are called "stack objects". The runtime then uses that
information while scanning a stack to find all of the stack objects on
a stack. It then does a mark phase on the stack objects, using all the
pointers found on the stack (and ancillary structures, like defer
records) as the root set. Only stack objects which are found to be
live during this mark phase will be scanned and thus retain any heap
objects they point to.

A subsequent CL will remove all the "ambiguously live" logic from
the compiler, so that the stack object tracing will be required.
For this CL, the stack tracing is all redundant with the current
ambiguously live logic.

Update #22350

Change-Id: Ide19f1f71a5b6ec8c4d54f8f66f0e9a98344772f
Reviewed-on: https://go-review.googlesource.com/c/134155
Reviewed-by: Austin Clements <austin@google.com>
  • Loading branch information
randall77 committed Oct 3, 2018
1 parent 4334966 commit cbafcc5
Show file tree
Hide file tree
Showing 13 changed files with 607 additions and 27 deletions.
5 changes: 4 additions & 1 deletion src/cmd/compile/internal/gc/obj.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ func dumpglobls() {
funcsyms = nil
}

// addGCLocals adds gcargs and gclocals symbols to Ctxt.Data.
// addGCLocals adds gcargs, gclocals, gcregs, and stack object symbols to Ctxt.Data.
// It takes care not to add any duplicates.
// Though the object file format handles duplicates efficiently,
// storing only a single copy of the data,
Expand All @@ -299,6 +299,9 @@ func addGCLocals() {
Ctxt.Data = append(Ctxt.Data, gcsym)
seen[gcsym.Name] = true
}
if x := s.Func.StackObjects; x != nil {
ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.LOCAL)
}
}
}

Expand Down
20 changes: 20 additions & 0 deletions src/cmd/compile/internal/gc/pgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,26 @@ func compile(fn *Node) {
// Set up the function's LSym early to avoid data races with the assemblers.
fn.Func.initLSym()

// Make sure type syms are declared for all types that might
// be types of stack objects. We need to do this here
// because symbols must be allocated before the parallel
// phase of the compiler.
if fn.Func.lsym != nil { // not func _(){}
for _, n := range fn.Func.Dcl {
switch n.Class() {
case PPARAM, PPARAMOUT, PAUTO:
if livenessShouldTrack(n) && n.Addrtaken() {
dtypesym(n.Type)
// Also make sure we allocate a linker symbol
// for the stack object data, for the same reason.
if fn.Func.lsym.Func.StackObjects == nil {
fn.Func.lsym.Func.StackObjects = lookup(fmt.Sprintf("%s.stkobj", fn.funcname())).Linksym()
}
}
}
}
}

if compilenow() {
compileSSA(fn, 0)
} else {
Expand Down
48 changes: 47 additions & 1 deletion src/cmd/compile/internal/gc/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/objabi"
"cmd/internal/src"
"cmd/internal/sys"
)
Expand Down Expand Up @@ -4933,13 +4934,59 @@ func (s *SSAGenState) DebugFriendlySetPosFrom(v *ssa.Value) {
}
}

// byXoffset implements sort.Interface for []*Node using Xoffset as the ordering.
type byXoffset []*Node

func (s byXoffset) Len() int { return len(s) }
func (s byXoffset) Less(i, j int) bool { return s[i].Xoffset < s[j].Xoffset }
func (s byXoffset) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

func emitStackObjects(e *ssafn, pp *Progs) {
var vars []*Node
for _, n := range e.curfn.Func.Dcl {
if livenessShouldTrack(n) && n.Addrtaken() {
vars = append(vars, n)
}
}
if len(vars) == 0 {
return
}

// Sort variables from lowest to highest address.
sort.Sort(byXoffset(vars))

// Populate the stack object data.
// Format must match runtime/stack.go:stackObjectRecord.
x := e.curfn.Func.lsym.Func.StackObjects
off := 0
off = duintptr(x, off, uint64(len(vars)))
for _, v := range vars {
// Note: arguments and return values have non-negative Xoffset,
// in which case the offset is relative to argp.
// Locals have a negative Xoffset, in which case the offset is relative to varp.
off = duintptr(x, off, uint64(v.Xoffset))
if !typesym(v.Type).Siggen() {
Fatalf("stack object's type symbol not generated for type %s", v.Type)
}
off = dsymptr(x, off, dtypesym(v.Type), 0)
}

// Emit a funcdata pointing at the stack object data.
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_StackObjects)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = x
}

// genssa appends entries to pp for each instruction in f.
func genssa(f *ssa.Func, pp *Progs) {
var s SSAGenState

e := f.Frontend().(*ssafn)

s.livenessMap = liveness(e, f)
emitStackObjects(e, pp)

// Remember where each block starts.
s.bstart = make([]*obj.Prog, f.NumBlocks())
Expand Down Expand Up @@ -5054,7 +5101,6 @@ func genssa(f *ssa.Func, pp *Progs) {
}
}
}

// Emit control flow instructions for block
var next *ssa.Block
if i < len(f.Blocks)-1 && Debug['N'] == 0 {
Expand Down
7 changes: 4 additions & 3 deletions src/cmd/internal/obj/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,10 @@ type FuncInfo struct {
dwarfAbsFnSym *LSym
dwarfIsStmtSym *LSym

GCArgs LSym
GCLocals LSym
GCRegs LSym
GCArgs LSym
GCLocals LSym
GCRegs LSym
StackObjects *LSym
}

// Attribute is a set of symbol attributes.
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/objabi/funcdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
FUNCDATA_LocalsPointerMaps = 1
FUNCDATA_InlTree = 2
FUNCDATA_RegPointerMaps = 3
FUNCDATA_StackObjects = 4

// ArgsSizeUnknown is set in Func.argsize to mark all functions
// whose argument size is unknown (C vararg functions, and
Expand Down
3 changes: 2 additions & 1 deletion src/reflect/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5988,7 +5988,8 @@ func TestFuncLayout(t *testing.T) {
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
heapBits := GCBits(New(typ).Interface())
if !bytes.Equal(heapBits, bits) {
t.Errorf("heapBits incorrect for %v\nhave %v\nwant %v", typ, heapBits, bits)
_, _, line, _ := runtime.Caller(1)
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
}
}

Expand Down
1 change: 1 addition & 0 deletions src/runtime/funcdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define FUNCDATA_LocalsPointerMaps 1
#define FUNCDATA_InlTree 2
#define FUNCDATA_RegPointerMaps 3
#define FUNCDATA_StackObjects 4

// Pseudo-assembly statements.

Expand Down
21 changes: 20 additions & 1 deletion src/runtime/mbitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,20 @@ Run:
return totalBits
}

// materializeGCProg allocates space for the (1-bit) pointer bitmask
// for an object of size ptrdata. Then it fills that space with the
// pointer bitmask specified by the program prog.
// The bitmask starts at s.startAddr.
// The result must be deallocated with dematerializeGCProg.
func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys)
runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
return s
}
func dematerializeGCProg(s *mspan) {
mheap_.freeManual(s, &memstats.gc_sys)
}

func dumpGCProg(p *byte) {
nptr := 0
for {
Expand Down Expand Up @@ -2037,7 +2051,12 @@ func getgcmask(ep interface{}) (mask []byte) {
_g_ := getg()
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn.valid() {
locals, _ := getStackMap(&frame, nil, false)
// TODO: once stack objects are enabled (and their pointers
// are no longer described by the stack pointermap directly),
// tests using this will probably need fixing. We might need
// to loop through the stackobjects and if we're inside one,
// use the pointermap from that object.
locals, _, _ := getStackMap(&frame, nil, false)
if locals.n == 0 {
return
}
Expand Down
127 changes: 112 additions & 15 deletions src/runtime/mgcmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func markroot(gcw *gcWork, i uint32) {
case i == fixedRootFinalizers:
for fb := allfin; fb != nil; fb = fb.alllink {
cnt := uintptr(atomic.Load(&fb.cnt))
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
}

case i == fixedRootFreeGStacks:
Expand Down Expand Up @@ -248,7 +248,7 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
}

// Scan this shard.
scanblock(b, n, ptrmask, gcw)
scanblock(b, n, ptrmask, gcw, nil)
}

// markrootFreeGStacks frees stacks of dead Gs.
Expand Down Expand Up @@ -349,7 +349,7 @@ func markrootSpans(gcw *gcWork, shard int) {
scanobject(p, gcw)

// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
}

unlock(&s.speciallock)
Expand Down Expand Up @@ -689,42 +689,136 @@ func scanstack(gp *g, gcw *gcWork) {
// Shrink the stack if not much of it is being used.
shrinkstack(gp)

var state stackScanState
state.stack = gp.stack

if stackTraceDebug {
println("stack trace goroutine", gp.goid)
}

// Scan the saved context register. This is effectively a live
// register that gets moved back and forth between the
// register and sched.ctxt without a write barrier.
if gp.sched.ctxt != nil {
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
}

// Scan the stack.
var cache pcvalueCache
// Scan the stack. Accumulate a list of stack objects.
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
scanframeworker(frame, &cache, gcw)
scanframeworker(frame, &state, gcw)
return true
}
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
tracebackdefers(gp, scanframe, nil)

// Find and scan all reachable stack objects.
state.buildIndex()
for {
p := state.getPtr()
if p == 0 {
break
}
obj := state.findObject(p)
if obj == nil {
continue
}
t := obj.typ
if t == nil {
// We've already scanned this object.
continue
}
obj.setType(nil) // Don't scan it again.
if stackTraceDebug {
println(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
}
gcdata := t.gcdata
var s *mspan
if t.kind&kindGCProg != 0 {
// This path is pretty unlikely, an object large enough
// to have a GC program allocated on the stack.
// We need some space to unpack the program into a straight
// bitmask, which we allocate/free here.
// TODO: it would be nice if there were a way to run a GC
// program without having to store all its bits. We'd have
// to change from a Lempel-Ziv style program to something else.
// Or we can forbid putting objects on stacks if they require
// a gc program (see issue 27447).
s = materializeGCProg(t.ptrdata, gcdata)
gcdata = (*byte)(unsafe.Pointer(s.startAddr))
}

scanblock(state.stack.lo+uintptr(obj.off), t.ptrdata, gcdata, gcw, &state)

if s != nil {
dematerializeGCProg(s)
}
}

// Deallocate object buffers.
// (Pointer buffers were all deallocated in the loop above.)
for state.head != nil {
x := state.head
state.head = x.next
if stackTraceDebug {
for _, obj := range x.obj[:x.nobj] {
if obj.typ == nil { // reachable
continue
}
println(" dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of type", obj.typ.string())
// Note: not necessarily really dead - only reachable-from-ptr dead.
}
}
x.nobj = 0
putempty((*workbuf)(unsafe.Pointer(x)))
}
if state.buf != nil || state.freeBuf != nil {
throw("remaining pointer buffers")
}

gp.gcscanvalid = true
}

// Scan a stack frame: local variables and function arguments/results.
//go:nowritebarrier
func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
if _DebugGC > 1 && frame.continpc != 0 {
print("scanframe ", funcname(frame.fn), "\n")
}

locals, args := getStackMap(frame, cache, false)
locals, args, objs := getStackMap(frame, &state.cache, false)

// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
size := uintptr(locals.n) * sys.PtrSize
scanblock(frame.varp-size, size, locals.bytedata, gcw)
scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
}

// Scan arguments.
if args.n > 0 {
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw)
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
}

// Add all stack objects to the stack object list.
if frame.varp != 0 {
// varp is 0 for defers, where there are no locals.
// In that case, there can't be a pointer to its args, either.
// (And all args would be scanned above anyway.)
for _, obj := range objs {
off := obj.off
base := frame.varp // locals base pointer
if off >= 0 {
base = frame.argp // arguments and return values base pointer
}
ptr := base + uintptr(off)
if ptr < frame.sp {
// object hasn't been allocated in the frame yet.
continue
}
if stackTraceDebug {
println("stkobj at", hex(ptr), "of type", obj.typ.string())
}
state.addObject(ptr, obj.typ)
}
}
}

Expand Down Expand Up @@ -939,8 +1033,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
// This is used to scan non-heap roots, so it does not update
// gcw.bytesMarked or gcw.scanWork.
//
// If stk != nil, possible stack pointers are also reported to stk.putPtr.
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
// Use local copies of original parameters, so that a stack trace
// due to one of the throws below shows the original block
// base and extent.
Expand All @@ -957,10 +1052,12 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
for j := 0; j < 8 && i < n; j++ {
if bits&1 != 0 {
// Same work as in scanobject; see comments there.
obj := *(*uintptr)(unsafe.Pointer(b + i))
if obj != 0 {
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
p := *(*uintptr)(unsafe.Pointer(b + i))
if p != 0 {
if obj, span, objIndex := findObject(p, b, i); obj != 0 {
greyobject(obj, b, i, span, gcw, objIndex)
} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
stk.putPtr(p)
}
}
}
Expand Down
Loading

0 comments on commit cbafcc5

Please sign in to comment.