From 08ecdf7c2e9e9ecc4e2d7c6d9438faeed2338140 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 1 Dec 2021 08:56:19 -0500
Subject: [PATCH] runtime: fix racy allgs access on weak memory architectures

Currently, markroot is very clever about accessing the allgs slice to
find stack roots. Unfortunately, on weak memory architectures, it's a
little too clever and can sometimes read a nil g, causing a fatal
panic.

Specifically, gcMarkRootPrepare snapshots the length of allgs during
STW and then markroot accesses allgs up to this length during
concurrent marking. During concurrent marking, allgadd can append to
allgs *without synchronizing with markroot*, but the argument is that
the markroot access should be safe because allgs only grows
monotonically and existing entries in allgs never change.

This reasoning is insufficient on weak memory architectures. Suppose
thread 1 calls allgadd during concurrent marking and that allgs is
already at capacity. On thread 1, append will allocate a new slice
that initially consists of all nils, then copy the old backing store
to the new slice (write A), then allgadd will publish the new slice to
the allgs global (write B). Meanwhile, on thread 2, markroot reads the
allgs slice base pointer (read A), computes an offset from that base
pointer, and reads the value at that offset (read B). On a weak memory
machine, thread 2 can observe write B *before* write A. If the order
of events from thread 2's perspective is write B, read A, read B,
write A, then markroot on thread 2 will read a nil g and then panic.

Fix this by taking a snapshot of the allgs slice header in
gcMarkRootPrepare while the world is stopped and using that snapshot
as the list of stack roots in markroot. This eliminates all read/write
concurrency around the access in markroot.

Alternatively, we could make markroot use the atomicAllGs API to
atomically access the allgs list, but in my opinion it's much less
subtle to just eliminate all of the interesting concurrency around the
allgs access.

Fixes #49686.
Fixes #48845.
Fixes #43824.
(These are all just different paths to the same ultimate issue.)

Change-Id: I472b4934a637bbe88c8a080a280aa30212acf984
Reviewed-on: https://go-review.googlesource.com/c/go/+/368134
Trust: Austin Clements <austin@google.com>
Trust: Bryan C. Mills <bcmills@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 src/runtime/mgc.go     | 14 ++++++++++++++
 src/runtime/mgcmark.go | 14 ++++++--------
 src/runtime/proc.go    | 14 ++++++++++++++
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index d75893dc43530..8c8f7d936b800 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -320,11 +320,20 @@ var work struct {
 	nwait  uint32
 
 	// Number of roots of various root types. Set by gcMarkRootPrepare.
+	//
+	// nStackRoots == len(stackRoots), but we have nStackRoots for
+	// consistency.
 	nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int
 
 	// Base indexes of each root type. Set by gcMarkRootPrepare.
 	baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32
 
+	// stackRoots is a snapshot of all of the Gs that existed
+	// before the beginning of concurrent marking. The backing
+	// store of this must not be modified because it might be
+	// shared with allgs.
+	stackRoots []*g
+
 	// Each type of GC state transition is protected by a lock.
 	// Since multiple threads can simultaneously detect the state
 	// transition condition, any thread that detects a transition
@@ -1368,6 +1377,11 @@ func gcMark(startTime int64) {
 		throw("work.full != 0")
 	}
 
+	// Drop allg snapshot. allgs may have grown, in which case
+	// this is the only reference to the old backing store and
+	// there's no need to keep it around.
+	work.stackRoots = nil
+
 	// Clear out buffers and double-check that all gcWork caches
 	// are empty. This should be ensured by gcMarkDone before we
 	// enter mark termination.
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index a5129bd1ee489..a15c62cc491ee 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -102,7 +102,8 @@ func gcMarkRootPrepare() {
 	// ignore them because they begin life without any roots, so
 	// there's nothing to scan, and any roots they create during
 	// the concurrent phase will be caught by the write barrier.
-	work.nStackRoots = int(atomic.Loaduintptr(&allglen))
+	work.stackRoots = allGsSnapshot()
+	work.nStackRoots = len(work.stackRoots)
 
 	work.markrootNext = 0
 	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
@@ -194,15 +195,12 @@ func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
 	default:
 		// the rest is scanning goroutine stacks
 		workCounter = &gcController.stackScanWork
-		var gp *g
-		if work.baseStacks <= i && i < work.baseEnd {
-			// N.B. Atomic read of allglen in gcMarkRootPrepare
-			// acts as a barrier to ensure that allgs must be large
-			// enough to contain all relevant Gs.
-			gp = allgs[i-work.baseStacks]
-		} else {
+		if i < work.baseStacks || work.baseEnd <= i {
+			printlock()
+			print("runtime: markroot index ", i, " not in stack roots range [", work.baseStacks, ", ", work.baseEnd, ")\n")
 			throw("markroot: bad index")
 		}
+		gp := work.stackRoots[i-work.baseStacks]
 
 		// remember when we've first observed the G blocked
 		// needed only to output in traceback
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index a238ea77f3702..f375b6798173d 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -547,6 +547,20 @@ func allgadd(gp *g) {
 	unlock(&allglock)
 }
 
+// allGsSnapshot returns a snapshot of the slice of all Gs.
+//
+// The world must be stopped or allglock must be held.
+func allGsSnapshot() []*g {
+	assertWorldStoppedOrLockHeld(&allglock)
+
+	// Because the world is stopped or allglock is held, allgadd
+	// cannot happen concurrently with this. allgs grows
+	// monotonically and existing entries never change, so we can
+	// simply return a copy of the slice header. For added safety,
+	// we trim everything past len because that can still change.
+	return allgs[:len(allgs):len(allgs)]
+}
+
 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex.
 func atomicAllG() (**g, uintptr) {
 	length := atomic.Loaduintptr(&allglen)