From 249501aa51b9d8bb38abf3230471c29d68fa20bd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Dec 2021 23:09:02 -0600 Subject: [PATCH] cache: don't let RowByModel() recursively lock the RowCache RWLocks are not supposed to be used recursively, and if something else Locks the row cache then we deadlock. In this example, goroutine 59 RLocks the RowCache, and then goroutine 174 attempts to Lock the cache but blocks on #59's RLock. Since there is a writer waiting to acquire the lock, any further RLocks are effectively Locks, and thus when goroutine #59 attempts to acquire the second RLock in Row() it deadlocks against goroutine #174. goroutine 59 [semacquire, 12 minutes]: sync.runtime_SemacquireMutex(0xc000bf2e2c, 0xc0025a3800, 0x0) /usr/lib/golang/src/runtime/sema.go:71 +0x47 sync.(*RWMutex).RLock(...) /usr/lib/golang/src/sync/rwmutex.go:63 github.com/ovn-org/libovsdb/cache.(*RowCache).Row(0xc000bf2d80, 0xc00342de60, 0x24, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:101 +0x139 github.com/ovn-org/libovsdb/cache.(*RowCache).RowByModel(0xc000bf2d80, 0x1a8e8e0, 0xc0046c0000, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:130 +0x417 github.com/ovn-org/libovsdb/client.api.Get(0xc000bf3680, 0x0, 0x0, 0xc000183f98, 0x21135a0, 0xc003394600, 0x1a8e8e0, 0xc0046c0000, 0xc00416e300, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/api.go:221 +0xd0 github.com/ovn-org/libovsdb/client.(*ovsdbClient).Get(0xc0001c48f0, 0x21135a0, 0xc003394600, 0x1a8e8e0, 0xc0046c0000, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:1189 +0x139 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.ovnNBLSPDel(0x2138198, 0xc00002d640, 0xc0038731b8, 0x17, 0xc002501b60, 0x29, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/pods.go:649 +0x265 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.(*Controller).deleteLogicalPort(0xc00000e1e0, 0xc002f40400) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/pods.go:145 +0x46a goroutine 174 [semacquire, 12 minutes]: sync.runtime_SemacquireMutex(0xc000bf2e28, 0x4f1600, 0x0) /usr/lib/golang/src/runtime/sema.go:71 +0x47 sync.(*RWMutex).Lock(0xc000bf2e20) /usr/lib/golang/src/sync/rwmutex.go:116 +0x85 github.com/ovn-org/libovsdb/cache.(*RowCache).Update(0xc000bf2d80, 0xc002280570, 0x24, 0x1a8e8e0, 0xc0046c0600, 0xc003319200, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:176 +0x6f github.com/ovn-org/libovsdb/cache.(*TableCache).Populate2(0xc000bf3680, 0xc003766210, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:659 +0x7d6 github.com/ovn-org/libovsdb/cache.(*TableCache).Update2(0xc000bf3680, 0x1c71a40, 0xc003331980, 0xc003766210, 0x2e, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:536 +0x6b github.com/ovn-org/libovsdb/client.(*ovsdbClient).update3(0xc0001c48f0, 0xc00451a660, 0x3, 0x4, 0xc003e32738, 0x7f225dc865b8, 0x41b69b) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:637 +0x55a github.com/ovn-org/libovsdb/client.(*ovsdbClient).createRPC2Client.func4(0xc000289500, 0xc00451a660, 0x3, 0x4, 0xc003e32738, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:395 +0x52 Signed-off-by: Dan Williams --- cache/cache.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cache/cache.go b/cache/cache.go index 1f52eb2e..8a3c86e8 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -96,16 +96,22 @@ type RowCache struct { mutex sync.RWMutex } -// Row returns one model from the cache by UUID -func (r *RowCache) Row(uuid string) model.Model { - r.mutex.RLock() - defer r.mutex.RUnlock() +// rowByUUID returns one model from the cache by UUID. Caller must hold the row +// cache lock. +func (r *RowCache) rowByUUID(uuid string) model.Model { if row, ok := r.cache[uuid]; ok { return model.Clone(row) } return nil } +// Row returns one model from the cache by UUID +func (r *RowCache) Row(uuid string) model.Model { + r.mutex.RLock() + defer r.mutex.RUnlock() + return r.rowByUUID(uuid) +} + // RowByModel searches the cache using a the indexes for a provided model func (r *RowCache) RowByModel(m model.Model) model.Model { r.mutex.RLock() @@ -119,7 +125,7 @@ func (r *RowCache) RowByModel(m model.Model) model.Model { return nil } if uuid.(string) != "" { - return r.Row(uuid.(string)) + return r.rowByUUID(uuid.(string)) } for index := range r.indexes { val, err := valueFromIndex(info, index) @@ -127,7 +133,7 @@ func (r *RowCache) RowByModel(m model.Model) model.Model { continue } if uuid, ok := r.indexes[index][val]; ok { - return r.Row(uuid) + return r.rowByUUID(uuid) } } return nil