Skip to content

Commit

Permalink
cache: don't let RowByModel() recursively lock the RowCache
Browse files Browse the repository at this point in the history
RWLocks are not supposed to be used recursively, and if something
else Locks the row cache then we deadlock.

In this example, goroutine 59 RLocks the RowCache, and then
goroutine 174 attempts to Lock the cache but blocks on #59's
RLock. Since there is a writer waiting to acquire the lock,
any further RLocks are effectively Locks, and thus when
goroutine #59 attempts to acquire the second RLock in Row()
it deadlocks against goroutine #174.

goroutine 59 [semacquire, 12 minutes]:
sync.runtime_SemacquireMutex(0xc000bf2e2c, 0xc0025a3800, 0x0)
	/usr/lib/golang/src/runtime/sema.go:71 +0x47
sync.(*RWMutex).RLock(...)
	/usr/lib/golang/src/sync/rwmutex.go:63
github.com/ovn-org/libovsdb/cache.(*RowCache).Row(0xc000bf2d80, 0xc00342de60, 0x24, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:101 +0x139
github.com/ovn-org/libovsdb/cache.(*RowCache).RowByModel(0xc000bf2d80, 0x1a8e8e0, 0xc0046c0000, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:130 +0x417
github.com/ovn-org/libovsdb/client.api.Get(0xc000bf3680, 0x0, 0x0, 0xc000183f98, 0x21135a0, 0xc003394600, 0x1a8e8e0, 0xc0046c0000, 0xc00416e300, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/api.go:221 +0xd0
github.com/ovn-org/libovsdb/client.(*ovsdbClient).Get(0xc0001c48f0, 0x21135a0, 0xc003394600, 0x1a8e8e0, 0xc0046c0000, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:1189 +0x139
github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.ovnNBLSPDel(0x2138198, 0xc00002d640, 0xc0038731b8, 0x17, 0xc002501b60, 0x29, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/pods.go:649 +0x265
github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn.(*Controller).deleteLogicalPort(0xc00000e1e0, 0xc002f40400)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/ovn/pods.go:145 +0x46a

goroutine 174 [semacquire, 12 minutes]:
sync.runtime_SemacquireMutex(0xc000bf2e28, 0x4f1600, 0x0)
	/usr/lib/golang/src/runtime/sema.go:71 +0x47
sync.(*RWMutex).Lock(0xc000bf2e20)
	/usr/lib/golang/src/sync/rwmutex.go:116 +0x85
github.com/ovn-org/libovsdb/cache.(*RowCache).Update(0xc000bf2d80, 0xc002280570, 0x24, 0x1a8e8e0, 0xc0046c0600, 0xc003319200, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:176 +0x6f
github.com/ovn-org/libovsdb/cache.(*TableCache).Populate2(0xc000bf3680, 0xc003766210, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:659 +0x7d6
github.com/ovn-org/libovsdb/cache.(*TableCache).Update2(0xc000bf3680, 0x1c71a40, 0xc003331980, 0xc003766210, 0x2e, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/cache/cache.go:536 +0x6b
github.com/ovn-org/libovsdb/client.(*ovsdbClient).update3(0xc0001c48f0, 0xc00451a660, 0x3, 0x4, 0xc003e32738, 0x7f225dc865b8, 0x41b69b)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:637 +0x55a
github.com/ovn-org/libovsdb/client.(*ovsdbClient).createRPC2Client.func4(0xc000289500, 0xc00451a660, 0x3, 0x4, 0xc003e32738, 0x0, 0x0)
	/go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/github.com/ovn-org/libovsdb/client/client.go:395 +0x52

Signed-off-by: Dan Williams <dcbw@redhat.com>
  • Loading branch information
dcbw committed Dec 16, 2021
1 parent 4303261 commit a41f75f
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,22 @@ type RowCache struct {
mutex sync.RWMutex
}

// Row returns one model from the cache by UUID
func (r *RowCache) Row(uuid string) model.Model {
r.mutex.RLock()
defer r.mutex.RUnlock()
// getRow returns one model from the cache by UUID. Caller must hold the row
// cache lock.
func (r *RowCache) getRow(uuid string) model.Model {
if row, ok := r.cache[uuid]; ok {
return model.Clone(row)
}
return nil
}

// Row returns one model from the cache by UUID
func (r *RowCache) Row(uuid string) model.Model {
r.mutex.RLock()
defer r.mutex.RUnlock()
return r.getRow(uuid)
}

// RowByModel searches the cache using a the indexes for a provided model
func (r *RowCache) RowByModel(m model.Model) model.Model {
r.mutex.RLock()
Expand All @@ -119,15 +125,15 @@ func (r *RowCache) RowByModel(m model.Model) model.Model {
return nil
}
if uuid.(string) != "" {
return r.Row(uuid.(string))
return r.getRow(uuid.(string))
}
for index := range r.indexes {
val, err := valueFromIndex(info, index)
if err != nil {
continue
}
if uuid, ok := r.indexes[index][val]; ok {
return r.Row(uuid)
return r.getRow(uuid)
}
}
return nil
Expand Down

0 comments on commit a41f75f

Please sign in to comment.