Skip to content

Commit

Permalink
Re-acquire locks via iterative instead of recursive execution #447
Browse files Browse the repository at this point in the history
When multiple OrderedLocks are acquired by different threads, the
deadlock recovery mechanism suspending and reacquiring the locks
requires an indefinite number of tries until one thread holds all
required locks. Since reacquisition is performed by recursive method
invocation, the stack can become infinitely large with the change of
resulting in a StackOverflowError.

These change replace the recursive lock acqusition by an iterative one,
such that still an indefinite number of tries for acquiring a set of
locks is required but the chance of resulting in an error is eliminated.

The added test case does not deterministically reproduce the erroneous
behavior, but since a proper regression test is very hard to define (as
specific lock order across a magnitude of retries has to be ensured and
coordinated between different threads), it at least executes more
sophisticated locking scenarios to ensure proper lock retrieval and
deadlock management.
  • Loading branch information
HeikoKlare committed May 13, 2023
1 parent 1f999bf commit d052003
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,16 @@ protected static LockState suspend(OrderedLock lock) {
/**
* Re-acquires a suspended lock and reverts to the correct lock depth.
*/
public void resume() {
//spin until the lock is successfully acquired
//NOTE: spinning here allows the UI thread to service pending syncExecs
//if the UI thread is waiting to acquire a lock.
while (true) {
try {
if (lock.acquire(Long.MAX_VALUE))
break;
} catch (InterruptedException e) {
//ignore and loop
public boolean resume() {
try {
if (lock.acquire(Integer.MAX_VALUE, false)) {
lock.setDepth(depth);
return true;
}
} catch (InterruptedException e) {
// ignore and retry
}
lock.setDepth(depth);
return false;
}
}

Expand Down Expand Up @@ -303,17 +300,26 @@ void removeLockWaitThread(Thread thread, ISchedulingRule lock) {
* Resumes all the locks that were suspended while this thread was waiting to acquire another lock.
*/
void resumeSuspendedLocks(Thread owner) {
LockState[] toResume;
synchronized (suspendedLocks) {
Deque<LockState[]> prevLocks = suspendedLocks.get(owner);
if (prevLocks == null)
return;
toResume = prevLocks.pop();
if (prevLocks.isEmpty())
suspendedLocks.remove(owner);
// spin until the locks are successfully acquired
// NOTE: spinning here allows the UI thread to service pending syncExecs
// if the UI thread is waiting to acquire a lock.
while (true) {
LockState[] toResume;
synchronized (suspendedLocks) {
Deque<LockState[]> prevLocks = suspendedLocks.get(owner);
if (prevLocks == null)
return;
toResume = prevLocks.pop();
if (prevLocks.isEmpty())
suspendedLocks.remove(owner);
}
for (LockState element : toResume) {
if (!element.resume()) {
// Resuming a lock failed, so start with the first one again.
break;
}
}
}
for (LockState element : toResume)
element.resume();
}

public void setLockListener(LockListener listener) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ public void acquire() {

@Override
public boolean acquire(long delay) throws InterruptedException {
return acquire(delay, true);
}

/**
* Acquires a lock as defined by {@link #acquire(long)} but only resumes
* suspended locks if {@code resumeSuspendedLocks} is {@code true}.
*
* @param delay the number of milliseconds to delay
* @param resumeSuspendedLocks whether locks that were suspended because a lock
* could not be acquired shall be re-acquired or not
* @return {@code true} if the lock was successfully acquired, and {@code false}
* otherwise.
* @exception InterruptedException if the thread was interrupted
*/
boolean acquire(long delay, boolean resumeSuspendedLocks) throws InterruptedException {
if (Thread.interrupted())
throw new InterruptedException();

Expand All @@ -108,7 +123,9 @@ public boolean acquire(long delay) throws InterruptedException {
if (DEBUG)
System.out.println("[" + Thread.currentThread() + "] Operation waiting to be executed... " + this); //$NON-NLS-1$ //$NON-NLS-2$
boolean success = doAcquire(semaphore, delay);
manager.resumeSuspendedLocks(Thread.currentThread());
if (resumeSuspendedLocks) {
manager.resumeSuspendedLocks(Thread.currentThread());
}
if (DEBUG)
System.out.println("[" + Thread.currentThread() + //$NON-NLS-1$
(success ? "] Operation started... " : "] Operation timed out... ") + this); //$NON-NLS-1$ //$NON-NLS-2$ //}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,37 @@ public void testComplex() {
});
}

@Test
public void testManyLocksAndThreads() {
int numberOfLocks = 30;
int numberOfThreads = 15;
DeadlockDetector.runSilent(() -> {
ArrayList<LockAcquiringRunnable> allRunnables = new ArrayList<>();
LockManager manager = new LockManager();
manager.setLockListener(new LockListener() {
@Override
public boolean aboutToWait(Thread lockOwner) {
// Yield upon waiting for a lock to give other threads the change for
// conflicting lock acquisitions
Thread.yield();
return false;
}
});
List<OrderedLock> locks = new ArrayList<>();
for (int i = 0; i < numberOfLocks; i++) {
locks.add(manager.newLock());
}
for (int i = 0; i < numberOfThreads / 5; i++) {
Collections.shuffle(locks);
// Assign random half of the locks to every thread
createRunnables(locks.stream().limit(numberOfLocks / 2).toArray(OrderedLock[]::new), 5, allRunnables);
}
execute(allRunnables);
// the underlying array has to be empty
assertTrue("Locks not removed from graph.", manager.isEmpty());
});
}

@Test
public void testSimple() {
DeadlockDetector.runSilent(() -> {
Expand Down

0 comments on commit d052003

Please sign in to comment.