Skip to content

Commit

Permalink
Re-acquire locks via iterative instead of recursive execution #447
Browse files Browse the repository at this point in the history
When multiple OrderedLocks are acquired by different threads, the
deadlock recovery mechanism suspending and reacquiring the locks
requires an indefinite number of tries until one thread holds all
required locks. Since reacquisition is performed by recursive method
invocation, the stack can become infinitely large with the change of
resulting in a StackOverflowError.

These changes replace the recursive lock acqusition by an iterative one,
such that still an indefinite number of tries for acquiring a set of
locks is required but the chance of resulting in an error is eliminated.
The OrderedLockTest.testComplex, which was randomly failing on Windows
systems due to the recursive implementation, is re-enabled.

The added test case does not deterministically reproduce the erroneous
behavior, but since a proper regression test is very hard to define (as
specific lock order across a magnitude of retries has to be ensured and
coordinated between different threads), it at least executes more
sophisticated locking scenarios to ensure proper lock retrieval and
deadlock management.
  • Loading branch information
HeikoKlare committed Jun 21, 2023
1 parent 0d516c9 commit 79e36d3
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,16 @@ protected static LockState suspend(OrderedLock lock) {
/**
* Re-acquires a suspended lock and reverts to the correct lock depth.
*/
public void resume() {
//spin until the lock is successfully acquired
//NOTE: spinning here allows the UI thread to service pending syncExecs
//if the UI thread is waiting to acquire a lock.
while (true) {
try {
if (lock.acquire(Long.MAX_VALUE))
break;
} catch (InterruptedException e) {
//ignore and loop
public boolean resume() {
try {
if (lock.acquire(Integer.MAX_VALUE, false)) {
lock.setDepth(depth);
return true;
}
} catch (InterruptedException e) {
// ignore and retry
}
lock.setDepth(depth);
return false;
}
}

Expand Down Expand Up @@ -303,17 +300,26 @@ void removeLockWaitThread(Thread thread, ISchedulingRule lock) {
* Resumes all the locks that were suspended while this thread was waiting to acquire another lock.
*/
void resumeSuspendedLocks(Thread owner) {
LockState[] toResume;
synchronized (suspendedLocks) {
Deque<LockState[]> prevLocks = suspendedLocks.get(owner);
if (prevLocks == null)
return;
toResume = prevLocks.pop();
if (prevLocks.isEmpty())
suspendedLocks.remove(owner);
// spin until the locks are successfully acquired
// NOTE: spinning here allows the UI thread to service pending syncExecs
// if the UI thread is waiting to acquire a lock.
while (true) {
LockState[] toResume;
synchronized (suspendedLocks) {
Deque<LockState[]> prevLocks = suspendedLocks.get(owner);
if (prevLocks == null)
return;
toResume = prevLocks.pop();
if (prevLocks.isEmpty())
suspendedLocks.remove(owner);
}
for (LockState element : toResume) {
if (!element.resume()) {
// Try again if acquiring a lock failed
continue;
}
}
}
for (LockState element : toResume)
element.resume();
}

public void setLockListener(LockListener listener) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ public void acquire() {

@Override
public boolean acquire(long delay) throws InterruptedException {
return acquire(delay, true);
}

/**
* Acquires a lock as defined by {@link #acquire(long)} but only resumes
* suspended locks if {@code resumeSuspendedLocks} is {@code true}.
*
* @param delay the number of milliseconds to delay
* @param resumeSuspendedLocks whether locks that were suspended because a lock
* could not be acquired shall be re-acquired or not
* @return {@code true} if the lock was successfully acquired, and {@code false}
* otherwise.
* @exception InterruptedException if the thread was interrupted
*/
boolean acquire(long delay, boolean resumeSuspendedLocks) throws InterruptedException {
if (Thread.interrupted())
throw new InterruptedException();

Expand All @@ -108,7 +123,9 @@ public boolean acquire(long delay) throws InterruptedException {
if (DEBUG)
System.out.println("[" + Thread.currentThread() + "] Operation waiting to be executed... " + this); //$NON-NLS-1$ //$NON-NLS-2$
boolean success = doAcquire(semaphore, delay);
manager.resumeSuspendedLocks(Thread.currentThread());
if (resumeSuspendedLocks) {
manager.resumeSuspendedLocks(Thread.currentThread());
}
if (DEBUG)
System.out.println("[" + Thread.currentThread() + //$NON-NLS-1$
(success ? "] Operation started... " : "] Operation timed out... ") + this); //$NON-NLS-1$ //$NON-NLS-2$ //}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,28 @@
package org.eclipse.core.tests.runtime.jobs;

import static org.junit.Assert.assertTrue;
import static org.junit.Assume.assumeFalse;

import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.stream.Collectors;
import org.eclipse.core.internal.jobs.*;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.internal.jobs.DeadlockDetector;
import org.eclipse.core.internal.jobs.LockManager;
import org.eclipse.core.internal.jobs.OrderedLock;
import org.eclipse.core.runtime.jobs.ILock;
import org.eclipse.core.runtime.jobs.LockListener;
import org.eclipse.core.tests.harness.TestBarrier2;
import org.eclipse.core.tests.runtime.jobs.LockAcquiringRunnable.RandomOrder;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/**
* Tests implementation of ILock objects
*/
@RunWith(JUnit4.class)
@SuppressWarnings("restriction")
public class OrderedLockTest {
@Rule
Expand All @@ -52,9 +52,6 @@ private void createRunnables(ILock[] locks, int n, ArrayList<LockAcquiringRunnab

@Test
public void testComplex() {
// FIXME Disabled on Windows due to #477; must be re-enabled when merging fix
// for the issue
assumeFalse(Platform.getOS().equals(Platform.OS_WIN32));
DeadlockDetector.runSilent(() -> {
ArrayList<LockAcquiringRunnable> allRunnables = new ArrayList<>();
LockManager manager = new LockManager();
Expand All @@ -71,6 +68,36 @@ public void testComplex() {
});
}

@Test
public void testManyLocksAndThreads() {
int numberOfLocks = 10;
int numberOfThreads = 10;
DeadlockDetector.runSilent(() -> {
ArrayList<LockAcquiringRunnable> allRunnables = new ArrayList<>();
LockManager manager = new LockManager();
manager.setLockListener(new LockListener() {
@Override
public boolean aboutToWait(Thread lockOwner) {
// Yield upon waiting for a lock to give other threads the change for
// conflicting lock acquisitions
Thread.yield();
return false;
}
});
List<OrderedLock> locks = new ArrayList<>();
for (int i = 0; i < numberOfLocks; i++) {
locks.add(manager.newLock());
}
for (int i = 0; i < numberOfThreads / 5; i++) {
Collections.shuffle(locks);
createRunnables(locks.toArray(OrderedLock[]::new), 5, allRunnables);
}
execute(allRunnables);
// the underlying array has to be empty
assertTrue("Locks not removed from graph.", manager.isEmpty());
});
}

@Test
public void testSimple() {
DeadlockDetector.runSilent(() -> {
Expand Down

0 comments on commit 79e36d3

Please sign in to comment.