-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Protect scheduler engine against throwing listeners (#32998)
There are two problems with the scheduler engine today. Both relate to listeners that throw. The first problem is that any triggered listener that throws a plain old exception will cause no additional listeners to be triggered for the event, and will also cause the scheduler to never be invoked again. This leads to lost events and is bad. The second problem is that any triggered listener that throws an error of the fatal kind will not lead to that error because caught by the uncaught exception handler. This is because the triggered listener is executed as a future task under a scheduled thread pool executor. A throwable there goes caught by the JDK framework and set as the outcome on the future task. Since we never inspect these tasks for their outcomes, nor is there a good place to do this, we have to handle these errors ourselves. To do this, we catch them and dispatch them to the uncaught exception handler via a forked thread. This is similar to our handling in Netty.
- Loading branch information
1 parent
cc8161a
commit 21f6405
Showing
5 changed files
with
320 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
...lugin/core/src/test/java/org/elasticsearch/xpack/core/scheduler/SchedulerEngineTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.core.scheduler; | ||
|
||
import org.apache.logging.log4j.Logger; | ||
import org.apache.logging.log4j.message.ParameterizedMessage; | ||
import org.elasticsearch.common.collect.Tuple; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.test.ESTestCase; | ||
import org.mockito.ArgumentCaptor; | ||
|
||
import java.time.Clock; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
|
||
import static org.hamcrest.Matchers.arrayWithSize; | ||
import static org.hamcrest.Matchers.equalTo; | ||
import static org.hamcrest.Matchers.instanceOf; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.times; | ||
import static org.mockito.Mockito.verify; | ||
import static org.mockito.Mockito.verifyNoMoreInteractions; | ||
|
||
public class SchedulerEngineTests extends ESTestCase { | ||
|
||
public void testListenersThrowingExceptionsDoNotCauseOtherListenersToBeSkipped() throws InterruptedException { | ||
final Logger mockLogger = mock(Logger.class); | ||
final SchedulerEngine engine = new SchedulerEngine(Settings.EMPTY, Clock.systemUTC(), mockLogger); | ||
try { | ||
final List<Tuple<SchedulerEngine.Listener, AtomicBoolean>> listeners = new ArrayList<>(); | ||
final int numberOfListeners = randomIntBetween(1, 32); | ||
int numberOfFailingListeners = 0; | ||
final CountDownLatch latch = new CountDownLatch(numberOfListeners); | ||
for (int i = 0; i < numberOfListeners; i++) { | ||
final AtomicBoolean trigger = new AtomicBoolean(); | ||
final SchedulerEngine.Listener listener; | ||
if (randomBoolean()) { | ||
listener = event -> { | ||
if (trigger.compareAndSet(false, true)) { | ||
latch.countDown(); | ||
} else { | ||
fail("listener invoked twice"); | ||
} | ||
}; | ||
} else { | ||
numberOfFailingListeners++; | ||
listener = event -> { | ||
if (trigger.compareAndSet(false, true)) { | ||
latch.countDown(); | ||
throw new RuntimeException(getTestName()); | ||
} else { | ||
fail("listener invoked twice"); | ||
} | ||
}; | ||
} | ||
listeners.add(Tuple.tuple(listener, trigger)); | ||
} | ||
|
||
// randomize the order and register the listeners | ||
Collections.shuffle(listeners, random()); | ||
listeners.stream().map(Tuple::v1).forEach(engine::register); | ||
|
||
final AtomicBoolean scheduled = new AtomicBoolean(); | ||
engine.add(new SchedulerEngine.Job( | ||
getTestName(), | ||
(startTime, now) -> { | ||
// only allow one triggering of the listeners | ||
if (scheduled.compareAndSet(false, true)) { | ||
return 0; | ||
} else { | ||
return -1; | ||
} | ||
})); | ||
|
||
latch.await(); | ||
|
||
// now check that every listener was invoked | ||
assertTrue(listeners.stream().map(Tuple::v2).allMatch(AtomicBoolean::get)); | ||
if (numberOfFailingListeners > 0) { | ||
assertFailedListenerLogMessage(mockLogger, numberOfFailingListeners); | ||
} | ||
verifyNoMoreInteractions(mockLogger); | ||
} finally { | ||
engine.stop(); | ||
} | ||
} | ||
|
||
public void testListenersThrowingExceptionsDoNotCauseNextScheduledTaskToBeSkipped() throws InterruptedException { | ||
final Logger mockLogger = mock(Logger.class); | ||
final SchedulerEngine engine = new SchedulerEngine(Settings.EMPTY, Clock.systemUTC(), mockLogger); | ||
try { | ||
final List<Tuple<SchedulerEngine.Listener, AtomicInteger>> listeners = new ArrayList<>(); | ||
final int numberOfListeners = randomIntBetween(1, 32); | ||
final int numberOfSchedules = randomIntBetween(1, 32); | ||
final CountDownLatch listenersLatch = new CountDownLatch(numberOfSchedules * numberOfListeners); | ||
for (int i = 0; i < numberOfListeners; i++) { | ||
final AtomicInteger triggerCount = new AtomicInteger(); | ||
final SchedulerEngine.Listener listener = event -> { | ||
if (triggerCount.incrementAndGet() <= numberOfSchedules) { | ||
listenersLatch.countDown(); | ||
throw new RuntimeException(getTestName()); | ||
} else { | ||
fail("listener invoked more than [" + numberOfSchedules + "] times"); | ||
} | ||
}; | ||
listeners.add(Tuple.tuple(listener, triggerCount)); | ||
engine.register(listener); | ||
} | ||
|
||
// latch for each invocation of nextScheduledTimeAfter, once for each scheduled run, and then a final time when we disable | ||
final CountDownLatch latch = new CountDownLatch(1 + numberOfSchedules); | ||
engine.add(new SchedulerEngine.Job( | ||
getTestName(), | ||
(startTime, now) -> { | ||
if (latch.getCount() >= 2) { | ||
latch.countDown(); | ||
return 0; | ||
} else if (latch.getCount() == 1) { | ||
latch.countDown(); | ||
return -1; | ||
} else { | ||
throw new AssertionError("nextScheduledTimeAfter invoked more than the expected number of times"); | ||
} | ||
})); | ||
|
||
listenersLatch.await(); | ||
assertTrue(listeners.stream().map(Tuple::v2).allMatch(count -> count.get() == numberOfSchedules)); | ||
latch.await(); | ||
assertFailedListenerLogMessage(mockLogger, numberOfListeners * numberOfSchedules); | ||
verifyNoMoreInteractions(mockLogger); | ||
} finally { | ||
engine.stop(); | ||
} | ||
} | ||
|
||
private void assertFailedListenerLogMessage(Logger mockLogger, int times) { | ||
final ArgumentCaptor<ParameterizedMessage> messageCaptor = ArgumentCaptor.forClass(ParameterizedMessage.class); | ||
final ArgumentCaptor<Throwable> throwableCaptor = ArgumentCaptor.forClass(Throwable.class); | ||
verify(mockLogger, times(times)).warn(messageCaptor.capture(), throwableCaptor.capture()); | ||
for (final ParameterizedMessage message : messageCaptor.getAllValues()) { | ||
assertThat(message.getFormat(), equalTo("listener failed while handling triggered event [{}]")); | ||
assertThat(message.getParameters(), arrayWithSize(1)); | ||
assertThat(message.getParameters()[0], equalTo(getTestName())); | ||
} | ||
for (final Throwable throwable : throwableCaptor.getAllValues()) { | ||
assertThat(throwable, instanceOf(RuntimeException.class)); | ||
assertThat(throwable.getMessage(), equalTo(getTestName())); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
apply plugin: 'elasticsearch.standalone-test' | ||
|
||
dependencies { | ||
testCompile project(path: xpackModule('core'), configuration: 'shadow') | ||
} | ||
|
||
test { | ||
systemProperty 'tests.security.manager', 'false' | ||
} |
84 changes: 84 additions & 0 deletions
84
...-tests/src/test/java/org/elasticsearch/xpack/core/scheduler/EvilSchedulerEngineTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.core.scheduler; | ||
|
||
import org.apache.logging.log4j.Logger; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import java.time.Clock; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.concurrent.atomic.AtomicReference; | ||
|
||
import static org.hamcrest.Matchers.containsString; | ||
import static org.hamcrest.Matchers.equalTo; | ||
import static org.hamcrest.Matchers.hasToString; | ||
import static org.hamcrest.Matchers.instanceOf; | ||
import static org.hamcrest.Matchers.not; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.verifyNoMoreInteractions; | ||
|
||
public class EvilSchedulerEngineTests extends ESTestCase { | ||
|
||
public void testOutOfMemoryErrorWhileTriggeredIsRethrownAndIsUncaught() throws InterruptedException { | ||
final AtomicReference<Throwable> maybeFatal = new AtomicReference<>(); | ||
final CountDownLatch uncaughtLatuch = new CountDownLatch(1); | ||
final Thread.UncaughtExceptionHandler uncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler(); | ||
try { | ||
/* | ||
* We want to test that the out of memory error thrown from the scheduler engine goes uncaught on another thread; this gives us | ||
* confidence that an error thrown during a triggered event will lead to the node being torn down. | ||
*/ | ||
final AtomicReference<Thread> maybeThread = new AtomicReference<>(); | ||
Thread.setDefaultUncaughtExceptionHandler((t, e) -> { | ||
maybeFatal.set(e); | ||
maybeThread.set(Thread.currentThread()); | ||
uncaughtLatuch.countDown(); | ||
}); | ||
final Logger mockLogger = mock(Logger.class); | ||
final SchedulerEngine engine = new SchedulerEngine(Settings.EMPTY, Clock.systemUTC(), mockLogger); | ||
try { | ||
final AtomicBoolean trigger = new AtomicBoolean(); | ||
engine.register(event -> { | ||
if (trigger.compareAndSet(false, true)) { | ||
throw new OutOfMemoryError("640K ought to be enough for anybody"); | ||
} else { | ||
fail("listener invoked twice"); | ||
} | ||
}); | ||
final CountDownLatch schedulerLatch = new CountDownLatch(1); | ||
engine.add(new SchedulerEngine.Job( | ||
getTestName(), | ||
(startTime, now) -> { | ||
if (schedulerLatch.getCount() == 1) { | ||
schedulerLatch.countDown(); | ||
return 0; | ||
} else { | ||
throw new AssertionError("nextScheduledTimeAfter invoked more than the expected number of times"); | ||
} | ||
})); | ||
|
||
uncaughtLatuch.await(); | ||
assertTrue(trigger.get()); | ||
assertNotNull(maybeFatal.get()); | ||
assertThat(maybeFatal.get(), instanceOf(OutOfMemoryError.class)); | ||
assertThat(maybeFatal.get(), hasToString(containsString("640K ought to be enough for anybody"))); | ||
assertNotNull(maybeThread.get()); | ||
assertThat(maybeThread.get(), not(equalTo(Thread.currentThread()))); // the error should be rethrown on another thread | ||
schedulerLatch.await(); | ||
verifyNoMoreInteractions(mockLogger); // we never logged anything | ||
} finally { | ||
engine.stop(); | ||
} | ||
} finally { | ||
// restore the uncaught exception handler | ||
Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler); | ||
} | ||
} | ||
|
||
} |