From ab7598411650639ef8a0a7eab9f653a36a7ffa5f Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Sat, 5 May 2018 00:40:01 +0200 Subject: [PATCH 1/3] Watcher: Mark watcher as started only after loading watches Starting watcher should wait for the watches to be started before marking the status as started. Also, reloading watcher could set the execution service to paused. This could lead to watches not being executed, when run in tests. This fix does not change the paused flag in the execution service, just clears out the current queue and executions. Some tests were affected by this because after putting a watch, the execution service was still marked as paused, so that the manual execution with the ScheduleEngineTriggerMock did not work. Closes #30381 --- .../watcher/WatcherLifeCycleService.java | 6 ++--- .../xpack/watcher/WatcherService.java | 27 ++++++++++++++++--- .../watcher/execution/ExecutionService.java | 10 ++++++- .../watcher/WatcherLifeCycleServiceTests.java | 4 +-- .../xpack/watcher/WatcherServiceTests.java | 24 ++++++++++++++++- .../ExecutionVarsIntegrationTests.java | 17 +++++++----- 6 files changed, 69 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java index eef9e019b7a7e..24cf23245d762 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java @@ -110,8 +110,7 @@ public void clusterChanged(ClusterChangedEvent event) { // if this is not a data node, we need to start it ourselves possibly if (event.state().nodes().getLocalNode().isDataNode() == false && isWatcherStoppedManually == false && this.state.get() == WatcherState.STOPPED) { - watcherService.start(event.state()); - this.state.set(WatcherState.STARTED); + watcherService.start(event.state(), () -> this.state.set(WatcherState.STARTED)); return; } @@ -157,8 +156,7 @@ public void clusterChanged(ClusterChangedEvent event) { if (state.get() == WatcherState.STARTED) { watcherService.reload(event.state(), "new local watcher shard allocation ids"); } else if (state.get() == WatcherState.STOPPED) { - watcherService.start(event.state()); - this.state.set(WatcherState.STARTED); + watcherService.start(event.state(), () -> this.state.set(WatcherState.STARTED)); } } else { clearAllocationIds(); diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java index dcfb713a66580..8f02232cb23f6 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java @@ -183,23 +183,40 @@ void reload(ClusterState state, String reason) { // by checking the cluster state version before and after loading the watches we can potentially just exit without applying the // changes processedClusterStateVersion.set(state.getVersion()); - pauseExecution(reason); triggerService.pauseExecution(); + int cancelledTaskCount = executionService.reload(); + logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount); executor.execute(wrapWatcherService(() -> reloadInner(state, reason, false), e -> logger.error("error reloading watcher", e))); } - public void start(ClusterState state) { + /** + * start the watcher service, load watches in the background + * + * @param state the current cluster state + * @param callback the callback to be triggered, when watches where loaded successfully + */ + public void start(ClusterState state, Runnable callback) { + executionService.unPause(); processedClusterStateVersion.set(state.getVersion()); - executor.execute(wrapWatcherService(() -> reloadInner(state, "starting", true), + executor.execute(wrapWatcherService(() -> { + if (reloadInner(state, "starting", true)) { + callback.run(); + } + }, e -> logger.error("error starting watcher", e))); } /** * reload the watches and start scheduling them + * + * @param state the current cluster state + * @param reason the reason for reloading, will be logged + * @param loadTriggeredWatches should triggered watches be loaded in this run, not needed for reloading, only for starting + * @return true if no other loading of a newer cluster state happened in parallel, false otherwise */ - private synchronized void reloadInner(ClusterState state, String reason, boolean loadTriggeredWatches) { + private synchronized boolean reloadInner(ClusterState state, String reason, boolean loadTriggeredWatches) { // exit early if another thread has come in between if (processedClusterStateVersion.get() != state.getVersion()) { logger.debug("watch service has not been reloaded for state [{}], another reload for state [{}] in progress", @@ -221,9 +238,11 @@ private synchronized void reloadInner(ClusterState state, String reason, boolean executionService.executeTriggeredWatches(triggeredWatches); } logger.debug("watch service has been reloaded, reason [{}]", reason); + return true; } else { logger.debug("watch service has not been reloaded for state [{}], another reload for state [{}] in progress", state.getVersion(), processedClusterStateVersion.get()); + return false; } } diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java index 6901adb0a6937..b405ec4767623 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java @@ -121,11 +121,19 @@ public void unPause() { } /** - * Pause the execution of the watcher executor + * Pause the execution of the watcher executor, and empty the state * @return the number of tasks that have been removed */ public int pause() { paused.set(true); + return reload(); + } + + /** + * Empty the currently queued tasks and wait for current executions to finish + * @return the number of tasks that have been removed + */ + public int reload() { int cancelledTaskCount = executor.queue().drainTo(new ArrayList<>()); this.clearExecutions(); return cancelledTaskCount; diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleServiceTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleServiceTests.java index 316cb722f2f1e..700901753d4a1 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleServiceTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleServiceTests.java @@ -180,7 +180,7 @@ public void testManualStartStop() { reset(watcherService); when(watcherService.validate(clusterState)).thenReturn(true); lifeCycleService.clusterChanged(new ClusterChangedEvent("any", clusterState, stoppedClusterState)); - verify(watcherService, times(1)).start(eq(clusterState)); + verify(watcherService, times(1)).start(eq(clusterState), anyObject()); // no change, keep going reset(watcherService); @@ -423,7 +423,7 @@ public void testWatcherServiceDoesNotStartIfIndexTemplatesAreMissing() throws Ex when(watcherService.validate(eq(state))).thenReturn(true); lifeCycleService.clusterChanged(new ClusterChangedEvent("any", state, state)); - verify(watcherService, times(0)).start(any(ClusterState.class)); + verify(watcherService, times(0)).start(any(ClusterState.class), anyObject()); } public void testWatcherStopsWhenMasterNodeIsMissing() { diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java index 5f815170215d3..83c7a26412362 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java @@ -68,6 +68,7 @@ import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -199,7 +200,7 @@ void stopExecutor() { when(client.clearScroll(any(ClearScrollRequest.class))).thenReturn(clearScrollFuture); clearScrollFuture.onResponse(new ClearScrollResponse(true, 1)); - service.start(clusterState); + service.start(clusterState, () -> {}); ArgumentCaptor captor = ArgumentCaptor.forClass(List.class); verify(triggerService).start(captor.capture()); @@ -238,6 +239,27 @@ void stopExecutor() { verify(triggerEngine).pauseExecution(); } + // if we have to reload the watcher service, the execution service should not be paused, as this might + // result in missing executions + public void testReloadingWatcherDoesNotPauseExecutionService() { + ExecutionService executionService = mock(ExecutionService.class); + TriggerService triggerService = mock(TriggerService.class); + WatcherService service = new WatcherService(Settings.EMPTY, triggerService, mock(TriggeredWatchStore.class), + executionService, mock(WatchParser.class), mock(Client.class), executorService) { + @Override + void stopExecutor() { + } + }; + + ClusterState.Builder csBuilder = new ClusterState.Builder(new ClusterName("_name")); + csBuilder.metaData(MetaData.builder()); + + service.reload(csBuilder.build(), "whatever"); + verify(executionService).reload(); + verify(executionService, never()).pause(); + verify(triggerService).pauseExecution(); + } + private static DiscoveryNode newNode() { return new DiscoveryNode("node", ESTestCase.buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(asList(DiscoveryNode.Role.values())), Version.CURRENT); diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/integration/ExecutionVarsIntegrationTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/integration/ExecutionVarsIntegrationTests.java index 85b0280588a6e..2f69cc95a50ef 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/integration/ExecutionVarsIntegrationTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/integration/ExecutionVarsIntegrationTests.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.function.Function; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.xpack.watcher.actions.ActionBuilders.loggingAction; import static org.elasticsearch.xpack.watcher.client.WatchSourceBuilders.watchBuilder; import static org.elasticsearch.xpack.watcher.input.InputBuilders.simpleInput; @@ -36,6 +37,8 @@ public class ExecutionVarsIntegrationTests extends AbstractWatcherIntegrationTestCase { + private String watchId = randomAlphaOfLength(20); + @Override protected List> pluginTypes() { List> types = super.pluginTypes(); @@ -107,7 +110,7 @@ protected Map, Object>> pluginScripts() { public void testVars() throws Exception { WatcherClient watcherClient = watcherClient(); - PutWatchResponse putWatchResponse = watcherClient.preparePutWatch("_id").setSource(watchBuilder() + PutWatchResponse putWatchResponse = watcherClient.preparePutWatch(watchId).setSource(watchBuilder() .trigger(schedule(cron("0/1 * * * * ?"))) .input(simpleInput("value", 5)) .condition(new ScriptCondition( @@ -126,7 +129,7 @@ public void testVars() throws Exception { assertThat(putWatchResponse.isCreated(), is(true)); - timeWarp().trigger("_id"); + timeWarp().trigger(watchId); flush(); refresh(); @@ -135,11 +138,11 @@ public void testVars() throws Exception { // defaults to match all; }); - assertThat(searchResponse.getHits().getTotalHits(), is(1L)); + assertHitCount(searchResponse, 1L); Map source = searchResponse.getHits().getAt(0).getSourceAsMap(); - assertValue(source, "watch_id", is("_id")); + assertValue(source, "watch_id", is(watchId)); assertValue(source, "state", is("executed")); // we don't store the computed vars in history @@ -171,7 +174,7 @@ public void testVars() throws Exception { public void testVarsManual() throws Exception { WatcherClient watcherClient = watcherClient(); - PutWatchResponse putWatchResponse = watcherClient.preparePutWatch("_id").setSource(watchBuilder() + PutWatchResponse putWatchResponse = watcherClient.preparePutWatch(watchId).setSource(watchBuilder() .trigger(schedule(cron("0/1 * * * * ? 2020"))) .input(simpleInput("value", 5)) .condition(new ScriptCondition( @@ -193,13 +196,13 @@ public void testVarsManual() throws Exception { boolean debug = randomBoolean(); ExecuteWatchResponse executeWatchResponse = watcherClient - .prepareExecuteWatch("_id") + .prepareExecuteWatch(watchId) .setDebug(debug) .get(); assertThat(executeWatchResponse.getRecordId(), notNullValue()); XContentSource source = executeWatchResponse.getRecordSource(); - assertValue(source, "watch_id", is("_id")); + assertValue(source, "watch_id", is(watchId)); assertValue(source, "state", is("executed")); if (debug) { From 1035fb2b8666f2b44f0f4cb3c1113a5c245741af Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Mon, 7 May 2018 09:13:18 +0200 Subject: [PATCH 2/3] add starting state when starting up to share this info in watcher status --- .../org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java index 24cf23245d762..620d575fc802c 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherLifeCycleService.java @@ -156,6 +156,7 @@ public void clusterChanged(ClusterChangedEvent event) { if (state.get() == WatcherState.STARTED) { watcherService.reload(event.state(), "new local watcher shard allocation ids"); } else if (state.get() == WatcherState.STOPPED) { + this.state.set(WatcherState.STARTING); watcherService.start(event.state(), () -> this.state.set(WatcherState.STARTED)); } } else { From 211bec730cdecfe5327c7a93bc42c897abf47bf4 Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Mon, 7 May 2018 11:00:32 +0200 Subject: [PATCH 3/3] review comments --- .../xpack/watcher/WatcherService.java | 12 ++++++------ .../xpack/watcher/execution/ExecutionService.java | 14 ++++++++++---- .../xpack/watcher/WatcherServiceTests.java | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java index 8f02232cb23f6..49915674fe9e2 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/WatcherService.java @@ -184,7 +184,7 @@ void reload(ClusterState state, String reason) { // changes processedClusterStateVersion.set(state.getVersion()); triggerService.pauseExecution(); - int cancelledTaskCount = executionService.reload(); + int cancelledTaskCount = executionService.clearExecutionsAndQueue(); logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount); executor.execute(wrapWatcherService(() -> reloadInner(state, reason, false), @@ -194,22 +194,22 @@ void reload(ClusterState state, String reason) { /** * start the watcher service, load watches in the background * - * @param state the current cluster state - * @param callback the callback to be triggered, when watches where loaded successfully + * @param state the current cluster state + * @param postWatchesLoadedCallback the callback to be triggered, when watches where loaded successfully */ - public void start(ClusterState state, Runnable callback) { + public void start(ClusterState state, Runnable postWatchesLoadedCallback) { executionService.unPause(); processedClusterStateVersion.set(state.getVersion()); executor.execute(wrapWatcherService(() -> { if (reloadInner(state, "starting", true)) { - callback.run(); + postWatchesLoadedCallback.run(); } }, e -> logger.error("error starting watcher", e))); } /** - * reload the watches and start scheduling them + * reload watches and start scheduling them * * @param state the current cluster state * @param reason the reason for reloading, will be logged diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java index b405ec4767623..7b77afb225e4b 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/execution/ExecutionService.java @@ -121,19 +121,25 @@ public void unPause() { } /** - * Pause the execution of the watcher executor, and empty the state + * Pause the execution of the watcher executor, and empty the state. + * Pausing means, that no new watch executions will be done unless this pausing is explicitely unset. + * This is important when watcher is stopped, so that scheduled watches do not accidentally get executed. + * This should not be used when we need to reload watcher based on some cluster state changes, then just calling + * {@link #clearExecutionsAndQueue()} is the way to go + * * @return the number of tasks that have been removed */ public int pause() { paused.set(true); - return reload(); + return clearExecutionsAndQueue(); } /** - * Empty the currently queued tasks and wait for current executions to finish + * Empty the currently queued tasks and wait for current executions to finish. + * * @return the number of tasks that have been removed */ - public int reload() { + public int clearExecutionsAndQueue() { int cancelledTaskCount = executor.queue().drainTo(new ArrayList<>()); this.clearExecutions(); return cancelledTaskCount; diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java index 83c7a26412362..73f9271e3efda 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherServiceTests.java @@ -255,7 +255,7 @@ void stopExecutor() { csBuilder.metaData(MetaData.builder()); service.reload(csBuilder.build(), "whatever"); - verify(executionService).reload(); + verify(executionService).clearExecutionsAndQueue(); verify(executionService, never()).pause(); verify(triggerService).pauseExecution(); }