From 049f29710e609818d3a4558988e91a8e0bafbbc3 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 23 Jan 2018 12:14:23 -0500 Subject: [PATCH] Reindex: log more on rare test failure The test failure tracked by #26758 occurs when we cancel a running reindex request that has been sliced into many children. The main reindex response *looks* canceled but none of the children look canceled. This is super strange because for the main request to look canceled for any length of time one of the children has to be canceled. This change adds additional logging to the test so we have more to go on to debug this the next time it fails. --- .../index/reindex/CancelTests.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/CancelTests.java b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/CancelTests.java index 3ad48d803a437..f21fb45ed7a64 100644 --- a/modules/reindex/src/test/java/org/elasticsearch/index/reindex/CancelTests.java +++ b/modules/reindex/src/test/java/org/elasticsearch/index/reindex/CancelTests.java @@ -91,6 +91,7 @@ private void testCancel(String action, AbstractBulkByScrollRequestBuilder int numDocs = getNumShards(INDEX).numPrimaries * 10 * builder.request().getSlices(); ALLOWED_OPERATIONS.release(numDocs); + logger.debug("setting up [{}] docs", numDocs); indexRandom(true, false, true, IntStream.range(0, numDocs) .mapToObj(i -> client().prepareIndex(INDEX, TYPE, String.valueOf(i)).setSource("n", i)) .collect(Collectors.toList())); @@ -102,16 +103,21 @@ private void testCancel(String action, AbstractBulkByScrollRequestBuilder // Scroll by 1 so that cancellation is easier to control builder.source().setSize(1); - /* Allow a random number of the documents less the number of workers to be modified by the reindex action. That way at least one - * worker is blocked. */ + /* Allow a random number of the documents less the number of workers + * to be modified by the reindex action. That way at least one worker + * is blocked. */ int numModifiedDocs = randomIntBetween(builder.request().getSlices() * 2, numDocs); + logger.debug("chose to modify [{}] docs", numModifiedDocs); ALLOWED_OPERATIONS.release(numModifiedDocs - builder.request().getSlices()); // Now execute the reindex action... ActionFuture future = builder.execute(); - /* ... and waits for the indexing operation listeners to block. It is important to realize that some of the workers might have - * exhausted their slice while others might have quite a bit left to work on. We can't control that. */ + /* ... and wait for the indexing operation listeners to block. It + * is important to realize that some of the workers might have + * exhausted their slice while others might have quite a bit left + * to work on. We can't control that. */ + logger.debug("waiting for updates to be blocked"); awaitBusy(() -> ALLOWED_OPERATIONS.hasQueuedThreads() && ALLOWED_OPERATIONS.availablePermits() == 0); // Status should show the task running @@ -128,15 +134,19 @@ private void testCancel(String action, AbstractBulkByScrollRequestBuilder cancelTasksResponse.rethrowFailures("Cancel"); assertThat(cancelTasksResponse.getTasks(), hasSize(1)); - // The status should now show canceled. The request will still be in the list because it is (or its children are) still blocked. + /* The status should now show canceled. The request will still be in the + * list because it is (or its children are) still blocked. */ mainTask = client().admin().cluster().prepareGetTask(mainTask.getTaskId()).get().getTask().getTask(); status = (BulkByScrollTask.Status) mainTask.getStatus(); + logger.debug("asserting that parent is marked canceled {}", status); assertEquals(CancelTasksRequest.DEFAULT_REASON, status.getReasonCancelled()); + if (builder.request().getSlices() > 1) { boolean foundCancelled = false; ListTasksResponse sliceList = client().admin().cluster().prepareListTasks().setParentTaskId(mainTask.getTaskId()) .setDetailed(true).get(); sliceList.rethrowFailures("Fetch slice tasks"); + logger.debug("finding at least one canceled child among {}", sliceList.getTasks()); for (TaskInfo slice: sliceList.getTasks()) { BulkByScrollTask.Status sliceStatus = (BulkByScrollTask.Status) slice.getStatus(); if (sliceStatus.getReasonCancelled() == null) continue; @@ -146,7 +156,7 @@ private void testCancel(String action, AbstractBulkByScrollRequestBuilder assertTrue("Didn't find at least one sub task that was cancelled", foundCancelled); } - // Unblock the last operations + logger.debug("unblocking the blocked update"); ALLOWED_OPERATIONS.release(builder.request().getSlices()); // Checks that no more operations are executed