Skip to content

Commit

Permalink
Fix flaky DecommissionControllerTests.testTimesOut
Browse files Browse the repository at this point in the history
This test fails pretty reliably if I run it on repeat. I believe the
problem is that the test assumes the function will take longer than 2ms,
which is likely not a valid assumption in all cases. Fortunately, I can
pass in a zero duration which is guaranteed to timeout even if the
system clock does not advance at all.

Also moved the assertions out of the callback into the main test method,
otherwise the assertion error messages would get buried and the test
report would just show a timeout error.
  • Loading branch information
andrross committed Oct 5, 2022
1 parent 05d11b2 commit f01d548
Showing 1 changed file with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.cluster.decommission;

import org.hamcrest.MatcherAssert;
import org.junit.After;
import org.junit.Before;
import org.opensearch.OpenSearchTimeoutException;
Expand Down Expand Up @@ -45,6 +46,7 @@
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

Expand All @@ -53,6 +55,7 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.sameInstance;
import static org.opensearch.cluster.ClusterState.builder;
import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService;
Expand Down Expand Up @@ -213,24 +216,25 @@ public void testTimesOut() throws InterruptedException {
nodesToBeRemoved.add(clusterService.state().nodes().get("node13"));
nodesToBeRemoved.add(clusterService.state().nodes().get("node14"));
nodesToBeRemoved.add(clusterService.state().nodes().get("node15"));
final AtomicReference<Exception> exceptionReference = new AtomicReference<>();
decommissionController.removeDecommissionedNodes(
nodesToBeRemoved,
"unit-test-timeout",
TimeValue.timeValueMillis(2),
new ActionListener<Void>() {
TimeValue.timeValueMillis(0),
new ActionListener<>() {
@Override
public void onResponse(Void unused) {
fail("response shouldn't have been called");
}
public void onResponse(Void unused) {}

@Override
public void onFailure(Exception e) {
assertThat(e, instanceOf(OpenSearchTimeoutException.class));
assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes"));
exceptionReference.set(e);
countDownLatch.countDown();
}
}
);
MatcherAssert.assertThat("Expected onFailure to be called", exceptionReference.get(), notNullValue());
MatcherAssert.assertThat(exceptionReference.get(), instanceOf(OpenSearchTimeoutException.class));
MatcherAssert.assertThat(exceptionReference.get().getMessage(), containsString("waiting for removal of decommissioned nodes"));
assertTrue(countDownLatch.await(30, TimeUnit.SECONDS));
}

Expand Down

0 comments on commit f01d548

Please sign in to comment.