-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Segment Replication] Update RefreshPolicy.WAIT_UNTIL for replica shards with segment replication enabled to wait for replica refresh #6464
Changes from 1 commit
055f225
36f3851
c1a4c87
c03fa8f
666dd0b
3798512
52ad227
423a0c8
413b3d4
ed8b4a0
6f2b174
1260905
adc9bb0
17fd7a1
672a67f
f3ee8d6
64410d6
b33aca2
4b47039
6bd0b3d
d62aaca
0a79735
f285ba3
83a1cac
c0721b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
package org.opensearch.indices.replication; | ||
|
||
import com.carrotsearch.randomizedtesting.RandomizedTest; | ||
import org.opensearch.action.ActionFuture; | ||
import org.opensearch.action.index.IndexResponse; | ||
import org.opensearch.action.support.WriteRequest; | ||
import org.opensearch.action.update.UpdateResponse; | ||
import org.opensearch.client.Requests; | ||
|
@@ -20,15 +22,18 @@ | |
import org.opensearch.index.shard.IndexShard; | ||
import org.opensearch.indices.recovery.FileChunkRequest; | ||
import org.opensearch.indices.replication.common.ReplicationType; | ||
import org.opensearch.rest.RestStatus; | ||
import org.opensearch.test.BackgroundIndexer; | ||
import org.opensearch.test.InternalTestCluster; | ||
import org.opensearch.test.OpenSearchIntegTestCase; | ||
import org.opensearch.test.transport.MockTransportService; | ||
import org.opensearch.transport.TransportService; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static java.util.Arrays.asList; | ||
import static org.opensearch.index.query.QueryBuilders.matchQuery; | ||
|
@@ -515,4 +520,46 @@ public void testDropPrimaryDuringReplication() throws Exception { | |
verifyStoreContent(); | ||
} | ||
} | ||
|
||
public void testWaitUntil() throws Exception { | ||
final String primaryNode = internalCluster().startNode(featureFlagSettings()); | ||
prepareCreate( | ||
INDEX_NAME, | ||
Settings.builder() | ||
.put("index.number_of_shards", 1) | ||
.put("index.number_of_replicas", 1) | ||
// we want to control refreshes | ||
.put("index.refresh_interval", "40ms") | ||
.put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) | ||
).get(); | ||
ensureYellowAndNoInitializingShards(INDEX_NAME); | ||
final String replicaNode = internalCluster().startNode(featureFlagSettings()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You shouldn't need to pass featureflag settings to startNode() anymore, they will automatically get picked up. |
||
ensureGreen(INDEX_NAME); | ||
final int initialDocCount = scaledRandomIntBetween(4000, 5000); | ||
final List<ActionFuture<IndexResponse>> pendingIndexResponses = new ArrayList<>(); | ||
IndexShard primaryShard = getIndexShard(primaryNode, INDEX_NAME); | ||
IndexShard replicaShard = getIndexShard(replicaNode, INDEX_NAME); | ||
|
||
for (int i = 0; i < initialDocCount; i++) { | ||
pendingIndexResponses.add( | ||
client().prepareIndex(INDEX_NAME) | ||
.setId(Integer.toString(i)) | ||
.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL) | ||
.setSource("field", "value" + i) | ||
.execute() | ||
); | ||
} | ||
assertBusy( | ||
() -> { | ||
assertTrue(pendingIndexResponses.stream().allMatch(response -> response.actionGet().status().equals(RestStatus.CREATED))); | ||
}, | ||
1, | ||
TimeUnit.MINUTES | ||
); | ||
|
||
assertEquals(primaryShard.getLatestReplicationCheckpoint().getSeqNo(), replicaShard.getLatestReplicationCheckpoint().getSeqNo()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to assert this with every There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can do that. Do you think some assertion like this would be good:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will be tricky with async operations here. There is one option of applying this assertion (equal seq no on primary & replica) if we perform single indexing operation at a time, not sure if this is what we want from this test. Please feel free to ignore my previous comment here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it will be tricky if we want to assert after every request. Th purpose of this test is to make sure that we wait until all requests are finished and replica has all indexed docs ready to be searched |
||
|
||
assertHitCount(client(primaryNode).prepareSearch(INDEX_NAME).setPreference("_only_local").setSize(0).get(), initialDocCount); | ||
assertHitCount(client(replicaNode).prepareSearch(INDEX_NAME).setPreference("_only_local").setSize(0).get(), initialDocCount); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -780,8 +780,8 @@ static BulkItemResponse processUpdateResponse( | |
@Override | ||
protected void dispatchedShardOperationOnReplica(BulkShardRequest request, IndexShard replica, ActionListener<ReplicaResult> listener) { | ||
ActionListener.completeWith(listener, () -> { | ||
final Translog.Location location = performOnReplica(request, replica); | ||
return new WriteReplicaResult<>(request, location, null, replica, logger); | ||
final Tuple<Translog.Location, Long> tuple = performOnReplica(request, replica); | ||
return new WriteReplicaResult<>(request, tuple, replica, logger); | ||
}); | ||
} | ||
|
||
|
@@ -790,8 +790,9 @@ protected long replicaOperationSize(BulkShardRequest request) { | |
return request.ramBytesUsed(); | ||
} | ||
|
||
public static Translog.Location performOnReplica(BulkShardRequest request, IndexShard replica) throws Exception { | ||
public static Tuple<Translog.Location, Long> performOnReplica(BulkShardRequest request, IndexShard replica) throws Exception { | ||
Translog.Location location = null; | ||
long maxSeqNo = SequenceNumbers.NO_OPS_PERFORMED; | ||
for (int i = 0; i < request.items().length; i++) { | ||
final BulkItemRequest item = request.items()[i]; | ||
final BulkItemResponse response = item.getPrimaryResponse(); | ||
|
@@ -822,8 +823,9 @@ public static Translog.Location performOnReplica(BulkShardRequest request, Index | |
} | ||
assert operationResult != null : "operation result must never be null when primary response has no failure"; | ||
location = syncOperationResultOrThrow(operationResult, location); | ||
maxSeqNo = response.getResponse().getSeqNo(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to keep track of ongoing max here as we iterate all the items, this will overwrite the value with each iteration. The last item in the list is not guaranteed to have the highest seqNo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense. I will keep track of maxSeqNo here |
||
} | ||
return location; | ||
return new Tuple<Translog.Location, Long>(location, maxSeqNo); | ||
} | ||
|
||
private static Engine.Result performOpOnReplica( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these base settings are already defined in
SegmentReplicationBaseIT
, also why do we need to change refresh_interval here?