Skip to content

Commit

Permalink
[Segment Replication] Bump segment infos counter before commit during…
Browse files Browse the repository at this point in the history
… replica promotion (opensearch-project#4365)

* [Segment Replication] Bump segment infos counter before commit during replica promotion

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Add changelog entry

Signed-off-by: Suraj Singh <surajrider@gmail.com>

Signed-off-by: Suraj Singh <surajrider@gmail.com>
  • Loading branch information
dreamer-89 authored and pranikum committed Sep 25, 2022
1 parent 8e15ea9 commit 9084345
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Add timeout on Mockito.verify to reduce flakyness in testReplicationOnDone test([#4314](https://github.com/opensearch-project/OpenSearch/pull/4314))
- Commit workflow for dependabot changelog helper ([#4331](https://github.com/opensearch-project/OpenSearch/pull/4331))
- Fixed cancellation of segment replication events ([#4225](https://github.com/opensearch-project/OpenSearch/pull/4225))
- [Segment Replication] Bump segment infos counter before commit during replica promotion ([#4365](https://github.com/opensearch-project/OpenSearch/pull/4365))
- Bugs for dependabot changelog verifier workflow ([#4364](https://github.com/opensearch-project/OpenSearch/pull/4364))
- Fix flaky random test `NRTReplicationEngineTests.testUpdateSegments` ([#4352](https://github.com/opensearch-project/OpenSearch/pull/4352))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ public class NRTReplicationEngine extends Engine {
private final LocalCheckpointTracker localCheckpointTracker;
private final WriteOnlyTranslogManager translogManager;

private static final int SI_COUNTER_INCREMENT = 10;

public NRTReplicationEngine(EngineConfig engineConfig) {
super(engineConfig);
store.incRef();
Expand Down Expand Up @@ -142,6 +144,13 @@ public synchronized void updateSegments(final SegmentInfos infos, long seqNo) th
public void commitSegmentInfos() throws IOException {
// TODO: This method should wait for replication events to finalize.
final SegmentInfos latestSegmentInfos = getLatestSegmentInfos();
/*
This is a workaround solution which decreases the chances of conflict on replica nodes when same file is copied
from two different primaries during failover. Increasing counter helps in avoiding this conflict as counter is
used to generate new segment file names. The ideal solution is to identify the counter from previous primary.
*/
latestSegmentInfos.counter = latestSegmentInfos.counter + SI_COUNTER_INCREMENT;
latestSegmentInfos.changed();
store.commitSegmentInfos(latestSegmentInfos, localCheckpointTracker.getMaxSeqNo(), localCheckpointTracker.getProcessedCheckpoint());
translogManager.syncTranslog();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ public void testCommitSegmentInfos() throws Exception {
// ensure getLatestSegmentInfos returns an updated infos ref with correct userdata.
final SegmentInfos latestSegmentInfos = nrtEngine.getLatestSegmentInfos();
assertEquals(previousInfos.getGeneration(), latestSegmentInfos.getLastGeneration());
assertEquals(previousInfos.getVersion(), latestSegmentInfos.getVersion());
assertEquals(previousInfos.counter, latestSegmentInfos.counter);
Map<String, String> userData = latestSegmentInfos.getUserData();
assertEquals(processedCheckpoint, localCheckpointTracker.getProcessedCheckpoint());
assertEquals(maxSeqNo, Long.parseLong(userData.get(MAX_SEQ_NO)));
Expand Down

0 comments on commit 9084345

Please sign in to comment.