Skip to content

Commit

Permalink
add more logs in create-index flow
Browse files Browse the repository at this point in the history
  • Loading branch information
sumitasr committed Jul 23, 2024
1 parent 295a70d commit 387e73a
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
import org.opensearch.common.unit.TimeValue;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.rest.RestController;
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.transport.TransportService;

Expand Down Expand Up @@ -135,8 +134,10 @@ protected void clusterManagerOperation(
response -> new CreateIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged(), indexName)
)
);
logger.info("[Custom Log] TransportCreateIndexAction, clusterManagerOperation latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.info(
"[Custom Log] TransportCreateIndexAction, clusterManagerOperation latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs)
);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,10 @@ public void createIndex(
request.index()
);
}
logger.info("[Custom Log] MetadataCreateIndexService, createIndex latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.info(
"[Custom Log] MetadataCreateIndexService, createIndex latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs)
);
listener.onResponse(new CreateIndexClusterStateUpdateResponse(response.isAcknowledged(), shardsAcknowledged));
},
listener::onFailure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ private void reroute(RoutingAllocation allocation) {

allocateExistingUnassignedShards(allocation); // try to allocate existing shard copies first
shardsAllocator.allocate(allocation);
logger.info("[Custom Log] AllocationService, reroute latency: {} ms", TimeValue.nsecToMSec(System.nanoTime() - rerouteStartTimeNS));
clusterManagerMetrics.recordLatency(
clusterManagerMetrics.rerouteHistogram,
(double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - rerouteStartTimeNS))
Expand Down Expand Up @@ -614,8 +615,10 @@ private void allocateExistingUnassignedShards(RoutingAllocation allocation) {
getAllocatorForShard(shardRouting, allocation).allocateUnassigned(shardRouting, allocation, replicaIterator);
}
}
logger.info("[Custom Log] AllocationService, allocateExistingUnassignedShards latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.info(
"[Custom Log] AllocationService, allocateExistingUnassignedShards latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs)
);
}

private void allocateAllUnassignedShards(RoutingAllocation allocation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,9 +562,14 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
}

logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version());
long connectToNodesStartTimeNS = System.nanoTime();
try (TimingHandle ignored = stopWatch.timing("connecting to new nodes")) {
connectToNodesAndWait(newClusterState);
}
logger.info(
"[Custom Log] ClusterApplierService, connectToNodesAndWait latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - connectToNodesStartTimeNS)
);

// nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency
if (clusterChangedEvent.state().blocks().disableStatePersistence() == false && clusterChangedEvent.metadataChanged()) {
Expand All @@ -574,11 +579,20 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
clusterSettings.applySettings(incomingSettings);
}
}

long appliersStartTimeNS = System.nanoTime();
logger.debug("apply cluster state with version {}", newClusterState.version());
callClusterStateAppliers(clusterChangedEvent, stopWatch);
logger.info(
"[Custom Log] ClusterApplierService, callClusterStateAppliers latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - appliersStartTimeNS)
);

long disconnectStartTimeNS = System.nanoTime();
nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes());
logger.info(
"[Custom Log] ClusterApplierService, disconnectFromNodesExcept latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - disconnectStartTimeNS)
);

assert newClusterState.coordinationMetadata()
.getLastAcceptedConfiguration()
Expand All @@ -591,8 +605,12 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl

logger.debug("set locally applied cluster state to version {}", newClusterState.version());
state.set(newClusterState);

long listenersStartTimeNS = System.nanoTime();
callClusterStateListeners(clusterChangedEvent, stopWatch);
logger.info(
"[Custom Log] callClusterStateListeners latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - listenersStartTimeNS)
);
}

protected void connectToNodesAndWait(ClusterState newClusterState) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,7 @@ protected void run(Object batchingKey, List<? extends BatchedTask> tasks, Functi
ClusterStateTaskExecutor<Object> taskExecutor = (ClusterStateTaskExecutor<Object>) batchingKey;
List<UpdateTask> updateTasks = (List<UpdateTask>) tasks;
runTasks(new TaskInputs(taskExecutor, updateTasks, taskSummaryGenerator));
logger.info("[Custom Log] MasterService, run latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.info("[Custom Log] MasterService, run latency: {} ms", TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
}

class UpdateTask extends BatchedTask {
Expand Down Expand Up @@ -329,6 +328,11 @@ private void runTasks(TaskInputs taskInputs) {
final TimeValue computationTime = getTimeSince(computationStartTime);
logExecutionTime(computationTime, "compute cluster state update", shortSummary);

logger.info(
"[Custom Log] Cluster state compute - latency: {} ms, operation : {}",
computationTime.getMillis(),
taskInputs.executor.getClass().getSimpleName()
);
clusterManagerMetrics.recordLatency(
clusterManagerMetrics.clusterStateComputeHistogram,
(double) computationTime.getMillis(),
Expand Down Expand Up @@ -394,6 +398,7 @@ protected boolean blockingAllowed() {
stateStats.stateUpdateTook(durationMillis);
stateStats.stateUpdated();
clusterManagerMetrics.recordLatency(clusterManagerMetrics.clusterStatePublishHistogram, (double) durationMillis);
logger.info("[Custom Log] Cluster state publish - latency: {} ms", durationMillis);
} catch (Exception e) {
stateStats.stateUpdateFailed();
onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeNanos, e);
Expand Down
12 changes: 8 additions & 4 deletions server/src/main/java/org/opensearch/rest/RestController.java
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,10 @@ public void dispatchRequest(RestRequest request, RestChannel channel, ThreadCont
try {
long latencyStartTimeInNs = System.nanoTime();
tryAllHandlers(request, channel, threadContext);
logger.info("[Custom Log] RestController, dispatchRequest latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.info(
"[Custom Log] RestController, dispatchRequest latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs)
);
} catch (Exception e) {
try {
channel.sendResponse(new BytesRestResponse(channel, e));
Expand Down Expand Up @@ -384,8 +386,10 @@ private void dispatchRequest(RestRequest request, RestChannel channel, RestHandl
}
long latencyStartTimeInNs = System.nanoTime();
handler.handleRequest(request, responseChannel, client);
logger.info("[Custom Log] RestController, handleRequest latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs));
logger.debug(
"[Custom Log] RestController, handleRequest latency: {} ms",
TimeValue.nsecToMSec(System.nanoTime() - latencyStartTimeInNs)
);
} catch (Exception e) {
responseChannel.sendResponse(new BytesRestResponse(responseChannel, e));
}
Expand Down

0 comments on commit 387e73a

Please sign in to comment.