Skip to content

Commit

Permalink
Support LMQ dispatch in case if Consume Queue Store is RocksDB-based (#…
Browse files Browse the repository at this point in the history
…8842)

* feat: support LMQ dispatch

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: introduce group-commit for batch insertion of RocksDB KV pairs

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: propagate store error to broker module

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* chore: fix all Bazel warning and errors

Signed-off-by: Zhanhui Li <lizhanhui@gmail.com>

* fix: remove unnecessary batch-ops when writing RocksDB using atomic flush

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: find a writable directory for RocksDB logs

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* chore: clean up ConfigHelperTest

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: truncate consume queues in case commit log records are truncated

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: truncate LMQ max offsets

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: correct truncate boundary of consume queues

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* fix: correct MessageExt encoding

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

* chore: remove unused import

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>

---------

Signed-off-by: Li Zhanhui <lizhanhui@gmail.com>
Signed-off-by: Zhanhui Li <lizhanhui@gmail.com>
  • Loading branch information
lizhanhui authored Oct 23, 2024
1 parent d2fd068 commit b86059c
Show file tree
Hide file tree
Showing 68 changed files with 1,440 additions and 814 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ bazel-out
bazel-bin
bazel-rocketmq
bazel-testlogs
.vscode
.vscode
MODULE.bazel.lock
22 changes: 22 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
###############################################################################
# Bazel now uses Bzlmod by default to manage external dependencies.
# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel.
#
# For more details, please check https://github.com/bazelbuild/bazel/issues/18958
###############################################################################
2 changes: 2 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ maven_install(
"com.alipay.sofa:hessian:3.3.6",
"io.netty:netty-tcnative-boringssl-static:2.0.48.Final",
"org.mockito:mockito-junit-jupiter:4.11.0",
"com.alibaba.fastjson2:fastjson2:2.0.43",
"org.junit.jupiter:junit-jupiter-api:5.9.1",
],
fetch_sources = True,
repositories = [
Expand Down
3 changes: 3 additions & 0 deletions broker/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ java_library(
"@maven//:io_github_aliyunmq_rocketmq_slf4j_api",
"@maven//:org_powermock_powermock_core",
"@maven//:io_opentelemetry_opentelemetry_api",
"@maven//:com_googlecode_concurrentlinkedhashmap_concurrentlinkedhashmap_lru",
"@maven//:org_apache_rocketmq_rocketmq_rocksdb",
"@maven//:commons_collections_commons_collections",
],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.rocketmq.logging.org.slf4j.Logger;
import org.apache.rocketmq.logging.org.slf4j.LoggerFactory;
import org.apache.rocketmq.remoting.common.RemotingHelper;
import org.apache.rocketmq.remoting.exception.RemotingCommandException;
import org.apache.rocketmq.remoting.exception.RemotingSendRequestException;
import org.apache.rocketmq.remoting.exception.RemotingTimeoutException;
import org.apache.rocketmq.remoting.protocol.RemotingCommand;
Expand All @@ -49,6 +50,7 @@
import org.apache.rocketmq.remoting.protocol.header.GetConsumerStatusRequestHeader;
import org.apache.rocketmq.remoting.protocol.header.NotifyConsumerIdsChangedRequestHeader;
import org.apache.rocketmq.remoting.protocol.header.ResetOffsetRequestHeader;
import org.apache.rocketmq.store.exception.ConsumeQueueException;

public class Broker2Client {
private static final Logger log = LoggerFactory.getLogger(LoggerName.BROKER_LOGGER_NAME);
Expand Down Expand Up @@ -100,13 +102,12 @@ public void notifyConsumerIdsChanged(
}
}


public RemotingCommand resetOffset(String topic, String group, long timeStamp, boolean isForce) {
public RemotingCommand resetOffset(String topic, String group, long timeStamp, boolean isForce) throws RemotingCommandException {
return resetOffset(topic, group, timeStamp, isForce, false);
}

public RemotingCommand resetOffset(String topic, String group, long timeStamp, boolean isForce,
boolean isC) {
boolean isC) throws RemotingCommandException {
final RemotingCommand response = RemotingCommand.createResponseCommand(null);

TopicConfig topicConfig = this.brokerController.getTopicConfigManager().selectTopicConfig(topic);
Expand Down Expand Up @@ -135,8 +136,11 @@ public RemotingCommand resetOffset(String topic, String group, long timeStamp, b

long timeStampOffset;
if (timeStamp == -1) {

timeStampOffset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, i);
try {
timeStampOffset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, i);
} catch (ConsumeQueueException e) {
throw new RemotingCommandException("Failed to get max offset in queue", e);
}
} else {
timeStampOffset = this.brokerController.getMessageStore().getOffsetInQueueByTime(topic, i, timeStamp);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.rocketmq.logging.org.slf4j.Logger;
import org.apache.rocketmq.logging.org.slf4j.LoggerFactory;


public class LmqPullRequestHoldService extends PullRequestHoldService {
private static final Logger LOGGER = LoggerFactory.getLogger(LoggerName.BROKER_LOGGER_NAME);

Expand All @@ -48,8 +47,8 @@ public void checkHoldRequest() {
}
String topic = key.substring(0, idx);
int queueId = Integer.parseInt(key.substring(idx + 1));
final long offset = brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
try {
final long offset = brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
this.notifyMessageArriving(topic, queueId, offset);
} catch (Throwable e) {
LOGGER.error("check hold request failed. topic={}, queueId={}", topic, queueId, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.rocketmq.logging.org.slf4j.Logger;
import org.apache.rocketmq.logging.org.slf4j.LoggerFactory;
import org.apache.rocketmq.store.ConsumeQueueExt;
import org.apache.rocketmq.store.exception.ConsumeQueueException;

public class PullRequestHoldService extends ServiceThread {
private static final Logger log = LoggerFactory.getLogger(LoggerName.BROKER_LOGGER_NAME);
Expand Down Expand Up @@ -103,8 +104,8 @@ protected void checkHoldRequest() {
if (2 == kArray.length) {
String topic = kArray[0];
int queueId = Integer.parseInt(kArray[1]);
final long offset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
try {
final long offset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
this.notifyMessageArriving(topic, queueId, offset);
} catch (Throwable e) {
log.error(
Expand All @@ -131,7 +132,12 @@ public void notifyMessageArriving(final String topic, final int queueId, final l
for (PullRequest request : requestList) {
long newestOffset = maxOffset;
if (newestOffset <= request.getPullFromThisOffset()) {
newestOffset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
try {
newestOffset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId);
} catch (ConsumeQueueException e) {
log.error("Failed tp get max offset in queue", e);
continue;
}
}

if (newestOffset > request.getPullFromThisOffset()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.rocketmq.remoting.protocol.subscription.SubscriptionGroupConfig;
import org.apache.rocketmq.store.DefaultMessageFilter;
import org.apache.rocketmq.store.MessageStore;
import org.apache.rocketmq.store.exception.ConsumeQueueException;

public class ConsumerLagCalculator {
private final BrokerConfig brokerConfig;
Expand Down Expand Up @@ -212,45 +213,61 @@ public void calculateLag(Consumer<CalculateLagResult> lagRecorder) {

CalculateLagResult result = new CalculateLagResult(info.group, info.topic, false);

Pair<Long, Long> lag = getConsumerLagStats(info.group, info.topic, info.isPop);
if (lag != null) {
result.lag = lag.getObject1();
result.earliestUnconsumedTimestamp = lag.getObject2();
try {
Pair<Long, Long> lag = getConsumerLagStats(info.group, info.topic, info.isPop);
if (lag != null) {
result.lag = lag.getObject1();
result.earliestUnconsumedTimestamp = lag.getObject2();
}
lagRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get lag stats", e);
}
lagRecorder.accept(result);

if (info.isPop) {
Pair<Long, Long> retryLag = getConsumerLagStats(info.group, info.retryTopic, true);
try {
Pair<Long, Long> retryLag = getConsumerLagStats(info.group, info.retryTopic, true);

result = new CalculateLagResult(info.group, info.topic, true);
if (retryLag != null) {
result.lag = retryLag.getObject1();
result.earliestUnconsumedTimestamp = retryLag.getObject2();
result = new CalculateLagResult(info.group, info.topic, true);
if (retryLag != null) {
result.lag = retryLag.getObject1();
result.earliestUnconsumedTimestamp = retryLag.getObject2();
}
lagRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get lag stats", e);
}
lagRecorder.accept(result);
}
});
}

public void calculateInflight(Consumer<CalculateInflightResult> inflightRecorder) {
processAllGroup(info -> {
CalculateInflightResult result = new CalculateInflightResult(info.group, info.topic, false);
Pair<Long, Long> inFlight = getInFlightMsgStats(info.group, info.topic, info.isPop);
if (inFlight != null) {
result.inFlight = inFlight.getObject1();
result.earliestUnPulledTimestamp = inFlight.getObject2();
try {
Pair<Long, Long> inFlight = getInFlightMsgStats(info.group, info.topic, info.isPop);
if (inFlight != null) {
result.inFlight = inFlight.getObject1();
result.earliestUnPulledTimestamp = inFlight.getObject2();
}
inflightRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get inflight message stats", e);
}
inflightRecorder.accept(result);

if (info.isPop) {
Pair<Long, Long> retryInFlight = getInFlightMsgStats(info.group, info.retryTopic, true);
try {
Pair<Long, Long> retryInFlight = getInFlightMsgStats(info.group, info.retryTopic, true);

result = new CalculateInflightResult(info.group, info.topic, true);
if (retryInFlight != null) {
result.inFlight = retryInFlight.getObject1();
result.earliestUnPulledTimestamp = retryInFlight.getObject2();
result = new CalculateInflightResult(info.group, info.topic, true);
if (retryInFlight != null) {
result.inFlight = retryInFlight.getObject1();
result.earliestUnPulledTimestamp = retryInFlight.getObject2();
}
inflightRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get inflight message stats", e);
}
inflightRecorder.accept(result);
}
});
}
Expand All @@ -259,20 +276,28 @@ public void calculateAvailable(Consumer<CalculateAvailableResult> availableRecor
processAllGroup(info -> {
CalculateAvailableResult result = new CalculateAvailableResult(info.group, info.topic, false);

result.available = getAvailableMsgCount(info.group, info.topic, info.isPop);
availableRecorder.accept(result);
try {
result.available = getAvailableMsgCount(info.group, info.topic, info.isPop);
availableRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get available message count", e);
}

if (info.isPop) {
long retryAvailable = getAvailableMsgCount(info.group, info.retryTopic, true);

result = new CalculateAvailableResult(info.group, info.topic, true);
result.available = retryAvailable;
availableRecorder.accept(result);
if (info.isPop) {
try {
long retryAvailable = getAvailableMsgCount(info.group, info.retryTopic, true);
result = new CalculateAvailableResult(info.group, info.topic, true);
result.available = retryAvailable;
availableRecorder.accept(result);
} catch (ConsumeQueueException e) {
LOGGER.error("Failed to get available message count", e);
}
}
});
}

public Pair<Long, Long> getConsumerLagStats(String group, String topic, boolean isPop) {
public Pair<Long, Long> getConsumerLagStats(String group, String topic, boolean isPop) throws ConsumeQueueException {
long total = 0L;
long earliestUnconsumedTimestamp = Long.MAX_VALUE;

Expand All @@ -298,7 +323,8 @@ public Pair<Long, Long> getConsumerLagStats(String group, String topic, boolean
return new Pair<>(total, earliestUnconsumedTimestamp);
}

public Pair<Long, Long> getConsumerLagStats(String group, String topic, int queueId, boolean isPop) {
public Pair<Long, Long> getConsumerLagStats(String group, String topic, int queueId, boolean isPop)
throws ConsumeQueueException {
long brokerOffset = messageStore.getMaxOffsetInQueue(topic, queueId);
if (brokerOffset < 0) {
brokerOffset = 0;
Expand Down Expand Up @@ -329,7 +355,7 @@ public Pair<Long, Long> getConsumerLagStats(String group, String topic, int queu
return new Pair<>(lag, consumerStoreTimeStamp);
}

public Pair<Long, Long> getInFlightMsgStats(String group, String topic, boolean isPop) {
public Pair<Long, Long> getInFlightMsgStats(String group, String topic, boolean isPop) throws ConsumeQueueException {
long total = 0L;
long earliestUnPulledTimestamp = Long.MAX_VALUE;

Expand All @@ -355,7 +381,8 @@ public Pair<Long, Long> getInFlightMsgStats(String group, String topic, boolean
return new Pair<>(total, earliestUnPulledTimestamp);
}

public Pair<Long, Long> getInFlightMsgStats(String group, String topic, int queueId, boolean isPop) {
public Pair<Long, Long> getInFlightMsgStats(String group, String topic, int queueId, boolean isPop)
throws ConsumeQueueException {
if (isPop) {
long inflight = popInflightMessageCounter.getGroupPopInFlightMessageNum(topic, group, queueId);
long pullOffset = popBufferMergeService.getLatestOffset(topic, group, queueId);
Expand Down Expand Up @@ -384,7 +411,7 @@ public Pair<Long, Long> getInFlightMsgStats(String group, String topic, int queu
return new Pair<>(inflight, pullStoreTimeStamp);
}

public long getAvailableMsgCount(String group, String topic, boolean isPop) {
public long getAvailableMsgCount(String group, String topic, boolean isPop) throws ConsumeQueueException {
long total = 0L;

if (group == null || topic == null) {
Expand All @@ -403,7 +430,8 @@ public long getAvailableMsgCount(String group, String topic, boolean isPop) {
return total;
}

public long getAvailableMsgCount(String group, String topic, int queueId, boolean isPop) {
public long getAvailableMsgCount(String group, String topic, int queueId, boolean isPop)
throws ConsumeQueueException {
long brokerOffset = messageStore.getMaxOffsetInQueue(topic, queueId);
if (brokerOffset < 0) {
brokerOffset = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@
import org.apache.rocketmq.common.metrics.NopLongCounter;
import org.apache.rocketmq.common.metrics.NopLongHistogram;
import org.apache.rocketmq.store.PutMessageStatus;
import org.apache.rocketmq.store.exception.ConsumeQueueException;
import org.apache.rocketmq.store.pop.AckMsg;
import org.apache.rocketmq.store.pop.PopCheckPoint;
import org.apache.rocketmq.logging.org.slf4j.Logger;
import org.apache.rocketmq.logging.org.slf4j.LoggerFactory;

import static org.apache.rocketmq.broker.metrics.BrokerMetricsConstant.LABEL_CONSUMER_GROUP;
import static org.apache.rocketmq.broker.metrics.BrokerMetricsConstant.LABEL_TOPIC;
Expand All @@ -57,6 +60,7 @@
import static org.apache.rocketmq.broker.metrics.PopMetricsConstant.LABEL_REVIVE_MESSAGE_TYPE;

public class PopMetricsManager {
private static final Logger log = LoggerFactory.getLogger(PopMetricsManager.class);
public static Supplier<AttributesBuilder> attributesBuilderSupplier;

private static LongHistogram popBufferScanTimeConsume = new NopLongHistogram();
Expand Down Expand Up @@ -138,19 +142,27 @@ private static void calculatePopReviveLatency(BrokerController brokerController,
ObservableLongMeasurement measurement) {
PopReviveService[] popReviveServices = brokerController.getAckMessageProcessor().getPopReviveServices();
for (PopReviveService popReviveService : popReviveServices) {
measurement.record(popReviveService.getReviveBehindMillis(), newAttributesBuilder()
.put(LABEL_QUEUE_ID, popReviveService.getQueueId())
.build());
try {
measurement.record(popReviveService.getReviveBehindMillis(), newAttributesBuilder()
.put(LABEL_QUEUE_ID, popReviveService.getQueueId())
.build());
} catch (ConsumeQueueException e) {
log.error("Failed to get revive behind duration", e);
}
}
}

private static void calculatePopReviveLag(BrokerController brokerController,
ObservableLongMeasurement measurement) {
PopReviveService[] popReviveServices = brokerController.getAckMessageProcessor().getPopReviveServices();
for (PopReviveService popReviveService : popReviveServices) {
measurement.record(popReviveService.getReviveBehindMessages(), newAttributesBuilder()
.put(LABEL_QUEUE_ID, popReviveService.getQueueId())
.build());
try {
measurement.record(popReviveService.getReviveBehindMessages(), newAttributesBuilder()
.put(LABEL_QUEUE_ID, popReviveService.getQueueId())
.build());
} catch (ConsumeQueueException e) {
log.error("Failed to get revive behind message count", e);
}
}
}

Expand Down
Loading

0 comments on commit b86059c

Please sign in to comment.