diff --git a/apps/leo_storage/include/leo_storage.hrl b/apps/leo_storage/include/leo_storage.hrl index e9c13812..9932485f 100644 --- a/apps/leo_storage/include/leo_storage.hrl +++ b/apps/leo_storage/include/leo_storage.hrl @@ -531,6 +531,13 @@ _Time end end). +-define(env_storage_watchdog_msgs_enabled(), + case application:get_env(leo_storage, watchdog_msgs_enabled) of + {ok, EnvWathdogMsgEnabled} -> + EnvWathdogMsgEnabled; + _ -> + false + end). %% @doc Storage autonomic-operation related -define(env_auto_compaction_enabled(), diff --git a/apps/leo_storage/priv/leo_storage.conf b/apps/leo_storage/priv/leo_storage.conf index a9b820e3..badb659f 100644 --- a/apps/leo_storage/priv/leo_storage.conf +++ b/apps/leo_storage/priv/leo_storage.conf @@ -177,6 +177,14 @@ watchdog.error.interval = 60 ## error - threshold error count - default:100 watchdog.error.threshold_count = 100 +## +## Watchdog.Messages +## +## Traking the number of slow operations and timeouts happened on leo_object_storage +## and get triggered when the number goes above a certain threshold +## +## Is messages-watchdog enabled - default:false +watchdog.msgs.is_enabled = false ## -------------------------------------------------------------------- ## STORAGE - Autonomic Operation diff --git a/apps/leo_storage/priv/leo_storage.schema b/apps/leo_storage/priv/leo_storage.schema index c38593da..2aefeae0 100644 --- a/apps/leo_storage/priv/leo_storage.schema +++ b/apps/leo_storage/priv/leo_storage.schema @@ -649,6 +649,17 @@ {default, 100} ]}. +%% +%% Watchdog.Messages +%% +%% @doc Messages - Is enabled +{mapping, + "watchdog.msgs.is_enabled", + "leo_storage.watchdog_msgs_enabled", + [ + {datatype, {enum, [true, false]}}, + {default, false} + ]}. %% -------------------------------------------------------------------- %% STORAGE - Autonomic operation diff --git a/apps/leo_storage/src/leo_storage_app.erl b/apps/leo_storage/src/leo_storage_app.erl index 8d277199..a61685ba 100644 --- a/apps/leo_storage/src/leo_storage_app.erl +++ b/apps/leo_storage/src/leo_storage_app.erl @@ -191,16 +191,23 @@ after_proc_1(true, Pid, Managers) -> end, %% Watchdog for notified messages - {ok, _} = supervisor:start_child( - leo_watchdog_sup, {leo_storage_watchdog_msgs, - {leo_storage_watchdog_msgs, start_link, - [?env_threshold_num_of_notified_msgs(), - WatchdogInterval - ]}, - permanent, - 2000, - worker, - [leo_storage_watchdog_msgs]}), + case ?env_storage_watchdog_msgs_enabled() of + true -> + leo_storage_msg_collector:set_enabled(), + {ok, _} = supervisor:start_child( + leo_watchdog_sup, {leo_storage_watchdog_msgs, + {leo_storage_watchdog_msgs, start_link, + [?env_threshold_num_of_notified_msgs(), + WatchdogInterval + ]}, + permanent, + 2000, + worker, + [leo_storage_watchdog_msgs]}); + false -> + void + end, + ok = leo_storage_watchdog_sub:start(), %% Launch statistics/mnesia-related processes diff --git a/apps/leo_storage/src/leo_storage_msg_collector.erl b/apps/leo_storage/src/leo_storage_msg_collector.erl index 62cf9fc8..88182a3e 100644 --- a/apps/leo_storage/src/leo_storage_msg_collector.erl +++ b/apps/leo_storage/src/leo_storage_msg_collector.erl @@ -37,6 +37,7 @@ -export([start_link/0, stop/0]). -export([clear/0, get/0, + set_enabled/0, notify/3, notify/4 ]). @@ -53,6 +54,7 @@ -define(TIMEOUT, timer:seconds(5)). -record(state, { messages = [] :: [term()], + enabled = false :: boolean(), interval = ?TIMEOUT :: pos_integer() }). @@ -84,6 +86,9 @@ clear() -> get() -> gen_server:call(?MODULE, get, ?TIMEOUT). +%% @doc Make it work to store messages +set_enabled() -> + gen_server:call(?MODULE, set_enabled, ?TIMEOUT). %% @doc Operate the data -spec(notify(Msg, Method, Key) -> @@ -121,16 +126,25 @@ handle_call(get, _From, #state{messages = Msg} = State) -> Ret = dict:to_list(Msg), {reply, {ok, Ret}, State}; -handle_call({notify, ?ERROR_MSG_TIMEOUT = Msg, Method, Key}, - _From, #state{messages = Msg} = State) -> - Msg_1 = dict:append(?MSG_ITEM_TIMEOUT, {Method, Key}, Msg), +handle_call(set_enabled, _From, State) -> + {reply, ok, State#state{enabled = true}}; + +handle_call({notify, ?ERROR_MSG_TIMEOUT, _Method, _Key}, + _From, #state{enabled = false} = State) -> + {reply, disabled, State}; +handle_call({notify, ?ERROR_MSG_TIMEOUT, Method, Key}, + _From, #state{messages = Messages} = State) -> + Msg_1 = dict:append(?MSG_ITEM_TIMEOUT, {Method, Key}, Messages), {reply, ok, State#state{messages = Msg_1}}; handle_call({notify,_Msg,_Method,_Key}, _From, State) -> {reply, ok, State}; -handle_call({notify, ?ERROR_MSG_SLOW_OPERATION = Msg, Method, Key, ProcessingTime}, - _From, #state{messages = Msg} = State) -> - Msg_1 = dict:append(?MSG_ITEM_TIMEOUT, {Method, Key, ProcessingTime}, Msg), +handle_call({notify, ?ERROR_MSG_SLOW_OPERATION, _Method, _Key, _ProcessingTime}, + _From, #state{enabled = false} = State) -> + {reply, disabled, State}; +handle_call({notify, ?ERROR_MSG_SLOW_OPERATION, Method, Key, ProcessingTime}, + _From, #state{messages = Messages} = State) -> + Msg_1 = dict:append(?MSG_ITEM_SLOW_OP, {Method, Key, ProcessingTime}, Messages), {reply, ok, State#state{messages = Msg_1}}; handle_call({notify,_Msg,_Method,_Key,_ProcessingTime},_From, State) -> {reply, ok, State}; diff --git a/apps/leo_storage/src/leo_storage_watchdog_msgs.erl b/apps/leo_storage/src/leo_storage_watchdog_msgs.erl index 0f8217fd..92fc84d8 100644 --- a/apps/leo_storage/src/leo_storage_watchdog_msgs.erl +++ b/apps/leo_storage/src/leo_storage_watchdog_msgs.erl @@ -136,16 +136,24 @@ handle_notified_messages(Id, NumOfNotifiedMsgs) -> + erlang:length(leo_misc:get_value(?MSG_ITEM_SLOW_OP, Msgs, [])), case (Len >= NumOfNotifiedMsgs) of true -> + error_logger:warning_msg("~p,~p,~p,~p~n", + [{module, ?MODULE_STRING}, + {function, "handle_notified_messages/2"},{line, ?LINE}, + {body, [{triggered_watchdog, num_of_notified_msgs, Len}]}]), %% raise error - elarm:raise(Id, ?WD_ITEM_ACTIVE_SIZE_RATIO, + elarm:raise(Id, ?WD_ITEM_NOTIFIED_MSGS, #watchdog_state{id = Id, level = ?WD_LEVEL_ERROR, src = ?WD_ITEM_NOTIFIED_MSGS, props = [{num_of_notified_msgs, Len} ]}); false when Len >= WarnNumOfNotifiedMsgs -> + error_logger:warning_msg("~p,~p,~p,~p~n", + [{module, ?MODULE_STRING}, + {function, "handle_notified_messages/2"},{line, ?LINE}, + {body, [{triggered_watchdog, num_of_notified_msgs, Len}]}]), %% raise warning - elarm:raise(Id, ?WD_ITEM_ACTIVE_SIZE_RATIO, + elarm:raise(Id, ?WD_ITEM_NOTIFIED_MSGS, #watchdog_state{id = Id, level = ?WD_LEVEL_WARN, src = ?WD_ITEM_NOTIFIED_MSGS,