Skip to content

Commit

Permalink
Add Structured Events w/ YANG Models (#12270)
Browse files Browse the repository at this point in the history
Add events for dhcp-relay, bgp, syncd, & kernel.
  • Loading branch information
zbud-msft authored Oct 10, 2022
1 parent 7e0346c commit 09fe3f4
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 4 deletions.
10 changes: 10 additions & 0 deletions dockers/docker-fpm-frr/bgp_regex.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
"tag": "bgp-state",
"regex": "Peer .default\\|([0-9a-f:.]*[0-9a-f]*). admin state is set to .(up|down).",
"params": [ "ip", "status" ]
},
{
"tag": "zebra-no-buff",
"regex": "No buffer space available",
"params": []
},
{
"tag": "notification",
"regex": "NOTIFICATION: (received|sent) (?:to|from) neighbor ([0-9a-f:.]*[0-9a-f+]*)\\s*.* (\\d*)\/(\\d*)",
"params": [ "is-sent", "ip", "major-code", "minor-code" ]
}
]

7 changes: 7 additions & 0 deletions files/build_templates/dhcp_relay_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"tag": "dhcp-relay-discard",
"regex": "Discarding packet received on ([a-zA-Z0-9-_]*) interface that has no IPv4 address assigned.",
"params": [ "ifname" ]
}
]
7 changes: 7 additions & 0 deletions files/build_templates/dockerd_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"tag": "invalid-freelist",
"regex": "invalid freelist",
"params": []
}
]
26 changes: 25 additions & 1 deletion files/build_templates/events_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"proclist": [
{
"name": "monit",
"parse_json": "monit_regex.json"
"parse_json": "monit_regex.json"
},
{
"name": "sshd",
Expand All @@ -12,6 +12,30 @@
{
"name": "systemd",
"parse_json": "systemd_regex.json"
},
{
"name": "dhcp_relay",
"parse_json": "dhcp_relay_regex.json"
},
{
"name": "syncd",
"parse_json": "syncd_regex.json"
},
{
"name": "kernel",
"parse_json": "kernel_regex.json"
},
{
"name": "dockerd",
"parse_json": "dockerd_regex.json"
},
{
"name": "arista",
"parse_json": "seu_regex.json"
},
{
"name": "python3",
"parse_json": "seu_regex.json"
}
]
}
7 changes: 7 additions & 0 deletions files/build_templates/kernel_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"tag": "event-kernel",
"regex": "(write failed|Write protected|Remounting filesystem read-only|zlib decompression failed, data probably corrupt)",
"params": [ "fail_type:ret=(arg==\"write failed\")and\"write_failed\"or((arg==\"Write protected\")and\"write_protected\"or((arg==\"Remounting filesystem read-only\")and\"remount_read_only\"or((arg==\"zlib decompression failed, data probably corrupt\")and\"zlib_decompress\"or\"\")))" ]
}
]
7 changes: 7 additions & 0 deletions files/build_templates/seu_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"tag": "event-seu",
"regex": "SEU error was detected",
"params": []
}
]
5 changes: 5 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ j2 -f json $BUILD_TEMPLATES/rsyslog_plugin.conf.j2 $BUILD_TEMPLATES/events_info.
sudo cp $BUILD_TEMPLATES/monit_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/sshd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/systemd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/dhcp_relay_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/syncd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/kernel_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/dockerd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/seu_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/

# Install custom-built monit package and SONiC configuration files
sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/monit_*.deb || \
Expand Down
7 changes: 7 additions & 0 deletions files/build_templates/syncd_regex.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"tag": "syncd-failure",
"regex": "(MMU ERR Type|L3 route add failed with error|Assertion failed|Received switch event|SER Parity Check Error)",
"params": [ "fail_type:ret=(arg==\"Received switch event\")and\"switch_event\"or((arg==\"Assertion Failed\")and\"assert\"or((arg==\"SER Parity Check Error\")and\"parity_check\"or((arg==\"MMU ERR Type\")and\"mmu_err\"or((arg==\"route add failed\")and\"route_add_failed\"or\"\"))))" ]
}
]
7 changes: 6 additions & 1 deletion files/build_templates/systemd_regex.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
{
"tag": "event-stopped-ctr",
"regex": "Stopped ([a-zA-Z-_\\s]*) container",
"params": [ "ctr-name" ]
"params": [ "ctr_name" ]
},
{
"tag": "watchdog-timeout",
"regex": "(?:watchdog|Watchdog) timeout .limit.([0-9])min.",
"params": [ "limit" ]
}
]
2 changes: 1 addition & 1 deletion files/image_config/monit/container_checker
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def publish_events(lst):
params = swsscommon.FieldValueMap()

for ctr in lst:
params["name"] = ctr;
params["ctr_name"] = ctr;
swsscommon.event_publish(events_handle, EVENTS_PUBLISHER_TAG, params)

swsscommon.events_deinit_publisher(events_handle)
Expand Down
17 changes: 16 additions & 1 deletion src/dhcpmon/src/dhcp_mon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "dhcp_mon.h"
#include "dhcp_devman.h"
#include "events.h"

/** DHCP device/interface state */
typedef struct
Expand All @@ -40,6 +41,8 @@ static struct event *ev_sigterm;
/** libevent SIGUSR1 signal event struct */
static struct event *ev_sigusr1;

event_handle_t g_events_handle;

/** DHCP monitor state data for aggregate device for mgmt device */
static dhcp_mon_state_t state_data[] = {
[0] = {
Expand Down Expand Up @@ -95,7 +98,15 @@ static void check_dhcp_relay_health(dhcp_mon_state_t *state_data)
{
case DHCP_MON_STATUS_UNHEALTHY:
if (++state_data->count > dhcp_unhealthy_max_count) {
syslog(LOG_ALERT, state_data->msg, state_data->count * window_interval_sec, context->intf);
auto duration = state_data->count * window_interval_sec;
std::string vlan(context->intf);
syslog(LOG_ALERT, state_data->msg, duration, vlan);
if (state_data->check_type == DHCP_MON_CHECK_POSITIVE) {
event_params_t params = {
{ "vlan", vlan },
{ "duration", std::to_string(duration) }};
event_publish(g_events_handle, "dhcp-relay-disparity", &params);
}
dhcp_devman_print_status(context, DHCP_COUNTERS_SNAPSHOT);
dhcp_devman_print_status(context, DHCP_COUNTERS_CURRENT);
}
Expand Down Expand Up @@ -179,6 +190,8 @@ int dhcp_mon_init(int window_sec, int max_count)
break;
}

g_events_handle = events_init_publisher("sonic-events-dhcp-relay");

rv = 0;
} while (0);

Expand All @@ -203,6 +216,8 @@ void dhcp_mon_shutdown()
event_free(ev_sigusr1);

event_base_free(base);

events_deinit_publisher(g_events_handle);
}

/**
Expand Down

0 comments on commit 09fe3f4

Please sign in to comment.