Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teamd :: fix for cleaning up the teamd processes correctly on teamd docker stop #1159

Merged
merged 7 commits into from
Jan 15, 2020
79 changes: 79 additions & 0 deletions cfgmgr/teammgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@

#include <algorithm>
#include <sstream>
#include <fstream>
#include <thread>

#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <signal.h>

#define PID_FILE_PATH "/var/run/teamd/"

using namespace std;
using namespace swss;
Expand Down Expand Up @@ -108,6 +112,79 @@ void TeamMgr::doTask(Consumer &consumer)
}
}


pid_t TeamMgr::getTeamPid(const string &alias)
{
SWSS_LOG_ENTER();
pid_t pid = 0;

string file = string(PID_FILE_PATH) + alias + string(".pid");
ifstream infile(file);
if (!infile.is_open())
{
SWSS_LOG_WARN("The LAG PID file: %s is not readable", file.c_str());
return 0;
}

string line;
getline(infile, line);
if (line.empty())
{
SWSS_LOG_WARN("The LAG PID file: %s is empty", file.c_str());
}
else
{
/*Store the PID value */
pid = stoi(line, nullptr, 10);
}

/* Close the file and return */
infile.close();

return pid;
}


void TeamMgr::addLagPid(const string &alias)
{
SWSS_LOG_ENTER();
m_lagPIDList[alias] = getTeamPid(alias);
}

void TeamMgr::removeLagPid(const string &alias)
{
SWSS_LOG_ENTER();
m_lagPIDList.erase(alias);
}

void TeamMgr::cleanTeamProcesses(int signo)
{
pid_t pid = 0;

for (const auto& it: m_lagList)
{
pid = m_lagPIDList[it];
if(!pid) {
SWSS_LOG_WARN("Invalid PID found for LaG %s ", it.c_str());

/* Try to get the PID again */
pid = getTeamPid(it);
}

if(pid > 0)
{
SWSS_LOG_INFO("Sending TERM Signal to (PID: %d) for LaG %s ", pid, it.c_str());
kill(pid, signo);
}
pavel-shirshov marked this conversation as resolved.
Show resolved Hide resolved
else
{
SWSS_LOG_ERROR("Can't send TERM signal to LAG %s. PID wasn't found", it.c_str());
}
}

return;
}

void TeamMgr::doLagTask(Consumer &consumer)
{
SWSS_LOG_ENTER();
Expand Down Expand Up @@ -171,6 +248,7 @@ void TeamMgr::doLagTask(Consumer &consumer)
}

m_lagList.insert(alias);
addLagPid(alias);
}

setLagAdminStatus(alias, admin_status);
Expand All @@ -187,6 +265,7 @@ void TeamMgr::doLagTask(Consumer &consumer)
{
removeLag(alias);
m_lagList.erase(alias);
removeLagPid(alias);
}
}

Expand Down
8 changes: 8 additions & 0 deletions cfgmgr/teammgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "netmsg.h"
#include "orch.h"
#include "producerstatetable.h"
#include <sys/types.h>

namespace swss {

Expand All @@ -17,6 +18,8 @@ class TeamMgr : public Orch
const std::vector<TableConnector> &tables);

using Orch::doTask;
void cleanTeamProcesses(int signo);

private:
Table m_cfgMetadataTable; // To retrieve MAC address
Table m_cfgPortTable;
Expand All @@ -29,6 +32,7 @@ class TeamMgr : public Orch
ProducerStateTable m_appLagTable;

std::set<std::string> m_lagList;
std::map<std::string, pid_t> m_lagPIDList;

MacAddress m_mac;

Expand All @@ -45,6 +49,10 @@ class TeamMgr : public Orch
bool setLagAdminStatus(const std::string &alias, const std::string &admin_status);
bool setLagMtu(const std::string &alias, const std::string &mtu);
bool setLagLearnMode(const std::string &alias, const std::string &learn_mode);

pid_t getTeamPid(const std::string &alias);
void addLagPid(const std::string &alias);
void removeLagPid(const std::string &alias);

bool isPortEnslaved(const std::string &);
bool findPortMaster(std::string &, const std::string &);
Expand Down
18 changes: 18 additions & 0 deletions cfgmgr/teammgrd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "netlink.h"
#include "select.h"
#include "warm_restart.h"
#include <signal.h>

using namespace std;
using namespace swss;
Expand All @@ -17,13 +18,24 @@ bool gLogRotate = false;
ofstream gRecordOfs;
string gRecordFile;

bool received_sigterm = false;

void sig_handler(int signo)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

signo [](start = 21, length = 5)

check signo == SIGTERM first?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Signal handler is only registered only for SIGTERM now. The handler will be called only on SIGTERM signal.

{
received_sigterm = true;
return;
}

int main(int argc, char **argv)
{
Logger::linkToDbNative("teammgrd");
SWSS_LOG_ENTER();

SWSS_LOG_NOTICE("--- Starting teammrgd ---");

/* Register the signal handler for SIGTERM */
signal(SIGTERM, sig_handler);

try
{
DBConnector conf_db("CONFIG_DB", 0);
Expand Down Expand Up @@ -55,6 +67,12 @@ int main(int argc, char **argv)

while (true)
pavel-shirshov marked this conversation as resolved.
Show resolved Hide resolved
{
if(received_sigterm)
{
teammgr.cleanTeamProcesses(SIGTERM);
received_sigterm = false;
pavel-shirshov marked this conversation as resolved.
Show resolved Hide resolved
}

Selectable *sel;
int ret;

Expand Down
26 changes: 26 additions & 0 deletions teamsyncd/teamsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ void TeamSync::onMsg(int nlmsg_type, struct nl_object *obj)
if (!type || (strcmp(type, TEAM_DRV_NAME) != 0))
return;

unsigned int flags = rtnl_link_get_flags(link);
bool admin = flags & IFF_UP;
bool oper = flags & IFF_LOWER_UP;
unsigned int ifindex = rtnl_link_get_ifindex(link);

if (type)
{
SWSS_LOG_INFO(" nlmsg type:%d key:%s admin:%d oper:%d ifindex:%d type:%s",
nlmsg_type, lagName.c_str(), admin, oper, ifindex, type);
}
else
{
SWSS_LOG_INFO(" nlmsg type:%d key:%s admin:%d oper:%d ifindex:%d",
nlmsg_type, lagName.c_str(), admin, oper, ifindex);
}

if (nlmsg_type == RTM_DELLINK)
{
if (m_teamSelectables.find(lagName) != m_teamSelectables.end())
Expand Down Expand Up @@ -194,6 +210,16 @@ void TeamSync::removeLag(const string &lagName)
m_selectablesToRemove.insert(lagName);
}

void TeamSync::cleanTeamSync()
{
for (const auto& it: m_teamSelectables)
{
/* Cleanup LAG */
removeLag(it.first);
}
return;
}

const struct team_change_handler TeamSync::TeamPortSync::gPortChangeHandler = {
.func = TeamSync::TeamPortSync::teamdHandler,
.type_mask = TEAM_PORT_CHANGE | TEAM_OPTION_CHANGE
Expand Down
1 change: 1 addition & 0 deletions teamsyncd/teamsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class TeamSync : public NetMsg
TeamSync(DBConnector *db, DBConnector *stateDb, Select *select);

void periodic();
void cleanTeamSync();

/* Listen to RTM_NEWLINK, RTM_DELLINK to track team devices */
virtual void onMsg(int nlmsg_type, struct nl_object *obj);
Expand Down
18 changes: 18 additions & 0 deletions teamsyncd/teamsyncd.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <iostream>
#include <team.h>
#include <signal.h>
#include "logger.h"
#include "select.h"
#include "netdispatcher.h"
Expand All @@ -9,6 +10,14 @@
using namespace std;
using namespace swss;

bool received_sigterm = false;

void sig_handler(int signo)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

signo [](start = 21, length = 5)

the same

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Signal handler is only registered only for SIGTERM now. The handler will be called only on SIGTERM signal.

{
received_sigterm = true;
return;
}

int main(int argc, char **argv)
{
swss::Logger::linkToDbNative(TEAMSYNCD_APP_NAME);
Expand All @@ -20,6 +29,9 @@ int main(int argc, char **argv)
NetDispatcher::getInstance().registerMessageHandler(RTM_NEWLINK, &sync);
NetDispatcher::getInstance().registerMessageHandler(RTM_DELLINK, &sync);

/* Register the signal handler for SIGTERM */
signal(SIGTERM, sig_handler);

while (1)
{
try
Expand All @@ -33,6 +45,12 @@ int main(int argc, char **argv)
s.addSelectable(&netlink);
while (true)
{
if(received_sigterm)
{
sync.cleanTeamSync();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[](start = 16, length = 2)

Inconsistent indentation.

received_sigterm = false;
}
pavel-shirshov marked this conversation as resolved.
Show resolved Hide resolved

Selectable *temps;
s.select(&temps, 1000); // block for a second
sync.periodic();
Expand Down