Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle the new 5.0 linux format for oom messages #2418

Merged
merged 1 commit into from
Mar 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions utils/oomparser/oomparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import (
)

var (
containerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
legacyContainerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
// Starting in 5.0 linux kernels, the OOM message changed
containerRegexp = regexp.MustCompile(`oom-kill:constraint=(.*),nodemask=(.*),cpuset=(.*),mems_allowed=(.*),oom_memcg=(.*) (.*),task_memcg=(.*),task=(.*),pid=(.*),uid=(.*)`)
lastLineRegexp = regexp.MustCompile(`Killed process ([0-9]+) \((.+)\)`)
firstLineRegexp = regexp.MustCompile(`invoked oom-killer:`)
)
Expand All @@ -51,11 +53,14 @@ type OomInstance struct {
// the absolute name of the container that was killed
// due to the OOM.
VictimContainerName string
// the constraint that triggered the OOM. One of CONSTRAINT_NONE,
// CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG
Constraint string
}

// gets the container name from a line and adds it to the oomInstance.
func getContainerName(line string, currentOomInstance *OomInstance) error {
parsedLine := containerRegexp.FindStringSubmatch(line)
func getLegacyContainerName(line string, currentOomInstance *OomInstance) error {
parsedLine := legacyContainerRegexp.FindStringSubmatch(line)
if parsedLine == nil {
return nil
}
Expand All @@ -64,6 +69,25 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
return nil
}

// gets the container name from a line and adds it to the oomInstance.
func getContainerName(line string, currentOomInstance *OomInstance) (bool, error) {
parsedLine := containerRegexp.FindStringSubmatch(line)
if parsedLine == nil {
// Fall back to the legacy format if it isn't found here.
return false, getLegacyContainerName(line, currentOomInstance)
}
currentOomInstance.ContainerName = parsedLine[7]
currentOomInstance.VictimContainerName = parsedLine[5]
currentOomInstance.Constraint = parsedLine[1]
pid, err := strconv.Atoi(parsedLine[9])
if err != nil {
return false, err
}
currentOomInstance.Pid = pid
currentOomInstance.ProcessName = parsedLine[8]
return true, nil
}

// gets the pid, name, and date from a line and adds it to oomInstance
func getProcessNamePid(line string, currentOomInstance *OomInstance) (bool, error) {
reList := lastLineRegexp.FindStringSubmatch(line)
Expand Down Expand Up @@ -106,13 +130,15 @@ func (self *OomParser) StreamOoms(outStream chan<- *OomInstance) {
TimeOfDeath: msg.Timestamp,
}
for msg := range kmsgEntries {
err := getContainerName(msg.Message, oomCurrentInstance)
finished, err := getContainerName(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
}
finished, err := getProcessNamePid(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
if !finished {
finished, err = getProcessNamePid(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
}
}
if finished {
oomCurrentInstance.TimeOfDeath = msg.Timestamp
Expand Down
59 changes: 53 additions & 6 deletions utils/oomparser/oomparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,33 @@ import (
"github.com/stretchr/testify/assert"
)

const startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
const endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
const containerLine = "Task in /mem2 killed as a result of limit of /mem3"
const (
startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
legacyContainerLine = "Task in /mem2 killed as a result of limit of /mem3"
containerLine = "oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,mems_allowed=0,oom_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907 c-b2ec8e25d012,task_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,task=manager,pid=966,uid=0"
)

func TestGetContainerName(t *testing.T) {
func TestGetLegacyContainerName(t *testing.T) {
currentOomInstance := new(OomInstance)
err := getContainerName(startLine, currentOomInstance)
finished, err := getContainerName(startLine, currentOomInstance)
if err != nil {
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
}
if currentOomInstance.ContainerName != "" {
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
}
err = getContainerName(containerLine, currentOomInstance)
finished, err = getContainerName(legacyContainerLine, currentOomInstance)
if err != nil {
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("getContainerName with the legacy log line should not result in a finished oom log, but it did")

}
if currentOomInstance.ContainerName != "/mem2" {
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
}
Expand All @@ -48,6 +58,43 @@ func TestGetContainerName(t *testing.T) {
}
}

func TestGetContainerName(t *testing.T) {
currentOomInstance := new(OomInstance)
finished, err := getContainerName(startLine, currentOomInstance)
if err != nil {
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
}
if currentOomInstance.ContainerName != "" {
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
}
finished, err = getContainerName(containerLine, currentOomInstance)
if err != nil {
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
}
if !finished {
t.Errorf("getContainerName with the complete log line should result in a finished oom log, but it did not")

}
if currentOomInstance.ContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8" {
t.Errorf("getContainerName should have set containerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8, not %s", currentOomInstance.ContainerName)
}
if currentOomInstance.VictimContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907" {
t.Errorf("getContainerName should have set victimContainerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907, not %s", currentOomInstance.VictimContainerName)
}
if currentOomInstance.Pid != 966 {
t.Errorf("getContainerName should have set Pid to 966, not %d", currentOomInstance.Pid)
}
if currentOomInstance.ProcessName != "manager" {
t.Errorf("getContainerName should have set ProcessName to manager, not %s", currentOomInstance.ProcessName)
}
if currentOomInstance.Constraint != "CONSTRAINT_MEMCG" {
t.Errorf("getContainerName should have set ProcessName to CONSTRAINT_MEMCG, not %s", currentOomInstance.Constraint)
}
}

func TestGetProcessNamePid(t *testing.T) {
currentOomInstance := new(OomInstance)
couldParseLine, err := getProcessNamePid(startLine, currentOomInstance)
Expand Down