Skip to content

Commit

Permalink
Merge pull request #2418 from dashpole/oom_5_kernel
Browse files Browse the repository at this point in the history
Handle the new 5.0 linux format for oom messages
  • Loading branch information
dashpole authored Mar 6, 2020
2 parents 7152b60 + f6f5fd2 commit cfe6b50
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 13 deletions.
40 changes: 33 additions & 7 deletions utils/oomparser/oomparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import (
)

var (
containerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
legacyContainerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
// Starting in 5.0 linux kernels, the OOM message changed
containerRegexp = regexp.MustCompile(`oom-kill:constraint=(.*),nodemask=(.*),cpuset=(.*),mems_allowed=(.*),oom_memcg=(.*) (.*),task_memcg=(.*),task=(.*),pid=(.*),uid=(.*)`)
lastLineRegexp = regexp.MustCompile(`Killed process ([0-9]+) \((.+)\)`)
firstLineRegexp = regexp.MustCompile(`invoked oom-killer:`)
)
Expand All @@ -51,11 +53,14 @@ type OomInstance struct {
// the absolute name of the container that was killed
// due to the OOM.
VictimContainerName string
// the constraint that triggered the OOM. One of CONSTRAINT_NONE,
// CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG
Constraint string
}

// gets the container name from a line and adds it to the oomInstance.
func getContainerName(line string, currentOomInstance *OomInstance) error {
parsedLine := containerRegexp.FindStringSubmatch(line)
func getLegacyContainerName(line string, currentOomInstance *OomInstance) error {
parsedLine := legacyContainerRegexp.FindStringSubmatch(line)
if parsedLine == nil {
return nil
}
Expand All @@ -64,6 +69,25 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
return nil
}

// gets the container name from a line and adds it to the oomInstance.
func getContainerName(line string, currentOomInstance *OomInstance) (bool, error) {
parsedLine := containerRegexp.FindStringSubmatch(line)
if parsedLine == nil {
// Fall back to the legacy format if it isn't found here.
return false, getLegacyContainerName(line, currentOomInstance)
}
currentOomInstance.ContainerName = parsedLine[7]
currentOomInstance.VictimContainerName = parsedLine[5]
currentOomInstance.Constraint = parsedLine[1]
pid, err := strconv.Atoi(parsedLine[9])
if err != nil {
return false, err
}
currentOomInstance.Pid = pid
currentOomInstance.ProcessName = parsedLine[8]
return true, nil
}

// gets the pid, name, and date from a line and adds it to oomInstance
func getProcessNamePid(line string, currentOomInstance *OomInstance) (bool, error) {
reList := lastLineRegexp.FindStringSubmatch(line)
Expand Down Expand Up @@ -106,13 +130,15 @@ func (self *OomParser) StreamOoms(outStream chan<- *OomInstance) {
TimeOfDeath: msg.Timestamp,
}
for msg := range kmsgEntries {
err := getContainerName(msg.Message, oomCurrentInstance)
finished, err := getContainerName(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
}
finished, err := getProcessNamePid(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
if !finished {
finished, err = getProcessNamePid(msg.Message, oomCurrentInstance)
if err != nil {
klog.Errorf("%v", err)
}
}
if finished {
oomCurrentInstance.TimeOfDeath = msg.Timestamp
Expand Down
59 changes: 53 additions & 6 deletions utils/oomparser/oomparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,33 @@ import (
"github.com/stretchr/testify/assert"
)

const startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
const endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
const containerLine = "Task in /mem2 killed as a result of limit of /mem3"
const (
startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
legacyContainerLine = "Task in /mem2 killed as a result of limit of /mem3"
containerLine = "oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,mems_allowed=0,oom_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907 c-b2ec8e25d012,task_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,task=manager,pid=966,uid=0"
)

func TestGetContainerName(t *testing.T) {
func TestGetLegacyContainerName(t *testing.T) {
currentOomInstance := new(OomInstance)
err := getContainerName(startLine, currentOomInstance)
finished, err := getContainerName(startLine, currentOomInstance)
if err != nil {
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
}
if currentOomInstance.ContainerName != "" {
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
}
err = getContainerName(containerLine, currentOomInstance)
finished, err = getContainerName(legacyContainerLine, currentOomInstance)
if err != nil {
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("getContainerName with the legacy log line should not result in a finished oom log, but it did")

}
if currentOomInstance.ContainerName != "/mem2" {
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
}
Expand All @@ -48,6 +58,43 @@ func TestGetContainerName(t *testing.T) {
}
}

func TestGetContainerName(t *testing.T) {
currentOomInstance := new(OomInstance)
finished, err := getContainerName(startLine, currentOomInstance)
if err != nil {
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
}
if finished {
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
}
if currentOomInstance.ContainerName != "" {
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
}
finished, err = getContainerName(containerLine, currentOomInstance)
if err != nil {
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
}
if !finished {
t.Errorf("getContainerName with the complete log line should result in a finished oom log, but it did not")

}
if currentOomInstance.ContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8" {
t.Errorf("getContainerName should have set containerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8, not %s", currentOomInstance.ContainerName)
}
if currentOomInstance.VictimContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907" {
t.Errorf("getContainerName should have set victimContainerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907, not %s", currentOomInstance.VictimContainerName)
}
if currentOomInstance.Pid != 966 {
t.Errorf("getContainerName should have set Pid to 966, not %d", currentOomInstance.Pid)
}
if currentOomInstance.ProcessName != "manager" {
t.Errorf("getContainerName should have set ProcessName to manager, not %s", currentOomInstance.ProcessName)
}
if currentOomInstance.Constraint != "CONSTRAINT_MEMCG" {
t.Errorf("getContainerName should have set ProcessName to CONSTRAINT_MEMCG, not %s", currentOomInstance.Constraint)
}
}

func TestGetProcessNamePid(t *testing.T) {
currentOomInstance := new(OomInstance)
couldParseLine, err := getProcessNamePid(startLine, currentOomInstance)
Expand Down

0 comments on commit cfe6b50

Please sign in to comment.