Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use llvm-symbolizer's JSON output for symbolizing #879

Merged
merged 2 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 57 additions & 61 deletions internal/binutils/addr2liner_llvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package binutils

import (
"bufio"
"encoding/json"
"fmt"
"io"
"os/exec"
Expand All @@ -37,6 +38,7 @@ type llvmSymbolizer struct {
filename string
rw lineReaderWriter
base uint64
isData bool
}

type llvmSymbolizerJob struct {
Expand Down Expand Up @@ -76,7 +78,7 @@ func newLLVMSymbolizer(cmd, file string, base uint64, isData bool) (*llvmSymboli
}

j := &llvmSymbolizerJob{
cmd: exec.Command(cmd, "--inlining", "-demangle=false"),
cmd: exec.Command(cmd, "--inlining", "-demangle=false", "--output-style=JSON"),
symType: "CODE",
}
if isData {
Expand All @@ -102,63 +104,67 @@ func newLLVMSymbolizer(cmd, file string, base uint64, isData bool) (*llvmSymboli
filename: file,
rw: j,
base: base,
isData: isData,
}

return a, nil
}

// readFrame parses the llvm-symbolizer output for a single address. It
// returns a populated plugin.Frame and whether it has reached the end of the
// data.
func (d *llvmSymbolizer) readFrame() (plugin.Frame, bool) {
funcname, err := d.rw.readLine()
// readDataFrames parses the llvm-symbolizer DATA output for a single address. It
// returns a populated plugin.Frame array with a single entry.
func (d *llvmSymbolizer) readDataFrames() ([]plugin.Frame, error) {
line, err := d.rw.readLine()
if err != nil {
return plugin.Frame{}, true
return nil, err
}

switch funcname {
case "":
return plugin.Frame{}, true
case "??":
funcname = ""
var frame struct {
Address string `json:"Address"`
ModuleName string `json:"ModuleName"`
Data struct {
Start string `json:"Start"`
Size string `json:"Size"`
Name string `json:"Name"`
} `json:"Data"`
}
if err := json.Unmarshal([]byte(line), &frame); err != nil {
return nil, err
}
// Match non-JSON output behaviour of stuffing the start/size into the filename of a single frame,
// with the size being a decimal value.
size, err := strconv.ParseInt(frame.Data.Size, 0, 0)
if err != nil {
return nil, err
}
var stack []plugin.Frame
stack = append(stack, plugin.Frame{Func: frame.Data.Name, File: fmt.Sprintf("%s %d", frame.Data.Start, size)})
return stack, nil
}

fileline, err := d.rw.readLine()
// readCodeFrames parses the llvm-symbolizer CODE output for a single address. It
// returns a populated plugin.Frame array.
func (d *llvmSymbolizer) readCodeFrames() ([]plugin.Frame, error) {
line, err := d.rw.readLine()
if err != nil {
return plugin.Frame{Func: funcname}, true
}

linenumber := 0
columnnumber := 0
// The llvm-symbolizer outputs the <file_name>:<line_number>:<column_number>.
// When it cannot identify the source code location, it outputs "??:0:0".
// Older versions output just the filename and line number, so we check for
// both conditions here.
if fileline == "??:0" || fileline == "??:0:0" {
fileline = ""
} else {
switch split := strings.Split(fileline, ":"); len(split) {
case 3:
// filename:line:column
if col, err := strconv.Atoi(split[2]); err == nil {
columnnumber = col
}
fallthrough
case 2:
// filename:line
if line, err := strconv.Atoi(split[1]); err == nil {
linenumber = line
}
fallthrough
case 1:
// filename
fileline = split[0]
default:
// Unrecognized, ignore
}
}

return plugin.Frame{Func: funcname, File: fileline, Line: linenumber, Column: columnnumber}, false
return nil, err
}
var frame struct {
Address string `json:"Address"`
ModuleName string `json:"ModuleName"`
Symbol []struct {
Line int `json:"Line"`
Column int `json:"Column"`
FunctionName string `json:"FunctionName"`
FileName string `json:"FileName"`
} `json:"Symbol"`
}
if err := json.Unmarshal([]byte(line), &frame); err != nil {
return nil, err
}
var stack []plugin.Frame
for _, s := range frame.Symbol {
stack = append(stack, plugin.Frame{Func: s.FunctionName, File: s.FileName, Line: s.Line, Column: s.Column})
}
return stack, nil
}

// addrInfo returns the stack frame information for a specific program
Expand All @@ -170,18 +176,8 @@ func (d *llvmSymbolizer) addrInfo(addr uint64) ([]plugin.Frame, error) {
if err := d.rw.write(fmt.Sprintf("%s 0x%x", d.filename, addr-d.base)); err != nil {
return nil, err
}

var stack []plugin.Frame
for {
frame, end := d.readFrame()
if end {
break
}

if frame != (plugin.Frame{}) {
stack = append(stack, frame)
}
if d.isData {
return d.readDataFrames()
}

return stack, nil
return d.readCodeFrames()
}
16 changes: 6 additions & 10 deletions internal/binutils/testdata/fake-llvm-symbolizer
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,18 @@ IFS=" "
while read line; do
# line has form:
# filename 0xaddr
# Emit dummy output that matches llvm-symbolizer output format.
# Emit dummy output that matches llvm-symbolizer JSON output format.
set -- ${line}
kind=$1
fname=$2
addr=$3
case ${kind} in
CODE)
echo "Inlined_${addr}"
echo "${fname}.h"
echo "Func_${addr}"
echo "${fname}.c:2:1"
echo;;
echo "{\"Address\":\"${addr}\",\"ModuleName\":\"${fname}\",\"Symbol\":[{\"Column\":0,\"FileName\":\"${fname}.h\",\"FunctionName\":\"Inlined_${addr}\",\"Line\":0},{\"Column\":1,\"FileName\":\"${fname}.c\",\"FunctionName\":\"Func_${addr}\",\"Line\":2}]}"
;;
DATA)
echo "${fname}_${addr}"
echo "${addr} 8"
echo;;
*) echo ${kind} ${fname} ${addr};;
echo "{\"Address\":\"${addr}\",\"ModuleName\":\"${fname}\",\"Data\":{\"Name\":\"${fname}_${addr}\",\"Size\":\"0x8\",\"Start\":\"${addr}\"}}"
;;
*) exit 1;;
esac
done
Loading