Skip to content

Commit

Permalink
cgo: support preprocessor macros passed on the command line
Browse files Browse the repository at this point in the history
Go code might sometimes want to use preprocessor macros that were passed
on the command line. This wasn't working before and resulted in the
following error:

    internal error: could not find file where macro is defined

This is now supported, though location information isn't available
(which makes sense: the command line is not a file).

I had to use the `clang_tokenize` API for this and reconstruct the
original source location. Apparently this is the only way to do it:
https://stackoverflow.com/a/19074846/559350
In the future we could consider replacing our own tokenization with the
tokenizer that's built into Clang directly. This should reduce the
possibility of bugs a bit.
  • Loading branch information
aykevl committed Jul 13, 2024
1 parent 6f462fb commit 2f3d821
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 37 deletions.
9 changes: 8 additions & 1 deletion cgo/cgo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"go/ast"
"go/format"
"go/parser"
"go/scanner"
"go/token"
"go/types"
"os"
Expand Down Expand Up @@ -219,7 +220,13 @@ func (i simpleImporter) Import(path string) (*types.Package, error) {
// formatDiagnostic formats the error message to be an indented comment. It
// also fixes Windows path name issues (backward slashes).
func formatDiagnostic(err error) string {
msg := err.Error()
var msg string
switch err := err.(type) {
case scanner.Error:
msg = err.Pos.String() + ": " + err.Msg
default:
msg = err.Error()
}
if runtime.GOOS == "windows" {
// Fix Windows path slashes.
msg = strings.ReplaceAll(msg, "testdata\\", "testdata/")
Expand Down
8 changes: 6 additions & 2 deletions cgo/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,9 @@ func (t *tokenizer) Next() {
t.curValue = t.peekValue

// Parse the next peek token.
t.peekPos += token.Pos(len(t.curValue))
if t.peekPos != token.NoPos {
t.peekPos += token.Pos(len(t.curValue))
}
for {
if len(t.buf) == 0 {
t.peekToken = token.EOF
Expand All @@ -207,7 +209,9 @@ func (t *tokenizer) Next() {
// Skip whitespace.
// Based on this source, not sure whether it represents C whitespace:
// https://en.cppreference.com/w/cpp/string/byte/isspace
t.peekPos++
if t.peekPos != token.NoPos {
t.peekPos++
}
t.buf = t.buf[1:]
case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&" || string(t.buf[:2]) == "<<" || string(t.buf[:2]) == ">>"):
// Two-character tokens.
Expand Down
68 changes: 36 additions & 32 deletions cgo/libclang.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package cgo
// modification. It does not touch the AST itself.

import (
"bytes"
"crypto/sha256"
"crypto/sha512"
"encoding/hex"
Expand Down Expand Up @@ -369,42 +370,45 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
gen.Specs = append(gen.Specs, valueSpec)
return gen, nil
case C.CXCursor_MacroDefinition:
// Extract tokens from the Clang tokenizer.
// See: https://stackoverflow.com/a/19074846/559350
sourceRange := C.tinygo_clang_getCursorExtent(c)
start := C.clang_getRangeStart(sourceRange)
end := C.clang_getRangeEnd(sourceRange)
var file, endFile C.CXFile
var startOffset, endOffset C.unsigned
C.clang_getExpansionLocation(start, &file, nil, nil, &startOffset)
if file == nil {
f.addError(pos, "internal error: could not find file where macro is defined")
return nil, nil
}
C.clang_getExpansionLocation(end, &endFile, nil, nil, &endOffset)
if file != endFile {
f.addError(pos, "internal error: expected start and end location of a macro to be in the same file")
return nil, nil
}
if startOffset > endOffset {
f.addError(pos, "internal error: start offset of macro is after end offset")
return nil, nil
}

// read file contents and extract the relevant byte range
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
var size C.size_t
sourcePtr := C.clang_getFileContents(tu, file, &size)
if endOffset >= C.uint(size) {
f.addError(pos, "internal error: end offset of macro lies after end of file")
return nil, nil
}
source := string(((*[1 << 28]byte)(unsafe.Pointer(sourcePtr)))[startOffset:endOffset:endOffset])
if !strings.HasPrefix(source, name) {
f.addError(pos, fmt.Sprintf("internal error: expected macro value to start with %#v, got %#v", name, source))
return nil, nil
var rawTokens *C.CXToken
var numTokens C.unsigned
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
tokens := unsafe.Slice(rawTokens, numTokens)
// Convert this range of tokens back to source text.
// Ugly, but it works well enough.
sourceBuf := &bytes.Buffer{}
var startOffset int
for i, token := range tokens {
spelling := getString(C.clang_getTokenSpelling(tu, token))
location := C.clang_getTokenLocation(tu, token)
var tokenOffset C.unsigned
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
if i == 0 {
// The first token is the macro name itself.
// Skip it (after using its location).
startOffset = int(tokenOffset) + len(name)
} else {
// Later tokens are the macro contents.
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
// Pad the source text with whitespace (that must have been
// present in the original source as well).
sourceBuf.WriteByte(' ')
}
sourceBuf.WriteString(spelling)
}
}
value := source[len(name):]
C.clang_disposeTokens(tu, rawTokens, numTokens)
value := sourceBuf.String()
// Try to convert this #define into a Go constant expression.
expr, scannerError := parseConst(pos+token.Pos(len(name)), f.fset, value)
tokenPos := token.NoPos
if pos != token.NoPos {
tokenPos = pos + token.Pos(len(name))
}
expr, scannerError := parseConst(tokenPos, f.fset, value)
if scannerError != nil {
f.errors = append(f.errors, *scannerError)
return nil, nil
Expand Down
13 changes: 13 additions & 0 deletions cgo/testdata/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,22 @@ typedef someType noType; // undefined type
#define SOME_CONST_1 5) // invalid const syntax
#define SOME_CONST_2 6) // const not used (so no error)
#define SOME_CONST_3 1234 // const too large for byte
#define SOME_CONST_b 3 ) // const with lots of weird whitespace (to test error locations)
# define SOME_CONST_startspace 3)
*/
//
//
// #define SOME_CONST_4 8) // after some empty lines
// #cgo CFLAGS: -DSOME_PARAM_CONST_invalid=3/+3
// #cgo CFLAGS: -DSOME_PARAM_CONST_valid=3+4
import "C"

// #warning another warning
import "C"

// Make sure that errors for the following lines won't change with future
// additions to the CGo preamble.
//
//line errors.go:100
var (
// constant too large
Expand All @@ -38,4 +43,12 @@ var (
_ byte = C.SOME_CONST_3

_ = C.SOME_CONST_4

_ = C.SOME_CONST_b

_ = C.SOME_CONST_startspace

// constants passed by a command line parameter
_ = C.SOME_PARAM_CONST_invalid
_ = C.SOME_PARAM_CONST_valid
)
11 changes: 9 additions & 2 deletions cgo/testdata/errors.out.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
// CGo errors:
// testdata/errors.go:4:2: warning: some warning
// testdata/errors.go:11:9: error: unknown type name 'someType'
// testdata/errors.go:22:5: warning: another warning
// testdata/errors.go:26:5: warning: another warning
// testdata/errors.go:13:23: unexpected token ), expected end of expression
// testdata/errors.go:19:26: unexpected token ), expected end of expression
// testdata/errors.go:21:26: unexpected token ), expected end of expression
// testdata/errors.go:16:33: unexpected token ), expected end of expression
// testdata/errors.go:17:34: unexpected token ), expected end of expression
// -: unexpected token INT, expected end of expression

// Type checking errors after CGo processing:
// testdata/errors.go:102: cannot use 2 << 10 (untyped int constant 2048) as C.char value in variable declaration (overflows)
// testdata/errors.go:105: unknown field z in struct literal
// testdata/errors.go:108: undefined: C.SOME_CONST_1
// testdata/errors.go:110: cannot use C.SOME_CONST_3 (untyped int constant 1234) as byte value in variable declaration (overflows)
// testdata/errors.go:112: undefined: C.SOME_CONST_4
// testdata/errors.go:114: undefined: C.SOME_CONST_b
// testdata/errors.go:116: undefined: C.SOME_CONST_startspace
// testdata/errors.go:119: undefined: C.SOME_PARAM_CONST_invalid

package main

Expand Down Expand Up @@ -58,3 +64,4 @@ type C.struct_point_t struct {
type C.point_t = C.struct_point_t

const C.SOME_CONST_3 = 1234
const C.SOME_PARAM_CONST_valid = 3 + 4

0 comments on commit 2f3d821

Please sign in to comment.