Skip to content

Commit

Permalink
Also retrieve file's package when fast-scanning for imports (#209)
Browse files Browse the repository at this point in the history
This augments the experimental fast-scanner `imports.ScanForImports` to
also return the package for each scanned file. It's also been renamed to
`fastscan.Scan` since it does more than just imports now.
  • Loading branch information
jhump committed Nov 29, 2023
1 parent f7f6094 commit fa71488
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 23 deletions.
17 changes: 8 additions & 9 deletions internal/benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import (
"github.com/bufbuild/protocompile/internal/protoc"
"github.com/bufbuild/protocompile/linker"
"github.com/bufbuild/protocompile/parser"
"github.com/bufbuild/protocompile/parser/imports"
"github.com/bufbuild/protocompile/parser/fastscan"
"github.com/bufbuild/protocompile/protoutil"
"github.com/bufbuild/protocompile/reporter"
)
Expand Down Expand Up @@ -356,15 +356,15 @@ func benchmarkGoogleapisProtoparse(b *testing.B, factory func() *protoparse.Pars
}
}

func BenchmarkGoogleapisScanImports(b *testing.B) {
func BenchmarkGoogleapisFastScan(b *testing.B) {
par := runtime.GOMAXPROCS(-1)
cpus := runtime.NumCPU()
if par > cpus {
par = cpus
}
type entry struct {
filename string
imports []string
filename string
scanResult fastscan.Result
}
for i := 0; i < b.N; i++ {
workCh := make(chan string, par)
Expand Down Expand Up @@ -405,24 +405,23 @@ func BenchmarkGoogleapisScanImports(b *testing.B) {
return ctx.Err()
}
r, err := os.Open(filename)
var imps []string
if err != nil {
return err
}
imps, err = imports.ScanForImports(r)
res, err := fastscan.Scan(r)
_ = r.Close()
if err != nil {
return err
}
select {
case resultsCh <- entry{filename: filename, imports: imps}:
case resultsCh <- entry{filename: filename, scanResult: res}:
case <-ctx.Done():
return ctx.Err()
}
}
})
}
results := make(map[string][]string, len(googleapisSources))
results := make(map[string]fastscan.Result, len(googleapisSources))
grp.Go(func() error {
// accumulator
for {
Expand All @@ -431,7 +430,7 @@ func BenchmarkGoogleapisScanImports(b *testing.B) {
if !ok {
return nil
}
results[entry.filename] = entry.imports
results[entry.filename] = entry.scanResult
case <-ctx.Done():
return ctx.Err()
}
Expand Down
61 changes: 48 additions & 13 deletions parser/imports/fast_imports.go → parser/fastscan/fastscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package imports
package fastscan

import (
"io"
Expand All @@ -26,23 +26,38 @@ var closeSymbol = map[tokenType]tokenType{
openAngleToken: closeAngleToken,
}

// ScanForImports scans the given reader, which should contain Protobuf source, and
// returns the set of imports declared in the file. It returns an error if there is
// Result is the result of scanning a Protobuf source file. It contains the
// information extracted from the file.
type Result struct {
PackageName string
Imports []string
}

// Scan scans the given reader, which should contain Protobuf source, and
// returns the set of imports declared in the file. The result also contains the
// value of any package declaration in the file. It returns an error if there is
// an I/O error reading from r. In the event of such an error, it will still return
// a slice of imports that contains as many imports as were found before the I/O
// error occurred.
func ScanForImports(r io.Reader) ([]string, error) {
var imports []string
// a result that contains as much information as was found before the I/O error
// occurred.
func Scan(r io.Reader) (Result, error) {
var res Result

var currentImport []string // if non-nil, parsing an import statement
var packageComponents []string // if non-nil, parsing a package statement

// current stack of open blocks -- those starting with {, [, (, or < for
// which we haven't yet encountered the closing }, ], ), or >
var contextStack []tokenType
var currentImport []string
declarationStart := true

lexer := newLexer(r)
for {
token, text, err := lexer.Lex()
if err != nil {
return imports, err
return res, err
}
if token == eofToken {
return imports, nil
return res, nil
}

if currentImport != nil {
Expand All @@ -51,12 +66,26 @@ func ScanForImports(r io.Reader) ([]string, error) {
currentImport = append(currentImport, text.(string))
default:
if len(currentImport) > 0 {
imports = append(imports, strings.Join(currentImport, ""))
res.Imports = append(res.Imports, strings.Join(currentImport, ""))
}
currentImport = nil
}
}

if packageComponents != nil {
switch token {
case identifierToken:
packageComponents = append(packageComponents, text.(string))
case periodToken:
packageComponents = append(packageComponents, ".")
default:
if len(packageComponents) > 0 {
res.PackageName = strings.Join(packageComponents, "")
}
packageComponents = nil
}
}

switch token {
case openParenToken, openBraceToken, openBracketToken, openAngleToken:
contextStack = append(contextStack, closeSymbol[token])
Expand All @@ -65,9 +94,15 @@ func ScanForImports(r io.Reader) ([]string, error) {
contextStack = contextStack[:len(contextStack)-1]
}
case identifierToken:
if text == "import" && len(contextStack) == 0 {
currentImport = []string{}
if declarationStart && len(contextStack) == 0 {
if text == "import" {
currentImport = []string{}
} else if text == "package" {
packageComponents = []string{}
}
}
}

declarationStart = token == closeBraceToken || token == semicolonToken
}
}
4 changes: 3 additions & 1 deletion parser/imports/lexer.go → parser/fastscan/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package imports
package fastscan

import (
"bufio"
Expand Down Expand Up @@ -56,6 +56,8 @@ const (
closeBraceToken = tokenType('}')
closeBracketToken = tokenType(']')
closeAngleToken = tokenType('>')
periodToken = tokenType('.')
semicolonToken = tokenType(';')
)

type runeReader struct {
Expand Down

0 comments on commit fa71488

Please sign in to comment.