diff --git a/bibtex.y b/bibtex.y index 39e131b..2664cb0 100644 --- a/bibtex.y +++ b/bibtex.y @@ -24,7 +24,7 @@ var bib *BibTex // Only for holding current bib %token tCOMMENT tSTRING tPREAMBLE %token tATSIGN tCOLON tEQUAL tCOMMA tPOUND tLBRACE tRBRACE tDQUOTE tLPAREN tRPAREN -%token tBAREIDENT tIDENT +%token tBAREIDENT tIDENT tCOMMENTBODY %type bibtex %type bibentry %type tag stringentry @@ -47,8 +47,7 @@ bibentry : tATSIGN tBAREIDENT tLBRACE tBAREIDENT tCOMMA tags tRBRACE { $$ = NewB | tATSIGN tBAREIDENT tLPAREN tBAREIDENT tCOMMA tags tRPAREN { $$ = NewBibEntry($2, $4); for _, t := range $6 { $$.AddField(t.key, t.val) } } ; -commententry : tATSIGN tCOMMENT tLBRACE longstring tRBRACE {} - | tATSIGN tCOMMENT tLPAREN longstring tRBRACE {} +commententry : tATSIGN tCOMMENT tCOMMENTBODY { } ; stringentry : tATSIGN tSTRING tLBRACE tBAREIDENT tEQUAL longstring tRBRACE { $$ = &bibTag{key: $4, val: $6 } } diff --git a/bibtex.y.go b/bibtex.y.go index a3c771e..a394cb3 100644 --- a/bibtex.y.go +++ b/bibtex.y.go @@ -44,6 +44,7 @@ const tLPAREN = 57357 const tRPAREN = 57358 const tBAREIDENT = 57359 const tIDENT = 57360 +const tCOMMENTBODY = 57361 var bibtexToknames = [...]string{ "$end", @@ -64,6 +65,7 @@ var bibtexToknames = [...]string{ "tRPAREN", "tBAREIDENT", "tIDENT", + "tCOMMENTBODY", } var bibtexStatenames = [...]string{} @@ -71,7 +73,7 @@ const bibtexEofCode = 1 const bibtexErrCode = 2 const bibtexInitialStackSize = 16 -//line bibtex.y:76 +//line bibtex.y:75 // Parse is the entry point to the bibtex parser. func Parse(r io.Reader) (*BibTex, error) { @@ -96,60 +98,56 @@ var bibtexExca = [...]int{ const bibtexPrivate = 57344 -const bibtexLast = 61 +const bibtexLast = 54 var bibtexAct = [...]int{ - 22, 39, 40, 41, 9, 10, 11, 24, 23, 44, - 43, 27, 48, 26, 21, 20, 25, 8, 50, 28, - 29, 33, 33, 49, 18, 16, 38, 19, 17, 14, - 31, 12, 15, 42, 13, 30, 45, 46, 33, 33, - 52, 51, 48, 36, 33, 47, 37, 33, 35, 34, - 54, 53, 33, 7, 32, 4, 1, 6, 5, 3, - 2, + 23, 14, 35, 34, 9, 10, 11, 25, 24, 41, + 40, 36, 43, 22, 21, 32, 20, 8, 45, 26, + 33, 17, 19, 15, 18, 12, 16, 32, 13, 47, + 38, 39, 37, 32, 43, 46, 32, 42, 31, 32, + 28, 27, 44, 30, 29, 49, 48, 7, 4, 1, + 6, 5, 3, 2, } var bibtexPact = [...]int{ - -1000, -1000, 46, -1000, -1000, -1000, -1000, 0, 19, 17, - 13, 12, -2, -3, -10, -10, -4, -6, -10, -10, - 25, 20, 41, -1000, -1000, 36, 39, 34, 33, 10, - -14, -14, -1000, -8, -1000, -10, -10, -1000, -1000, 32, - -1000, 14, 2, -1000, -1000, 28, 27, -1000, -14, -10, - -1000, -1000, -1000, -1000, 11, + -1000, -1000, 40, -1000, -1000, -1000, -1000, 0, 13, -18, + 11, 9, 5, -1, -1000, -3, -4, -10, -10, 31, + 30, 35, 34, 25, -1000, -1000, 4, -6, -6, -10, + -10, -1000, -8, -1000, 24, -1000, 33, 2, 22, 16, + -1000, -1000, -1000, -6, -10, -1000, -1000, -1000, -1000, 28, } var bibtexPgo = [...]int{ - 0, 60, 59, 2, 58, 1, 0, 57, 56, 55, + 0, 53, 52, 2, 51, 3, 0, 50, 49, 48, } var bibtexR1 = [...]int{ 0, 8, 1, 1, 1, 1, 1, 2, 2, 9, - 9, 4, 4, 7, 7, 6, 6, 6, 6, 3, - 3, 5, 5, + 4, 4, 7, 7, 6, 6, 6, 6, 3, 3, + 5, 5, } var bibtexR2 = [...]int{ - 0, 1, 0, 2, 2, 2, 2, 7, 7, 5, - 5, 7, 7, 5, 5, 1, 1, 3, 3, 0, - 3, 1, 3, + 0, 1, 0, 2, 2, 2, 2, 7, 7, 3, + 7, 7, 5, 5, 1, 1, 3, 3, 0, 3, + 1, 3, } var bibtexChk = [...]int{ -1000, -8, -1, -2, -9, -4, -7, 7, 17, 4, - 5, 6, 12, 15, 12, 15, 12, 15, 12, 15, - 17, 17, -6, 18, 17, -6, 17, 17, -6, -6, - 10, 10, 13, 11, 13, 9, 9, 13, 16, -5, - -3, 17, -5, 18, 17, -6, -6, 13, 10, 9, - 16, 13, 13, -3, -6, + 5, 6, 12, 15, 19, 12, 15, 12, 15, 17, + 17, 17, 17, -6, 18, 17, -6, 10, 10, 9, + 9, 13, 11, 16, -5, -3, 17, -5, -6, -6, + 18, 17, 13, 10, 9, 16, 13, 13, -3, -6, } var bibtexDef = [...]int{ 2, -2, 1, 3, 4, 5, 6, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, - 19, 19, 9, 0, 10, 0, 0, 13, 14, 0, - 21, 0, 0, 17, 18, 0, 0, 7, 19, 0, - 8, 11, 12, 22, 20, + 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 14, 15, 0, 18, 18, 0, + 0, 12, 0, 13, 0, 20, 0, 0, 0, 0, + 16, 17, 7, 18, 0, 8, 10, 11, 21, 19, } var bibtexTok1 = [...]int{ @@ -158,7 +156,7 @@ var bibtexTok1 = [...]int{ var bibtexTok2 = [...]int{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, + 12, 13, 14, 15, 16, 17, 18, 19, } var bibtexTok3 = [...]int{ 0, @@ -559,14 +557,15 @@ bibtexdefault: } } case 9: - bibtexDollar = bibtexS[bibtexpt-5 : bibtexpt+1] + bibtexDollar = bibtexS[bibtexpt-3 : bibtexpt+1] //line bibtex.y:50 { } case 10: - bibtexDollar = bibtexS[bibtexpt-5 : bibtexpt+1] -//line bibtex.y:51 + bibtexDollar = bibtexS[bibtexpt-7 : bibtexpt+1] +//line bibtex.y:53 { + bibtexVAL.bibtag = &bibTag{key: bibtexDollar[4].strval, val: bibtexDollar[6].strings} } case 11: bibtexDollar = bibtexS[bibtexpt-7 : bibtexpt+1] @@ -575,10 +574,10 @@ bibtexdefault: bibtexVAL.bibtag = &bibTag{key: bibtexDollar[4].strval, val: bibtexDollar[6].strings} } case 12: - bibtexDollar = bibtexS[bibtexpt-7 : bibtexpt+1] -//line bibtex.y:55 + bibtexDollar = bibtexS[bibtexpt-5 : bibtexpt+1] +//line bibtex.y:57 { - bibtexVAL.bibtag = &bibTag{key: bibtexDollar[4].strval, val: bibtexDollar[6].strings} + bibtexVAL.strings = bibtexDollar[4].strings } case 13: bibtexDollar = bibtexS[bibtexpt-5 : bibtexpt+1] @@ -587,59 +586,53 @@ bibtexdefault: bibtexVAL.strings = bibtexDollar[4].strings } case 14: - bibtexDollar = bibtexS[bibtexpt-5 : bibtexpt+1] -//line bibtex.y:59 - { - bibtexVAL.strings = bibtexDollar[4].strings - } - case 15: bibtexDollar = bibtexS[bibtexpt-1 : bibtexpt+1] -//line bibtex.y:62 +//line bibtex.y:61 { bibtexVAL.strings = NewBibConst(bibtexDollar[1].strval) } - case 16: + case 15: bibtexDollar = bibtexS[bibtexpt-1 : bibtexpt+1] -//line bibtex.y:63 +//line bibtex.y:62 { bibtexVAL.strings = bib.GetStringVar(bibtexDollar[1].strval) } - case 17: + case 16: bibtexDollar = bibtexS[bibtexpt-3 : bibtexpt+1] -//line bibtex.y:64 +//line bibtex.y:63 { bibtexVAL.strings = NewBibComposite(bibtexDollar[1].strings) bibtexVAL.strings.(*BibComposite).Append(NewBibConst(bibtexDollar[3].strval)) } - case 18: + case 17: bibtexDollar = bibtexS[bibtexpt-3 : bibtexpt+1] -//line bibtex.y:65 +//line bibtex.y:64 { bibtexVAL.strings = NewBibComposite(bibtexDollar[1].strings) bibtexVAL.strings.(*BibComposite).Append(bib.GetStringVar(bibtexDollar[3].strval)) } - case 19: + case 18: bibtexDollar = bibtexS[bibtexpt-0 : bibtexpt+1] -//line bibtex.y:68 +//line bibtex.y:67 { } - case 20: + case 19: bibtexDollar = bibtexS[bibtexpt-3 : bibtexpt+1] -//line bibtex.y:69 +//line bibtex.y:68 { bibtexVAL.bibtag = &bibTag{key: bibtexDollar[1].strval, val: bibtexDollar[3].strings} } - case 21: + case 20: bibtexDollar = bibtexS[bibtexpt-1 : bibtexpt+1] -//line bibtex.y:72 +//line bibtex.y:71 { if bibtexDollar[1].bibtag != nil { bibtexVAL.bibtags = []*bibTag{bibtexDollar[1].bibtag} } } - case 22: + case 21: bibtexDollar = bibtexS[bibtexpt-3 : bibtexpt+1] -//line bibtex.y:73 +//line bibtex.y:72 { if bibtexDollar[3].bibtag == nil { bibtexVAL.bibtags = bibtexDollar[1].bibtags diff --git a/bibtex_test.go b/bibtex_test.go index 1def1ca..dea74a4 100644 --- a/bibtex_test.go +++ b/bibtex_test.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "os" "path/filepath" "strings" "testing" @@ -71,7 +71,7 @@ func TestParser(t *testing.T) { for _, ex := range examples { t.Logf("Parsing example: %s", ex) - b, err := ioutil.ReadFile(ex) + b, err := os.ReadFile(ex) if err != nil { t.Errorf("Cannot read %s: %v", ex, err) } @@ -89,12 +89,13 @@ func TestMultiParse(t *testing.T) { "example/simple.bib", "example/simple.bib", "example/simple.bib", + "example/simple2.bib", // simple but with comment } var bibs []*BibTex for _, ex := range examples { t.Logf("Parsing example: %s", ex) - b, err := ioutil.ReadFile(ex) + b, err := os.ReadFile(ex) if err != nil { t.Errorf("Cannot read %s: %v", ex, err) } @@ -122,7 +123,7 @@ func TestPrettyStringRoundTrip(t *testing.T) { for _, ex := range examples { // Read input. - b, err := ioutil.ReadFile(ex) + b, err := os.ReadFile(ex) if err != nil { t.Fatal(err) } @@ -148,7 +149,7 @@ func TestPrettyStringRoundTrip(t *testing.T) { func TestUnexpectedAtSign(t *testing.T) { // Tests correct syntax but scanning error - b, err := ioutil.ReadFile("example/unexpected-at-sign.badbib") + b, err := os.ReadFile("example/unexpected-at-sign.badbib") if err != nil { t.Fatal(err) } @@ -191,7 +192,7 @@ func AssertEntriesEqual(t *testing.T, a, b *BibEntry) { } func BenchmarkStringPerformance(b *testing.B) { - exampleFileBytes, err := ioutil.ReadFile("example/biblatex-examples.bib") + exampleFileBytes, err := os.ReadFile("example/biblatex-examples.bib") if err != nil { b.Fatal(err) } diff --git a/docs.go b/docs.go index d11ffa0..7b2883e 100644 --- a/docs.go +++ b/docs.go @@ -3,15 +3,15 @@ // The package contains a simple parser and data structure to represent bibtex // records. // -// Supported syntax +// # Supported syntax // // The basic syntax is: // -// @BIBTYPE{IDENT, -// key1 = word, -// key2 = "quoted", -// key3 = {quoted}, -// } +// @BIBTYPE{IDENT, +// key1 = word, +// key2 = "quoted", +// key3 = {quoted}, +// } // // where BIBTYPE is the type of document (e.g. inproceedings, article, etc.) // and IDENT is a string identifier. @@ -20,5 +20,4 @@ // found in the link below. If there are any problems, please file any issues // with a minimal working example at the GitHub repository. // http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html -// package bibtex // import "github.com/nickng/bibtex" diff --git a/example/simple2.bib b/example/simple2.bib new file mode 100644 index 0000000..2e95370 --- /dev/null +++ b/example/simple2.bib @@ -0,0 +1,16 @@ +@article{name, + year = 2016, + title = {SessionBasedBlah} +} + +@comment{ + this looks like an entry + but is actually a comment + even if the braces don't + balance it's still valid {} + +@inproceedings{ng2014, + title = "Blah", + author = "Me", + booktitle = "ABCD2014" +} diff --git a/scanner.go b/scanner.go index b3cc089..73d4f8d 100644 --- a/scanner.go +++ b/scanner.go @@ -12,8 +12,9 @@ var parseField bool // scanner is a lexical scanner type scanner struct { - r *bufio.Reader - pos tokenPos + commentMode bool + r *bufio.Reader + pos tokenPos } // newScanner returns a new instance of scanner. @@ -79,6 +80,13 @@ func (s *scanner) Scan() (tok token, lit string, err error) { if parseField { return s.scanBraced() } + // If we're reading a comment, return everything after { + // to the next @-sign (exclusive) + if s.commentMode { + s.unread() + commentBodyTok, commentBody := s.scanCommentBody() + return commentBodyTok, commentBody, nil + } return tLBRACE, string(ch), nil case '}': if parseField { // reset parseField if reached end of entry. @@ -122,6 +130,7 @@ func (s *scanner) scanBare() (token, string) { } str := buf.String() if strings.ToLower(str) == "comment" { + s.commentMode = true return tCOMMENT, str } else if strings.ToLower(str) == "preamble" { return tPREAMBLE, str @@ -193,6 +202,28 @@ func (s *scanner) scanQuoted() (token, string) { return tILLEGAL, buf.String() } +// skipCommentBody is a scan method used for reading bibtex +// comment item by reading all runes until the next @. +// +// e.g. +// @comment{...anything can go here even if braces are unbalanced@ +// comment body string will be "...anything can go here even if braces are unbalanced" +func (s *scanner) scanCommentBody() (token, string) { + var buf bytes.Buffer + for { + if ch := s.read(); ch == eof { + break + } else if ch == '@' { + s.unread() + break + } else { + _, _ = buf.WriteRune(ch) + } + } + s.commentMode = false + return tCOMMENTBODY, buf.String() +} + // ignoreWhitespace consumes the current rune and all contiguous whitespace. func (s *scanner) ignoreWhitespace() { for {