Skip to content

Commit

Permalink
Fix issue where symbols are read incorrectly from v1 index files.
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskorn committed Nov 24, 2022
1 parent cdfe6f0 commit ce19084
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 20 deletions.
37 changes: 18 additions & 19 deletions pkg/storegateway/indexheader/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,38 +50,38 @@ type ByteSlice interface {
}

type Symbols struct {
bs ByteSlice
r stream_encoding.Reader
version int
off int
bs ByteSlice
r stream_encoding.Reader
version int
symbolTablePosition int

offsets []int
seen int
}

const symbolFactor = 32

//func NewSymbols(bs ByteSlice, version, off int) (*Symbols, error) {

// NewSymbols returns a Symbols object for symbol lookups.
func NewSymbols(bs ByteSlice, version, off int) (*Symbols, error) {
// bs should contain the Decbuf-encoded symbol table, including leading length and trailing checksum bytes.
// symbolTablePosition should be the offset, from the beginning of the index file, of the symbol table.
func NewSymbols(bs ByteSlice, version, symbolTablePosition int) (*Symbols, error) {
r := stream_encoding.NewBufReader(bs)
s := &Symbols{
bs: bs,
r: r,
version: version,
off: off,
bs: bs,
r: r,
version: version,
symbolTablePosition: symbolTablePosition,
}

d := stream_encoding.NewDecbuf(r, off, castagnoliTable)
d := stream_encoding.NewDecbuf(r, 0, castagnoliTable)
if d.Err() != nil {
fmt.Printf("error: %v\n", d.Err())
}

var (
origLen = d.Len()
cnt = d.Be32int()
basePos = off + 4
basePos = 4
)
s.offsets = make([]int, 0, 1+cnt/symbolFactor)
for d.Err() == nil && s.seen < cnt {
Expand Down Expand Up @@ -118,12 +118,11 @@ func (s Symbols) Lookup(o uint32) (string, error) {
d.UvarintBytes()
}
} else {

// TODO: This is always wrong for v1 by 14 bytes. Meaning, if we do `d.Skip(int(0 - 14))` it works
// perfectly. We're not accounting for the index header somewhere or we stopped including it somewhere
// that we used to include it in.

d.Skip(int(o))
// In v1, o is relative to the beginning of the whole index header file, so we
// need to adjust for the fact our view into the file starts at the beginning
// of the symbol table.
offsetInTable := int(o) - s.symbolTablePosition
d.Skip(offsetInTable)
}
sym := d.UvarintStr()
if d.Err() != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/storegateway/indexheader/stream_binary_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func newFileStreamBinaryReader(path string, postingOffsetsInMemSampling int) (bw
//length := len(lengthBytes) + int(binary.BigEndian.Uint32(lengthBytes)) + 4

//fr := stream_encoding.NewFileReader(f, int(r.toc.Symbols), length)
r.symbols, err = stream_index.NewSymbols(symbolsByteSlice, r.indexVersion, 0)
r.symbols, err = stream_index.NewSymbols(symbolsByteSlice, r.indexVersion, int(r.toc.Symbols))
if err != nil {
return nil, errors.Wrap(err, "read symbols")
}
Expand Down

0 comments on commit ce19084

Please sign in to comment.