Skip to content

Commit

Permalink
[bug] - Correctly Handle Large Files in BufferedReadSeeker (#3203)
Browse files Browse the repository at this point in the history
* handle large files correctly

* return if http get fails
  • Loading branch information
ahrav authored Aug 8, 2024
1 parent 239f359 commit a966a47
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
27 changes: 27 additions & 0 deletions pkg/handlers/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,30 @@ func BenchmarkHandleTar(b *testing.B) {
assert.NoError(b, err)
}
}

func TestHandleLargeHTTPJson(t *testing.T) {
resp, err := http.Get("https://raw.githubusercontent.com/ahrav/nothing-to-see-here/main/md_random_data.json.zip")
if !assert.NoError(t, err) {
return
}

defer func() {
if resp != nil && resp.Body != nil {
resp.Body.Close()
}
}()

chunkCh := make(chan *sources.Chunk, 1)
go func() {
defer close(chunkCh)
err := HandleFile(logContext.Background(), resp.Body, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh})
assert.NoError(t, err)
}()

wantCount := 5121
count := 0
for range chunkCh {
count++
}
assert.Equal(t, wantCount, count)
}
24 changes: 24 additions & 0 deletions pkg/iobuf/bufferedreaderseeker.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ func (br *BufferedReadSeeker) Read(out []byte) (int, error) {
return n, err
}

// If we have a temp file and the total size is known, we can read directly from it.
if br.sizeKnown && br.tempFile != nil {
if br.index >= br.totalSize {
return 0, io.EOF
}
if _, err := br.tempFile.Seek(br.index, io.SeekStart); err != nil {
return 0, err
}
n, err := br.tempFile.Read(out)
br.index += int64(n)
return n, err
}

var (
totalBytesRead int
err error
Expand Down Expand Up @@ -211,6 +224,17 @@ func (br *BufferedReadSeeker) readToEnd() error {
return err
}
}

// If a temporary file exists and the buffer contains data,
// flush the buffer to the file. This allows future operations
// to utilize the temporary file exclusively, simplifying
// management by avoiding separate handling of the buffer and file.
if br.tempFile != nil && br.buf.Len() > 0 {
if err := br.flushBufferToDisk(); err != nil {
return err
}
}

br.totalSize = br.bytesRead
br.sizeKnown = true

Expand Down

0 comments on commit a966a47

Please sign in to comment.