Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbnode] Optimize filesetFiles function #3900

Merged
merged 6 commits into from
Nov 9, 2021
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 67 additions & 42 deletions src/dbnode/persist/fs/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -1231,7 +1231,62 @@ func SortedCommitLogFiles(commitLogsDir string) ([]string, error) {
return sortedCommitLogFiles(commitLogsDir, commitLogFilePattern)
}

type filesetFile struct {
volumeIndex int
blockStart xtime.UnixNano
fileName string
}

type toSortableFn func(files []string) sort.Interface
type toBlockStartAndVolumeIndexFn func(file string) (xtime.UnixNano, int, error)
type sortedFilesetFiles []filesetFile

func (s sortedFilesetFiles) Len() int {
return len(s)
}

func (s sortedFilesetFiles) Less(i, j int) bool {
ti := s[i].blockStart
tj := s[j].blockStart

if ti.Before(tj) {
return true
}

ij := s[j].volumeIndex
ii := s[i].volumeIndex
soundvibe marked this conversation as resolved.
Show resolved Hide resolved
return ti.Equal(tj) && ii < ij
}

func (s sortedFilesetFiles) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

func findFilesetFiles(fileDir string, pattern string, fn toBlockStartAndVolumeIndexFn) (sortedFilesetFiles, error) {
soundvibe marked this conversation as resolved.
Show resolved Hide resolved
matched, err := filepath.Glob(path.Join(fileDir, pattern))
if err != nil {
return nil, err
}
if len(matched) == 0 {
return nil, nil
}
Comment on lines +1241 to +1243
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this if is redundant.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't want to change semantics on what is returned from this function. Previously nil would be returned if filepath.Glob returns nil. Without this if check we would be returning empty slice which is not the same thing as before.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

findSortedFilesetFiles is a new function so I guess there can be no existing semantics for it.
And for the filesetFiles, there is still an if for this purpose:
https://github.com/m3db/m3/pull/3900/files#diff-78d9cf687193bca4cdc4ae73e54059f6a3b3ab4360a627c4bf26c070b8a0f909R1340

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

findSortedFilesetFiles replaced findFiles (which is still present) so its semantics were based on it so that these both functions could be used interchangeably if needed. In Go returning nil slices is quite common and I don't see problems with this approach here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But findFiles does not have such check, it returns whatever is returned by filepath.Glob...

Copy link
Collaborator Author

@soundvibe soundvibe Nov 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly, and filepath.Glob returns nil if it founds nothing. So findSortedFilesetFilesshould also return nil when it founds nothing. I could have probably checked if matched == nil but since filepath.Glob never returns empty slice, there is no difference here.

result := make([]filesetFile, len(matched))
for i, file := range matched {
blockStart, volume, err := fn(file)
if err != nil {
return nil, err
}

result[i] = filesetFile{
fileName: file,
blockStart: blockStart,
volumeIndex: volume,
}
}
Comment on lines +1244 to +1256
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
result := make([]filesetFile, len(matched))
for i, file := range matched {
blockStart, volume, err := fn(file)
if err != nil {
return nil, err
}
result[i] = filesetFile{
fileName: file,
blockStart: blockStart,
volumeIndex: volume,
}
}
result := make([]filesetFile, 0, len(matched))
for _, file := range matched {
blockStart, volume, err := fn(file)
if err != nil {
return nil, err
}
result = append(result, filesetFile{
fileName: file,
blockStart: blockStart,
volumeIndex: volume,
})
}


sort.Sort(sortedFilesetFiles(result))
return result, nil
}

func findFiles(fileDir string, pattern string, fn toSortableFn) ([]string, error) {
matched, err := filepath.Glob(path.Join(fileDir, pattern))
Expand Down Expand Up @@ -1278,22 +1333,18 @@ type filesetFilesSelector struct {

func filesetFiles(args filesetFilesSelector) (FileSetFilesSlice, error) {
var (
byTimeAsc []string
byTimeAsc sortedFilesetFiles
err error
)
switch args.fileSetType {
case persist.FileSetFlushType:
switch args.contentType {
case persist.FileSetDataContentType:
dir := ShardDataDirPath(args.filePathPrefix, args.namespace, args.shard)
byTimeAsc, err = findFiles(dir, args.pattern, func(files []string) sort.Interface {
return dataFileSetFilesByTimeAndVolumeIndexAscending(files)
vpranckaitis marked this conversation as resolved.
Show resolved Hide resolved
})
byTimeAsc, err = findFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromDataFileSetFilename)
case persist.FileSetIndexContentType:
dir := NamespaceIndexDataDirPath(args.filePathPrefix, args.namespace)
byTimeAsc, err = findFiles(dir, args.pattern, func(files []string) sort.Interface {
return fileSetFilesByTimeAndVolumeIndexAscending(files)
})
byTimeAsc, err = findFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename)
default:
return nil, fmt.Errorf("unknown content type: %d", args.contentType)
}
Expand All @@ -1307,9 +1358,7 @@ func filesetFiles(args filesetFilesSelector) (FileSetFilesSlice, error) {
default:
return nil, fmt.Errorf("unknown content type: %d", args.contentType)
}
byTimeAsc, err = findFiles(dir, args.pattern, func(files []string) sort.Interface {
return fileSetFilesByTimeAndVolumeIndexAscending(files)
})
byTimeAsc, err = findFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename)
default:
return nil, fmt.Errorf("unknown type: %d", args.fileSetType)
}
Expand All @@ -1328,51 +1377,27 @@ func filesetFiles(args filesetFilesSelector) (FileSetFilesSlice, error) {
filesetFiles = []FileSetFile{}
)
for _, file := range byTimeAsc {
var (
currentFileBlockStart xtime.UnixNano
volumeIndex int
err error
)
switch args.fileSetType {
case persist.FileSetFlushType:
switch args.contentType {
case persist.FileSetDataContentType:
currentFileBlockStart, volumeIndex, err = TimeAndVolumeIndexFromDataFileSetFilename(file)
case persist.FileSetIndexContentType:
currentFileBlockStart, volumeIndex, err = TimeAndVolumeIndexFromFileSetFilename(file)
default:
return nil, fmt.Errorf("unknown content type: %d", args.contentType)
}
case persist.FileSetSnapshotType:
currentFileBlockStart, volumeIndex, err = TimeAndVolumeIndexFromFileSetFilename(file)
default:
return nil, fmt.Errorf("unknown type: %d", args.fileSetType)
}
if err != nil {
return nil, err
}

if latestBlockStart == 0 {
latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{
Namespace: args.namespace,
BlockStart: currentFileBlockStart,
BlockStart: file.blockStart,
Shard: args.shard,
VolumeIndex: volumeIndex,
VolumeIndex: file.volumeIndex,
}, args.filePathPrefix)
} else if !currentFileBlockStart.Equal(latestBlockStart) || latestVolumeIndex != volumeIndex {
} else if !file.blockStart.Equal(latestBlockStart) || latestVolumeIndex != file.volumeIndex {
filesetFiles = append(filesetFiles, latestFileSetFile)
latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{
Namespace: args.namespace,
BlockStart: currentFileBlockStart,
BlockStart: file.blockStart,
Shard: args.shard,
VolumeIndex: volumeIndex,
VolumeIndex: file.volumeIndex,
}, args.filePathPrefix)
}

latestBlockStart = currentFileBlockStart
latestVolumeIndex = volumeIndex
latestBlockStart = file.blockStart
latestVolumeIndex = file.volumeIndex

latestFileSetFile.AbsoluteFilePaths = append(latestFileSetFile.AbsoluteFilePaths, file)
latestFileSetFile.AbsoluteFilePaths = append(latestFileSetFile.AbsoluteFilePaths, file.fileName)
}

filesetFiles = append(filesetFiles, latestFileSetFile)
Expand Down