Skip to content

Commit

Permalink
feat: add single file extraction support (#11)
Browse files Browse the repository at this point in the history
* wip fixing tests

* add single file support, add tests

* var block, remove typo

* update err msg

* remove newline

* simplify if
  • Loading branch information
leohhhn authored Feb 1, 2024
1 parent 42aaa94 commit a03195a
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 44 deletions.
36 changes: 24 additions & 12 deletions extractor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ var (

// Define extractor config
type extractorCfg struct {
fileType string
sourceDir string
outputDir string
fileType string
sourcePath string
outputDir string
}

func main() {
Expand Down Expand Up @@ -74,10 +74,10 @@ func (c *extractorCfg) registerFlags(fs *flag.FlagSet) {
)

fs.StringVar(
&c.sourceDir,
"source-dir",
".",
"the root folder containing transaction data",
&c.sourcePath,
"source-path",
"",
"the source file or folder containing transaction data",
)

fs.StringVar(
Expand All @@ -96,7 +96,7 @@ func execExtract(ctx context.Context, cfg *extractorCfg) error {
}

// Check the source dir is valid
if cfg.sourceDir == "" {
if cfg.sourcePath == "" {
return errInvalidSourceDir
}

Expand All @@ -105,10 +105,22 @@ func execExtract(ctx context.Context, cfg *extractorCfg) error {
return errInvalidOutputDir
}

// Find the files that need to be analyzed
sourceFiles, findErr := findFilePaths(cfg.sourceDir, cfg.fileType)
if findErr != nil {
return fmt.Errorf("unable to find file paths, %w", findErr)
// Check if source is valid
source, err := os.Stat(cfg.sourcePath)
if err != nil {
return fmt.Errorf("unable to stat source path, %w", err)
}

var sourceFiles []string
sourceFiles = append(sourceFiles, cfg.sourcePath)

// If source is dir, walk it and add to sourceFiles
if source.IsDir() {
var findErr error
sourceFiles, findErr = findFilePaths(cfg.sourcePath, cfg.fileType)
if findErr != nil {
return fmt.Errorf("unable to find file paths, %w", findErr)
}
}

if len(sourceFiles) == 0 {
Expand Down
129 changes: 97 additions & 32 deletions extractor/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,7 @@ import (
"time"
)

const (
numSourceFiles = 20
numTx = 100
numMsg = 200
msgPerTx = numMsg / numTx
txPerSourceFile = numTx / numSourceFiles
sourceFileType = ".log"
)
const sourceFileType = ".jsonl"

func TestExtractor_Errors(t *testing.T) {
testTable := []struct {
Expand All @@ -41,36 +34,36 @@ func TestExtractor_Errors(t *testing.T) {
{
"no source files",
&extractorCfg{
fileType: ".log",
sourceDir: "./",
outputDir: ".",
fileType: ".log",
sourcePath: "./",
outputDir: ".",
},
errNoSourceFilesFound,
},
{
"invalid filetype",
&extractorCfg{
fileType: "",
sourceDir: ".",
outputDir: ".",
fileType: "",
sourcePath: ".",
outputDir: ".",
},
errInvalidFileType,
},
{
"invalid source dir",
&extractorCfg{
fileType: ".log",
sourceDir: "",
outputDir: ".",
fileType: ".log",
sourcePath: "",
outputDir: ".",
},
errInvalidSourceDir,
},
{
"invalid output dir",
&extractorCfg{
fileType: ".log",
sourceDir: ".",
outputDir: "",
fileType: ".log",
sourcePath: ".",
outputDir: "",
},
errInvalidOutputDir,
},
Expand All @@ -91,29 +84,97 @@ func TestExtractor_Errors(t *testing.T) {
}
}

func TestValidFlow(t *testing.T) {
func TestValidFlow_Dir(t *testing.T) {
t.Parallel()

// Generate temporary output dir
outputDir, err := os.MkdirTemp(".", "outputDir")
require.NoError(t, err)
t.Cleanup(removeDir(t, outputDir))

// Generate temporary source dir
sourceDir, err := os.MkdirTemp(".", "sourceDir")
require.NoError(t, err)
t.Cleanup(removeDir(t, sourceDir))

// Set correct config
var cfg = &extractorCfg{
fileType: sourceFileType,
sourcePath: sourceDir,
outputDir: outputDir,
}

// Generate mock messages & mock files
mockStdMsg, mockAddPkgMsg := generateMockMsgs(t)
_ = generateSourceFiles(t, sourceDir, mockStdMsg, 20)

// Perform extraction
ctx, cancelFn := context.WithTimeout(context.Background(), time.Second*5)
defer cancelFn()

require.NoError(t, execExtract(ctx, cfg))

for _, msg := range mockAddPkgMsg {
basePath := filepath.Join(outputDir, strings.TrimLeft(msg.Package.Path, "gno.land/"))

// Get metadata path & open metadata file
metadataPath := filepath.Join(basePath, packageMetadataFile)
file, err := os.Open(metadataPath)
require.NoError(t, err)

// Read Metadata
reader := bufio.NewReader(file)
retrievedMetadata, _, err := reader.ReadLine()
require.NoError(t, err)

// Compare metadata
expectedMetadata, err := json.Marshal(metadataFromMsg(msg))
assert.Equal(t, expectedMetadata, retrievedMetadata)

// Close metadata file
require.NoError(t, file.Close())

// Check package file content
for _, f := range msg.Package.Files {
filePath := filepath.Join(basePath, f.Name)

// Open file
file, err := os.Open(filePath)
require.NoError(t, err)

// Read file body
reader := bufio.NewReader(file)
retrievedFileBody, _, err := reader.ReadLine()

// Compare file bodies
assert.Equal(t, f.Body, string(retrievedFileBody))
}
}
}

func TestValidFlow_File(t *testing.T) {
t.Parallel()

// Generate temporary output dir
outputDir, err := os.MkdirTemp(".", "output")
outputDir, err := os.MkdirTemp(".", "outputDir")
require.NoError(t, err)
t.Cleanup(removeDir(t, outputDir))

// Generate temporary source dir
sourceDir, err := os.MkdirTemp(".", "source")
sourceDir, err := os.MkdirTemp(".", "sourceDir")
require.NoError(t, err)
t.Cleanup(removeDir(t, sourceDir))

// Set correct config
var cfg = &extractorCfg{
fileType: sourceFileType,
sourceDir: sourceDir,
outputDir: outputDir,
fileType: sourceFileType,
sourcePath: sourceDir,
outputDir: outputDir,
}

// Generate mock messages & mock files
mockStdMsg, mockAddPkgMsg := generateMockMsgs(t)
_ = generateSourceFiles(t, sourceDir, mockStdMsg)
_ = generateSourceFiles(t, sourceDir, mockStdMsg, 1)

// Perform extraction
ctx, cancelFn := context.WithTimeout(context.Background(), time.Second*5)
Expand Down Expand Up @@ -166,6 +227,7 @@ func TestFindFilePaths(t *testing.T) {
require.NoError(t, err)
t.Cleanup(removeDir(t, tempDir))

numSourceFiles := 20
testFiles := make([]string, numSourceFiles)

for i := 0; i < numSourceFiles; i++ {
Expand All @@ -181,7 +243,7 @@ func TestFindFilePaths(t *testing.T) {
require.NoError(t, err)
}

results, err := findFilePaths(tempDir, ".log")
results, err := findFilePaths(tempDir, sourceFileType)
require.NoError(t, err)

expectedResults := make([]string, 0, len(testFiles))
Expand Down Expand Up @@ -215,7 +277,7 @@ func TestExtractAddMessages(t *testing.T) {
t.Cleanup(removeDir(t, tempDir))

mockMsgs, mockMsgsAddPackage := generateMockMsgs(t)
sourceFiles := generateSourceFiles(t, tempDir, mockMsgs)
sourceFiles := generateSourceFiles(t, tempDir, mockMsgs, 20)

var results []vm.MsgAddPackage
for _, sf := range sourceFiles {
Expand Down Expand Up @@ -306,12 +368,14 @@ func TestWritePackageFiles(t *testing.T) {
}

// Helpers
func generateSourceFiles(t *testing.T, dir string, mockMsgs []std.Msg) []string {
func generateSourceFiles(t *testing.T, dir string, mockMsgs []std.Msg, numSourceFiles int) []string {
t.Helper()

var (
mockTx = make([]std.Tx, numTx)
testFiles = make([]string, numSourceFiles)
txPerSourceFile = 5
mockTx = make([]std.Tx, txPerSourceFile*numSourceFiles)
testFiles = make([]string, numSourceFiles)
msgPerTx = len(mockMsgs) / len(mockTx)
)

// Generate transactions to wrap messages
Expand Down Expand Up @@ -368,6 +432,7 @@ func generateMockMsgs(t *testing.T) ([]std.Msg, []vm.MsgAddPackage) {

var ret []std.Msg
var addPkgRet []vm.MsgAddPackage
numMsg := 100

pkgID := 0

Expand Down

0 comments on commit a03195a

Please sign in to comment.