Skip to content

Commit

Permalink
sqlstream -> pipeclean
Browse files Browse the repository at this point in the history
  • Loading branch information
xeger committed Apr 11, 2023
1 parent dea1148 commit 2f90ae3
Show file tree
Hide file tree
Showing 20 changed files with 44 additions and 44 deletions.
24 changes: 12 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
.PHONY: benchmark bin clean default test

default:
cat data/sql/input.sql | ./sqlstream scrub data/models
cat data/sql/input.sql | ./pipeclean scrub data/models

bin: bin/sqlstream-darwin-amd64 bin/sqlstream-darwin-arm64
bin: bin/pipeclean-darwin-amd64 bin/pipeclean-darwin-arm64

bin/sqlstream-darwin-amd64:
env GOOS=darwin GOARCH=amd64 go build -o bin/sqlstream-darwin-amd64
bin/pipeclean-darwin-amd64:
env GOOS=darwin GOARCH=amd64 go build -o bin/pipeclean-darwin-amd64

bin/sqlstream-darwin-arm64:
env GOOS=darwin GOARCH=amd64 go build -o bin/sqlstream-darwin-arm64
bin/pipeclean-darwin-arm64:
env GOOS=darwin GOARCH=amd64 go build -o bin/pipeclean-darwin-arm64

benchmark:
time cat data/sql/benchmark.sql | ./sqlstream scrub data/models > data/sql/benchmark-output.sql
time cat data/sql/benchmark.sql | ./pipeclean scrub data/models > data/sql/benchmark-output.sql

clean:
rm -Rf bin
Expand All @@ -24,16 +24,16 @@ test:
data: data/models/city.json data/models/givenName.json data/models/sn.json data/models/streetName.json

data/models/city.txt: data/training/city.csv
tail -n+2 data/training/city.csv | ./sqlstream train dict > data/models/city.txt
tail -n+2 data/training/city.csv | ./pipeclean train dict > data/models/city.txt

data/models/city.json: Makefile data/models/city.txt
cat data/models/city.txt | ./sqlstream train markov:words:5 > data/models/city.json
cat data/models/city.txt | ./pipeclean train markov:words:5 > data/models/city.json

data/models/givenName.json: Makefile data/training/givenName.csv
tail -n+2 data/training/givenName.csv | ./sqlstream train markov:words:3 > data/models/givenName.json
tail -n+2 data/training/givenName.csv | ./pipeclean train markov:words:3 > data/models/givenName.json

data/models/sn.json: Makefile data/training/sn.csv
tail -n+2 data/training/sn.csv | ./sqlstream train markov:words:3 > data/models/sn.json
tail -n+2 data/training/sn.csv | ./pipeclean train markov:words:3 > data/models/sn.json

data/models/streetName.json: Makefile data/training/streetName.csv
tail -n+2 data/training/streetName.csv | ./sqlstream train markov:words:5 > data/models/streetName.json
tail -n+2 data/training/streetName.csv | ./pipeclean train markov:words:5 > data/models/streetName.json
4 changes: 2 additions & 2 deletions cmd/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"os"

"github.com/spf13/cobra"
"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

// Used for flags.
Expand All @@ -24,7 +24,7 @@ func generate(cmd *cobra.Command, args []string) {
if len(args) == 1 {
modelFile = args[0]
} else {
fmt.Fprintln(os.Stderr, "Usage: sqlstream generate <modelFile>")
fmt.Fprintln(os.Stderr, "Usage: pipeclean generate <modelFile>")
os.Exit(1)
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/recognize.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"strings"

"github.com/spf13/cobra"
"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

// Used for flags.
Expand All @@ -32,7 +32,7 @@ func recognize(cmd *cobra.Command, args []string) {
if len(args) == 1 {
modelFile = args[0]
} else {
fmt.Fprintln(os.Stderr, "Usage: sqlstream recognize <modelFile>")
fmt.Fprintln(os.Stderr, "Usage: pipeclean recognize <modelFile>")
os.Exit(1)
}

Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (

var (
rootCmd = &cobra.Command{
Use: "sqlstream",
Use: "pipeclean",
Short: "Streaming MySQL Sanitizer",
Long: `Masks sensitive text in MySQL dumps.
Uses a heuristic rule system, applying language models to avoid depending on
Expand Down
8 changes: 4 additions & 4 deletions cmd/scrub.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ import (
"runtime"

"github.com/spf13/cobra"
"github.com/xeger/sqlstream/nlp"
"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/sqlstream/scrubbing/json"
"github.com/xeger/sqlstream/scrubbing/mysql"
"github.com/xeger/pipeclean/nlp"
"github.com/xeger/pipeclean/scrubbing"
"github.com/xeger/pipeclean/scrubbing/json"
"github.com/xeger/pipeclean/scrubbing/mysql"
)

// Used for flags.
Expand Down
10 changes: 5 additions & 5 deletions cmd/train.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"strings"

"github.com/spf13/cobra"
"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

// Used for flags.
Expand Down Expand Up @@ -51,11 +51,11 @@ func train(cmd *cobra.Command, args []string) {
case "words":
markovSep = ""
default:
fmt.Fprintln(os.Stderr, "Usage: sqlstream train <modelType>[param1:param2:...]")
fmt.Fprintln(os.Stderr, "Usage: pipeclean train <modelType>[param1:param2:...]")
fmt.Fprintln(os.Stderr, "Examples:")
fmt.Fprintln(os.Stderr, " sqlstream train dict # dictionary-lookup model")
fmt.Fprintln(os.Stderr, " sqlstream train markov:words:5 # markov word model of order 5")
fmt.Fprintln(os.Stderr, " sqlstream train markov:sentences:3 # markov sentence model of order 5")
fmt.Fprintln(os.Stderr, " pipeclean train dict # dictionary-lookup model")
fmt.Fprintln(os.Stderr, " pipeclean train markov:words:5 # markov word model of order 5")
fmt.Fprintln(os.Stderr, " pipeclean train markov:sentences:3 # markov sentence model of order 5")
os.Exit(1)
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module github.com/xeger/sqlstream
module github.com/xeger/pipeclean

go 1.20

Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package main

import (
"github.com/xeger/sqlstream/cmd"
"github.com/xeger/pipeclean/cmd"
)

func main() {
Expand Down
2 changes: 1 addition & 1 deletion nlp/clean_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package nlp_test
import (
"testing"

"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

func TestClean(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion nlp/is_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package nlp_test
import (
"testing"

"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

func TestIsLower(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions nlp/markov_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"strings"

"github.com/xeger/gomarkov"
"github.com/xeger/sqlstream/rand"
"github.com/xeger/pipeclean/rand"
)

type MarkovModel struct {
Expand All @@ -21,7 +21,7 @@ type markovModelJSON struct {
Chain gomarkov.Chain `json:"chain"`
}

const markovModelTypeID = "github.com/xeger/sqlstream/nlp.MarkovModel"
const markovModelTypeID = "github.com/xeger/pipeclean/nlp.MarkovModel"

func NewMarkovModel(order int, separator string) *MarkovModel {
return &MarkovModel{
Expand Down
2 changes: 1 addition & 1 deletion nlp/markov_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package nlp_test
import (
"testing"

"github.com/xeger/sqlstream/nlp"
"github.com/xeger/pipeclean/nlp"
)

func TestDeterminism(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion rand/new_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package rand_test
import (
"testing"

"github.com/xeger/sqlstream/rand"
"github.com/xeger/pipeclean/rand"
)

func TestNewSource(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion scrubbing/json/scrub.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"encoding/json"
"io"

"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/pipeclean/scrubbing"
)

// Scrub sanitizes a single line, which may contain multiple SQL statements.
Expand Down
2 changes: 1 addition & 1 deletion scrubbing/mysql/mysql_scrubber.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package mysql
import (
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/test_driver"
"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/pipeclean/scrubbing"
)

type mysqlScrubber struct {
Expand Down
2 changes: 1 addition & 1 deletion scrubbing/mysql/scrub.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"

"github.com/pingcap/tidb/parser"
"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/pipeclean/scrubbing"
)

func scrub(msc *mysqlScrubber, p *parser.Parser, line string) string {
Expand Down
4 changes: 2 additions & 2 deletions scrubbing/mysql/scrub_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"strings"
"testing"

"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/sqlstream/scrubbing/mysql"
"github.com/xeger/pipeclean/scrubbing"
"github.com/xeger/pipeclean/scrubbing/mysql"
)

func read(t *testing.T, name string) string {
Expand Down
4 changes: 2 additions & 2 deletions scrubbing/scrubber.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
"regexp"
"strings"

"github.com/xeger/sqlstream/nlp"
"github.com/xeger/sqlstream/rand"
"github.com/xeger/pipeclean/nlp"
"github.com/xeger/pipeclean/rand"
"gopkg.in/yaml.v3"
)

Expand Down
4 changes: 2 additions & 2 deletions scrubbing/scrubber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package scrubbing_test
import (
"testing"

"github.com/xeger/sqlstream/scrubbing"
"github.com/xeger/pipeclean/scrubbing"
)

const salt = "github.com/xeger/sqlstream/scrubbing"
const salt = "github.com/xeger/pipeclean/scrubbing"

func scrub(s string) string {
return scrubSalted(s, "")
Expand Down
2 changes: 1 addition & 1 deletion sqlstream
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/sh

# Convenience wrapper for running sqlstream from source.
# Convenience wrapper for running pipeclean from source.
exec go run main.go $@

0 comments on commit 2f90ae3

Please sign in to comment.