From 7da2c2889d1adcba41dd9d5d53570f78dcda14ab Mon Sep 17 00:00:00 2001 From: Eric Fixler Date: Tue, 13 Aug 2024 14:43:34 +0100 Subject: [PATCH 1/2] Initial mysql migration --- .../mysql/migrations/00002_domain_settings.sql | 18 ++++++++++++++++++ .../migrations/00002_domain_settings.sql | 2 +- internal/server/version/version.go | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 database/mysql/migrations/00002_domain_settings.sql diff --git a/database/mysql/migrations/00002_domain_settings.sql b/database/mysql/migrations/00002_domain_settings.sql new file mode 100644 index 0000000..91e3b7c --- /dev/null +++ b/database/mysql/migrations/00002_domain_settings.sql @@ -0,0 +1,18 @@ +-- This migration adds per-domain settings to the database. +-- +goose Up +-- +goose StatementBegin + +CREATE TABLE IF NOT EXISTS `domain_settings` ( + `domain` VARCHAR(255) PRIMARY KEY NOT NULL, + `sitename` VARCHAR(255), + `fetch_client` VARCHAR(255), + `user_agent` VARCHAR(512), + `headers` JSON +); + +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TABLE IF EXISTS domain_settings; +-- +goose StatementEnd diff --git a/database/sqlite/migrations/00002_domain_settings.sql b/database/sqlite/migrations/00002_domain_settings.sql index 42bc6ed..9c3f330 100644 --- a/database/sqlite/migrations/00002_domain_settings.sql +++ b/database/sqlite/migrations/00002_domain_settings.sql @@ -10,7 +10,7 @@ CREATE TABLE IF NOT EXISTS domain_settings ( check (length(domain) <= 255) check (length(sitename) <= 255) check (length(fetch_client) <= 255) - check (length(user_agent) <= 1024) + check (length(user_agent) <= 512) check (json_valid(headers)) ) WITHOUT ROWID; -- +goose StatementEnd diff --git a/internal/server/version/version.go b/internal/server/version/version.go index 881a42f..9284946 100644 --- a/internal/server/version/version.go +++ b/internal/server/version/version.go @@ -1,7 +1,7 @@ package version const ( - Commit = "81bd03a" + Commit = "50f6778" Tag = "v0.8.6" RepoURL = "https://github.com/efixler/scrape" ) From cf699cd90b06c6b820d0830e2b4503e680411aa3 Mon Sep 17 00:00:00 2001 From: Eric Fixler Date: Wed, 14 Aug 2024 00:32:44 +0100 Subject: [PATCH 2/2] mysql tests --- Makefile | 2 +- internal/server/version/version.go | 2 +- internal/settings/domain_test.go | 222 ----------------------------- internal/settings/mysql_test.go | 19 +++ internal/settings/sqlite_test.go | 13 ++ internal/settings/storage_test.go | 219 ++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 224 deletions(-) create mode 100644 internal/settings/mysql_test.go create mode 100644 internal/settings/sqlite_test.go create mode 100644 internal/settings/storage_test.go diff --git a/Makefile b/Makefile index 3980c18..17e6f43 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ test: ## run the tests test-mysql: ## run the MySQL integration tests @echo "Running MySQL tests..." - @go test -tags mysql -coverprofile=mysql_coverage.out ./internal/storage ./database/mysql + @go test -tags mysql -coverprofile=mysql_coverage.out ./internal/settings/... ./internal/storage/... ./database/mysql/... vet: fmt ## fmt, vet, and staticcheck @echo "Running go vet and staticcheck..." diff --git a/internal/server/version/version.go b/internal/server/version/version.go index 9284946..81058e8 100644 --- a/internal/server/version/version.go +++ b/internal/server/version/version.go @@ -1,7 +1,7 @@ package version const ( - Commit = "50f6778" + Commit = "7da2c28" Tag = "v0.8.6" RepoURL = "https://github.com/efixler/scrape" ) diff --git a/internal/settings/domain_test.go b/internal/settings/domain_test.go index 7e675f3..4250477 100644 --- a/internal/settings/domain_test.go +++ b/internal/settings/domain_test.go @@ -1,21 +1,13 @@ package settings import ( - "context" "encoding/json" "fmt" - "log/slog" "math/rand" - "net/textproto" - "sort" "testing" - "github.com/efixler/scrape/database" - "github.com/efixler/scrape/database/sqlite" - "github.com/efixler/scrape/internal/storage" "github.com/efixler/scrape/resource" "github.com/efixler/scrape/ua" - "github.com/pressly/goose/v3" ) func TestJSONUnmarshal(t *testing.T) { @@ -164,164 +156,6 @@ func TestJSONMarshal(t *testing.T) { } } -func TestStoreAndRetrieve(t *testing.T) { - engine := sqlite.MustNew(sqlite.InMemoryDB()) - db := database.New(engine) - if err := db.Open(context.Background()); err != nil { - t.Fatalf("Error opening database: %v", err) - } - t.Cleanup(func() { - db.Close() - }) - dss := NewDomainSettingsStorage(db) - - tests := []struct { - name string - settings DomainSettings - expectErr bool - }{ - { - name: "empty", - settings: DomainSettings{}, - expectErr: true, - }, - { - name: "populated", - settings: DomainSettings{ - Domain: "example.com", - Sitename: "example", - FetchClient: resource.DefaultClient, - UserAgent: ua.UserAgent("Mozilla/5.0"), - Headers: MIMEHeader{"x-special": "special"}, - }, - }, - } - for _, test := range tests { - if err := dss.Save(&test.settings); (err != nil) != test.expectErr { - if !test.expectErr { - t.Fatalf("%s: can't save: %v", test.name, err) - } - continue - } else if err != nil { - continue - } - ds, err := dss.Fetch(test.settings.Domain) - if (err != nil) != test.expectErr { - if test.expectErr { - t.Errorf("%s: expected error on fetch, got none", test.name) - } else { - t.Errorf("%s: unexpected error on fetch: %v", test.name, err) - } - continue - } - if err != nil { - continue - } - if ds.Sitename != test.settings.Sitename { - t.Errorf("%s: Sitename: got %q, want %q", test.name, ds.Sitename, test.settings.Sitename) - } - if ds.FetchClient != test.settings.FetchClient { - t.Errorf("%s: FetchClient: got %v, want %v", test.name, ds.FetchClient, test.settings.FetchClient) - } - if ds.UserAgent != test.settings.UserAgent { - t.Errorf("%s: UserAgent: got %v, want %v", test.name, ds.UserAgent, test.settings.UserAgent) - } - if len(ds.Headers) != len(test.settings.Headers) { - t.Errorf("%s: Headers: got %v, want %v", test.name, ds.Headers, test.settings.Headers) - continue - } - for k := range test.settings.Headers { - if test.settings.Headers[textproto.CanonicalMIMEHeaderKey(k)] != ds.Headers[k] { - t.Errorf( - "%s: Headers[%q]: got %q, want %q", - test.name, - k, - ds.Headers[k], - test.settings.Headers[k], - ) - } - } - } -} - -func TestFetchRange(t *testing.T) { - engine := sqlite.MustNew(sqlite.InMemoryDB()) - db := database.New(engine) - if err := db.Open(context.Background()); err != nil { - t.Fatalf("Error opening database: %v", err) - } - t.Cleanup(func() { - db.Close() - }) - dss := NewDomainSettingsStorage(db) - - domains, err := populateTestDB(db, 100) - if err != nil { - t.Fatalf("can't populate test database: %v", err) - } - sort.Strings(domains) - limit := 10 - for i := 0; i < len(domains); i += limit { - ds, err := dss.FetchRange(i, limit, "") - if err != nil { - t.Fatalf("can't fetch range: %v", err) - } - for j := i; j < 10; j++ { - if ds[j].Domain != domains[j] { - t.Errorf("expected %q, got %q", domains[j], ds[j].Domain) - } - } - } - // now check a set that's smaller than limit - domains = domains[len(domains)-5:] - ds, err := dss.FetchRange(95, limit, "") - if err != nil { - t.Fatalf("can't fetch range: %v", err) - } - if len(ds) != len(domains) { - t.Fatalf("expected %d domains, got %d", len(domains), len(ds)) - } - for i := range ds { - if ds[i].Domain != domains[i] { - t.Errorf("expected %q, got %q", domains[i], ds[i].Domain) - } - } -} - -func TestDelete(t *testing.T) { - engine := sqlite.MustNew(sqlite.InMemoryDB()) - db := database.New(engine) - if err := db.Open(context.Background()); err != nil { - t.Fatalf("Error opening database: %v", err) - } - t.Cleanup(func() { - db.Close() - }) - - domains, err := populateTestDB(db, 1) - if err != nil { - t.Fatalf("can't populate test database: %v", err) - } - dss := NewDomainSettingsStorage(db) - - if deleted, err := dss.Delete(domains[0]); err != nil { - t.Fatalf("can't delete domain: %v", err) - } else if !deleted { - t.Errorf("expected domain %v to be deleted", domains[0]) - } - - if _, err = dss.Fetch(domains[0]); err != storage.ErrResourceNotFound { - t.Errorf("expected domain %v to be deleted, it wasn't", domains[0]) - } - - if deleted, err := dss.Delete(domains[0]); err != nil { - t.Fatalf("can't delete domain: %v", err) - } else if deleted { - t.Errorf("expected domain %v to already be deleted", domains[0]) - } - -} - // validateDomain checks that the domain is a valid domain name. func TestValidateDomain(t *testing.T) { tests := []struct { @@ -384,57 +218,6 @@ func TestParseDomainQuery(t *testing.T) { } } -func TestFetchRangeWithQuery(t *testing.T) { - tests := []struct { - name string - query string - expectCount int - }{ - {"empty", "", 260}, - {"*", "*", 260}, - {"a*", "a*", 10}, - {"-1", "-1", 26}, - {"c-1*", "c-1*", 1}, - {"*.com", "*.com", 260}, - } - - engine := sqlite.MustNew(sqlite.InMemoryDB()) - db := database.New(engine) - if err := db.Open(context.Background()); err != nil { - t.Fatalf("Error opening database: %v", err) - } - t.Cleanup(func() { - db.Close() - }) - dss := NewDomainSettingsStorage(db) - runes := []rune("abcdefghijklmnopqrstuvwxyz") - for rune := range runes { - for i := 0; i < 10; i++ { - domain := fmt.Sprintf("%c-%d.com", runes[rune], i) - ds := &DomainSettings{ - Domain: domain, - Sitename: "example", - FetchClient: resource.DefaultClient, - UserAgent: ua.UserAgent("Mozilla/5.0"), - Headers: MIMEHeader{"x-special": "special"}, - } - if err := dss.Save(ds); err != nil { - t.Fatalf("can't save domain: %v", err) - } - } - } - - for _, test := range tests { - ds, err := dss.FetchRange(0, 1000, test.query) - if err != nil { - t.Fatalf("[%s]: can't fetch range: %v", test.name, err) - } - if len(ds) != test.expectCount { - t.Errorf("[%s]: expected %d domains, got %d", test.name, test.expectCount, len(ds)) - } - } -} - // We only use the random domain generator for testing but we can still // just make sure that it's returning valid domains. func TestRandomDomainGenerator(t *testing.T) { @@ -466,8 +249,3 @@ func randomDomain() string { tld := tlds[rand.Intn(len(tlds))] return fmt.Sprintf("%s.%s.%s", randomString(subLen), randomString(domLen), tld) } - -func init() { - goose.SetLogger(goose.NopLogger()) - slog.SetLogLoggerLevel(slog.LevelWarn) -} diff --git a/internal/settings/mysql_test.go b/internal/settings/mysql_test.go new file mode 100644 index 0000000..27b266b --- /dev/null +++ b/internal/settings/mysql_test.go @@ -0,0 +1,19 @@ +//go:build mysql + +package settings + +import ( + "github.com/efixler/scrape/database" + "github.com/efixler/scrape/database/mysql" +) + +func testEngine() database.Engine { + engine := mysql.MustNew( + mysql.NetAddress("127.0.0.1:3306"), + mysql.Username("root"), + mysql.WithMaxConnections(1), + mysql.Schema("scrape_test"), + mysql.ForMigration(), + ) + return engine +} diff --git a/internal/settings/sqlite_test.go b/internal/settings/sqlite_test.go new file mode 100644 index 0000000..2a7448f --- /dev/null +++ b/internal/settings/sqlite_test.go @@ -0,0 +1,13 @@ +//go:build !mysql + +package settings + +import ( + "github.com/efixler/scrape/database" + "github.com/efixler/scrape/database/sqlite" +) + +func testEngine() database.Engine { + engine := sqlite.MustNew(sqlite.InMemoryDB()) + return engine +} diff --git a/internal/settings/storage_test.go b/internal/settings/storage_test.go new file mode 100644 index 0000000..16c654b --- /dev/null +++ b/internal/settings/storage_test.go @@ -0,0 +1,219 @@ +package settings + +import ( + "context" + "fmt" + "log/slog" + "net/textproto" + "sort" + "testing" + + "github.com/efixler/scrape/database" + "github.com/efixler/scrape/internal/storage" + "github.com/efixler/scrape/resource" + "github.com/efixler/scrape/ua" + "github.com/pressly/goose/v3" +) + +func getDatabase(t *testing.T) *database.DBHandle { + engine := testEngine() + db := database.New(engine) + if err := db.Open(context.TODO()); err != nil { + t.Fatalf("Error opening database: %v", err) + } + if err := db.MigrateUp(); err != nil { + t.Fatalf("Error migrating database: %v", err) + } + t.Cleanup(func() { + if err := db.MigrateReset(); err != nil { + t.Errorf("Error resetting test db: %v", err) + } + db.Close() + }) + return db +} + +func TestStoreAndRetrieve(t *testing.T) { + db := getDatabase(t) + dss := NewDomainSettingsStorage(db) + + tests := []struct { + name string + settings DomainSettings + expectErr bool + }{ + { + name: "empty", + settings: DomainSettings{}, + expectErr: true, + }, + { + name: "populated", + settings: DomainSettings{ + Domain: "example.com", + Sitename: "example", + FetchClient: resource.DefaultClient, + UserAgent: ua.UserAgent("Mozilla/5.0"), + Headers: MIMEHeader{"x-special": "special"}, + }, + }, + } + for _, test := range tests { + if err := dss.Save(&test.settings); (err != nil) != test.expectErr { + if !test.expectErr { + t.Fatalf("%s: can't save: %v", test.name, err) + } + continue + } else if err != nil { + continue + } + ds, err := dss.Fetch(test.settings.Domain) + if (err != nil) != test.expectErr { + if test.expectErr { + t.Errorf("%s: expected error on fetch, got none", test.name) + } else { + t.Errorf("%s: unexpected error on fetch: %v", test.name, err) + } + continue + } + if err != nil { + continue + } + if ds.Sitename != test.settings.Sitename { + t.Errorf("%s: Sitename: got %q, want %q", test.name, ds.Sitename, test.settings.Sitename) + } + if ds.FetchClient != test.settings.FetchClient { + t.Errorf("%s: FetchClient: got %v, want %v", test.name, ds.FetchClient, test.settings.FetchClient) + } + if ds.UserAgent != test.settings.UserAgent { + t.Errorf("%s: UserAgent: got %v, want %v", test.name, ds.UserAgent, test.settings.UserAgent) + } + if len(ds.Headers) != len(test.settings.Headers) { + t.Errorf("%s: Headers: got %v, want %v", test.name, ds.Headers, test.settings.Headers) + continue + } + for k := range test.settings.Headers { + if test.settings.Headers[textproto.CanonicalMIMEHeaderKey(k)] != ds.Headers[k] { + t.Errorf( + "%s: Headers[%q]: got %q, want %q", + test.name, + k, + ds.Headers[k], + test.settings.Headers[k], + ) + } + } + } +} + +func TestFetchRange(t *testing.T) { + db := getDatabase(t) + dss := NewDomainSettingsStorage(db) + + domains, err := populateTestDB(db, 100) + if err != nil { + t.Fatalf("can't populate test database: %v", err) + } + sort.Strings(domains) + limit := 10 + for i := 0; i < len(domains); i += limit { + ds, err := dss.FetchRange(i, limit, "") + if err != nil { + t.Fatalf("can't fetch range: %v", err) + } + for j := i; j < 10; j++ { + if ds[j].Domain != domains[j] { + t.Errorf("expected %q, got %q", domains[j], ds[j].Domain) + } + } + } + // now check a set that's smaller than limit + domains = domains[len(domains)-5:] + ds, err := dss.FetchRange(95, limit, "") + if err != nil { + t.Fatalf("can't fetch range: %v", err) + } + if len(ds) != len(domains) { + t.Fatalf("expected %d domains, got %d", len(domains), len(ds)) + } + for i := range ds { + if ds[i].Domain != domains[i] { + t.Errorf("expected %q, got %q", domains[i], ds[i].Domain) + } + } +} + +func TestDelete(t *testing.T) { + db := getDatabase(t) + + domains, err := populateTestDB(db, 1) + if err != nil { + t.Fatalf("can't populate test database: %v", err) + } + dss := NewDomainSettingsStorage(db) + + if deleted, err := dss.Delete(domains[0]); err != nil { + t.Fatalf("can't delete domain: %v", err) + } else if !deleted { + t.Errorf("expected domain %v to be deleted", domains[0]) + } + + if _, err = dss.Fetch(domains[0]); err != storage.ErrResourceNotFound { + t.Errorf("expected domain %v to be deleted, it wasn't", domains[0]) + } + + if deleted, err := dss.Delete(domains[0]); err != nil { + t.Fatalf("can't delete domain: %v", err) + } else if deleted { + t.Errorf("expected domain %v to already be deleted", domains[0]) + } +} + +func TestFetchRangeWithQuery(t *testing.T) { + tests := []struct { + name string + query string + expectCount int + }{ + {"empty", "", 260}, + {"*", "*", 260}, + {"a*", "a*", 10}, + {"-1", "-1", 26}, + {"c-1*", "c-1*", 1}, + {"*.com", "*.com", 260}, + } + + db := getDatabase(t) + dss := NewDomainSettingsStorage(db) + runes := []rune("abcdefghijklmnopqrstuvwxyz") + for rune := range runes { + for i := 0; i < 10; i++ { + domain := fmt.Sprintf("%c-%d.com", runes[rune], i) + ds := &DomainSettings{ + Domain: domain, + Sitename: "example", + FetchClient: resource.DefaultClient, + UserAgent: ua.UserAgent("Mozilla/5.0"), + Headers: MIMEHeader{"x-special": "special"}, + } + if err := dss.Save(ds); err != nil { + t.Fatalf("can't save domain: %v", err) + } + } + } + + for _, test := range tests { + ds, err := dss.FetchRange(0, 1000, test.query) + if err != nil { + t.Fatalf("[%s]: can't fetch range: %v", test.name, err) + } + if len(ds) != test.expectCount { + t.Errorf("[%s]: expected %d domains, got %d", test.name, test.expectCount, len(ds)) + } + } +} + +func init() { + goose.SetLogger(goose.NopLogger()) + slog.SetLogLoggerLevel(slog.LevelWarn) +}