From 8768eaae7fd8e173ce30f121b9ca8571ad05ae6c Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Sun, 10 Dec 2023 14:36:37 +0100 Subject: [PATCH] add versioned migrations Up until this commit, headscale has had one massive migration func that runs every time the server starts. This means that every time we have migrated anything, we have added stuff to it and not really taken into account how the database of clients look like. This means that sometimes the new stuff has had to be added in the middle, as it would break things further down. It is kind of a miracle that it has worked so far with no permament damage, particularly considering some of our larger migrations like Machine -> Node or Namespace -> User. This commit addresses this by adding gormigrate, which is a simple versioned migration system. As we have to maintain our messy past, the first migration is the large, currently working, but hard to change function. Subsequent changes should be added with new IDs after this. Signed-off-by: Kristoffer Dalby --- CHANGELOG.md | 1 + flake.nix | 2 +- go.mod | 1 + go.sum | 2 + hscontrol/db/db.go | 477 ++++++++++++++++++++-------------------- hscontrol/types/node.go | 2 +- 6 files changed, 241 insertions(+), 244 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47652cbad9..2f3babab8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/ ### Changes +Use versioned migrations [#1644](https://github.com/juanfont/headscale/pull/1644) Make the OIDC callback page better [#1484](https://github.com/juanfont/headscale/pull/1484) SSH support [#1487](https://github.com/juanfont/headscale/pull/1487) State management has been improved [#1492](https://github.com/juanfont/headscale/pull/1492) diff --git a/flake.nix b/flake.nix index 44cf6fe85f..638bf7c195 100644 --- a/flake.nix +++ b/flake.nix @@ -31,7 +31,7 @@ # When updating go.mod or go.sum, a new sha will need to be calculated, # update this if you have a mismatch after doing a change to thos files. - vendorHash = "sha256-7yqJbF0GkKa3wjiGWJ8BZSJyckrpwmCiX77/aoPGmRc="; + vendorHash = "sha256-u9AmJguQ5dnJpfhOeLN43apvMHuraOrJhvlEIp9RoIc="; ldflags = ["-s" "-w" "-X github.com/juanfont/headscale/cmd/headscale/cli.Version=v${version}"]; }; diff --git a/go.mod b/go.mod index dc6f3dac3b..46086c06b5 100644 --- a/go.mod +++ b/go.mod @@ -75,6 +75,7 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fxamacker/cbor/v2 v2.5.0 // indirect github.com/glebarez/go-sqlite v1.21.2 // indirect + github.com/go-gormigrate/gormigrate/v2 v2.1.1 // indirect github.com/go-jose/go-jose/v3 v3.0.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect diff --git a/go.sum b/go.sum index f8a8c1bc4d..aa0a650bfe 100644 --- a/go.sum +++ b/go.sum @@ -101,6 +101,8 @@ github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9g github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= github.com/glebarez/sqlite v1.10.0 h1:u4gt8y7OND/cCei/NMHmfbLxF6xP2wgKcT/BJf2pYkc= github.com/glebarez/sqlite v1.10.0/go.mod h1:IJ+lfSOmiekhQsFTJRx/lHtGYmCdtAiTaf5wI9u5uHA= +github.com/go-gormigrate/gormigrate/v2 v2.1.1 h1:eGS0WTFRV30r103lU8JNXY27KbviRnqqIDobW3EV3iY= +github.com/go-gormigrate/gormigrate/v2 v2.1.1/go.mod h1:L7nJ620PFDKei9QOhJzqA8kRCk+E3UbV2f5gv+1ndLc= github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA= github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= diff --git a/hscontrol/db/db.go b/hscontrol/db/db.go index 557827643c..030a6f0bab 100644 --- a/hscontrol/db/db.go +++ b/hscontrol/db/db.go @@ -11,6 +11,7 @@ import ( "time" "github.com/glebarez/sqlite" + "github.com/go-gormigrate/gormigrate/v2" "github.com/juanfont/headscale/hscontrol/notifier" "github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/util" @@ -21,15 +22,11 @@ import ( ) const ( - dbVersion = "1" - Postgres = "postgres" - Sqlite = "sqlite3" + Postgres = "postgres" + Sqlite = "sqlite3" ) -var ( - errValueNotFound = errors.New("not found") - errDatabaseNotSupported = errors.New("database type not supported") -) +var errDatabaseNotSupported = errors.New("database type not supported") // KV is a key-value store in a psql table. For future use... // TODO(kradalby): Is this used for anything? @@ -64,240 +61,269 @@ func NewHeadscaleDatabase( return nil, err } - db := HSDatabase{ - db: dbConn, - notifier: notifier, - - ipPrefixes: ipPrefixes, - baseDomain: baseDomain, - } - - log.Debug().Msgf("database %#v", dbConn) - - if dbType == Postgres { - dbConn.Exec(`create extension if not exists "uuid-ossp";`) - } - - _ = dbConn.Migrator().RenameTable("namespaces", "users") + migrations := gormigrate.New(dbConn, gormigrate.DefaultOptions, []*gormigrate.Migration{ + // New migrations should be added as transactions at the end of this list. + // The initial commit here is quite messy, completely out of order and + // has no versioning and is the tech debt of not having versioned migrations + // prior to this point. This first migration is all DB changes to bring a DB + // up to 0.23.0. + { + ID: "202312101416", + Migrate: func(tx *gorm.DB) error { + if dbType == Postgres { + tx.Exec(`create extension if not exists "uuid-ossp";`) + } - // the big rename from Machine to Node - _ = dbConn.Migrator().RenameTable("machines", "nodes") - _ = dbConn.Migrator().RenameColumn(&types.Route{}, "machine_id", "node_id") + _ = tx.Migrator().RenameTable("namespaces", "users") - err = dbConn.AutoMigrate(types.User{}) - if err != nil { - return nil, err - } + // the big rename from Machine to Node + _ = tx.Migrator().RenameTable("machines", "nodes") + _ = tx.Migrator().RenameColumn(&types.Route{}, "machine_id", "node_id") - _ = dbConn.Migrator().RenameColumn(&types.Node{}, "namespace_id", "user_id") - _ = dbConn.Migrator().RenameColumn(&types.PreAuthKey{}, "namespace_id", "user_id") - - _ = dbConn.Migrator().RenameColumn(&types.Node{}, "ip_address", "ip_addresses") - _ = dbConn.Migrator().RenameColumn(&types.Node{}, "name", "hostname") - - // GivenName is used as the primary source of DNS names, make sure - // the field is populated and normalized if it was not when the - // node was registered. - _ = dbConn.Migrator().RenameColumn(&types.Node{}, "nickname", "given_name") - - // If the Node table has a column for registered, - // find all occourences of "false" and drop them. Then - // remove the column. - if dbConn.Migrator().HasColumn(&types.Node{}, "registered") { - log.Info(). - Msg(`Database has legacy "registered" column in node, removing...`) - - nodes := types.Nodes{} - if err := dbConn.Not("registered").Find(&nodes).Error; err != nil { - log.Error().Err(err).Msg("Error accessing db") - } - - for _, node := range nodes { - log.Info(). - Str("node", node.Hostname). - Str("machine_key", node.MachineKey.ShortString()). - Msg("Deleting unregistered node") - if err := dbConn.Delete(&types.Node{}, node.ID).Error; err != nil { - log.Error(). - Err(err). - Str("node", node.Hostname). - Str("machine_key", node.MachineKey.ShortString()). - Msg("Error deleting unregistered node") - } - } - - err := dbConn.Migrator().DropColumn(&types.Node{}, "registered") - if err != nil { - log.Error().Err(err).Msg("Error dropping registered column") - } - } + err = tx.AutoMigrate(types.User{}) + if err != nil { + return err + } - err = dbConn.AutoMigrate(&types.Route{}) - if err != nil { - return nil, err - } + _ = tx.Migrator().RenameColumn(&types.Node{}, "namespace_id", "user_id") + _ = tx.Migrator().RenameColumn(&types.PreAuthKey{}, "namespace_id", "user_id") - err = dbConn.AutoMigrate(&types.Node{}) - if err != nil { - return nil, err - } + _ = tx.Migrator().RenameColumn(&types.Node{}, "ip_address", "ip_addresses") + _ = tx.Migrator().RenameColumn(&types.Node{}, "name", "hostname") - // Ensure all keys have correct prefixes - // https://github.com/tailscale/tailscale/blob/main/types/key/node.go#L35 - type result struct { - ID uint64 - MachineKey string - NodeKey string - DiscoKey string - } - var results []result - err = db.db.Raw("SELECT id, node_key, machine_key, disco_key FROM nodes").Find(&results).Error - if err != nil { - return nil, err - } + // GivenName is used as the primary source of DNS names, make sure + // the field is populated and normalized if it was not when the + // node was registered. + _ = tx.Migrator().RenameColumn(&types.Node{}, "nickname", "given_name") - for _, node := range results { - mKey := node.MachineKey - if !strings.HasPrefix(node.MachineKey, "mkey:") { - mKey = "mkey:" + node.MachineKey - } - nKey := node.NodeKey - if !strings.HasPrefix(node.NodeKey, "nodekey:") { - nKey = "nodekey:" + node.NodeKey - } - - dKey := node.DiscoKey - if !strings.HasPrefix(node.DiscoKey, "discokey:") { - dKey = "discokey:" + node.DiscoKey - } - - err := db.db.Exec( - "UPDATE nodes SET machine_key = @mKey, node_key = @nKey, disco_key = @dKey WHERE ID = @id", - sql.Named("mKey", mKey), - sql.Named("nKey", nKey), - sql.Named("dKey", dKey), - sql.Named("id", node.ID), - ).Error - if err != nil { - return nil, err - } - } + // If the Node table has a column for registered, + // find all occourences of "false" and drop them. Then + // remove the column. + if tx.Migrator().HasColumn(&types.Node{}, "registered") { + log.Info(). + Msg(`Database has legacy "registered" column in node, removing...`) + + nodes := types.Nodes{} + if err := tx.Not("registered").Find(&nodes).Error; err != nil { + log.Error().Err(err).Msg("Error accessing db") + } + + for _, node := range nodes { + log.Info(). + Str("node", node.Hostname). + Str("machine_key", node.MachineKey.ShortString()). + Msg("Deleting unregistered node") + if err := tx.Delete(&types.Node{}, node.ID).Error; err != nil { + log.Error(). + Err(err). + Str("node", node.Hostname). + Str("machine_key", node.MachineKey.ShortString()). + Msg("Error deleting unregistered node") + } + } + + err := tx.Migrator().DropColumn(&types.Node{}, "registered") + if err != nil { + log.Error().Err(err).Msg("Error dropping registered column") + } + } - if dbConn.Migrator().HasColumn(&types.Node{}, "enabled_routes") { - log.Info().Msgf("Database has legacy enabled_routes column in node, migrating...") - - type NodeAux struct { - ID uint64 - EnabledRoutes types.IPPrefixes - } - - nodesAux := []NodeAux{} - err := dbConn.Table("nodes").Select("id, enabled_routes").Scan(&nodesAux).Error - if err != nil { - log.Fatal().Err(err).Msg("Error accessing db") - } - for _, node := range nodesAux { - for _, prefix := range node.EnabledRoutes { + err = tx.AutoMigrate(&types.Route{}) if err != nil { - log.Error(). - Err(err). - Str("enabled_route", prefix.String()). - Msg("Error parsing enabled_route") + return err + } - continue + err = tx.AutoMigrate(&types.Node{}) + if err != nil { + return err } - err = dbConn.Preload("Node"). - Where("node_id = ? AND prefix = ?", node.ID, types.IPPrefix(prefix)). - First(&types.Route{}). - Error - if err == nil { - log.Info(). - Str("enabled_route", prefix.String()). - Msg("Route already migrated to new table, skipping") + // Ensure all keys have correct prefixes + // https://github.com/tailscale/tailscale/blob/main/types/key/node.go#L35 + type result struct { + ID uint64 + MachineKey string + NodeKey string + DiscoKey string + } + var results []result + err = tx.Raw("SELECT id, node_key, machine_key, disco_key FROM nodes").Find(&results).Error + if err != nil { + return err + } - continue + for _, node := range results { + mKey := node.MachineKey + if !strings.HasPrefix(node.MachineKey, "mkey:") { + mKey = "mkey:" + node.MachineKey + } + nKey := node.NodeKey + if !strings.HasPrefix(node.NodeKey, "nodekey:") { + nKey = "nodekey:" + node.NodeKey + } + + dKey := node.DiscoKey + if !strings.HasPrefix(node.DiscoKey, "discokey:") { + dKey = "discokey:" + node.DiscoKey + } + + err := tx.Exec( + "UPDATE nodes SET machine_key = @mKey, node_key = @nKey, disco_key = @dKey WHERE ID = @id", + sql.Named("mKey", mKey), + sql.Named("nKey", nKey), + sql.Named("dKey", dKey), + sql.Named("id", node.ID), + ).Error + if err != nil { + return err + } } - route := types.Route{ - NodeID: node.ID, - Advertised: true, - Enabled: true, - Prefix: types.IPPrefix(prefix), + if tx.Migrator().HasColumn(&types.Node{}, "enabled_routes") { + log.Info().Msgf("Database has legacy enabled_routes column in node, migrating...") + + type NodeAux struct { + ID uint64 + EnabledRoutes types.IPPrefixes + } + + nodesAux := []NodeAux{} + err := tx.Table("nodes").Select("id, enabled_routes").Scan(&nodesAux).Error + if err != nil { + log.Fatal().Err(err).Msg("Error accessing db") + } + for _, node := range nodesAux { + for _, prefix := range node.EnabledRoutes { + if err != nil { + log.Error(). + Err(err). + Str("enabled_route", prefix.String()). + Msg("Error parsing enabled_route") + + continue + } + + err = tx.Preload("Node"). + Where("node_id = ? AND prefix = ?", node.ID, types.IPPrefix(prefix)). + First(&types.Route{}). + Error + if err == nil { + log.Info(). + Str("enabled_route", prefix.String()). + Msg("Route already migrated to new table, skipping") + + continue + } + + route := types.Route{ + NodeID: node.ID, + Advertised: true, + Enabled: true, + Prefix: types.IPPrefix(prefix), + } + if err := tx.Create(&route).Error; err != nil { + log.Error().Err(err).Msg("Error creating route") + } else { + log.Info(). + Uint64("node_id", route.NodeID). + Str("prefix", prefix.String()). + Msg("Route migrated") + } + } + } + + err = tx.Migrator().DropColumn(&types.Node{}, "enabled_routes") + if err != nil { + log.Error().Err(err).Msg("Error dropping enabled_routes column") + } } - if err := dbConn.Create(&route).Error; err != nil { - log.Error().Err(err).Msg("Error creating route") - } else { - log.Info(). - Uint64("node_id", route.NodeID). - Str("prefix", prefix.String()). - Msg("Route migrated") + + if tx.Migrator().HasColumn(&types.Node{}, "given_name") { + nodes := types.Nodes{} + if err := tx.Find(&nodes).Error; err != nil { + log.Error().Err(err).Msg("Error accessing db") + } + + for item, node := range nodes { + if node.GivenName == "" { + normalizedHostname, err := util.NormalizeToFQDNRulesConfigFromViper( + node.Hostname, + ) + if err != nil { + log.Error(). + Caller(). + Str("hostname", node.Hostname). + Err(err). + Msg("Failed to normalize node hostname in DB migration") + } + + err = tx.Model(nodes[item]).Updates(types.Node{ + GivenName: normalizedHostname, + }).Error + if err != nil { + log.Error(). + Caller(). + Str("hostname", node.Hostname). + Err(err). + Msg("Failed to save normalized node name in DB migration") + } + } + } } - } - } - err = dbConn.Migrator().DropColumn(&types.Node{}, "enabled_routes") - if err != nil { - log.Error().Err(err).Msg("Error dropping enabled_routes column") - } - } + err = tx.AutoMigrate(&KV{}) + if err != nil { + return err + } - if dbConn.Migrator().HasColumn(&types.Node{}, "given_name") { - nodes := types.Nodes{} - if err := dbConn.Find(&nodes).Error; err != nil { - log.Error().Err(err).Msg("Error accessing db") - } - - for item, node := range nodes { - if node.GivenName == "" { - normalizedHostname, err := util.NormalizeToFQDNRulesConfigFromViper( - node.Hostname, - ) + err = tx.AutoMigrate(&types.PreAuthKey{}) if err != nil { - log.Error(). - Caller(). - Str("hostname", node.Hostname). - Err(err). - Msg("Failed to normalize node hostname in DB migration") + return err } - err = db.RenameNode(nodes[item], normalizedHostname) + err = tx.AutoMigrate(&types.PreAuthKeyACLTag{}) if err != nil { - log.Error(). - Caller(). - Str("hostname", node.Hostname). - Err(err). - Msg("Failed to save normalized node name in DB migration") + return err } - } - } - } - err = dbConn.AutoMigrate(&KV{}) - if err != nil { - return nil, err - } + _ = tx.Migrator().DropTable("shared_machines") - err = dbConn.AutoMigrate(&types.PreAuthKey{}) - if err != nil { - return nil, err - } + err = tx.AutoMigrate(&types.APIKey{}) + if err != nil { + return err + } - err = dbConn.AutoMigrate(&types.PreAuthKeyACLTag{}) - if err != nil { - return nil, err + return nil + }, + Rollback: func(tx *gorm.DB) error { + return nil + }, + }, + { + // drop key-value table, it is not used, and has not contained + // useful data for a long time or ever. + ID: "202312101430", + Migrate: func(tx *gorm.DB) error { + return tx.Migrator().DropTable("kvs") + }, + Rollback: func(tx *gorm.DB) error { + return nil + }, + }, + }) + + if err = migrations.Migrate(); err != nil { + log.Fatal().Err(err).Msgf("Migration failed: %v", err) } - _ = dbConn.Migrator().DropTable("shared_machines") + db := HSDatabase{ + db: dbConn, + notifier: notifier, - err = dbConn.AutoMigrate(&types.APIKey{}) - if err != nil { - return nil, err + ipPrefixes: ipPrefixes, + baseDomain: baseDomain, } - // TODO(kradalby): is this needed? - err = db.setValue("db_version", dbVersion) - return &db, err } @@ -347,39 +373,6 @@ func openDB(dbType, connectionAddr string, debug bool) (*gorm.DB, error) { ) } -// getValue returns the value for the given key in KV. -func (hsdb *HSDatabase) getValue(key string) (string, error) { - var row KV - if result := hsdb.db.First(&row, "key = ?", key); errors.Is( - result.Error, - gorm.ErrRecordNotFound, - ) { - return "", errValueNotFound - } - - return row.Value, nil -} - -// setValue sets value for the given key in KV. -func (hsdb *HSDatabase) setValue(key string, value string) error { - keyValue := KV{ - Key: key, - Value: value, - } - - if _, err := hsdb.getValue(key); err == nil { - hsdb.db.Model(&keyValue).Where("key = ?", key).Update("value", value) - - return nil - } - - if err := hsdb.db.Create(keyValue).Error; err != nil { - return fmt.Errorf("failed to create key value pair in the database: %w", err) - } - - return nil -} - func (hsdb *HSDatabase) PingDB(ctx context.Context) error { ctx, cancel := context.WithTimeout(ctx, time.Second) defer cancel() diff --git a/hscontrol/types/node.go b/hscontrol/types/node.go index bb88fc323e..aa61114390 100644 --- a/hscontrol/types/node.go +++ b/hscontrol/types/node.go @@ -62,7 +62,7 @@ type Node struct { // it is _only_ used for reading and writing the key to the // database and should not be used. // Use Endpoints instead. - HostinfoDatabaseField string `gorm:"column:hostinfo"` + HostinfoDatabaseField string `gorm:"column:host_info"` Hostinfo *tailcfg.Hostinfo `gorm:"-"` IPAddresses NodeAddresses