diff --git a/tpu-provisioner/cmd/main.go b/tpu-provisioner/cmd/main.go index b876926cd..2be68daef 100644 --- a/tpu-provisioner/cmd/main.go +++ b/tpu-provisioner/cmd/main.go @@ -83,6 +83,10 @@ func main() { GCPNodeSecondaryDisk string `envconfig:"GCP_NODE_SECONDARY_DISK" default:""` GCPNodeSecureBoot bool `envconfig:"GCP_NODE_SECURE_BOOT" default:"true"` + // GCPForceOnDemand forces the controller to create nodes on demand, even if + // the Pod requests a reservation or spot. + GCPForceOnDemand bool `envconfig:"GCP_FORCE_ON_DEMAND" default:"false"` + // NodeMinLifespan is the amount of time that should pass between a Node object // creation and a cleanup of that Node. This needs to be long enough to allow // the node to become Ready and for a pending Pod to be scheduled on it. @@ -203,6 +207,7 @@ func main() { NodeSecondaryDisk: cfg.GCPNodeSecondaryDisk, NodeTags: cfg.GCPNodeTags, NodeSecureBoot: cfg.GCPNodeSecureBoot, + ForceOnDemand: cfg.GCPForceOnDemand, }, Recorder: mgr.GetEventRecorderFor("tpu-provisioner"), } diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index 61c878e66..840867838 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -276,27 +276,30 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node } var reservation *containerv1beta1.ReservationAffinity - if resName, ok := p.Spec.NodeSelector["cloud.google.com/reservation-name"]; ok { - reservation = &containerv1beta1.ReservationAffinity{ - ConsumeReservationType: "SPECIFIC_RESERVATION", - Key: "compute.googleapis.com/reservation-name", - Values: []string{ - resName, - }, - } - } - var taints []*containerv1beta1.NodeTaint + var spot bool + + if !g.ClusterContext.ForceOnDemand { + if resName, ok := p.Spec.NodeSelector["cloud.google.com/reservation-name"]; ok { + reservation = &containerv1beta1.ReservationAffinity{ + ConsumeReservationType: "SPECIFIC_RESERVATION", + Key: "compute.googleapis.com/reservation-name", + Values: []string{ + resName, + }, + } + } - spot := p.Spec.NodeSelector["cloud.google.com/gke-spot"] == "true" - if spot { - // Add the taint that NAP would add. - // https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms#spotvms-nap - taints = append(taints, &containerv1beta1.NodeTaint{ - Key: "cloud.google.com/gke-spot", - Value: "true", - Effect: "NO_SCHEDULE", - }) + spot = p.Spec.NodeSelector["cloud.google.com/gke-spot"] == "true" + if spot { + // Add the taint that NAP would add. + // https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms#spotvms-nap + taints = append(taints, &containerv1beta1.NodeTaint{ + Key: "cloud.google.com/gke-spot", + Value: "true", + Effect: "NO_SCHEDULE", + }) + } } var secondaryDisks []*containerv1beta1.SecondaryBootDisk @@ -336,7 +339,7 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node }, Management: &containerv1beta1.NodeManagement{ AutoRepair: true, - AutoUpgrade: true, + AutoUpgrade: false, }, UpgradeSettings: &containerv1beta1.UpgradeSettings{ MaxSurge: 1, diff --git a/tpu-provisioner/internal/cloud/gke_context.go b/tpu-provisioner/internal/cloud/gke_context.go index 70c16178e..d204c1729 100644 --- a/tpu-provisioner/internal/cloud/gke_context.go +++ b/tpu-provisioner/internal/cloud/gke_context.go @@ -11,6 +11,7 @@ type GKEContext struct { NodeSecondaryDisk string NodeTags []string NodeSecureBoot bool + ForceOnDemand bool } func (c GKEContext) ClusterName() string { diff --git a/tpu-provisioner/internal/cloud/gke_test.go b/tpu-provisioner/internal/cloud/gke_test.go index 102b3ed0d..5ef5a2b91 100644 --- a/tpu-provisioner/internal/cloud/gke_test.go +++ b/tpu-provisioner/internal/cloud/gke_test.go @@ -242,7 +242,67 @@ func TestNodePoolForPod(t *testing.T) { }, InitialNodeCount: 512, Locations: []string{""}, - Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: true}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + }, + }, + { + desc: "spot", + selector: map[string]string{ + "cloud.google.com/gke-spot": "true", + }, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + Spot: true, + Taints: []*container.NodeTaint{ + {Effect: "NO_SCHEDULE", Key: "cloud.google.com/gke-spot", Value: "true"}, + }, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + }, + }, + { + desc: "spot with forced on demand", + gkeContext: GKEContext{ForceOnDemand: true}, + selector: map[string]string{ + "cloud.google.com/gke-spot": "true", + }, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + Spot: false, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, Name: "test-pool", PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, @@ -272,7 +332,34 @@ func TestNodePoolForPod(t *testing.T) { }, InitialNodeCount: 512, Locations: []string{""}, - Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: true}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, + MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, + Name: "test-pool", + PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, + UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1}, + }, + }, + { + desc: "pod with reservation selector but on demand is forced", + selector: map[string]string{"cloud.google.com/reservation-name": "tpu-rsv"}, + gkeContext: GKEContext{ForceOnDemand: true}, + want: &containerv1beta1.NodePool{ + Config: &container.NodeConfig{ + Labels: map[string]string{ + "google.com/nodepool-manager": "tpu-provisioner", + "google.com/tpu-provisioner-jobset-name": "jobset-test", + "google.com/tpu-provisioner-jobset-namespace": "default", + "google.com/tpu-provisioner-parent-kind": "job", + "google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0", + "google.com/tpu-provisioner-parent-namespace": "default", + }, + MachineType: "ct5p-hightpu-4t", + ReservationAffinity: nil, + ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true}, + }, + InitialNodeCount: 512, + Locations: []string{""}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, Name: "test-pool", PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, @@ -298,7 +385,7 @@ func TestNodePoolForPod(t *testing.T) { }, InitialNodeCount: 512, Locations: []string{""}, - Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: true}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, Name: "test-pool", PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"}, @@ -329,7 +416,7 @@ func TestNodePoolForPod(t *testing.T) { }, InitialNodeCount: 512, Locations: []string{""}, - Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: true}, + Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false}, MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15}, Name: "test-pool", PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"},