Skip to content

Commit

Permalink
fix: concurency problem with volumes (scaleway#255)
Browse files Browse the repository at this point in the history
  • Loading branch information
jerome-quere authored Sep 25, 2019
1 parent a8d1e6c commit 3ac8270
Show file tree
Hide file tree
Showing 8 changed files with 248 additions and 112 deletions.
4 changes: 2 additions & 2 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ sweep:
test: fmtcheck
go test -i $(TEST) || exit 1
echo $(TEST) | \
xargs -t -n4 go test $(TESTARGS) -timeout=30s -parallel=4
xargs -t -n4 go test $(TESTARGS) -timeout=30s -parallel=10

testacc: fmtcheck
TF_ACC=1 go test $(TEST) -v $(TESTARGS) -timeout=120m -parallel=2
TF_ACC=1 go test $(TEST) -v $(TESTARGS) -timeout=120m -parallel=10

vet:
@echo "go vet ."
Expand Down
33 changes: 33 additions & 0 deletions scaleway/helpers.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package scaleway

import (
"bytes"
"fmt"
"net/http"
"regexp"
"strings"
"sync"
"text/template"
"time"

"github.com/hashicorp/terraform/helper/resource"
Expand Down Expand Up @@ -464,3 +466,34 @@ var UUIDRegex = regexp.MustCompile(`[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{
func isUUID(s string) bool {
return UUIDRegex.MatchString(s)
}

// newTemplateFunc takes a go template string and returns a function that can be called to execute template.
func newTemplateFunc(tplStr string) func(data interface{}) string {
t := template.Must(template.New("tpl").Parse(tplStr))
return func(tplParams interface{}) string {
buffer := bytes.Buffer{}
err := t.Execute(&buffer, tplParams)
if err != nil {
panic(err)
}
return buffer.String()
}
}

// testAccGetResourceAttr can be used in acceptance tests to extract value from state and store it in dest
func testAccGetResourceAttr(resourceName string, attrName string, dest *string) resource.TestCheckFunc {
return func(state *terraform.State) error {
r, exist := state.RootModule().Resources[resourceName]
if !exist {
return fmt.Errorf("unknown ressource %s", resourceName)
}

a, exist := r.Primary.Attributes[attrName]
if !exist {
return fmt.Errorf("unknown ressource %s", resourceName)
}

*dest = a
return nil
}
}
124 changes: 88 additions & 36 deletions scaleway/helpers_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"time"

"github.com/hashicorp/terraform/helper/hashcode"
"github.com/hashicorp/terraform/helper/resource"
"github.com/hashicorp/terraform/helper/schema"
"github.com/scaleway/scaleway-sdk-go/api/instance/v1"
"github.com/scaleway/scaleway-sdk-go/scw"
Expand All @@ -17,8 +16,7 @@ const (
InstanceServerStateStarted = "started"
InstanceServerStateStandby = "standby"

InstanceServerWaitForTimeout = 10 * time.Minute
InstanceServerRetryFuncTimeout = InstanceServerWaitForTimeout + time.Minute // some RetryFunc are calling a WaitFor
InstanceServerWaitForTimeout = 10 * time.Minute
)

// getInstanceAPIWithZone returns a new instance API and the zone for a Create request
Expand Down Expand Up @@ -73,49 +71,103 @@ func serverStateFlatten(fromState instance.ServerState) (string, error) {
return "", fmt.Errorf("server is in an invalid state, someone else might be executing action at the same time")
}

// instanceServerStateToAction returns the action required to transit from a state to another.
func instanceServerStateToAction(previousState, nextState string, forceReboot bool) []instance.ServerAction {
if previousState == InstanceServerStateStarted && nextState == InstanceServerStateStarted && forceReboot {
return []instance.ServerAction{instance.ServerActionReboot}
// serverStateExpand converts a terraform state to an API state or return an error.
func serverStateExpand(rawState string) (instance.ServerState, error) {

apiState, exist := map[string]instance.ServerState{
InstanceServerStateStopped: instance.ServerStateStopped,
InstanceServerStateStandby: instance.ServerStateStoppedInPlace,
InstanceServerStateStarted: instance.ServerStateRunning,
}[rawState]

if !exist {
return "", fmt.Errorf("server is in a transient state, someone else might be executing another action at the same time")
}

return apiState, nil
}

func reachState(instanceAPI *instance.API, zone scw.Zone, serverID string, toState instance.ServerState) error {
response, err := instanceAPI.GetServer(&instance.GetServerRequest{
Zone: zone,
ServerID: serverID,
})
if err != nil {
return err
}
fromState := response.Server.State

if response.Server.State == toState {
return nil
}

transitionMap := map[[2]instance.ServerState][]instance.ServerAction{
{instance.ServerStateStopped, instance.ServerStateRunning}: {instance.ServerActionPoweron},
{instance.ServerStateStopped, instance.ServerStateStoppedInPlace}: {instance.ServerActionPoweron, instance.ServerActionStopInPlace},
{instance.ServerStateRunning, instance.ServerStateStopped}: {instance.ServerActionPoweroff},
{instance.ServerStateRunning, instance.ServerStateStoppedInPlace}: {instance.ServerActionStopInPlace},
{instance.ServerStateStoppedInPlace, instance.ServerStateRunning}: {instance.ServerActionPoweron},
{instance.ServerStateStoppedInPlace, instance.ServerStateStopped}: {instance.ServerActionPoweron, instance.ServerActionPoweroff},
}
transitionMap := map[[2]string][]instance.ServerAction{
{InstanceServerStateStopped, InstanceServerStateStopped}: {},
{InstanceServerStateStopped, InstanceServerStateStarted}: {instance.ServerActionPoweron},
{InstanceServerStateStopped, InstanceServerStateStandby}: {instance.ServerActionPoweron, instance.ServerActionStopInPlace},
{InstanceServerStateStarted, InstanceServerStateStopped}: {instance.ServerActionPoweroff},
{InstanceServerStateStarted, InstanceServerStateStarted}: {},
{InstanceServerStateStarted, InstanceServerStateStandby}: {instance.ServerActionStopInPlace},
{InstanceServerStateStandby, InstanceServerStateStopped}: {instance.ServerActionPoweroff},
{InstanceServerStateStandby, InstanceServerStateStarted}: {instance.ServerActionPoweron},
{InstanceServerStateStandby, InstanceServerStateStandby}: {},

actions, exist := transitionMap[[2]instance.ServerState{fromState, toState}]
if !exist {
return fmt.Errorf("don't know how to reach state %s from state %s for server %s", toState, fromState, serverID)
}

return transitionMap[[2]string{previousState, nextState}]
for _, a := range actions {
err = instanceAPI.ServerActionAndWait(&instance.ServerActionAndWaitRequest{
ServerID: serverID,
Action: a,
Zone: zone,
Timeout: InstanceServerWaitForTimeout,
})
if err != nil {
return err
}
}
return nil
}

// reachState executes server action(s) to reach the expected state
func reachState(instanceAPI *instance.API, zone scw.Zone, serverID, fromState, toState string, forceReboot bool) error {
for _, action := range instanceServerStateToAction(fromState, toState, forceReboot) {
err := resource.Retry(InstanceServerRetryFuncTimeout, func() *resource.RetryError {
err := instanceAPI.ServerActionAndWait(&instance.ServerActionAndWaitRequest{
Zone: zone,
ServerID: serverID,
Action: action,
Timeout: InstanceServerWaitForTimeout,
})
if isSDKError(err, "expected state [\\w]+ but found [\\w]+") {
l.Errorf("Retrying action %s because of error '%v'", action, err)
return resource.RetryableError(err)
}
if err != nil {
return resource.NonRetryableError(err)
}
// detachVolume will make sure a volume is not attached to any server. If volume is attached to a server, it will be stopped
// to allow volume detachment.
func detachVolume(instanceAPI *instance.API, zone scw.Zone, volumeId string) error {

res, err := instanceAPI.GetVolume(&instance.GetVolumeRequest{
Zone: zone,
VolumeID: volumeId,
})
if err != nil {
return err
}

if res.Volume.Server == nil {
return nil
}

defer lockLocalizedId(newZonedId(zone, res.Volume.Server.ID))()

// We need to stop server only for VolumeTypeLSSD volume type
if res.Volume.VolumeType == instance.VolumeTypeLSSD {
err = reachState(instanceAPI, zone, res.Volume.Server.ID, instance.ServerStateStopped)

// If 404 this mean server is deleted and volume is already detached
if is404Error(err) {
return nil
})
}
if err != nil {
return err
}
}
_, err = instanceAPI.DetachVolume(&instance.DetachVolumeRequest{
Zone: zone,
VolumeID: res.Volume.ID,
})

// TODO find a better way to test this error
if err != nil && err.Error() != "scaleway-sdk-go: volume should be attached to a server" {
return err
}

return nil
}
1 change: 1 addition & 0 deletions scaleway/resource_baremetal_server_beta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
)

func TestAccScalewayBaremetalServerBetaMinimal1(t *testing.T) {
t.Skip("due to low stock on this resource type, test is flaky")
resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
Expand Down
73 changes: 39 additions & 34 deletions scaleway/resource_instance_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ import (
"fmt"
"io"
"io/ioutil"
"net/http"
"strconv"

"github.com/hashicorp/terraform/helper/resource"
"github.com/hashicorp/terraform/helper/schema"
"github.com/hashicorp/terraform/helper/validation"
"github.com/scaleway/scaleway-sdk-go/api/instance/v1"
Expand Down Expand Up @@ -281,7 +279,11 @@ func resourceScalewayInstanceServerCreate(d *schema.ResourceData, m interface{})
}
}

err = reachState(instanceAPI, zone, res.Server.ID, InstanceServerStateStopped, d.Get("state").(string), false)
targetState, err := serverStateExpand(d.Get("state").(string))
if err != nil {
return err
}
err = reachState(instanceAPI, zone, res.Server.ID, targetState)
if err != nil {
return err
}
Expand Down Expand Up @@ -410,12 +412,12 @@ func resourceScalewayInstanceServerUpdate(d *schema.ResourceData, m interface{})
return err
}

// This variable will be set to true if any state change requires a server reboot.
var forceReboot bool

////
// Update the server
// Construct UpdateServerRequest
////
previousState, nextState := d.GetChange("state") // the previous state of the server might change when updating volumes
updateRequest := &instance.UpdateServerRequest{
Zone: zone,
ServerID: ID,
Expand Down Expand Up @@ -451,13 +453,20 @@ func resourceScalewayInstanceServerUpdate(d *schema.ResourceData, m interface{})
volumes["0"] = &instance.VolumeTemplate{ID: d.Get("root_volume.0.volume_id").(string), Name: getRandomName("vol")} // name is ignored by the API, any name will work here

for i, volumeID := range raw.([]interface{}) {

// We make sure volume is detached so we can attach it to the server.
err = detachVolume(instanceAPI, zone, expandID(volumeID))
if err != nil {
return err
}
volumes[strconv.Itoa(i+1)] = &instance.VolumeTemplate{
ID: expandID(volumeID),
Name: getRandomName("vol"), // name is ignored by the API, any name will work here
}
}

updateRequest.Volumes = &volumes
forceReboot = true
}

if d.HasChange("placement_group_id") {
Expand All @@ -470,33 +479,6 @@ func resourceScalewayInstanceServerUpdate(d *schema.ResourceData, m interface{})
}
}

var updateResponse *instance.UpdateServerResponse

err = resource.Retry(InstanceServerRetryFuncTimeout, func() *resource.RetryError {
updateResponse, err = instanceAPI.UpdateServer(updateRequest)
if isSDKResponseError(err, http.StatusBadRequest, "Instance must be powered off to change local volumes") {
err = reachState(instanceAPI, zone, ID, previousState.(string), InstanceServerStateStopped, false)
if err != nil && !isSDKResponseError(err, http.StatusBadRequest, "server should be running") {
return resource.NonRetryableError(err)
}
return resource.RetryableError(fmt.Errorf("server is being powered off"))
}

if err != nil {
return resource.NonRetryableError(err)
}

return nil
})
if err != nil {
return err
}

previousState, err = serverStateFlatten(updateResponse.Server.State)
if err != nil {
return err
}

////
// Update server user data
////
Expand Down Expand Up @@ -529,8 +511,30 @@ func resourceScalewayInstanceServerUpdate(d *schema.ResourceData, m interface{})

}

////
// Apply changes
////

defer lockLocalizedId(d.Id())()

if forceReboot {
err = reachState(instanceAPI, zone, ID, InstanceServerStateStopped)
if err != nil {
return err
}
}
_, err = instanceAPI.UpdateServer(updateRequest)
if err != nil {
return err
}

targetState, err := serverStateExpand(d.Get("state").(string))
if err != nil {
return err
}

// reach expected state
err = reachState(instanceAPI, zone, ID, previousState.(string), nextState.(string), forceReboot)
err = reachState(instanceAPI, zone, ID, targetState)
if err != nil {
return err
}
Expand All @@ -543,9 +547,10 @@ func resourceScalewayInstanceServerDelete(d *schema.ResourceData, m interface{})
if err != nil {
return err
}
defer lockLocalizedId(d.Id())()

// reach stopped state
err = reachState(instanceAPI, zone, ID, d.Get("state").(string), InstanceServerStateStopped, false)
err = reachState(instanceAPI, zone, ID, instance.ServerStateStopped)
if is404Error(err) {
return nil
}
Expand Down
Loading

0 comments on commit 3ac8270

Please sign in to comment.