Skip to content

Commit

Permalink
wait for iscsi device removal
Browse files Browse the repository at this point in the history
Replaces static sleep with a wait with timeout when deleting a device
  • Loading branch information
jharrod authored Nov 18, 2024
1 parent dfa21af commit 24ce5c6
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 6 deletions.
39 changes: 36 additions & 3 deletions utils/devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"strings"
"time"

"github.com/spf13/afero"
"golang.org/x/net/context"

"github.com/netapp/trident/internal/fiji"
Expand All @@ -26,7 +27,8 @@ import (
const LUKSMetadataSize = 18874368

const (
luksDevicePrefix = "luks-"
luksDevicePrefix = "luks-"
devicesRemovalMaxWaitTime = 5 * time.Second
)

var (
Expand All @@ -35,6 +37,8 @@ var (
beforeRemoveFile = fiji.Register("beforeRemoveFile", "devices")

LuksCloseDurations = durations.TimeDuration{}

osFs = afero.NewOsFs()
)

// waitForDevice accepts a device name and checks if it is present
Expand Down Expand Up @@ -175,6 +179,30 @@ func removeDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ignore
return nil
}

// waitForDevicesRemoval waits for devices to be removed from the system.
func waitForDevicesRemoval(ctx context.Context, osFs afero.Fs, devicePathPrefix string, deviceNames []string,
maxWaitTime time.Duration,
) error {
startTime := time.Now()
for time.Since(startTime) < maxWaitTime {
anyExist := false
for _, device := range deviceNames {
path := filepath.Join(devicePathPrefix, device)
if _, err := osFs.Stat(path); !os.IsNotExist(err) {
anyExist = true
break
}
}
if !anyExist {
return nil
}
time.Sleep(50 * time.Millisecond)
}

Logc(ctx).WithField("devices", deviceNames).Debug("Timed out waiting for devices to be removed.")
return errors.TimeoutError("timed out waiting for devices to be removed")
}

// canFlushMultipathDevice determines whether device can be flushed.
// 1. Check the health of path by executing 'multipath -C <devicePath>'
// 2. If no error, return nil.
Expand Down Expand Up @@ -911,8 +939,13 @@ func removeSCSIDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ig
return false, err
}

// Give the host a chance to fully process the removal
time.Sleep(time.Second)
// Wait for device to be removed. Do not ignore errors here as we need the device removed
// for the force removal of the multipath device to succeed.
err = waitForDevicesRemoval(ctx, osFs, iscsi.DevPrefix, deviceInfo.Devices, devicesRemovalMaxWaitTime)
if err != nil {
return false, err
}

listAllISCSIDevices(ctx)

// If ignoreErrors was set to true while entering into this function and
Expand Down
3 changes: 0 additions & 3 deletions utils/devices_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"unsafe"

log "github.com/sirupsen/logrus"
"github.com/spf13/afero"
"golang.org/x/net/context"
"golang.org/x/sys/unix"

Expand Down Expand Up @@ -49,8 +48,6 @@ var (
duringOpenBeforeCryptSetupOpen = fiji.Register("duringOpenBeforeCryptSetupOpen", "devices_linux")
duringRotatePassphraseBeforeLuksKeyChange = fiji.Register("duringRotatePassphraseBeforeLuksKeyChange",
"devices_linux")

osFs = afero.NewOsFs()
)

// flushOneDevice flushes any outstanding I/O to a disk
Expand Down
84 changes: 84 additions & 0 deletions utils/devices_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import (
"testing"
"time"

"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"go.uber.org/mock/gomock"

mockexec "github.com/netapp/trident/mocks/mock_utils/mock_exec"
"github.com/netapp/trident/mocks/mock_utils/mock_models/mock_luks"
"github.com/netapp/trident/utils/errors"
)

// ////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -266,3 +268,85 @@ func TestRemoveMultipathDeviceMapping(t *testing.T) {
})
}
}

func TestWaitForDevicesRemoval(t *testing.T) {
errMsg := "timed out waiting for devices to be removed"
tests := map[string]struct {
name string
devicePathPrefix string
deviceNames []string
getOsFs func() (afero.Fs, error)
maxWaitTime time.Duration
expectedError error
}{
"Devices removed successfully": {
devicePathPrefix: "/dev",
deviceNames: []string{"sda", "sdb"},
getOsFs: func() (afero.Fs, error) {
return afero.NewMemMapFs(), nil
},
maxWaitTime: 1 * time.Second,
expectedError: nil,
},
"Timeout waiting for devices to be removed": {
devicePathPrefix: "/dev",
deviceNames: []string{"sda", "sdb"},
getOsFs: func() (afero.Fs, error) {
osFs := afero.NewMemMapFs()
_, err := osFs.Create("/dev/sda")
if err != nil {
return nil, err
}
_, err = osFs.Create("/dev/sdb")
if err != nil {
return nil, err
}
return osFs, nil
},
maxWaitTime: 1 * time.Second,
expectedError: errors.TimeoutError(errMsg),
},
"Timeout waiting for last device to be removed": {
devicePathPrefix: "/dev",
deviceNames: []string{"sda", "sdb"},
getOsFs: func() (afero.Fs, error) {
osFs := afero.NewMemMapFs()
_, err := osFs.Create("/dev/sdb")
if err != nil {
return nil, err
}
return osFs, nil
},
maxWaitTime: 1 * time.Second,
expectedError: errors.TimeoutError(errMsg),
},
"Timeout waiting for first device to be removed": {
devicePathPrefix: "/dev",
deviceNames: []string{"sda", "sdb"},
getOsFs: func() (afero.Fs, error) {
osFs := afero.NewMemMapFs()
_, err := osFs.Create("/dev/sda")
if err != nil {
return nil, err
}
return osFs, nil
},
maxWaitTime: 1 * time.Second,
expectedError: errors.TimeoutError(errMsg),
},
}

for name, params := range tests {
t.Run(name, func(t *testing.T) {
fs, err := params.getOsFs()
assert.NoError(t, err)
err = waitForDevicesRemoval(context.Background(), fs, params.devicePathPrefix, params.deviceNames,
params.maxWaitTime)
if params.expectedError != nil {
assert.EqualError(t, err, params.expectedError.Error())
} else {
assert.NoError(t, err)
}
})
}
}

0 comments on commit 24ce5c6

Please sign in to comment.