Skip to content

Commit

Permalink
Fix ONTAP SAN drivers to clean up if provisioning fails
Browse files Browse the repository at this point in the history
Fixes: #442
  • Loading branch information
clintonk authored Oct 21, 2020
1 parent 9751a1e commit 75b74ab
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Fixed issue of SLM portal login for iSCSI backends (Issue [#387](https://github.com/NetApp/trident/issues/387)).
- Fixed issue where Trident would crash when creating a volume if ONTAP FCP interfaces were present in the target SVM.
- Fixed issue where storage prefix with period would cause backend to fail when updating or creating backend or upgrading Trident from v20.01
- Fixed ONTAP SAN drivers to clean up if provisioning fails (Issue [#442](https://github.com/NetApp/trident/issues/442)).

**Enhancements:**
- **Kubernetes:** Added automatic node preparation for NFS protocol. By default, Trident will now attempt to make
Expand Down
17 changes: 16 additions & 1 deletion storage_drivers/ontap/api/ontap.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ const (
maxZapiRecords = 0xfffffffe
NumericalValueNotSet = -1
maxFlexGroupWait = 30 * time.Second

failureLUNCreate = "failure_65dc2f4b_adbe_4ed3_8b73_6c61d5eac054"
failureLUNSetAttr = "failure_7c3a89e2_7d83_457b_9e29_bfdb082c1d8b"
)

// ClientConfig holds the configuration data for Client objects
Expand Down Expand Up @@ -412,7 +415,14 @@ func (d Client) IgroupGet(initiatorGroupName string) (*azgo.InitiatorGroupInfoTy

// LunCreate creates a lun with the specified attributes
// equivalent to filer::> lun create -vserver iscsi_vs -path /vol/v/lun1 -size 1g -ostype linux -space-reserve disabled -space-allocation enabled
func (d Client) LunCreate(lunPath string, sizeInBytes int, osType string, spaceReserved bool, spaceAllocated bool) (*azgo.LunCreateBySizeResponse, error) {
func (d Client) LunCreate(
lunPath string, sizeInBytes int, osType string, spaceReserved bool, spaceAllocated bool,
) (*azgo.LunCreateBySizeResponse, error) {

if strings.Contains(lunPath, failureLUNCreate) {
return nil, errors.New("injected error")
}

response, err := azgo.NewLunCreateBySizeRequest().
SetPath(lunPath).
SetSize(sizeInBytes).
Expand Down Expand Up @@ -574,6 +584,11 @@ func (d Client) LunDestroy(lunPath string) (*azgo.LunDestroyResponse, error) {

// LunSetAttribute sets a named attribute for a given LUN.
func (d Client) LunSetAttribute(lunPath, name, value string) (*azgo.LunSetAttributeResponse, error) {

if strings.Contains(lunPath, failureLUNSetAttr) {
return nil, errors.New("injected error")
}

response, err := azgo.NewLunSetAttributeRequest().
SetPath(lunPath).
SetName(name).
Expand Down
49 changes: 42 additions & 7 deletions storage_drivers/ontap/ontap_san.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ func (d *SANStorageDriver) Create(
errMessage := fmt.Sprintf("ONTAP-SAN pool %s/%s; error: %v", storagePool.Name, aggregate, aggrLimitsErr)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Move on to the next pool
continue
}

Expand All @@ -301,29 +303,62 @@ func (d *SANStorageDriver) Create(
aggregate, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Move on to the next pool
continue
}

lunPath := lunPath(name)
osType := "linux"

// Create the LUN
// Create the LUN. If this fails, clean up and move on to the next pool.
lunCreateResponse, err := d.API.LunCreate(lunPath, int(sizeBytes), osType, false, spaceAllocation)
if err = api.GetError(ctx, lunCreateResponse, err); err != nil {
errMessage := fmt.Sprintf("ONTAP-SAN pool %s/%s; error creating LUN %s: %v", storagePool.Name,
aggregate, name, err)

errMessage := fmt.Sprintf("ONTAP-SAN pool %s/%s; error creating LUN %s: %v",
storagePool.Name, aggregate, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Don't leave the new Flexvol around
if _, err := d.API.VolumeDestroy(name, true); err != nil {
Logc(ctx).WithField("volume", name).Errorf("Could not clean up volume; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up volume after LUN create error.")
}

// Move on to the next pool
continue
}

// Save the fstype in a LUN attribute so we know what to do in Attach
// Save the fstype in a LUN attribute so we know what to do in Attach. If this fails, clean up and
// move on to the next pool.
attrResponse, err := d.API.LunSetAttribute(lunPath, LUNAttributeFSType, fstype)
if err = api.GetError(ctx, attrResponse, err); err != nil {
defer d.API.LunDestroy(lunPath)
return fmt.Errorf("ONTAP-SAN pool %s/%s; error saving file system type for LUN %s: %v", storagePool.Name,
aggregate, name, err)

errMessage := fmt.Sprintf("ONTAP-SAN pool %s/%s; error saving file system type for LUN %s: %v",
storagePool.Name, aggregate, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Don't leave the new LUN around
if _, err := d.API.LunDestroy(lunPath); err != nil {
Logc(ctx).WithField("LUN", lunPath).Errorf("Could not clean up LUN; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up LUN after set attribute error.")
}

// Don't leave the new Flexvol around
if _, err := d.API.VolumeDestroy(name, true); err != nil {
Logc(ctx).WithField("volume", name).Errorf("Could not clean up volume; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up volume after set attribute error.")
}

// Move on to the next pool
continue
}

// Save the context
attrResponse, err = d.API.LunSetAttribute(lunPath, "context", string(d.Config.DriverContext))
if err = api.GetError(ctx, attrResponse, err); err != nil {
Expand Down
80 changes: 64 additions & 16 deletions storage_drivers/ontap/ontap_san_economy.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,29 +465,42 @@ func (d *SANEconomyStorageDriver) Create(
aggrLimitsErr)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Move on to the next pool
continue
}

// Make sure we have a Flexvol for the new LUN
bucketVol, err := d.ensureFlexvolForLUN(
ctx, aggregate, spaceReserve, snapshotPolicy, tieringPolicy, false, enableEncryption, sizeBytes, opts,
d.Config, storagePool)
bucketVol, newVol, err := d.ensureFlexvolForLUN(ctx, aggregate, spaceReserve, snapshotPolicy, tieringPolicy,
false, enableEncryption, sizeBytes, opts, d.Config, storagePool)
if err != nil {
errMessage := fmt.Sprintf("ONTAP-SAN-ECONOMY pool %s/%s; BucketVol location/creation failed %s: %v",
storagePool.Name,
aggregate, name, err)
storagePool.Name, aggregate, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Move on to the next pool
continue
}

// Grow or shrink the Flexvol as needed
err = d.resizeFlexvol(ctx, bucketVol, sizeBytes)
if err != nil {
if err = d.resizeFlexvol(ctx, bucketVol, sizeBytes); err != nil {

errMessage := fmt.Sprintf("ONTAP-SAN-ECONOMY pool %s/%s; Flexvol resize failed %s/%s: %v",
storagePool.Name, aggregate, bucketVol, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Don't leave the new Flexvol around if we just created it
if newVol {
if _, err := d.API.VolumeDestroy(bucketVol, true); err != nil {
Logc(ctx).WithField("volume", bucketVol).Errorf("Could not clean up volume; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up volume after resize error.")
}
}

// Move on to the next pool
continue
}

Expand All @@ -497,10 +510,22 @@ func (d *SANEconomyStorageDriver) Create(
// Create the LUN
lunCreateResponse, err := d.API.LunCreate(lunPath, int(sizeBytes), osType, false, spaceAllocation)
if err = api.GetError(ctx, lunCreateResponse, err); err != nil {

errMessage := fmt.Sprintf("ONTAP-SAN-ECONOMY pool %s/%s; error creating LUN %s/%s: %v", storagePool.Name,
aggregate, bucketVol, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Don't leave the new Flexvol around if we just created it
if newVol {
if _, err := d.API.VolumeDestroy(bucketVol, true); err != nil {
Logc(ctx).WithField("volume", bucketVol).Errorf("Could not clean up volume; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up volume after LUN create error.")
}
}

// Move on to the next pool
continue
}

Expand All @@ -509,10 +534,32 @@ func (d *SANEconomyStorageDriver) Create(
// Save the fstype in a LUN attribute so we know what to do in Attach
attrResponse, err := d.API.LunSetAttribute(lunPath, LUNAttributeFSType, fstype)
if err = api.GetError(ctx, attrResponse, err); err != nil {
d.API.LunDestroy(lunPath)
return fmt.Errorf("ONTAP-SAN-ECONOMY pool %s/%s; error saving file system type for LUN %s/%s: %v",

errMessage := fmt.Sprintf("ONTAP-SAN-ECONOMY pool %s/%s; error saving file system type for LUN %s/%s: %v",
storagePool.Name, aggregate, bucketVol, name, err)
Logc(ctx).Error(errMessage)
createErrors = append(createErrors, fmt.Errorf(errMessage))

// Don't leave the new LUN around
if _, err := d.API.LunDestroy(lunPath); err != nil {
Logc(ctx).WithField("LUN", lunPath).Errorf("Could not clean up LUN; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up LUN after set attribute error.")
}

// Don't leave the new Flexvol around if we just created it
if newVol {
if _, err := d.API.VolumeDestroy(bucketVol, true); err != nil {
Logc(ctx).WithField("volume", bucketVol).Errorf("Could not clean up volume; %v", err)
} else {
Logc(ctx).WithField("volume", name).Debugf("Cleaned up volume after set attribute error.")
}
}

// Move on to the next pool
continue
}

// Save the context
attrResponse, err = d.API.LunSetAttribute(lunPath, "context", string(d.Config.DriverContext))
if err = api.GetError(ctx, attrResponse, err); err != nil {
Expand Down Expand Up @@ -1177,16 +1224,17 @@ func (d *SANEconomyStorageDriver) Get(ctx context.Context, name string) error {
}

// ensureFlexvolForLUN accepts a set of Flexvol characteristics and either finds one to contain a new
// LUN or it creates a new Flexvol with the needed attributes.
// LUN or it creates a new Flexvol with the needed attributes. The name of the matching volume is returned,
// as is a boolean indicating whether the volume was newly created to satisfy this request.
func (d *SANEconomyStorageDriver) ensureFlexvolForLUN(
ctx context.Context, aggregate, spaceReserve, snapshotPolicy, tieringPolicy string, enableSnapshotDir, encrypt bool,
sizeBytes uint64, opts map[string]string, config drivers.OntapStorageDriverConfig, storagePool *storage.Pool,
) (string, error) {
) (string, bool, error) {

shouldLimitVolumeSize, flexvolSizeLimit, checkVolumeSizeLimitsError := drivers.CheckVolumeSizeLimits(
ctx, sizeBytes, config.CommonStorageDriverConfig)
if checkVolumeSizeLimitsError != nil {
return "", checkVolumeSizeLimitsError
return "", false, checkVolumeSizeLimitsError
}

// Check if a suitable Flexvol already exists
Expand All @@ -1195,22 +1243,22 @@ func (d *SANEconomyStorageDriver) ensureFlexvolForLUN(
shouldLimitVolumeSize, flexvolSizeLimit)

if err != nil {
return "", fmt.Errorf("error finding Flexvol for LUN: %v", err)
return "", false, fmt.Errorf("error finding Flexvol for LUN: %v", err)
}

// Found one
if flexvol != "" {
return flexvol, nil
return flexvol, false, nil
}

// Nothing found, so create a suitable Flexvol
flexvol, err = d.createFlexvolForLUN(
ctx, aggregate, spaceReserve, snapshotPolicy, tieringPolicy, enableSnapshotDir, encrypt, opts, storagePool)
if err != nil {
return "", fmt.Errorf("error creating Flexvol for LUN: %v", err)
return "", false, fmt.Errorf("error creating Flexvol for LUN: %v", err)
}

return flexvol, nil
return flexvol, true, nil
}

// createFlexvolForLUN creates a new Flexvol matching the specified attributes for
Expand Down

0 comments on commit 75b74ab

Please sign in to comment.