From 3d75a36ab31210f0d2f2af0a3881f16a950ecae1 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 24 May 2022 09:08:42 +0530 Subject: [PATCH] rbd: add support for rbd striping RBD supports creating rbd images with object size, strip unit and strip count to support striping. This PR adds the support for the same. More details about strip at https://docs.ceph.com/en/quincy/man/8/rbd/#striping fixes: #3124 Signed-off-by: Madhu Rajanna --- docs/deploy-rbd.md | 4 + e2e/rbd.go | 142 +++++++++++++++++++++++++++++++ e2e/rbd_helper.go | 65 ++++++++++++++ examples/rbd/storageclass.yaml | 8 ++ internal/rbd/controllerserver.go | 19 +++++ internal/rbd/rbd_util.go | 142 ++++++++++++++++++++++--------- 6 files changed, 339 insertions(+), 41 deletions(-) diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md index 0c90bf6445e7..602d1b02407b 100644 --- a/docs/deploy-rbd.md +++ b/docs/deploy-rbd.md @@ -65,6 +65,10 @@ make image-cephcsi | `mounter` | no | if set to `rbd-nbd`, use `rbd-nbd` on nodes that have `rbd-nbd` and `nbd` kernel modules to map rbd images | | `encrypted` | no | disabled by default, use `"true"` to enable LUKS encryption on PVC and `"false"` to disable it. **Do not change for existing storageclasses** | | `encryptionKMSID` | no | required if encryption is enabled and a kms is used to store passphrases | +| `stripeUnit` | no | stripe unit in bytes | +| `stripeCount` | no | objects to stripe over before looping | +| `objectSize` | no | object size in bytes | + **NOTE:** An accompanying CSI configuration file, needs to be provided to the running pods. Refer to [Creating CSI configuration](../examples/README.md#creating-csi-configuration) diff --git a/e2e/rbd.go b/e2e/rbd.go index 2b0c14d0108c..62fe83e66c32 100644 --- a/e2e/rbd.go +++ b/e2e/rbd.go @@ -4041,6 +4041,148 @@ var _ = Describe("RBD", func() { }) }) + By("validate rbd image stripe", func() { + stripeUnit := 4096 + stripeCount := 8 + objectSize := 4096 + err := deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + + err = createRBDStorageClass( + f.ClientSet, + f, + defaultSCName, + nil, + map[string]string{ + "stripeUnit": fmt.Sprintf("%d", stripeUnit), + "stripeCount": fmt.Sprintf("%d", stripeCount), + "objectSize": fmt.Sprintf("%d", objectSize), + }, + deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteResource(rbdExamplePath + "storageclass.yaml") + if err != nil { + e2elog.Failf("failed to delete storageclass: %v", err) + } + err = createRBDStorageClass(f.ClientSet, f, defaultSCName, nil, nil, deletePolicy) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + }() + + err = createRBDSnapshotClass(f) + if err != nil { + e2elog.Failf("failed to create storageclass: %v", err) + } + defer func() { + err = deleteRBDSnapshotClass() + if err != nil { + e2elog.Failf("failed to delete VolumeSnapshotClass: %v", err) + } + }() + + // create PVC and bind it to an app + pvc, err := loadPVC(pvcPath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + pvc.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC and application: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 1, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvc, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe %v", err) + } + + snap := getSnapshot(snapshotPath) + snap.Namespace = f.UniqueName + snap.Spec.Source.PersistentVolumeClaimName = &pvc.Name + + err = createSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to create snapshot: %v", err) + } + // validate created backend rbd images + // parent PVC + snapshot + totalImages := 2 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 1, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + pvcClone, err := loadPVC(pvcClonePath) + if err != nil { + e2elog.Failf("failed to load PVC: %v", err) + } + + // create clone PVC as ROX + pvcClone.Namespace = f.UniqueName + pvcClone.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadOnlyMany} + err = createPVCAndvalidatePV(f.ClientSet, pvcClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create PVC: %v", err) + } + // validate created backend rbd images + // parent pvc+ snapshot + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + validateOmapCount(f, 1, rbdType, defaultRBDPool, snapsType) + err = validateStripe(f, pvcClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone %v", err) + } + // delete snapshot + err = deleteSnapshot(&snap, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete snapshot: %v", err) + } + + pvcSmartClone, err := loadPVC(pvcSmartClonePath) + if err != nil { + e2elog.Failf("failed to load pvcSmartClone: %v", err) + } + pvcSmartClone.Namespace = f.UniqueName + + err = createPVCAndvalidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to create pvc: %v", err) + } + // validate created backend rbd images + // parent pvc + temp clone + clone + totalImages = 3 + validateRBDImageCount(f, totalImages, defaultRBDPool) + validateOmapCount(f, 2, rbdType, defaultRBDPool, volumesType) + err = validateStripe(f, pvcClone, stripeUnit, stripeCount, objectSize) + if err != nil { + e2elog.Failf("failed to validate stripe for clone %v", err) + } + // delete parent pvc + err = deletePVCAndValidatePV(f.ClientSet, pvc, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + + // delete clone pvc + err = deletePVCAndValidatePV(f.ClientSet, pvcSmartClone, deployTimeout) + if err != nil { + e2elog.Failf("failed to delete PVC: %v", err) + } + // validate created backend rbd images + validateRBDImageCount(f, 0, defaultRBDPool) + validateOmapCount(f, 0, rbdType, defaultRBDPool, volumesType) + }) + // Make sure this should be last testcase in this file, because // it deletes pool By("Create a PVC and delete PVC when backend pool deleted", func() { diff --git a/e2e/rbd_helper.go b/e2e/rbd_helper.go index 8dbcfb5a2896..7b326758a965 100644 --- a/e2e/rbd_helper.go +++ b/e2e/rbd_helper.go @@ -940,3 +940,68 @@ func waitToRemoveImagesFromTrash(f *framework.Framework, poolName string, t int) return err } + +// imageInfo strongly typed JSON spec for image info. +type imageInfo struct { + ObjectUUID string `json:"name"` + Size int64 `json:"size"` + Format int64 `json:"format"` + StripeUnit int `json:"stripe_unit"` + StripeCount int `json:"stripe_count"` + ObjectSize int `json:"object_size"` +} + +// getImageInfo queries rbd about the given image and returns its metadata, and returns +// error if provided image is not found. +func getImageInfo(f *framework.Framework, imageName, poolName string) (imageInfo, error) { + // rbd --format=json info [image-spec | snap-spec] + var imgInfo imageInfo + + stdOut, stdErr, err := execCommandInToolBoxPod( + f, + fmt.Sprintf("rbd info %s %s --format json", rbdOptions(poolName), imageName), + rookNamespace) + if err != nil { + return imgInfo, fmt.Errorf("failed to get rbd info %w", err) + } + if stdErr != "" { + return imgInfo, fmt.Errorf("failed to get rbd info %v", stdErr) + } + err = json.Unmarshal([]byte(stdOut), &imgInfo) + if err != nil { + return imgInfo, fmt.Errorf("unmarshal failed: %w. raw buffer response: %s", + err, stdOut) + } + + return imgInfo, nil +} + +func validateStripe(f *framework.Framework, + pvc *v1.PersistentVolumeClaim, + stripeUnit, + stripeCount, + objectSize int) error { + imageData, err := getImageInfoFromPVC(pvc.Namespace, pvc.Name, f) + if err != nil { + return err + } + + imgInfo, err := getImageInfo(f, imageData.imageName, defaultRBDPool) + if err != nil { + return err + } + + if imgInfo.ObjectSize != objectSize { + return fmt.Errorf("objectSize %d does not match expected %d", imgInfo.ObjectSize, objectSize) + } + + if imgInfo.StripeUnit != stripeUnit { + return fmt.Errorf("stripeUnit %d does not match expected %d", imgInfo.StripeUnit, stripeUnit) + } + + if imgInfo.StripeCount != stripeCount { + return fmt.Errorf("stripeCount %d does not match expected %d", imgInfo.StripeCount, stripeCount) + } + + return nil +} diff --git a/examples/rbd/storageclass.yaml b/examples/rbd/storageclass.yaml index 94c9413e13c3..a30114667ac6 100644 --- a/examples/rbd/storageclass.yaml +++ b/examples/rbd/storageclass.yaml @@ -134,6 +134,14 @@ parameters: # {"domainLabel":"zone","value":"zone1"}]} # ] + # Image striping, Refer https://docs.ceph.com/en/latest/man/8/rbd/#striping + # For more details + # (optional) stripe unit in bytes. + # stripeUnit: <> + # (optional) objects to stripe over before looping. + # stripeCount: <> + # (optional) The object size in bytes. + # objectSize: <> reclaimPolicy: Delete allowVolumeExpansion: true mountOptions: diff --git a/internal/rbd/controllerserver.go b/internal/rbd/controllerserver.go index f002896f5749..41b57ac6e01e 100644 --- a/internal/rbd/controllerserver.go +++ b/internal/rbd/controllerserver.go @@ -91,6 +91,25 @@ func (cs *ControllerServer) validateVolumeReq(ctx context.Context, req *csi.Crea return err } + err = validateStriping(req.Parameters) + if err != nil { + return status.Error(codes.InvalidArgument, err.Error()) + } + + return nil +} + +func validateStriping(parameters map[string]string) error { + stripeUnit := parameters["stripeUnit"] + stripeCount := parameters["stripeCount"] + if stripeUnit != "" && stripeCount == "" { + return errors.New("stripeCount must be specified when stripeUnit is specified") + } + + if stripeUnit == "" && stripeCount != "" { + return errors.New("stripeUnit must be specified when stripeCount is specified") + } + return nil } diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index ce60e3ee31ab..1f675d0f1dfe 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "math" "os" "path/filepath" "strconv" @@ -96,6 +97,11 @@ type rbdImage struct { // VolSize is the size of the RBD image backing this rbdImage. VolSize int64 + // image striping configurations. + StripeCount uint64 + StripeUnit uint64 + ObjectSize uint64 + Monitors string // JournalPool is the ceph pool in which the CSI Journal/CSI snapshot Journal is // stored @@ -402,27 +408,17 @@ func (rs *rbdSnapshot) String() string { // createImage creates a new ceph image with provision and volume options. func createImage(ctx context.Context, pOpts *rbdVolume, cr *util.Credentials) error { volSzMiB := fmt.Sprintf("%dM", util.RoundOffVolSize(pOpts.VolSize)) - options := librbd.NewRbdImageOptions() - logMsg := "rbd: create %s size %s (features: %s) using mon %s" - if pOpts.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", pOpts.DataPool) - err := options.SetString(librbd.RbdImageOptionDataPool, pOpts.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - log.DebugLog(ctx, logMsg, + log.DebugLog(ctx, "rbd: create %s size %s (features: %s) using mon %s", pOpts, volSzMiB, pOpts.ImageFeatureSet.Names(), pOpts.Monitors) - if pOpts.ImageFeatureSet != 0 { - err := options.SetUint64(librbd.RbdImageOptionFeatures, uint64(pOpts.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } + options, err := pOpts.setImageOptions(ctx) + if err != nil { + return err } + defer options.Destroy() - err := pOpts.Connect(cr) + err = pOpts.Connect(cr) if err != nil { return err } @@ -1267,9 +1263,42 @@ func genVolFromVolumeOptions( rbdVol.Mounter) rbdVol.DisableInUseChecks = disableInUseChecks + err = rbdVol.setStripConfiguration(volOptions) + if err != nil { + return nil, err + } + return rbdVol, nil } +func (ri *rbdImage) setStripConfiguration(options map[string]string) error { + var err error + if val, ok := options["stripUnit"]; ok { + ri.StripeUnit, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripUnit %s: %w", val, err) + } + } + + if val, ok := options["stripCount"]; ok { + ri.StripeCount, err = strconv.ParseUint(val, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse stripCount %s: %w", val, err) + } + } + + if val, ok := options["objectSize"]; ok { + objSize, err := strconv.ParseFloat(val, 64) + if err != nil { + return fmt.Errorf("failed to parse objectSize %s: %w", val, err) + } + size := math.Log2(objSize) + ri.ObjectSize = uint64(size) + } + + return nil +} + func (rv *rbdVolume) validateImageFeatures(imageFeatures string) error { // It is possible for image features to be an empty string which // the Go split function would return a single item array with @@ -1370,7 +1399,8 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( pSnapOpts *rbdSnapshot, parentVol *rbdVolume) error { var err error - logMsg := "rbd: clone %s %s (features: %s) using mon %s" + log.DebugLog(ctx, "rbd: clone %s %s (features: %s) using mon %s", + pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) err = parentVol.openIoctx() if err != nil { @@ -1381,32 +1411,11 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( parentVol.ioctx = nil }() - options := librbd.NewRbdImageOptions() - defer options.Destroy() - - if rv.DataPool != "" { - logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) - err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) - if err != nil { - return fmt.Errorf("failed to set data pool: %w", err) - } - } - - log.DebugLog(ctx, logMsg, - pSnapOpts, rv, rv.ImageFeatureSet.Names(), rv.Monitors) - - if rv.ImageFeatureSet != 0 { - err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) - if err != nil { - return fmt.Errorf("failed to set image features: %w", err) - } - } - - err = options.SetUint64(librbd.ImageOptionCloneFormat, 2) + options, err := rv.setImageOptions(ctx) if err != nil { - return fmt.Errorf("failed to set image features: %w", err) + return err } - + defer options.Destroy() // As the clone is yet to be created, open the Ioctx. err = rv.openIoctx() if err != nil { @@ -1447,6 +1456,57 @@ func (rv *rbdVolume) cloneRbdImageFromSnapshot( return nil } +// setImageOptions sets the image options. +func (rv *rbdVolume) setImageOptions(ctx context.Context) (*librbd.ImageOptions, error) { + var err error + options := librbd.NewRbdImageOptions() + defer func() { + if err != nil { + options.Destroy() + } + }() + + logMsg := fmt.Sprintf("setting image options on %s", rv) + if rv.DataPool != "" { + logMsg += fmt.Sprintf(", data pool %s", rv.DataPool) + err = options.SetString(librbd.RbdImageOptionDataPool, rv.DataPool) + if err != nil { + return nil, fmt.Errorf("failed to set data pool: %w", err) + } + } + + if rv.ImageFeatureSet != 0 { + err = options.SetUint64(librbd.RbdImageOptionFeatures, uint64(rv.ImageFeatureSet)) + if err != nil { + return nil, fmt.Errorf("failed to set image features: %w", err) + } + } + + if rv.StripeCount != 0 { + logMsg += fmt.Sprintf(", stripe count %d, stripe unit %d", rv.StripeCount, rv.StripeUnit) + err = options.SetUint64(librbd.RbdImageOptionStripeCount, rv.StripeCount) + if err != nil { + return nil, fmt.Errorf("failed to set stripe count: %w", err) + } + err = options.SetUint64(librbd.RbdImageOptionStripeUnit, rv.StripeUnit) + if err != nil { + return nil, fmt.Errorf("failed to set stripe unit: %w", err) + } + } + + if rv.ObjectSize != 0 { + logMsg += fmt.Sprintf(", object size %d", rv.ObjectSize) + err = options.SetUint64(librbd.RbdImageOptionOrder, rv.ObjectSize) + if err != nil { + return nil, fmt.Errorf("failed to set object size: %w", err) + } + } + + log.DebugLog(ctx, logMsg) + + return options, nil +} + // getImageInfo queries rbd about the given image and returns its metadata, and returns // ErrImageNotFound if provided image is not found. func (ri *rbdImage) getImageInfo() error {