From 72d5df48ee0af7df2e2587651cafdaed076cd7f6 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Tue, 22 Aug 2023 18:52:49 +0200 Subject: [PATCH] rbd: fix resync issue During the Demote volume store the image creation timestamp. During Resync do below operation * Check image creation timestamp stored during Demote operation and current creation timestamp during Resync and check both are equal and its for force resync then issue resync * If the image on both sides is not in unknown state, check last_snapshot_timestamp on the local mirror description, if its present send volumeReady as false or else return error message. If both the images are in up+unknown the send volumeReady as true. Signed-off-by: Madhu Rajanna --- internal/csi-addons/rbd/replication.go | 121 +++++++++++++++----- internal/csi-addons/rbd/replication_test.go | 66 ++--------- internal/rbd/rbd_util.go | 13 +++ internal/rbd/replication.go | 62 +++++----- 4 files changed, 139 insertions(+), 123 deletions(-) diff --git a/internal/csi-addons/rbd/replication.go b/internal/csi-addons/rbd/replication.go index 0cc4ce58a1df..d39f1ad4c844 100644 --- a/internal/csi-addons/rbd/replication.go +++ b/internal/csi-addons/rbd/replication.go @@ -24,6 +24,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" corerbd "github.com/ceph/ceph-csi/internal/rbd" @@ -40,6 +41,16 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) +// imageResyncMap is used to store the volumeID and image creation time for +// resync. This will not get populated at the start of the driver. The +// volumeID gets added to the map when the DemoteVolume RPC is called and +// removed from the map when the PromoteVolume RPC is called or +// DisableVolumeReplication RPC is called. +// If the driver is restarted when its handling secondary volumes, the map will +// be empty and we might end up in reissuing the Resync RBD call for the images +// which are already in sync. +var imageResyncMap = sync.Map{} + // imageMirroringMode is used to indicate the mirroring mode for an RBD image. type imageMirroringMode string @@ -337,11 +348,15 @@ func (rs *ReplicationServer) DisableVolumeReplication(ctx context.Context, if err != nil { return nil, getGRPCError(err) } + imageResyncMap.Delete(volumeID) return &replication.DisableVolumeReplicationResponse{}, nil default: return nil, status.Errorf(codes.InvalidArgument, "image is in %s Mode", mirroringInfo.State) } + // remove the volume from the resync map as the image mirroring is disabled and + // during next Demote operation cephcsi will re-add the image to the sync map + imageResyncMap.Delete(volumeID) return &replication.DisableVolumeReplicationResponse{}, nil } @@ -439,6 +454,13 @@ func (rs *ReplicationServer) PromoteVolume(ctx context.Context, rbdVol) } + // remove the volume from the resync map as the image is promoted and + // during next Demote operation cephcsi will re-add the image to the sync + // map. + // This is required if someone does not do DisableVolumeReplication but try + // to toggle between PromoteVolume and DemoteVolume. + imageResyncMap.Delete(volumeID) + return &replication.PromoteVolumeResponse{}, nil } @@ -480,6 +502,18 @@ func (rs *ReplicationServer) DemoteVolume(ctx context.Context, return nil, err } + + creationTime, err := rbdVol.GetImageCreationTime() + if err != nil { + log.ErrorLog(ctx, err.Error()) + + return nil, status.Error(codes.Internal, err.Error()) + } + // store the image creation time for resync + if _, ok := imageResyncMap.Load(volumeID); !ok { + imageResyncMap.Store(volumeID, creationTime.AsTime()) + } + mirroringInfo, err := rbdVol.GetImageMirroringInfo() if err != nil { log.ErrorLog(ctx, err.Error()) @@ -538,6 +572,8 @@ func checkRemoteSiteStatus(ctx context.Context, mirrorStatus *librbd.GlobalMirro // ResyncVolume extracts the RBD volume information from the volumeID, If the // image is present, mirroring is enabled and the image is in demoted state. // If yes it will resync the image to correct the split-brain. +// +//nolint:gocyclo,cyclop // TODO: reduce complexity func (rs *ReplicationServer) ResyncVolume(ctx context.Context, req *replication.ResyncVolumeRequest, ) (*replication.ResyncVolumeResponse, error) { @@ -572,22 +608,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, return nil, err } - mirroringInfo, err := rbdVol.GetImageMirroringInfo() + err = rbdVol.CheckImageIsPrimary() if err != nil { - // in case of Resync the image will get deleted and gets recreated and - // it takes time for this operation. log.ErrorLog(ctx, err.Error()) - return nil, status.Error(codes.Aborted, err.Error()) - } - - if mirroringInfo.State != librbd.MirrorImageEnabled { - return nil, status.Error(codes.InvalidArgument, "image mirroring is not enabled") - } - - // return error if the image is still primary - if mirroringInfo.Primary { - return nil, status.Error(codes.InvalidArgument, "image is in primary state") + return nil, getGRPCError(err) } mirrorStatus, err := rbdVol.GetImageMirroringStatus() @@ -637,14 +662,28 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, ready = checkRemoteSiteStatus(ctx, mirrorStatus) } - err = rbdVol.ResyncVol(localStatus, req.Force) + creationTime, err := rbdVol.GetImageCreationTime() if err != nil { - return nil, getGRPCError(err) + return nil, status.Errorf(codes.Internal, "failed to get image info: %s", err.Error()) } - err = checkVolumeResyncStatus(localStatus) + ok, err := resyncVolume(ctx, volumeID, creationTime) if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + return nil, status.Errorf(codes.Internal, "failed to check image needs resync: %s", err.Error()) + } + + if req.Force && ok { + err = rbdVol.ResyncVol(localStatus) + if err != nil { + return nil, getGRPCError(err) + } + } + + if !ready { + err = checkVolumeResyncStatus(localStatus) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } } err = rbdVol.RepairResyncedImageID(ctx, ready) @@ -659,6 +698,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context, return resp, nil } +func resyncVolume( + ctx context.Context, + volumeID string, + currentImageTime *timestamppb.Timestamp, +) (bool, error) { + savedImageTime, ok := imageResyncMap.Load(volumeID) + if !ok { + return false, errors.New("failed to get image creation time") + } + + log.UsefulLog(ctx, "savedImageTime=%v, currentImageTime=%v", savedImageTime, currentImageTime) + st, ok := savedImageTime.(time.Time) + if !ok { + return false, errors.New("failed to convert image creation time") + } + + if st.Equal(currentImageTime.AsTime()) { + return true, nil + } + + return false, nil +} + func getGRPCError(err error) error { if err == nil { return status.Error(codes.OK, codes.OK.String()) @@ -854,20 +916,17 @@ func getLastSyncInfo(description string) (*replication.GetVolumeReplicationInfoR } func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error { - // we are considering 2 states to check resync started and resync completed - // as below. all other states will be considered as an error state so that - // cephCSI can return error message and volume replication operator can - // mark the VolumeReplication status as not resyncing for the volume. - - // If the state is Replaying means the resync is going on. - // Once the volume on remote cluster is demoted and resync - // is completed the image state will be moved to UNKNOWN. - // RBD mirror daemon should be always running on the primary cluster. - if !localStatus.Up || (localStatus.State != librbd.MirrorImageStatusStateReplaying && - localStatus.State != librbd.MirrorImageStatusStateUnknown) { - return fmt.Errorf( - "not resyncing. Local status: daemon up=%t image is in %q state", - localStatus.Up, localStatus.State) + // we are considering local snapshot timestamp to check if the resync is + // started or not, if we dont see local_snapshot_timestamp in the + // description of localStatus, we are returning error. if we see the local + // snapshot timestamp in the description we return resyncing started. + description := localStatus.Description + resp, err := getLastSyncInfo(description) + if err != nil { + return fmt.Errorf("failed to get last sync info: %w", err) + } + if resp.LastSyncTime == nil { + return errors.New("last sync time is nil") } return nil diff --git a/internal/csi-addons/rbd/replication_test.go b/internal/csi-addons/rbd/replication_test.go index 8a7ea8806ff7..8c995b9ce4c4 100644 --- a/internal/csi-addons/rbd/replication_test.go +++ b/internal/csi-addons/rbd/replication_test.go @@ -225,74 +225,26 @@ func TestCheckVolumeResyncStatus(t *testing.T) { wantErr bool }{ { - name: "test when rbd mirror daemon is not running", + name: "test when local_snapshot_timestamp is non zero", args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateUnknown, - Up: false, - }, - wantErr: true, - }, - { - name: "test for unknown state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateUnknown, - Up: true, - }, - wantErr: false, - }, - { - name: "test for error state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateError, - Up: true, - }, - wantErr: true, - }, - { - name: "test for syncing state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateSyncing, - Up: true, - }, - wantErr: true, - }, - { - name: "test for starting_replay state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateStartingReplay, - Up: true, - }, - wantErr: true, - }, - { - name: "test for replaying state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateReplaying, - Up: true, + //nolint:lll // sample output cannot be split into multiple lines. + Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":1684675261,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`, }, wantErr: false, }, { - name: "test for stopping_replay state", - args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateStoppingReplay, - Up: true, - }, - wantErr: true, - }, - { - name: "test for stopped state", + name: "test when local_snapshot_timestamp is zero", + //nolint:lll // sample output cannot be split into multiple lines. args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusStateStopped, - Up: true, + Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":0,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`, }, wantErr: true, }, { - name: "test for invalid state", + name: "test when local_snapshot_timestamp is not present", + //nolint:lll // sample output cannot be split into multiple lines. args: librbd.SiteMirrorImageStatus{ - State: librbd.MirrorImageStatusState(100), - Up: true, + Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`, }, wantErr: true, }, diff --git a/internal/rbd/rbd_util.go b/internal/rbd/rbd_util.go index 64d786d734da..4f3283fe364b 100644 --- a/internal/rbd/rbd_util.go +++ b/internal/rbd/rbd_util.go @@ -1558,6 +1558,19 @@ func (rv *rbdVolume) setImageOptions(ctx context.Context, options *librbd.ImageO return nil } +// getImageCreationTime returns the creation time of the image. if the image +// creation time is not set, it queries the image info and returns the creation time. +func (ri *rbdImage) GetImageCreationTime() (*timestamppb.Timestamp, error) { + if ri.CreatedAt != nil { + return ri.CreatedAt, nil + } + err := ri.getImageInfo() + if err != nil { + return nil, err + } + return ri.CreatedAt, nil +} + // getImageInfo queries rbd about the given image and returns its metadata, and returns // ErrImageNotFound if provided image is not found. func (ri *rbdImage) getImageInfo() error { diff --git a/internal/rbd/replication.go b/internal/rbd/replication.go index 29a3ff12c72a..2b750626f809 100644 --- a/internal/rbd/replication.go +++ b/internal/rbd/replication.go @@ -19,31 +19,19 @@ package rbd import ( "context" "fmt" - "strings" librbd "github.com/ceph/go-ceph/rbd" ) -func (rv *rbdVolume) ResyncVol(localStatus librbd.SiteMirrorImageStatus, force bool) error { - if resyncRequired(localStatus) { - // If the force option is not set return the error message to retry - // with Force option. - if !force { - return fmt.Errorf("%w: image is in %q state, description (%s). Force resync to recover volume", - ErrFailedPrecondition, localStatus.State, localStatus.Description) - } - err := rv.resyncImage() - if err != nil { - return fmt.Errorf("%w: failed to resync image: %w", ErrResyncImageFailed, err) - } - - // If we issued a resync, return a non-final error as image needs to be recreated - // locally. Caller retries till RBD syncs an initial version of the image to - // report its status in the resync request. - return fmt.Errorf("%w: awaiting initial resync due to split brain", ErrUnavailable) +func (rv *rbdVolume) ResyncVol(localStatus librbd.SiteMirrorImageStatus) error { + if err := rv.resyncImage(); err != nil { + return fmt.Errorf("%w: failed to resync image: %w", ErrResyncImageFailed, err) } - return nil + // If we issued a resync, return a non-final error as image needs to be recreated + // locally. Caller retries till RBD syncs an initial version of the image to + // report its status in the resync request. + return fmt.Errorf("%w: awaiting initial resync due to split brain", ErrUnavailable) } // repairResyncedImageID updates the existing image ID with new one. @@ -66,22 +54,6 @@ func (rv *rbdVolume) RepairResyncedImageID(ctx context.Context, ready bool) erro return rv.repairImageID(ctx, j, true) } -// resyncRequired returns true if local image is in split-brain state and image -// needs resync. -func resyncRequired(localStatus librbd.SiteMirrorImageStatus) bool { - // resync is required if the image is in error state or the description - // contains split-brain message. - // In some corner cases like `re-player shutdown` the local image will not - // be in an error state. It would be also worth considering the `description` - // field to make sure about split-brain. - if localStatus.State == librbd.MirrorImageStatusStateError || - strings.Contains(localStatus.Description, "split-brain") { - return true - } - - return false -} - func (rv *rbdVolume) DisableVolumeReplication( mirroringInfo *librbd.MirrorImageInfo, force bool, @@ -126,3 +98,23 @@ func (rv *rbdVolume) DisableVolumeReplication( return nil } + +func (rv *rbdVolume) CheckImageIsPrimary() error { + mirroringInfo, err := rv.GetImageMirroringInfo() + if err != nil { + // in case of Resync the image will get deleted and gets recreated and + // it takes time for this operation. + return fmt.Errorf("%w: %w", ErrAborted, err) + } + + if mirroringInfo.State != librbd.MirrorImageEnabled { + return fmt.Errorf("%w: image mirroring is not enabled", ErrInvalidArgument) + } + + // return error if the image is still primary + if mirroringInfo.Primary { + return fmt.Errorf("%w: image is in primary state", ErrInvalidArgument) + } + + return nil +}