Skip to content

Commit

Permalink
rbd: refactor the replication code
Browse files Browse the repository at this point in the history
added interface to refactor mirroring
to work for both volume and group

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
  • Loading branch information
Madhu-1 committed Jul 26, 2024
1 parent da6279b commit 6b9ac05
Show file tree
Hide file tree
Showing 9 changed files with 488 additions and 300 deletions.
257 changes: 116 additions & 141 deletions internal/csi-addons/rbd/replication.go

Large diffs are not rendered by default.

183 changes: 103 additions & 80 deletions internal/csi-addons/rbd/replication_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

corerbd "github.com/ceph/ceph-csi/internal/rbd"
"github.com/ceph/ceph-csi/internal/rbd/types"

librbd "github.com/ceph/go-ceph/rbd"
"github.com/ceph/go-ceph/rbd/admin"
Expand Down Expand Up @@ -219,30 +220,36 @@ func TestCheckVolumeResyncStatus(t *testing.T) {
t.Parallel()
tests := []struct {
name string
args librbd.SiteMirrorImageStatus
args corerbd.SiteMirrorImageStatus
wantErr bool
}{
{
name: "test when local_snapshot_timestamp is non zero",
args: librbd.SiteMirrorImageStatus{
//nolint:lll // sample output cannot be split into multiple lines.
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":1684675261,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
args: corerbd.SiteMirrorImageStatus{
SiteMirrorImageStatus: librbd.SiteMirrorImageStatus{
//nolint:lll // sample output cannot be split into multiple lines.
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":1684675261,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
},
wantErr: false,
},
{
name: "test when local_snapshot_timestamp is zero",
//nolint:lll // sample output cannot be split into multiple lines.
args: librbd.SiteMirrorImageStatus{
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":0,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
args: corerbd.SiteMirrorImageStatus{
SiteMirrorImageStatus: librbd.SiteMirrorImageStatus{
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"local_snapshot_timestamp":0,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
},
wantErr: true,
},
{
name: "test when local_snapshot_timestamp is not present",
//nolint:lll // sample output cannot be split into multiple lines.
args: librbd.SiteMirrorImageStatus{
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
args: corerbd.SiteMirrorImageStatus{
SiteMirrorImageStatus: librbd.SiteMirrorImageStatus{
Description: `replaying, {"bytes_per_second":0.0,"bytes_per_snapshot":81920.0,"last_snapshot_bytes":81920,"last_snapshot_sync_seconds":56743,"remote_snapshot_timestamp":1684675261,"replay_state":"idle"}`,
},
},
wantErr: true,
},
Expand All @@ -261,122 +268,138 @@ func TestCheckRemoteSiteStatus(t *testing.T) {
t.Parallel()
tests := []struct {
name string
args *librbd.GlobalMirrorImageStatus
args corerbd.GlobalMirrorStatus
wantReady bool
}{
{
name: "Test a single peer in sync",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
},
},
},
wantReady: true,
},
{
name: "Test a single peer in sync, including a local instance",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
},
},
},
wantReady: true,
},
{
name: "Test a multiple peers in sync",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
},
},
},
wantReady: true,
},
{
name: "Test no remote peers",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{},
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{},
},
},
wantReady: false,
},
{
name: "Test single peer not in sync",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateReplaying,
Up: true,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateReplaying,
Up: true,
},
},
},
},
wantReady: false,
},
{
name: "Test single peer not up",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: false,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote",
State: librbd.MirrorImageStatusStateUnknown,
Up: false,
},
},
},
},
wantReady: false,
},
{
name: "Test multiple peers, when first peer is not in sync",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateStoppingReplay,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateStoppingReplay,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
},
},
},
wantReady: false,
},
{
name: "Test multiple peers, when second peer is not up",
args: &librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: false,
args: corerbd.GlobalMirrorStatus{
GlobalMirrorImageStatus: librbd.GlobalMirrorImageStatus{
SiteStatuses: []librbd.SiteMirrorImageStatus{
{
MirrorUUID: "remote1",
State: librbd.MirrorImageStatusStateUnknown,
Up: true,
},
{
MirrorUUID: "remote2",
State: librbd.MirrorImageStatusStateUnknown,
Up: false,
},
},
},
},
Expand All @@ -386,7 +409,7 @@ func TestCheckRemoteSiteStatus(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
if ready := checkRemoteSiteStatus(context.TODO(), tt.args); ready != tt.wantReady {
if ready := checkRemoteSiteStatus(context.TODO(), tt.args.GetAllSitesStatus()); ready != tt.wantReady {
t.Errorf("checkRemoteSiteStatus() ready = %v, expect ready = %v", ready, tt.wantReady)
}
})
Expand Down Expand Up @@ -651,7 +674,7 @@ func Test_getFlattenMode(t *testing.T) {
tests := []struct {
name string
args args
want corerbd.FlattenMode
want types.FlattenMode
wantErr bool
}{
{
Expand All @@ -660,27 +683,27 @@ func Test_getFlattenMode(t *testing.T) {
ctx: context.TODO(),
parameters: map[string]string{},
},
want: corerbd.FlattenModeNever,
want: types.FlattenModeNever,
},
{
name: "flattenMode option set to never",
args: args{
ctx: context.TODO(),
parameters: map[string]string{
flattenModeKey: string(corerbd.FlattenModeNever),
flattenModeKey: string(types.FlattenModeNever),
},
},
want: corerbd.FlattenModeNever,
want: types.FlattenModeNever,
},
{
name: "flattenMode option set to force",
args: args{
ctx: context.TODO(),
parameters: map[string]string{
flattenModeKey: string(corerbd.FlattenModeForce),
flattenModeKey: string(types.FlattenModeForce),
},
},
want: corerbd.FlattenModeForce,
want: types.FlattenModeForce,
},

{
Expand Down
19 changes: 13 additions & 6 deletions internal/rbd/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ func (cs *ControllerServer) DeleteVolume(
func cleanupRBDImage(ctx context.Context,
rbdVol *rbdVolume, cr *util.Credentials,
) (*csi.DeleteVolumeResponse, error) {
mirroringInfo, err := rbdVol.GetImageMirroringInfo()
info, err := rbdVol.GetMirroringInfo()
if err != nil {
log.ErrorLog(ctx, err.Error())

Expand All @@ -998,7 +998,7 @@ func cleanupRBDImage(ctx context.Context,
// Mirroring is enabled on the image
// Local image is secondary
// Local image is in up+replaying state
if mirroringInfo.State == librbd.MirrorImageEnabled && !mirroringInfo.Primary {
if info.GetState() == librbd.MirrorImageEnabled.String() && !info.IsPrimary() {
// If the image is in a secondary state and its up+replaying means its
// an healthy secondary and the image is primary somewhere in the
// remote cluster and the local image is getting replayed. Delete the
Expand All @@ -1007,11 +1007,18 @@ func cleanupRBDImage(ctx context.Context,
// the image on all the remote (secondary) clusters will get
// auto-deleted. This helps in garbage collecting the OMAP, PVC and PV
// objects after failback operation.
localStatus, rErr := rbdVol.GetLocalState()
sts, rErr := rbdVol.GetGlobalMirroringStatus()
if rErr != nil {
return nil, status.Error(codes.Internal, rErr.Error())
}
if localStatus.Up && localStatus.State == librbd.MirrorImageStatusStateReplaying {

localStatus, rErr := sts.GetLocalSiteStatus()
if rErr != nil {
log.ErrorLog(ctx, "failed to get local status for volume %s: %w", rbdVol.RbdImageName, rErr)

return nil, status.Error(codes.Internal, rErr.Error())
}
if localStatus.IsUP() && localStatus.GetState() == librbd.MirrorImageStatusStateReplaying.String() {
if err = undoVolReservation(ctx, rbdVol, cr); err != nil {
log.ErrorLog(ctx, "failed to remove reservation for volume (%s) with backing image (%s) (%s)",
rbdVol.RequestName, rbdVol.RbdImageName, err)
Expand All @@ -1023,8 +1030,8 @@ func cleanupRBDImage(ctx context.Context,
}
log.ErrorLog(ctx,
"secondary image status is up=%t and state=%s",
localStatus.Up,
localStatus.State)
localStatus.IsUP(),
localStatus.GetState())
}

inUse, err := rbdVol.isInUse()
Expand Down
2 changes: 1 addition & 1 deletion internal/rbd/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ func (r *Driver) setupCSIAddonsServer(conf *util.Config) error {
fcs := casrbd.NewFenceControllerServer()
r.cas.RegisterService(fcs)

rcs := casrbd.NewReplicationServer(NewControllerServer(r.cd))
rcs := casrbd.NewReplicationServer(rbd.CSIInstanceID, NewControllerServer(r.cd))
r.cas.RegisterService(rcs)

vgcs := casrbd.NewVolumeGroupServer(conf.InstanceID)
Expand Down
Loading

0 comments on commit 6b9ac05

Please sign in to comment.