Skip to content

Commit

Permalink
Reduce calls to EFS API
Browse files Browse the repository at this point in the history
With #850, the new way of allocating GIDs introduced a new call
to the ListAccessPoints endpoint of EFS API, that is generating
problems on systems where EFS CSI driver is under high load
(lots of PVC created within a short time period). In this PR,
we are extracting the ListAccessPoints call from gid_allocator,
moving it one level up. In case of dynamic provisioining of GIDs
we can reuse the ListAccessPoints call to check for the file
system existence (thus removing the DescribeFileSystem call in
such case). In case of a fixed UID/GID, we continue calling
DescribeFileSystem, and no calls to ListAccessPoints.

In addition to the change explained above, gidMin and gidMax have
been converted to int64. #850 made both uid and gid int64, but
gidMin and gidMax were nout touched. Also changing the default
value for gidMin, as the value of 50000 was spamming the logs
with a message coming from gid_allocator (i.e. range bigger than
the max number of access points). Setting the value to 6999000
(the value that gid_allocator was setting by substracting 1000
to gidMax). Removing an unused function and field from
gid_allocator too.
  • Loading branch information
otorreno committed Dec 29, 2023
1 parent 56f489a commit a80e7b0
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 137 deletions.
4 changes: 2 additions & 2 deletions pkg/cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,10 @@ func (c *cloud) ListAccessPoints(ctx context.Context, fileSystemId string) (acce
res, err := c.efs.DescribeAccessPointsWithContext(ctx, describeAPInput)
if err != nil {
if isAccessDenied(err) {
return
return nil, ErrAccessDenied
}
if isFileSystemNotFound(err) {
return
return nil, ErrNotFound
}
err = fmt.Errorf("List Access Points failed: %v", err)
return
Expand Down
27 changes: 17 additions & 10 deletions pkg/driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ const (
AccessPointMode = "efs-ap"
AzName = "az"
BasePath = "basePath"
DefaultGidMin = 50000
DefaultGidMax = 7000000
DefaultGidMin = int64(6999000)
DefaultGidMax = int64(7000000)
DefaultTagKey = "efs.csi.aws.com/cluster"
DefaultTagValue = "true"
DirectoryPerms = "directoryPerms"
Expand Down Expand Up @@ -120,8 +120,8 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
azName string
basePath string
gid int64
gidMin int
gidMax int
gidMin int64
gidMax int64
localCloud cloud.Cloud
provisioningMode string
roleArn string
Expand Down Expand Up @@ -189,7 +189,7 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
}

if value, ok := volumeParams[GidMin]; ok {
gidMin, err = strconv.Atoi(value)
gidMin, err = strconv.ParseInt(value, 10, 64)
if err != nil {
return nil, status.Errorf(codes.InvalidArgument, "Failed to parse invalid %v: %v", GidMin, err)
}
Expand All @@ -203,7 +203,7 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
if gidMin == 0 {
return nil, status.Errorf(codes.InvalidArgument, "Missing %v parameter", GidMin)
}
gidMax, err = strconv.Atoi(value)
gidMax, err = strconv.ParseInt(value, 10, 64)
if err != nil {
return nil, status.Errorf(codes.InvalidArgument, "Failed to parse invalid %v: %v", GidMax, err)
}
Expand Down Expand Up @@ -241,20 +241,27 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
return nil, err
}

// Check if file system exists. Describe FS handles appropriate error codes
if _, err = localCloud.DescribeFileSystem(ctx, accessPointsOptions.FileSystemId); err != nil {
// Check if file system exists. Describe FS or List APs handle appropriate error codes
// With dynamic uid/gid provisioning we can save a call to describe FS, as list APs fails if FS ID does not exist
var accessPoints []*cloud.AccessPoint
if uid == -1 || gid == -1 {
accessPoints, err = localCloud.ListAccessPoints(ctx, accessPointsOptions.FileSystemId)
} else {
_, err = localCloud.DescribeFileSystem(ctx, accessPointsOptions.FileSystemId)
}
if err != nil {
if err == cloud.ErrAccessDenied {
return nil, status.Errorf(codes.Unauthenticated, "Access Denied. Please ensure you have the right AWS permissions: %v", err)
}
if err == cloud.ErrNotFound {
return nil, status.Errorf(codes.InvalidArgument, "File System does not exist: %v", err)
}
return nil, status.Errorf(codes.Internal, "Failed to fetch File System info: %v", err)
return nil, status.Errorf(codes.Internal, "Failed to fetch Access Points for File System: %v", err)
}

var allocatedGid int64
if uid == -1 || gid == -1 {
allocatedGid, err = d.gidAllocator.getNextGid(ctx, localCloud, accessPointsOptions.FileSystemId, gidMin, gidMax)
allocatedGid, err = d.gidAllocator.getNextGid(accessPointsOptions.FileSystemId, accessPoints, gidMin, gidMax)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit a80e7b0

Please sign in to comment.