From 7d2bd5042ccc72cbb933065c24ebf0cb8548821d Mon Sep 17 00:00:00 2001 From: Ruben Mennes Date: Thu, 6 Jun 2024 14:59:29 +0200 Subject: [PATCH] Data usage for S3 and Glue data objects - Data usage for S3 an Glue data objects - Data source metadata based on config based on config --- .mockery.yaml | 6 + aws/constants/constants.go | 9 +- aws/data_access/access_to_target_handlers.go | 16 +- aws/data_access/data_access_from_target.go | 6 +- aws/data_access/data_access_syncer.go | 2 + aws/data_access/data_access_to_target.go | 4 +- aws/data_source/aws_s3_repository.go | 58 +-- aws/data_source/data_source.go | 45 +-- aws/data_source/data_source_meta.go | 164 ++++---- aws/data_source/data_source_meta_glue_file.go | 59 +++ aws/data_source/data_source_meta_s3_file.go | 58 +++ aws/data_usage.go | 295 -------------- aws/data_usage_test.go | 38 -- aws/iam/aws_iam_policy_resolution.go | 19 +- aws/it/data_usage_integration_test.go | 7 +- aws/usage/data_usage.go | 366 ++++++++++++++++++ aws/usage/data_usage_object_mapper.go | 89 +++++ aws/usage/data_usage_object_mapper_test.go | 140 +++++++ aws/usage/data_usage_test.go | 210 ++++++++++ aws/usage/mock_dataObjectRepository.go | 99 +++++ aws/usage/mock_dataUsageRepository.go | 167 ++++++++ aws/utils/trie/trie.go | 117 ++++++ aws/utils/trie/trie_test.go | 246 ++++++++++++ codecov.yml | 4 + main.go | 5 +- 25 files changed, 1724 insertions(+), 505 deletions(-) create mode 100644 aws/data_source/data_source_meta_glue_file.go create mode 100644 aws/data_source/data_source_meta_s3_file.go delete mode 100644 aws/data_usage.go delete mode 100644 aws/data_usage_test.go create mode 100644 aws/usage/data_usage.go create mode 100644 aws/usage/data_usage_object_mapper.go create mode 100644 aws/usage/data_usage_object_mapper_test.go create mode 100644 aws/usage/data_usage_test.go create mode 100644 aws/usage/mock_dataObjectRepository.go create mode 100644 aws/usage/mock_dataUsageRepository.go create mode 100644 codecov.yml diff --git a/.mockery.yaml b/.mockery.yaml index 37211c1..14e8499 100644 --- a/.mockery.yaml +++ b/.mockery.yaml @@ -13,6 +13,12 @@ packages: dataAccessRepository: dataAccessSsoRepository: dataAccessIamRepository: + github.com/raito-io/cli-plugin-aws-account/aws/usage: + config: + dir: "{{.InterfaceDir}}" + interfaces: + dataUsageRepository: + dataObjectRepository: github.com/raito-io/cli/base/access_provider/sync_to_target/naming_hint: config: dir: "mocks/{{.PackageName}}" diff --git a/aws/constants/constants.go b/aws/constants/constants.go index 204dd18..ff1c75a 100644 --- a/aws/constants/constants.go +++ b/aws/constants/constants.go @@ -14,9 +14,12 @@ const ( AwsS3Enabled = "aws-s3-enabled" AwsS3EmulateFolderStructure = "aws-s3-emulate-folder-structure" - AwsS3MaxFolderDepth = "aws-s3-max-folder-depth" - AwsS3IncludeBuckets = "aws-s3-include-buckets" - AwsS3ExcludeBuckets = "aws-s3-exclude-buckets" + + AwsS3MaxFolderDepth = "aws-s3-max-folder-depth" + AwsS3MaxFolderDepthDefault = 20 + + AwsS3IncludeBuckets = "aws-s3-include-buckets" + AwsS3ExcludeBuckets = "aws-s3-exclude-buckets" AwsGlueEnabled = "aws-glue-enabled" diff --git a/aws/data_access/access_to_target_handlers.go b/aws/data_access/access_to_target_handlers.go index ab1138c..05f1de8 100644 --- a/aws/data_access/access_to_target_handlers.go +++ b/aws/data_access/access_to_target_handlers.go @@ -431,12 +431,12 @@ func (r *roleAccessHandler) ExecuteUpdates(ctx context.Context) { // Getting the what ap := details.ap - statements := createPolicyStatementsFromWhat(ap.What) + statements := createPolicyStatementsFromWhat(ap.What, r.configMap) // Because we need to flatten the WHAT for roles as well, we gather all role APs from which this role AP inherits its what (following the reverse inheritance chain) inheritedAPs := r.accessProviders.GetAllAccessProvidersInInheritanceChainForWhat(model.Role, name, model.Role) for inheritedAP := range inheritedAPs { - statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What)...) + statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What, r.configMap)...) } if details.action == ActionCreate { @@ -718,7 +718,7 @@ func (p *policyAccessHandler) createAndUpdateRaitoPolicies(ctx context.Context, utils.Logger.Info(fmt.Sprintf("Process policy %s, action: %s", name, action)) - statements := createPolicyStatementsFromWhat(details.ap.What) + statements := createPolicyStatementsFromWhat(details.ap.What, p.configMap) if action == ActionCreate { utils.Logger.Info(fmt.Sprintf("Creating policy %s", name)) @@ -810,7 +810,7 @@ func (a *accessPointHandler) fetchExistingAccessPointsForRegion(ctx context.Cont for ind := range accessPoints { accessPoint := accessPoints[ind] - who, _, _ := iam.CreateWhoAndWhatFromAccessPointPolicy(accessPoint.PolicyParsed, accessPoint.Bucket, accessPoint.Name, a.account) + who, _, _ := iam.CreateWhoAndWhatFromAccessPointPolicy(accessPoint.PolicyParsed, accessPoint.Bucket, accessPoint.Name, a.account, a.configMap) if who != nil { existingPolicyBindings[accessPoint.Name] = set.Set[model.PolicyBinding]{} @@ -972,7 +972,7 @@ func (a *accessPointHandler) ExecuteUpdates(ctx context.Context) { sort.Strings(principals) // Getting the what - statements := createPolicyStatementsFromWhat(accessPointAp.What) + statements := createPolicyStatementsFromWhat(accessPointAp.What, a.configMap) whatItems := make([]sync_to_target.WhatItem, 0, len(accessPointAp.What)) whatItems = append(whatItems, accessPointAp.What...) @@ -980,7 +980,7 @@ func (a *accessPointHandler) ExecuteUpdates(ctx context.Context) { inheritedAPs := a.accessProviders.GetAllAccessProvidersInInheritanceChainForWhat(model.AccessPoint, accessPointName, model.AccessPoint) for inheritedAP := range inheritedAPs { whatItems = append(whatItems, inheritedAP.ap.What...) - statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What)...) + statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What, a.configMap)...) } bucketName, region, err2 := extractBucketForAccessPoint(whatItems) @@ -1313,13 +1313,13 @@ func (s *ssoRoleAccessHandler) updateWhatPolicies(ctx context.Context, name stri } func (s *ssoRoleAccessHandler) updateWhatDataObjects(ctx context.Context, details *AccessProviderDetails, name string, permissionSetArn string) { - statements := createPolicyStatementsFromWhat(details.ap.What) // this should be empty as it is purpose + statements := createPolicyStatementsFromWhat(details.ap.What, s.config) // this should be empty as it is purpose // Because we need to flatten the WHAT for roles as well, we gather all role APs from which this role AP inherits its what (following the reverse inheritance chain) inheritedWhatToFlatten := s.accessProviders.GetAllAccessProvidersInInheritanceChainForWhat(model.SSORole, name, model.Role, model.SSORole) for inheritedAP := range inheritedWhatToFlatten { - statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What)...) + statements = append(statements, createPolicyStatementsFromWhat(inheritedAP.ap.What, s.config)...) } err := s.ssoAdmin.UpdateInlinePolicyToPermissionSet(ctx, permissionSetArn, statements) diff --git a/aws/data_access/data_access_from_target.go b/aws/data_access/data_access_from_target.go index 9d6cb08..51aa801 100644 --- a/aws/data_access/data_access_from_target.go +++ b/aws/data_access/data_access_from_target.go @@ -226,7 +226,7 @@ func (a *AccessSyncer) fetchManagedPolicyAccessProviders(ctx context.Context, ap continue } - whatItems, incomplete := iam.CreateWhatFromPolicyDocument(policy.PolicyParsed, policy.Name, a.account) + whatItems, incomplete := iam.CreateWhatFromPolicyDocument(policy.PolicyParsed, policy.Name, a.account, a.cfgMap) policyDocument := "" if policy.PolicyDocument != nil { @@ -278,7 +278,7 @@ func (a *AccessSyncer) convertPoliciesToWhat(policies []model.PolicyEntity) ([]s for i := range policies { policy := policies[i] - policyWhat, policyIncomplete := iam.CreateWhatFromPolicyDocument(policy.PolicyParsed, policy.Name, a.account) + policyWhat, policyIncomplete := iam.CreateWhatFromPolicyDocument(policy.PolicyParsed, policy.Name, a.account, a.cfgMap) if policy.PolicyDocument != nil { policyDocuments += *policy.PolicyDocument + "\n" @@ -477,7 +477,7 @@ func (a *AccessSyncer) fetchS3AccessPointAccessProvidersForRegion(ctx context.Co } incomplete := false - newAp.ApInput.Who, newAp.ApInput.What, incomplete = iam.CreateWhoAndWhatFromAccessPointPolicy(accessPoint.PolicyParsed, accessPoint.Bucket, accessPoint.Name, a.account) + newAp.ApInput.Who, newAp.ApInput.What, incomplete = iam.CreateWhoAndWhatFromAccessPointPolicy(accessPoint.PolicyParsed, accessPoint.Bucket, accessPoint.Name, a.account, a.cfgMap) if incomplete { newAp.ApInput.Incomplete = ptr.Bool(true) diff --git a/aws/data_access/data_access_syncer.go b/aws/data_access/data_access_syncer.go index 6214234..f0864a7 100644 --- a/aws/data_access/data_access_syncer.go +++ b/aws/data_access/data_access_syncer.go @@ -81,6 +81,7 @@ type AccessSyncer struct { iamRepo dataAccessIamRepository account string userGroupMap map[string][]string + cfgMap *config.ConfigMap nameGenerator *NameGenerator } @@ -104,6 +105,7 @@ func NewDataAccessSyncerFromConfig(configMap *config.ConfigMap) *AccessSyncer { func (a *AccessSyncer) initialize(ctx context.Context, configMap *config.ConfigMap) error { a.repo = iam.NewAwsIamRepository(configMap) + a.cfgMap = configMap var err error diff --git a/aws/data_access/data_access_to_target.go b/aws/data_access/data_access_to_target.go index 199ec18..30aefdc 100644 --- a/aws/data_access/data_access_to_target.go +++ b/aws/data_access/data_access_to_target.go @@ -274,7 +274,7 @@ func mergeStatementsOnPermissions(statements []*awspolicy.Statement) []*awspolic return mergedStatements } -func createPolicyStatementsFromWhat(whatItems []sync_to_target.WhatItem) []*awspolicy.Statement { +func createPolicyStatementsFromWhat(whatItems []sync_to_target.WhatItem, cfg *config.ConfigMap) []*awspolicy.Statement { policyInfo := map[string][]string{} for _, what := range whatItems { @@ -283,7 +283,7 @@ func createPolicyStatementsFromWhat(whatItems []sync_to_target.WhatItem) []*awsp } if _, found := policyInfo[what.DataObject.FullName]; !found { - dot := data_source.GetDataObjectType(what.DataObject.Type) + dot := data_source.GetDataObjectType(what.DataObject.Type, cfg) allPermissions := what.Permissions if dot != nil { diff --git a/aws/data_source/aws_s3_repository.go b/aws/data_source/aws_s3_repository.go index 7eb9d26..82110e7 100644 --- a/aws/data_source/aws_s3_repository.go +++ b/aws/data_source/aws_s3_repository.go @@ -8,7 +8,6 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/aws/smithy-go/ptr" "github.com/raito-io/cli/base/util/config" "github.com/raito-io/cli-plugin-aws-account/aws/model" @@ -75,25 +74,10 @@ func (repo *AwsS3Repository) ListBuckets(ctx context.Context) ([]model.AwsS3Enti return result, nil } -func (repo *AwsS3Repository) ListFiles(ctx context.Context, bucket string, prefix *string) ([]model.AwsS3Entity, error) { - utils.Logger.Info(fmt.Sprintf("Fetching files from bucket %s", bucket)) - - bucketClient, err := repo.GetS3Client(ctx, nil) - if err != nil { - return nil, fmt.Errorf("get s3 client: %w", err) - } - - bucketInfo, err := bucketClient.GetBucketLocation(ctx, &s3.GetBucketLocationInput{Bucket: &bucket}) +func (repo *AwsS3Repository) ListFiles(ctx context.Context, bucket string, prefix *string) ([]model.AwsS3Entity, string, error) { + client, region, err := repo.getS3ClientForBucket(ctx, bucket) if err != nil { - return nil, fmt.Errorf("get bucket location: %w", err) - } - - bucketLocation := string(bucketInfo.LocationConstraint) - utils.Logger.Info(fmt.Sprintf("Location of bucket %q is %s", bucket, bucketLocation)) - - client, err := repo.GetS3Client(ctx, &bucketLocation) - if err != nil { - return nil, err + return nil, "", err } moreObjectsAvailable := true @@ -108,9 +92,9 @@ func (repo *AwsS3Repository) ListFiles(ctx context.Context, bucket string, prefi Prefix: prefix, } - response, err := client.ListObjectsV2(ctx, input) - if err != nil { - return nil, fmt.Errorf("list objects: %w", err) + response, err2 := client.ListObjectsV2(ctx, input) + if err2 != nil { + return nil, "", fmt.Errorf("list objects: %w", err2) } moreObjectsAvailable = response.IsTruncated != nil && *response.IsTruncated @@ -125,11 +109,35 @@ func (repo *AwsS3Repository) ListFiles(ctx context.Context, bucket string, prefi } } - return result, nil + return result, region, nil +} + +func (repo *AwsS3Repository) getS3ClientForBucket(ctx context.Context, bucket string) (*s3.Client, string, error) { + utils.Logger.Info(fmt.Sprintf("Fetching files from bucket %s", bucket)) + + bucketClient, err := repo.GetS3Client(ctx, nil) + if err != nil { + return nil, "", fmt.Errorf("get s3 client: %w", err) + } + + bucketInfo, err := bucketClient.GetBucketLocation(ctx, &s3.GetBucketLocationInput{Bucket: &bucket}) + if err != nil { + return nil, "", fmt.Errorf("get bucket location: %w", err) + } + + bucketLocation := string(bucketInfo.LocationConstraint) + utils.Logger.Info(fmt.Sprintf("Location of bucket %q is %s", bucket, bucketLocation)) + + client, err := repo.GetS3Client(ctx, &bucketLocation) + if err != nil { + return nil, "", err + } + + return client, bucketLocation, nil } -func (repo *AwsS3Repository) GetFile(ctx context.Context, bucket, key string, region string) (io.ReadCloser, error) { - client, err := repo.GetS3Client(ctx, ptr.String(region)) +func (repo *AwsS3Repository) GetFile(ctx context.Context, bucket, key string, region *string) (io.ReadCloser, error) { + client, err := repo.GetS3Client(ctx, region) if err != nil { return nil, err } diff --git a/aws/data_source/data_source.go b/aws/data_source/data_source.go index f278e59..ff5747c 100644 --- a/aws/data_source/data_source.go +++ b/aws/data_source/data_source.go @@ -14,6 +14,7 @@ import ( "github.com/raito-io/cli-plugin-aws-account/aws/model" "github.com/raito-io/cli-plugin-aws-account/aws/repo" "github.com/raito-io/cli-plugin-aws-account/aws/utils" + "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" "github.com/gammazero/workerpool" @@ -34,25 +35,21 @@ func NewDataSourceSyncer() *DataSourceSyncer { return &DataSourceSyncer{} } -// GetAvailableObjects is used by the data usage component to fetch all available data objects in a map structure for easy lookup of what is available -func (s *DataSourceSyncer) GetAvailableObjects(ctx context.Context, cfg *config.ConfigMap) (map[string]interface{}, error) { +// GetAvailableObjectTypes is used by the data usage component to fetch all available data objects and corresponding type +func (s *DataSourceSyncer) GetAvailableObjectTypes(ctx context.Context, cfg *config.ConfigMap) (*trie.Trie[string], error) { err := s.initialize(ctx, cfg) if err != nil { return nil, fmt.Errorf("initializing data source syncer: %w", err) } - bucketMap := map[string]interface{}{} - - dataSourceHandler := mapDataSourceHandler{ - bucketMap: bucketMap, - } + dataSourceHandler := newMapDataSourceHandler() err = s.fetchDataObjects(ctx, dataSourceHandler) if err != nil { return nil, err } - return bucketMap, nil + return dataSourceHandler.GetTrie(), nil } func (s *DataSourceSyncer) initialize(ctx context.Context, cfg *config.ConfigMap) error { @@ -257,7 +254,7 @@ func (s *DataSourceSyncer) FetchS3DataObjects(ctx context.Context, dataSourceHan utils.Logger.Info(fmt.Sprintf("Handling all files in bucket %s", bucketFullName)) } - files, err2 := s3Repo.ListFiles(ctx, bucketName, prefix) + files, _, err2 := s3Repo.ListFiles(ctx, bucketName, prefix) if err2 != nil { smu.Lock() resultErr = multierror.Append(resultErr, err2) @@ -285,7 +282,7 @@ func (s *DataSourceSyncer) FetchS3DataObjects(ctx context.Context, dataSourceHan func (s *DataSourceSyncer) GetDataSourceMetaData(ctx context.Context, configParams *config.ConfigMap) (*ds.MetaData, error) { utils.Logger.Debug("Returning meta data for AWS S3 data source") - return GetS3MetaData(), nil + return GetS3MetaData(configParams), nil } func (s *DataSourceSyncer) addAwsAsDataSource(dataSourceHandler wrappers.DataSourceObjectHandler, lock *sync.Mutex) error { @@ -349,7 +346,7 @@ func (s *DataSourceSyncer) addS3Entities(entities []model.AwsS3Entity, region st } } else if strings.EqualFold(entity.Type, ds.File) { if emulateFolders { - maxFolderDepth := s.config.GetIntWithDefault(constants.AwsS3MaxFolderDepth, 20) + maxFolderDepth := s.config.GetIntWithDefault(constants.AwsS3MaxFolderDepth, constants.AwsS3MaxFolderDepthDefault) parts := strings.Split(entity.Key, "/") parentExternalId := fmt.Sprintf("%s:%s:%s", s.account, region, entity.ParentKey) @@ -564,25 +561,19 @@ func filterBuckets(configMap *config.ConfigMap, buckets []model.AwsS3Entity) ([] return filteredBuckets, nil } +func newMapDataSourceHandler() *mapDataSourceHandler { + return &mapDataSourceHandler{ + bucketMap: trie.New[string]("/"), + } +} + type mapDataSourceHandler struct { - bucketMap map[string]interface{} + bucketMap *trie.Trie[string] } func (m mapDataSourceHandler) AddDataObjects(dataObjects ...*ds.DataObject) error { for _, dataObject := range dataObjects { - parts := strings.Split(dataObject.FullName, "/") - - currentMap := m.bucketMap - - for _, part := range parts { - partMap, found := currentMap[part] - if !found { - partMap = map[string]interface{}{} - currentMap[part] = partMap - } - - currentMap = partMap.(map[string]interface{}) - } + m.bucketMap.Insert(dataObject.FullName, dataObject.Type) } return nil @@ -596,3 +587,7 @@ func (m mapDataSourceHandler) SetDataSourceFullname(name string) { func (m mapDataSourceHandler) SetDataSourceDescription(desc string) { } + +func (m mapDataSourceHandler) GetTrie() *trie.Trie[string] { + return m.bucketMap +} diff --git a/aws/data_source/data_source_meta.go b/aws/data_source/data_source_meta.go index 96191f8..943371e 100644 --- a/aws/data_source/data_source_meta.go +++ b/aws/data_source/data_source_meta.go @@ -4,8 +4,9 @@ import ( "sync" ds "github.com/raito-io/cli/base/data_source" + "github.com/raito-io/cli/base/util/config" - "github.com/raito-io/cli-plugin-aws-account/aws/data_source/permissions" + "github.com/raito-io/cli-plugin-aws-account/aws/constants" "github.com/raito-io/cli-plugin-aws-account/aws/model" ) @@ -13,109 +14,80 @@ var metaData *ds.MetaData var dataObjects map[string]*ds.DataObjectType var mu sync.Mutex -func GetDataObjectType(name string) *ds.DataObjectType { - GetS3MetaData() +type MetadataProvider interface { + DataObjectTypes() []*ds.DataObjectType + UsageMetaInfo() *ds.UsageMetaInput + AccessProviderTypes() []*ds.AccessProviderType +} + +func GetDataObjectType(name string, cfg *config.ConfigMap) *ds.DataObjectType { + GetS3MetaData(cfg) return dataObjects[name] } -func GetS3MetaData() *ds.MetaData { +func GetS3MetaData(cfg *config.ConfigMap) *ds.MetaData { mu.Lock() defer mu.Unlock() if metaData == nil { + metaDataProvider := getMetadataProvider(cfg) + + dataObjectTypes := metaDataProvider.DataObjectTypes() + usageMetadataInfo := metaDataProvider.UsageMetaInfo() + + accessProviderTypes := []*ds.AccessProviderType{ + { + Type: string(model.Role), + Label: "AWS Role", + Icon: "", + IsNamedEntity: true, + CanBeCreated: true, + CanBeAssumed: true, + CanAssumeMultiple: false, + AllowedWhoAccessProviderTypes: []string{string(model.Role), string(model.SSORole)}, + }, + { + Type: string(model.SSORole), + Label: "AWS SSO Role", + Icon: "", + IsNamedEntity: true, + CanBeCreated: false, + CanBeAssumed: true, + CanAssumeMultiple: false, + AllowedWhoAccessProviderTypes: []string{string(model.SSORole)}, + IdentityStoreTypeForWho: "aws-organization", + }, + { + Type: string(model.Policy), + Label: "AWS Policy", + Icon: "", + IsNamedEntity: true, + CanBeCreated: true, + CanBeAssumed: false, + CanAssumeMultiple: false, + AllowedWhoAccessProviderTypes: []string{string(model.Policy), string(model.Role), string(model.SSORole)}, + }, + { + Type: string(model.AccessPoint), + Label: "AWS S3 Access Point", + Icon: "", + IsNamedEntity: true, + CanBeCreated: true, + CanBeAssumed: false, + CanAssumeMultiple: false, + AllowedWhoAccessProviderTypes: []string{string(model.AccessPoint), string(model.Role), string(model.SSORole)}, + }, + } + accessProviderTypes = append(accessProviderTypes, metaDataProvider.AccessProviderTypes()...) + metaData = &ds.MetaData{ Type: "aws-account", SupportedFeatures: []string{""}, SupportsApInheritance: true, - DataObjectTypes: []*ds.DataObjectType{ - { - Name: ds.Datasource, - Type: ds.Datasource, - Permissions: permissions.AllS3Permissions, - Children: []string{ds.Bucket}, - }, - { - Name: ds.Bucket, - Type: ds.Bucket, - Label: "S3 Bucket", - Permissions: permissions.AllS3Permissions, - Children: []string{ds.Folder, ds.File, model.GlueTable}, - }, - { - Name: ds.Folder, - Type: ds.Folder, - Label: "S3 Folder", - Permissions: permissions.S3ObjectPermissions, - Children: []string{ds.Folder, ds.File, model.GlueTable}, - }, - { - Name: ds.File, - Type: ds.File, - Label: "S3 File", - Permissions: permissions.S3ObjectPermissions, - Children: []string{}, - }, - { - Name: model.GlueTable, - Type: ds.Table, - Label: "Glue Table", - Permissions: permissions.S3AccessPointPermissions, - Children: []string{}, - }, - }, - UsageMetaInfo: &ds.UsageMetaInput{ - DefaultLevel: ds.File, - Levels: []*ds.UsageMetaInputDetail{ - { - Name: ds.File, - DataObjectTypes: []string{ds.File}, - }, - }, - }, - AccessProviderTypes: []*ds.AccessProviderType{ - { - Type: string(model.Role), - Label: "AWS Role", - Icon: "", - IsNamedEntity: true, - CanBeCreated: true, - CanBeAssumed: true, - CanAssumeMultiple: false, - AllowedWhoAccessProviderTypes: []string{string(model.Role), string(model.SSORole)}, - }, - { - Type: string(model.SSORole), - Label: "AWS SSO Role", - Icon: "", - IsNamedEntity: true, - CanBeCreated: false, - CanBeAssumed: true, - CanAssumeMultiple: false, - AllowedWhoAccessProviderTypes: []string{string(model.SSORole)}, - IdentityStoreTypeForWho: "aws-organization", - }, - { - Type: string(model.Policy), - Label: "AWS Policy", - Icon: "", - IsNamedEntity: true, - CanBeCreated: true, - CanBeAssumed: false, - CanAssumeMultiple: false, - AllowedWhoAccessProviderTypes: []string{string(model.Policy), string(model.Role), string(model.SSORole)}, - }, - { - Type: string(model.AccessPoint), - Label: "AWS S3 Access Point", - Icon: "", - IsNamedEntity: true, - CanBeCreated: true, - CanBeAssumed: false, - CanAssumeMultiple: false, - AllowedWhoAccessProviderTypes: []string{string(model.AccessPoint), string(model.Role), string(model.SSORole)}, - }, - }, + DataObjectTypes: dataObjectTypes, + UsageMetaInfo: usageMetadataInfo, + AccessProviderTypes: accessProviderTypes, } dataObjects = make(map[string]*ds.DataObjectType) @@ -127,3 +99,11 @@ func GetS3MetaData() *ds.MetaData { return metaData } + +func getMetadataProvider(cfg *config.ConfigMap) MetadataProvider { + if cfg.GetBoolWithDefault(constants.AwsGlueEnabled, false) { + return S3GlueMetadataProvider{} + } + + return S3FileMetadataProvider{} +} diff --git a/aws/data_source/data_source_meta_glue_file.go b/aws/data_source/data_source_meta_glue_file.go new file mode 100644 index 0000000..369f8b0 --- /dev/null +++ b/aws/data_source/data_source_meta_glue_file.go @@ -0,0 +1,59 @@ +package data_source //nolint:dupl // Improved readability + +import ( + ds "github.com/raito-io/cli/base/data_source" + + "github.com/raito-io/cli-plugin-aws-account/aws/data_source/permissions" + "github.com/raito-io/cli-plugin-aws-account/aws/model" +) + +type S3GlueMetadataProvider struct { +} + +func (p S3GlueMetadataProvider) DataObjectTypes() []*ds.DataObjectType { + return []*ds.DataObjectType{ + { + Name: ds.Datasource, + Type: ds.Datasource, + Permissions: permissions.AllS3Permissions, + Children: []string{ds.Bucket}, + }, + { + Name: ds.Bucket, + Type: ds.Bucket, + Label: "S3 Bucket", + Permissions: permissions.AllS3Permissions, + Children: []string{ds.Folder, model.GlueTable}, + }, + { + Name: ds.Folder, + Type: ds.Folder, + Label: "S3 Folder", + Permissions: permissions.S3ObjectPermissions, + Children: []string{ds.Folder, model.GlueTable}, + }, + { + Name: model.GlueTable, + Type: ds.Table, + Label: "Glue Table", + Permissions: permissions.S3AccessPointPermissions, + Children: []string{}, + }, + } +} + +func (p S3GlueMetadataProvider) UsageMetaInfo() *ds.UsageMetaInput { + return &ds.UsageMetaInput{ + DefaultLevel: model.GlueTable, + Levels: []*ds.UsageMetaInputDetail{ + { + Name: model.GlueTable, + DataObjectTypes: []string{model.GlueTable, ds.Folder}, + }, + }, + } +} + +func (p S3GlueMetadataProvider) AccessProviderTypes() []*ds.AccessProviderType { + return nil +} diff --git a/aws/data_source/data_source_meta_s3_file.go b/aws/data_source/data_source_meta_s3_file.go new file mode 100644 index 0000000..4ee0dfc --- /dev/null +++ b/aws/data_source/data_source_meta_s3_file.go @@ -0,0 +1,58 @@ +package data_source //nolint:dupl // Improved readability + +import ( + ds "github.com/raito-io/cli/base/data_source" + + "github.com/raito-io/cli-plugin-aws-account/aws/data_source/permissions" +) + +type S3FileMetadataProvider struct { +} + +func (p S3FileMetadataProvider) DataObjectTypes() []*ds.DataObjectType { + return []*ds.DataObjectType{ + { + Name: ds.Datasource, + Type: ds.Datasource, + Permissions: permissions.AllS3Permissions, + Children: []string{ds.Bucket}, + }, + { + Name: ds.Bucket, + Type: ds.Bucket, + Label: "S3 Bucket", + Permissions: permissions.AllS3Permissions, + Children: []string{ds.Folder, ds.File}, + }, + { + Name: ds.Folder, + Type: ds.Folder, + Label: "S3 Folder", + Permissions: permissions.S3ObjectPermissions, + Children: []string{ds.Folder, ds.File}, + }, + { + Name: ds.File, + Type: ds.File, + Label: "S3 File", + Permissions: permissions.S3ObjectPermissions, + Children: []string{}, + }, + } +} + +func (p S3FileMetadataProvider) UsageMetaInfo() *ds.UsageMetaInput { + return &ds.UsageMetaInput{ + DefaultLevel: ds.File, + Levels: []*ds.UsageMetaInputDetail{ + { + Name: ds.File, + DataObjectTypes: []string{ds.File, ds.Folder}, + }, + }, + } +} + +func (p S3FileMetadataProvider) AccessProviderTypes() []*ds.AccessProviderType { + return nil +} diff --git a/aws/data_usage.go b/aws/data_usage.go deleted file mode 100644 index 63324f6..0000000 --- a/aws/data_usage.go +++ /dev/null @@ -1,295 +0,0 @@ -package aws - -import ( - "compress/gzip" - "context" - "encoding/json" - "fmt" - "io" - "regexp" - "strings" - "sync" - "time" - - "github.com/gammazero/workerpool" - - "github.com/raito-io/cli-plugin-aws-account/aws/constants" - data_source2 "github.com/raito-io/cli-plugin-aws-account/aws/data_source" - "github.com/raito-io/cli-plugin-aws-account/aws/model" - "github.com/raito-io/cli-plugin-aws-account/aws/utils" - - "github.com/raito-io/cli/base/data_source" - "github.com/raito-io/cli/base/data_usage" - "github.com/raito-io/cli/base/util/config" - "github.com/raito-io/cli/base/wrappers" - - ap "github.com/raito-io/cli/base/access_provider/sync_from_target" -) - -type dataUsageRepository interface { - ListFiles(ctx context.Context, bucket string, prefix *string) ([]model.AwsS3Entity, error) - GetFile(ctx context.Context, bucket string, key string, region string) (io.ReadCloser, error) -} - -type DataUsageSyncer struct { - configMap *config.ConfigMap -} - -func NewDataUsageSyncer() *DataUsageSyncer { - return &DataUsageSyncer{} -} - -func (s *DataUsageSyncer) provideRepo() dataUsageRepository { - return data_source2.NewAwsS3Repository(s.configMap) -} - -func (s *DataUsageSyncer) SyncDataUsage(ctx context.Context, dataUsageFileHandler wrappers.DataUsageStatementHandler, configMap *config.ConfigMap) error { - s.configMap = configMap - repo := s.provideRepo() - - return s.syncDataUsageForRegion(ctx, dataUsageFileHandler, repo) -} - -func (s *DataUsageSyncer) syncDataUsageForRegion(ctx context.Context, dataUsageFileHandler wrappers.DataUsageStatementHandler, repo dataUsageRepository) error { - bucket := s.configMap.GetString(constants.AwsS3CloudTrailBucket) - - if bucket == "" { - utils.Logger.Warn("No usage cloud trail bucket specified.") - - return nil - } - - allUsageFiles, err := repo.ListFiles(ctx, bucket, nil) - if err != nil { - return fmt.Errorf("error while reading usage files from S3 bucket: %w", err) - } - - utils.Logger.Info(fmt.Sprintf("A total of %d usage files found in bucket %s", len(allUsageFiles), bucket)) - - numberOfDays := 90 - startDate := time.Now().Truncate(24*time.Hour).AddDate(0, 0, -numberOfDays) - - if s.configMap.Parameters["lastUsed"] != "" { - startDateRaw, errLocal := time.Parse(time.RFC3339, s.configMap.Parameters["lastUsed"]) - if errLocal == nil && startDateRaw.After(startDate) { - startDate = startDateRaw - } - } - - utils.Logger.Info(fmt.Sprintf("using start date %s", startDate.Format(time.RFC3339))) - - usageFiles := []string{} - - r := regexp.MustCompile(`.*/(\d{4}/\d{2}/\d{2})/.*`) - dateFormat := "2006/01/02" - - for _, file := range allUsageFiles { - matches := r.FindStringSubmatch(file.Key) - if len(matches) != 2 { - continue - } - - dt, err2 := time.Parse(dateFormat, matches[1]) - if err2 != nil { - continue - } - - if strings.Contains(file.Key, "/CloudTrail/eu-central-1/") && time.Since(dt).Hours() < float64(numberOfDays+1)*24 { - usageFiles = append(usageFiles, file.Key) - } - } - - utils.Logger.Info(fmt.Sprintf("%d files to process", len(usageFiles))) - - fileChan := make(chan string) - workerPool := workerpool.New(utils.GetConcurrency(s.configMap)) - fileLock := new(sync.Mutex) - numWorkers := 16 - - doSyncer := data_source2.NewDataSourceSyncer() - availableObjects, err := doSyncer.GetAvailableObjects(ctx, s.configMap) - - if err != nil { - return fmt.Errorf("error while fetching available objects for data usage: %w", err) - } - - for t := 0; t < numWorkers; t++ { - workerPool.Submit(func() { - readAndParseUsageLog(ctx, bucket, fileChan, repo, dataUsageFileHandler, fileLock, availableObjects) - }) - } - - for _, usageFile := range usageFiles { - fileChan <- usageFile - } - - close(fileChan) - workerPool.StopWait() - - return nil -} - -func readAndParseUsageLog(ctx context.Context, bucketName string, fileChan chan string, repo dataUsageRepository, - dataUsageFileHandler wrappers.DataUsageStatementHandler, fileLock *sync.Mutex, availableObjects map[string]interface{}) { - utils.Logger.Info("Starting data usage worker") - - for fileKey := range fileChan { - parts := strings.Split(fileKey, "/") - fileKeyShort := parts[len(parts)-1] - - start := time.Now() - - contents, err := getFileContents(ctx, repo, bucketName, fileKey) - if err != nil { - utils.Logger.Error(err.Error()) - return - } - - var result model.CloudTrailLog - - err = json.Unmarshal([]byte(contents), &result) - if err != nil { - utils.Logger.Error(err.Error()) - return - } - - statements := []data_usage.Statement{} - - for ind := range result.Records { - record := result.Records[ind] - isCloudTrailBucket := false - accessedObjects := []ap.WhatItem{} - - for _, resource := range record.Resources { - if resource.Type != nil && resource.Arn != nil && strings.Contains(*resource.Arn, bucketName) { - isCloudTrailBucket = true - break - } - - permission := fmt.Sprintf("%s:%s", constants.S3PermissionPrefix, *record.EventName) - - if resource.Type != nil && resource.Arn != nil && strings.EqualFold(*resource.Type, "AWS::S3::Object") { - object := utils.ConvertArnToFullname(*resource.Arn) - - mappedObject := mapToClosedObject(object, availableObjects) - if !strings.Contains(mappedObject, "/") { - utils.Logger.Info(fmt.Sprintf("Could not map object %q to anything known. Skipping", object)) - continue - } - - accessedObjects = append(accessedObjects, ap.WhatItem{ - DataObject: &data_source.DataObjectReference{ - FullName: mappedObject, - Type: data_source.File, - }, - Permissions: []string{permission}, - }) - } - } - - if isCloudTrailBucket || len(accessedObjects) == 0 { - continue - } - - userName := "" - // TODO: investigate what the different possibilities are, this has been figured out by just looking - // at the logs so far - if record.UserIdentity == nil || record.UserIdentity.Type == nil { - utils.Logger.Warn("user identity is nil") - continue - } else if *record.UserIdentity.Type == "IAMUser" { - userName = *record.UserIdentity.UserName - } else if *record.UserIdentity.Type == "AssumedRole" { - principalId := record.UserIdentity.PrincipalId - parts := strings.Split(*principalId, ":") - userName = parts[1] - } else if record.UserIdentity.InvokedBy != nil { - userName = *record.UserIdentity.InvokedBy - } else if record.UserIdentity.Arn != nil { - userName = *record.UserIdentity.Arn - } - - if userName != "" { - statements = append(statements, data_usage.Statement{ - ExternalId: *record.EventID, - StartTime: record.EventTime.Unix(), - Bytes: int(record.Bytes.BytesIn) + int(record.Bytes.BytesOut), - AccessedDataObjects: accessedObjects, - User: userName, - Success: true, - }) - } - } - - if len(statements) > 0 { - err = addStatementsToDataUsageHandler(dataUsageFileHandler, statements, fileLock) - if err != nil { - utils.Logger.Error(err.Error()) - return - } - } - - utils.Logger.Info(fmt.Sprintf("%d records fetched and processed in %d ms from %s", len(result.Records), time.Since(start).Milliseconds(), fileKeyShort)) - } -} - -// mapToClosedObject maps the object path to the closest available path. -func mapToClosedObject(object string, availableObjects map[string]interface{}) string { - parts := strings.Split(object, "/") - path := "" - currentMap := availableObjects - - for _, part := range parts { - nextElement, found := currentMap[part] - - if !found { - break - } - - path += part + "/" - - newMap, isMap := nextElement.(map[string]interface{}) - if isMap { - currentMap = newMap - } - } - - path = strings.TrimSuffix(path, "/") - - return path -} - -func getFileContents(ctx context.Context, repo dataUsageRepository, bucketName string, fileKey string) (string, error) { - reader, err := repo.GetFile(ctx, bucketName, fileKey, "") - if err != nil { - return "", fmt.Errorf("get file: %w", err) - } - defer reader.Close() - - gzipReader, err := gzip.NewReader(reader) - if err != nil { - return "", fmt.Errorf("new reader: %w", err) - } - defer gzipReader.Close() - - buf := new(strings.Builder) - - _, err = io.Copy(buf, gzipReader) //nolint:gosec // no risk of injection - if err != nil { - return "", fmt.Errorf("copy: %w", err) - } - - return buf.String(), nil -} - -func addStatementsToDataUsageHandler(dataUsageFileHandler wrappers.DataUsageStatementHandler, statements []data_usage.Statement, lock *sync.Mutex) error { - lock.Lock() - defer lock.Unlock() - - err := dataUsageFileHandler.AddStatements(statements) - if err != nil { - return fmt.Errorf("add statement to handler: %w", err) - } - - return nil -} diff --git a/aws/data_usage_test.go b/aws/data_usage_test.go deleted file mode 100644 index 9f8fd02..0000000 --- a/aws/data_usage_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package aws - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestDataUsage_mapToClosedObject(t *testing.T) { - availableObjects := map[string]interface{}{ - "bucket": map[string]interface{}{ - "topfolder1": map[string]interface{}{ - "subfolder11": map[string]interface{}{ - "subfolder111": map[string]interface{}{}, - }, - "subfolder12": map[string]interface{}{}, - }, - "topfolder2": map[string]interface{}{}, - }, - } - - tests := map[string]string{ - "bucket/topfolder1/subfolder11/subfolder111": "bucket/topfolder1/subfolder11/subfolder111", - "bucket/topfolder1/subfolder11/subfolder111/myfile.parquet": "bucket/topfolder1/subfolder11/subfolder111", - "bucket/topfolder1/subfolder11/subfolder111/another/file.csv": "bucket/topfolder1/subfolder11/subfolder111", - "bucket/topfolder1/subfolder11/subf/fileke": "bucket/topfolder1/subfolder11", - "bucket/totallydifferent": "bucket", - "bucket/topfolder2/subfolder11/subfolder111": "bucket/topfolder2", - "whatnow": "", - } - - for input, expected := range tests { - t.Run(input, func(t *testing.T) { - actual := mapToClosedObject(input, availableObjects) - assert.Equal(t, expected, actual) - }) - } -} diff --git a/aws/iam/aws_iam_policy_resolution.go b/aws/iam/aws_iam_policy_resolution.go index d0c72ae..ab0e020 100644 --- a/aws/iam/aws_iam_policy_resolution.go +++ b/aws/iam/aws_iam_policy_resolution.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/aws/smithy-go/ptr" + "github.com/raito-io/cli/base/util/config" "github.com/raito-io/cli-plugin-aws-account/aws/constants" data_source2 "github.com/raito-io/cli-plugin-aws-account/aws/data_source" @@ -16,7 +17,7 @@ import ( "github.com/raito-io/golang-set/set" ) -func CreateWhoAndWhatFromAccessPointPolicy(policy *awspolicy.Policy, bucketName string, name string, account string) (*sync_from_target.WhoItem, []sync_from_target.WhatItem, bool) { +func CreateWhoAndWhatFromAccessPointPolicy(policy *awspolicy.Policy, bucketName string, name string, account string, cfg *config.ConfigMap) (*sync_from_target.WhoItem, []sync_from_target.WhatItem, bool) { if policy == nil { return nil, nil, false } @@ -59,7 +60,7 @@ func CreateWhoAndWhatFromAccessPointPolicy(policy *awspolicy.Policy, bucketName if path == "" || path == "/" { fullName = bucketName - resourceActions, incompleteResource = mapResourceActions(actions, data_source.Bucket) + resourceActions, incompleteResource = mapResourceActions(actions, data_source.Bucket, cfg) } else if strings.HasPrefix(path, "/object/") { path = utils.RemoveEndingWildcards(strings.TrimPrefix(path, "/object/")) if path == "" { @@ -68,7 +69,7 @@ func CreateWhoAndWhatFromAccessPointPolicy(policy *awspolicy.Policy, bucketName fullName = bucketName + "/" + path } - resourceActions, incompleteResource = mapResourceActions(actions, data_source.Folder) + resourceActions, incompleteResource = mapResourceActions(actions, data_source.Folder, cfg) } else { utils.Logger.Warn(fmt.Sprintf("UNSUPPORTED: Policy document for access point %q contains unknown resource reference %q. Unexpected access point path", name, resource)) localIncomplete = true @@ -240,7 +241,7 @@ func handleStatements(policy *awspolicy.Policy, name string, handler func(statem return incomplete } -func CreateWhatFromPolicyDocument(policy *awspolicy.Policy, policyName string, account string) ([]sync_from_target.WhatItem, bool) { +func CreateWhatFromPolicyDocument(policy *awspolicy.Policy, policyName string, account string, cfg *config.ConfigMap) ([]sync_from_target.WhatItem, bool) { if policy == nil { return nil, false } @@ -269,13 +270,13 @@ func CreateWhatFromPolicyDocument(policy *awspolicy.Policy, policyName string, a isBucket := !strings.Contains(fullName, "/") if isBucket { - resourceActions, incompleteResource = mapResourceActions(actions, data_source.Bucket) + resourceActions, incompleteResource = mapResourceActions(actions, data_source.Bucket, cfg) } else { - resourceActions, incompleteResource = mapResourceActions(actions, data_source.Folder) + resourceActions, incompleteResource = mapResourceActions(actions, data_source.Folder, cfg) } } else if resource == "*" { fullName = account - resourceActions, incompleteResource = mapResourceActions(actions, data_source.Datasource) + resourceActions, incompleteResource = mapResourceActions(actions, data_source.Datasource, cfg) } else { utils.Logger.Warn(fmt.Sprintf("UNSUPPORTED: Policy document for %q contains unknown resource reference %q.", policyName, resource)) localIncomplete = true @@ -328,10 +329,10 @@ func flattenWhatMap(whatMap map[string]set.Set[string], awsAccount string) []syn // mapResourceActions maps the permissions given to the ones we know for the given resource type. // It returns the mapped actions, together with a boolean indicating whether any actions were skipped (true) or not (false). -func mapResourceActions(actions []string, resourceType string) ([]string, bool) { +func mapResourceActions(actions []string, resourceType string, cfg *config.ConfigMap) ([]string, bool) { mappedActions := make([]string, 0, len(actions)) - dot := data_source2.GetDataObjectType(resourceType) + dot := data_source2.GetDataObjectType(resourceType, cfg) dotPermissions := dot.GetPermissions() incomplete := false diff --git a/aws/it/data_usage_integration_test.go b/aws/it/data_usage_integration_test.go index 77f6701..542dd9c 100644 --- a/aws/it/data_usage_integration_test.go +++ b/aws/it/data_usage_integration_test.go @@ -5,19 +5,20 @@ package it import ( "testing" - "github.com/raito-io/cli-plugin-aws-account/aws" + "github.com/raito-io/cli-plugin-aws-account/aws/usage" + "github.com/stretchr/testify/suite" ) type DataUsageTestSuite struct { AWSTestSuite - usageSyncer *aws.DataUsageSyncer + usageSyncer *usage.DataUsageSyncer } func TestDataUsageTestSuiteTestSuiteTestSuite(t *testing.T) { ts := DataUsageTestSuite{} - ts.usageSyncer = aws.NewDataUsageSyncer() + ts.usageSyncer = usage.NewDataUsageSyncer() suite.Run(t, &ts) } diff --git a/aws/usage/data_usage.go b/aws/usage/data_usage.go new file mode 100644 index 0000000..a8cad53 --- /dev/null +++ b/aws/usage/data_usage.go @@ -0,0 +1,366 @@ +package usage + +import ( + "compress/gzip" + "context" + "encoding/json" + "fmt" + "io" + "regexp" + "strings" + "sync" + "time" + + "github.com/gammazero/workerpool" + "github.com/hashicorp/go-multierror" + "github.com/raito-io/golang-set/set" + + "github.com/raito-io/cli-plugin-aws-account/aws/constants" + data_source2 "github.com/raito-io/cli-plugin-aws-account/aws/data_source" + "github.com/raito-io/cli-plugin-aws-account/aws/model" + baserepo "github.com/raito-io/cli-plugin-aws-account/aws/repo" + "github.com/raito-io/cli-plugin-aws-account/aws/utils" + "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" + + "github.com/raito-io/cli/base/data_usage" + "github.com/raito-io/cli/base/util/config" + "github.com/raito-io/cli/base/wrappers" + + ap "github.com/raito-io/cli/base/access_provider/sync_from_target" +) + +type dataUsageRepository interface { + ListFiles(ctx context.Context, bucket string, prefix *string) ([]model.AwsS3Entity, string, error) + GetFile(ctx context.Context, bucket string, key string, region *string) (io.ReadCloser, error) +} + +type dataObjectRepository interface { + GetAvailableObjectTypes(ctx context.Context, cfg *config.ConfigMap) (*trie.Trie[string], error) +} + +type DataUsageSyncer struct { + account string + repo dataUsageRepository + dataObjectRepo dataObjectRepository + configMap *config.ConfigMap +} + +func NewDataUsageSyncer() *DataUsageSyncer { + return &DataUsageSyncer{} +} + +func (s *DataUsageSyncer) provideRepo() dataUsageRepository { + return data_source2.NewAwsS3Repository(s.configMap) +} + +func (s *DataUsageSyncer) SyncDataUsage(ctx context.Context, dataUsageFileHandler wrappers.DataUsageStatementHandler, configMap *config.ConfigMap) error { + s.configMap = configMap + s.repo = s.provideRepo() + s.dataObjectRepo = data_source2.NewDataSourceSyncer() + + var err error + + s.account, err = baserepo.GetAccountId(ctx, configMap) + if err != nil { + return fmt.Errorf("getting account id: %w", err) + } + + return s.syncDataUsage(ctx, dataUsageFileHandler, configMap) +} + +func (s *DataUsageSyncer) syncDataUsage(ctx context.Context, dataUsageFileHandler wrappers.DataUsageStatementHandler, configMap *config.ConfigMap) error { + dataObjectMapper, err := ObjectMapperFactory(ctx, s.dataObjectRepo, configMap) + if err != nil { + return fmt.Errorf("get data object mapper: %w", err) + } + + regions, err := s.getAllRegions(ctx) + if err != nil { + return fmt.Errorf("get all regions: %w", err) + } + + err = s.syncDataUsageForAllRegions(ctx, regions, dataUsageFileHandler, dataObjectMapper) + if err != nil { + return fmt.Errorf("sync data usage: %w", err) + } + + return nil +} + +func (s *DataUsageSyncer) syncDataUsageForAllRegions(ctx context.Context, regions set.Set[string], dataUsageFileHandler wrappers.DataUsageStatementHandler, dataObjectMapper ObjectMapper) error { + for region := range regions { + err := s.syncDataUsageForRegion(ctx, region, dataUsageFileHandler, dataObjectMapper) + if err != nil { + return fmt.Errorf("sync data usage for region %q: %w", region, err) + } + } + + return nil +} + +func (s *DataUsageSyncer) getAllRegions(ctx context.Context) (set.Set[string], error) { + regionsStr := s.configMap.GetString(constants.AwsRegions) + if regionsStr == "" { + cfg, err := baserepo.GetAWSConfig(ctx, s.configMap, nil) + if err != nil { + return nil, fmt.Errorf("get aws config: %w", err) + } + + return set.NewSet(cfg.Region), nil + } + + regions := strings.Split(regionsStr, ",") + + return set.NewSet(regions...), nil +} + +func (s *DataUsageSyncer) addStatementsToDataUsageHandler(dataUsageFileHandler wrappers.DataUsageStatementHandler, statementChannel <-chan []data_usage.Statement, errorChannel chan<- error) { + for statements := range statementChannel { + utils.Logger.Info(fmt.Sprintf("Will add %d statements to data usage handler", len(statements))) + + err := addStatementsToDataUsageHandler(dataUsageFileHandler, statements, new(sync.Mutex)) + if err != nil { + errorChannel <- fmt.Errorf("add statement to data usage handler: %w", err) + } + } +} + +func (s *DataUsageSyncer) syncDataUsageForRegion(ctx context.Context, region string, dataUsageFileHandler wrappers.DataUsageStatementHandler, dataObjectMapper ObjectMapper) error { + bucket := s.configMap.GetString(constants.AwsS3CloudTrailBucket) + + // Preparation + if bucket == "" { + utils.Logger.Warn("No usage cloud trail bucket specified.") + + return nil + } + + usageFiles, bucketRegion, err := s.loadUsageFilesInRegion(ctx, bucket, region) + if err != nil { + return fmt.Errorf("load usage files: %w", err) + } + + workerPool := workerpool.New(utils.GetConcurrency(s.configMap)) + statementChannel := make(chan []data_usage.Statement) + errorChannel := make(chan error) + + // error parsing + var errorParsingWg sync.WaitGroup + var parsingErrors error + + errorParsingWg.Add(1) + + go func() { + defer errorParsingWg.Done() + + for parsingErr := range errorChannel { + parsingErrors = multierror.Append(parsingErrors, parsingErr) + } + }() + + // statement parsing + var statementParsingWg sync.WaitGroup + statementParsingWg.Add(1) + + go func() { + defer statementParsingWg.Done() + + s.addStatementsToDataUsageHandler(dataUsageFileHandler, statementChannel, errorChannel) + }() + + // Syncing + for _, usageFile := range usageFiles { + workerPool.Submit(func() { + s.readAndParseUsageLog(ctx, bucket, bucketRegion, region, usageFile, dataObjectMapper, statementChannel, errorChannel) + }) + } + + workerPool.StopWait() + close(statementChannel) + statementParsingWg.Wait() + close(errorChannel) + errorParsingWg.Wait() + + return parsingErrors +} + +func (s *DataUsageSyncer) loadUsageFilesInRegion(ctx context.Context, bucket string, region string) ([]string, string, error) { + allUsageFiles, bucketRegion, err := s.repo.ListFiles(ctx, bucket, nil) + if err != nil { + return nil, "", fmt.Errorf("error while reading usage files from S3 bucket: %w", err) + } + + utils.Logger.Info(fmt.Sprintf("A total of %d usage files found in bucket %s", len(allUsageFiles), bucket)) + + numberOfDays := 90 + startDate := time.Now().Truncate(24*time.Hour).AddDate(0, 0, -numberOfDays) + + if s.configMap.Parameters["lastUsed"] != "" { + startDateRaw, errLocal := time.Parse(time.RFC3339, s.configMap.Parameters["lastUsed"]) + if errLocal == nil && startDateRaw.After(startDate) { + startDate = startDateRaw + } + } + + utils.Logger.Info(fmt.Sprintf("using start date %s", startDate.Format(time.RFC3339))) + + usageFiles := []string{} + + r := regexp.MustCompile(`.*/(\d{4}/\d{2}/\d{2})/.*`) + dateFormat := "2006/01/02" + + for _, file := range allUsageFiles { + matches := r.FindStringSubmatch(file.Key) + if len(matches) != 2 { + continue + } + + dt, err2 := time.Parse(dateFormat, matches[1]) + if err2 != nil { + continue + } + + if strings.Contains(file.Key, fmt.Sprintf("/CloudTrail/%s/", region)) && time.Since(dt).Hours() < float64(numberOfDays+1)*24 { + usageFiles = append(usageFiles, file.Key) + } + } + + utils.Logger.Info(fmt.Sprintf("%d files to process", len(usageFiles))) + + return usageFiles, bucketRegion, nil +} + +func (s *DataUsageSyncer) readAndParseUsageLog(ctx context.Context, bucketName string, bucketRegion string, region string, fileKey string, dataObjectMapper ObjectMapper, statementChannel chan<- []data_usage.Statement, errorChannel chan<- error) { + parts := strings.Split(fileKey, "/") + fileKeyShort := parts[len(parts)-1] + + start := time.Now() + + contents, err := getFileContents(ctx, s.repo, bucketName, bucketRegion, fileKey) + if err != nil { + errorChannel <- fmt.Errorf("get content of file \"%s/%s\": %w", bucketName, fileKey, err) + + return + } + + var result model.CloudTrailLog + + err = json.Unmarshal([]byte(contents), &result) + if err != nil { + errorChannel <- fmt.Errorf("unmarshal: %w", err) + + return + } + + statements := make([]data_usage.Statement, 0, len(result.Records)) + + for ind := range result.Records { + record := result.Records[ind] + isCloudTrailBucket := false + accessedObjects := make([]ap.WhatItem, 0, len(record.Resources)) + + for _, resource := range record.Resources { + if resource.Type != nil && resource.Arn != nil && strings.Contains(*resource.Arn, bucketName) { + isCloudTrailBucket = true + break + } + + permission := fmt.Sprintf("%s:%s", constants.S3PermissionPrefix, *record.EventName) + + if resource.Type != nil && resource.Arn != nil && strings.EqualFold(*resource.Type, "AWS::S3::Object") { + object := utils.ConvertArnToFullname(*resource.Arn) + + mappedWhatDataObject := dataObjectMapper.MapObject(fmt.Sprintf("%s:%s:%s", s.account, region, object)) + if mappedWhatDataObject == nil { + utils.Logger.Info(fmt.Sprintf("Could not map object %q to anything known. Skipping", object)) + continue + } + + accessedObjects = append(accessedObjects, ap.WhatItem{ + DataObject: mappedWhatDataObject, + Permissions: []string{permission}, + }) + } + } + + if isCloudTrailBucket || len(accessedObjects) == 0 { + utils.Logger.Info("Skipping cloud trail or no accessed objects found") + continue + } + + userName := "" + // TODO: investigate what the different possibilities are, this has been figured out by just looking + // at the logs so far + if record.UserIdentity == nil || record.UserIdentity.Type == nil { + utils.Logger.Warn("user identity is nil") + continue + } else if *record.UserIdentity.Type == "IAMUser" { + userName = *record.UserIdentity.UserName + } else if *record.UserIdentity.Type == "AssumedRole" { + principalId := record.UserIdentity.PrincipalId + parts := strings.Split(*principalId, ":") + userName = parts[1] + } else if record.UserIdentity.InvokedBy != nil { + userName = *record.UserIdentity.InvokedBy + } else if record.UserIdentity.Arn != nil { + userName = *record.UserIdentity.Arn + } + + var bytes int + if record.Bytes != nil { + bytes = int(record.Bytes.BytesIn) + int(record.Bytes.BytesOut) + } + + if userName != "" { + statements = append(statements, data_usage.Statement{ + ExternalId: *record.EventID, + StartTime: record.EventTime.Unix(), + Bytes: bytes, + AccessedDataObjects: accessedObjects, + User: userName, + Success: true, + }) + } + } + + if len(statements) > 0 { + statementChannel <- statements + } + + utils.Logger.Info(fmt.Sprintf("%d records fetched and processed in %d ms from %s", len(result.Records), time.Since(start).Milliseconds(), fileKeyShort)) +} + +func getFileContents(ctx context.Context, repo dataUsageRepository, bucketName string, bucketRegion string, fileKey string) (string, error) { + reader, err := repo.GetFile(ctx, bucketName, fileKey, &bucketRegion) + if err != nil { + return "", fmt.Errorf("get file: %w", err) + } + defer reader.Close() + + gzipReader, err := gzip.NewReader(reader) + if err != nil { + return "", fmt.Errorf("new reader: %w", err) + } + defer gzipReader.Close() + + buf := new(strings.Builder) + + _, err = io.Copy(buf, gzipReader) //nolint:gosec // no risk of injection + if err != nil { + return "", fmt.Errorf("copy: %w", err) + } + + return buf.String(), nil +} + +func addStatementsToDataUsageHandler(dataUsageFileHandler wrappers.DataUsageStatementHandler, statements []data_usage.Statement, lock *sync.Mutex) error { + lock.Lock() + defer lock.Unlock() + + err := dataUsageFileHandler.AddStatements(statements) + if err != nil { + return fmt.Errorf("add statement to handler: %w", err) + } + + return nil +} diff --git a/aws/usage/data_usage_object_mapper.go b/aws/usage/data_usage_object_mapper.go new file mode 100644 index 0000000..ef7e986 --- /dev/null +++ b/aws/usage/data_usage_object_mapper.go @@ -0,0 +1,89 @@ +package usage + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/raito-io/cli/base/data_source" + "github.com/raito-io/cli/base/util/config" + + "github.com/raito-io/cli-plugin-aws-account/aws/constants" + "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" +) + +type ObjectMapper interface { + MapObject(object string) *data_source.DataObjectReference +} + +type FileUsageObjectMapper struct { + pathDepth int + dataObjectsWithType *trie.Trie[string] +} + +func NewFileUsageObjectMapper(pathDepth int, dataObjectsWithType *trie.Trie[string]) FileUsageObjectMapper { + return FileUsageObjectMapper{pathDepth: pathDepth, dataObjectsWithType: dataObjectsWithType} +} + +func (m FileUsageObjectMapper) MapObject(object string) *data_source.DataObjectReference { + path := object + + parts := strings.Split(object, "/") + if len(parts) > m.pathDepth { + path = strings.Join(parts[:m.pathDepth], "/") + } + + if doType, found := m.dataObjectsWithType.Get(path); found { + return &data_source.DataObjectReference{ + FullName: path, + Type: doType, + } + } + + return nil +} + +type GlueUsageObjectMapper struct { + dataObjectsWithType *trie.Trie[string] +} + +func NewGlueUsageObjectMapper(dataObjectsWithType *trie.Trie[string]) GlueUsageObjectMapper { + return GlueUsageObjectMapper{dataObjectsWithType: dataObjectsWithType} +} + +func (m GlueUsageObjectMapper) MapObject(object string) *data_source.DataObjectReference { + commonPrefix, dataObjectType := m.dataObjectsWithType.GetClosest(object) + + if commonPrefix == "" { + return nil + } + + return &data_source.DataObjectReference{ + FullName: commonPrefix, + Type: dataObjectType, + } +} + +func ObjectMapperFactory(ctx context.Context, repo dataObjectRepository, configMap *config.ConfigMap) (ObjectMapper, error) { + s3Enabled := configMap.GetBoolWithDefault(constants.AwsS3Enabled, false) + glueEnabled := configMap.GetBoolWithDefault(constants.AwsGlueEnabled, false) + + if s3Enabled && glueEnabled { + return nil, errors.New("both S3 and Glue are enabled") + } else if !s3Enabled && !glueEnabled { + return nil, errors.New("neither S3 nor Glue are enabled") + } + + dataObjectsWithType, err := repo.GetAvailableObjectTypes(ctx, configMap) + if err != nil { + return nil, fmt.Errorf("get available object types: %w", err) + } + + if s3Enabled { + pathDepth := configMap.GetIntWithDefault(constants.AwsS3MaxFolderDepth, constants.AwsS3MaxFolderDepthDefault) + return NewFileUsageObjectMapper(pathDepth, dataObjectsWithType), nil + } else { + return NewGlueUsageObjectMapper(dataObjectsWithType), nil + } +} diff --git a/aws/usage/data_usage_object_mapper_test.go b/aws/usage/data_usage_object_mapper_test.go new file mode 100644 index 0000000..3d8201d --- /dev/null +++ b/aws/usage/data_usage_object_mapper_test.go @@ -0,0 +1,140 @@ +package usage + +import ( + "reflect" + "testing" + + "github.com/raito-io/cli/base/data_source" + + "github.com/raito-io/cli-plugin-aws-account/aws/model" + "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" +) + +func TestFileUsageObjectMapper_MapObject(t *testing.T) { + type fields struct { + pathDepth int + dataObjectsWithType *trie.Trie[string] + } + type args struct { + object string + } + tests := []struct { + name string + fields fields + args args + want *data_source.DataObjectReference + }{ + { + name: "Fullname can be used", + fields: fields{ + pathDepth: 10, + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder1/folder2/file1": data_source.File, "bucket1/folder1/folder2/file2": data_source.File, "bucket1/folder1/folder2": data_source.Folder, "bucket1/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket1/folder1/folder2/file1", + }, + want: &data_source.DataObjectReference{ + FullName: "bucket1/folder1/folder2/file1", + Type: data_source.File, + }, + }, + { + name: "Trim path", + fields: fields{ + pathDepth: 2, + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder2": data_source.Folder, "bucket1/folder1": "folder", "bucket2/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket1/folder1/folder2/file1", + }, + want: &data_source.DataObjectReference{ + FullName: "bucket1/folder1", + Type: data_source.Folder, + }, + }, + { + name: "Not found", + fields: fields{ + pathDepth: 2, + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder2": data_source.Folder, "bucket1/folder1": "folder", "bucket2/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket4/folder1/folder2/file1", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := FileUsageObjectMapper{ + pathDepth: tt.fields.pathDepth, + dataObjectsWithType: tt.fields.dataObjectsWithType, + } + if got := m.MapObject(tt.args.object); !reflect.DeepEqual(got, tt.want) { + t.Errorf("MapObject() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGlueUsageObjectMapper_MapObject(t *testing.T) { + type fields struct { + dataObjectsWithType *trie.Trie[string] + } + type args struct { + object string + } + tests := []struct { + name string + fields fields + args args + want *data_source.DataObjectReference + }{ + { + name: "Fullname can be used", + fields: fields{ + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder1/folder2": model.GlueTable, "bucket1/folder1/folder3": model.GlueTable, "bucket1/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket1/folder1/folder2", + }, + want: &data_source.DataObjectReference{ + FullName: "bucket1/folder1/folder2", + Type: model.GlueTable, + }, + }, + { + name: "Map to table", + fields: fields{ + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder1/folder2": model.GlueTable, "bucket1/folder1/folder3": model.GlueTable, "bucket1/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket1/folder1/folder2/folder3/file.parquet", + }, + want: &data_source.DataObjectReference{ + FullName: "bucket1/folder1/folder2", + Type: model.GlueTable, + }, + }, + { + name: "Not found", + fields: fields{ + dataObjectsWithType: trie.FromMap("/", map[string]string{"bucket1/folder1/folder2": model.GlueTable, "bucket1/folder1/folder3": model.GlueTable, "bucket1/folder1": data_source.Folder}), + }, + args: args{ + object: "bucket3/folder1/folder2/folder3/file.parquet", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := GlueUsageObjectMapper{ + dataObjectsWithType: tt.fields.dataObjectsWithType, + } + if got := m.MapObject(tt.args.object); !reflect.DeepEqual(got, tt.want) { + t.Errorf("MapObject() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/aws/usage/data_usage_test.go b/aws/usage/data_usage_test.go new file mode 100644 index 0000000..fc7b5fc --- /dev/null +++ b/aws/usage/data_usage_test.go @@ -0,0 +1,210 @@ +package usage + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "fmt" + "io" + "testing" + "time" + + "github.com/aws/smithy-go/ptr" + "github.com/raito-io/cli/base/access_provider/sync_from_target" + "github.com/raito-io/cli/base/data_source" + "github.com/raito-io/cli/base/data_usage" + "github.com/raito-io/cli/base/util/config" + "github.com/raito-io/cli/base/wrappers/mocks" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/raito-io/cli-plugin-aws-account/aws/constants" + "github.com/raito-io/cli-plugin-aws-account/aws/model" + "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" +) + +func TestDataUsageSyncer_SyncDataUsage_NoCloudTrailBucket(t *testing.T) { + // Given + repoMock := NewMockdataUsageRepository(t) + dataObjectRepoMock := NewMockdataObjectRepository(t) + account := "accountId" + configMap := &config.ConfigMap{ + Parameters: map[string]string{ + constants.AwsS3Enabled: "true", + }, + } + ctx := context.Background() + + dataUsageWrapper := mocks.NewDataUsageStatementHandler(t) + + syncer := DataUsageSyncer{account: account, repo: repoMock, dataObjectRepo: dataObjectRepoMock, configMap: configMap} + + // Expect + dataObjectRepoMock.EXPECT().GetAvailableObjectTypes(ctx, configMap).Return( + trie.FromMap("/", map[string]string{"bucket1/folder1/folder2/file1": "file", "bucket1/folder1/folder2/file2": "file", "bucket1/folder1/folder2": "folder", "bucket1/folder1": "folder"}), + nil) + + // When + err := syncer.syncDataUsage(ctx, dataUsageWrapper, configMap) + + // Then + require.NoError(t, err) +} + +func TestDataUsageSyncer_SyncDataUsage(t *testing.T) { + // Given + repoMock := NewMockdataUsageRepository(t) + dataObjectRepoMock := NewMockdataObjectRepository(t) + account := "accountId" + configMap := &config.ConfigMap{ + Parameters: map[string]string{ + constants.AwsS3Enabled: "true", + constants.AwsS3CloudTrailBucket: "cloudtrail-bucket", + }, + } + ctx := context.Background() + + dataUsageWrapper := mocks.NewSimpleDataUsageStatementHandler(t) + + tnow := time.Now() + filePrefix := fmt.Sprintf("cloudtrail-bucket/AWSLogs/%s/CloudTrail/eu-central-1/%04d/%02d/%02d/", account, tnow.Year(), tnow.Month(), tnow.Day()) + + syncer := DataUsageSyncer{account: account, repo: repoMock, dataObjectRepo: dataObjectRepoMock, configMap: configMap} + + fileContent, err := marshallAndCompressData(t, model.CloudTrailLog{ + Records: []model.CloudtrailRecord{ + { + UserIdentity: &model.UserIdentity{ + Type: ptr.String("AssumedRole"), + Arn: ptr.String("arn:aws:sts::accountId:assumed-role/AWSReservedSSO_AWSAdministratorAccess_randomPostFix/user@raito.io"), + PrincipalId: ptr.String("SFSAWERASFASR:user@raito.io"), + AccountId: ptr.String("accountId"), + SessionIssuer: nil, + }, + EventTime: &tnow, + EventSource: ptr.String("s3.amazonaws.com"), + EventName: ptr.String("GetObject"), + AwsRegion: ptr.String("eu-central-1"), + SourceIPAddress: ptr.String("2.2.2.2"), + EventID: ptr.String("eventId1"), + ReadOnly: true, + Resources: []model.AwsResource{ + { + Type: ptr.String("AWS::S3::Object"), + Arn: ptr.String("arn:aws:s3:::bucket1/folder1/folder2/file1"), + }, + }, + }, + { + UserIdentity: &model.UserIdentity{ + Type: ptr.String("AssumedRole"), + Arn: ptr.String("arn:aws:sts::accountId:assumed-role/AWSReservedSSO_AWSAdministratorAccess_randomPostFix/user@raito.io"), + PrincipalId: ptr.String("SFSAWERASFASR:user@raito.io"), + AccountId: ptr.String("accountId"), + SessionIssuer: nil, + }, + EventTime: &tnow, + EventSource: ptr.String("s3.amazonaws.com"), + EventName: ptr.String("PutObject"), + AwsRegion: ptr.String("eu-central-1"), + SourceIPAddress: ptr.String("2.2.2.2"), + EventID: ptr.String("eventId2"), + ReadOnly: true, + Resources: []model.AwsResource{ + { + Type: ptr.String("AWS::S3::Object"), + Arn: ptr.String("arn:aws:s3:::bucket1/folder1/folder2/file2"), + }, + }, + Bytes: &model.EventBytes{ + BytesIn: 100, + BytesOut: 350, + }, + }, + }, + }) + require.NoError(t, err) + + // Expect + dataObjectRepoMock.EXPECT().GetAvailableObjectTypes(ctx, configMap).Return( + trie.FromMap("/", map[string]string{"accountId:eu-central-1:bucket1/folder1/folder2/file1": "file", "accountId:eu-central-1:bucket1/folder1/folder2/file2": "file", "accountId:eu-central-1:bucket1/folder1/folder2": "folder", "accountId:eu-central-1:bucket1/folder1": "folder"}), + nil) + repoMock.EXPECT().ListFiles(ctx, "cloudtrail-bucket", (*string)(nil)).Return([]model.AwsS3Entity{{Type: data_source.File, Region: "eu-central-1", Key: filePrefix + "usageFile1.json.gz", ParentKey: filePrefix}}, "eu-central-1", nil) + repoMock.EXPECT().GetFile(ctx, "cloudtrail-bucket", filePrefix+"usageFile1.json.gz", ptr.String("eu-central-1")).Return(fileContent, nil) + + // When + err = syncer.syncDataUsage(ctx, dataUsageWrapper, configMap) + + // Then + require.NoError(t, err) + + assert.ElementsMatch(t, dataUsageWrapper.Statements, []data_usage.Statement{ + { + ExternalId: "eventId1", + AccessedDataObjects: []sync_from_target.WhatItem{ + { + DataObject: &data_source.DataObjectReference{ + FullName: "accountId:eu-central-1:bucket1/folder1/folder2/file1", + Type: "file", + }, + Permissions: []string{"s3:GetObject"}, + }, + }, + User: "user@raito.io", + Role: "", + Success: true, + Status: "", + Query: "", + StartTime: tnow.Unix(), + EndTime: 0, + Bytes: 0, + Rows: 0, + Credits: 0, + }, + { + ExternalId: "eventId2", + AccessedDataObjects: []sync_from_target.WhatItem{ + { + DataObject: &data_source.DataObjectReference{ + FullName: "accountId:eu-central-1:bucket1/folder1/folder2/file2", + Type: "file", + }, + Permissions: []string{"s3:PutObject"}, + }, + }, + User: "user@raito.io", + Role: "", + Success: true, + Status: "", + Query: "", + StartTime: tnow.Unix(), + EndTime: 0, + Bytes: 450, + Rows: 0, + Credits: 0, + }, + }) +} + +func marshallAndCompressData(t *testing.T, d interface{}) (io.ReadCloser, error) { + t.Helper() + + jsonData, err := json.Marshal(d) + if err != nil { + return nil, fmt.Errorf("marshal: %w", err) + } + + buffer := new(bytes.Buffer) + gzipWriter := gzip.NewWriter(buffer) + + defer gzipWriter.Close() + + _, err = gzipWriter.Write(jsonData) + if err != nil { + return nil, fmt.Errorf("write: %w", err) + } + + return io.NopCloser(buffer), nil + +} diff --git a/aws/usage/mock_dataObjectRepository.go b/aws/usage/mock_dataObjectRepository.go new file mode 100644 index 0000000..754b3e1 --- /dev/null +++ b/aws/usage/mock_dataObjectRepository.go @@ -0,0 +1,99 @@ +// Code generated by mockery v2.43.2. DO NOT EDIT. + +package usage + +import ( + context "context" + + config "github.com/raito-io/cli/base/util/config" + + mock "github.com/stretchr/testify/mock" + + trie "github.com/raito-io/cli-plugin-aws-account/aws/utils/trie" +) + +// MockdataObjectRepository is an autogenerated mock type for the dataObjectRepository type +type MockdataObjectRepository struct { + mock.Mock +} + +type MockdataObjectRepository_Expecter struct { + mock *mock.Mock +} + +func (_m *MockdataObjectRepository) EXPECT() *MockdataObjectRepository_Expecter { + return &MockdataObjectRepository_Expecter{mock: &_m.Mock} +} + +// GetAvailableObjectTypes provides a mock function with given fields: ctx, cfg +func (_m *MockdataObjectRepository) GetAvailableObjectTypes(ctx context.Context, cfg *config.ConfigMap) (*trie.Trie[string], error) { + ret := _m.Called(ctx, cfg) + + if len(ret) == 0 { + panic("no return value specified for GetAvailableObjectTypes") + } + + var r0 *trie.Trie[string] + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *config.ConfigMap) (*trie.Trie[string], error)); ok { + return rf(ctx, cfg) + } + if rf, ok := ret.Get(0).(func(context.Context, *config.ConfigMap) *trie.Trie[string]); ok { + r0 = rf(ctx, cfg) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*trie.Trie[string]) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *config.ConfigMap) error); ok { + r1 = rf(ctx, cfg) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockdataObjectRepository_GetAvailableObjectTypes_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetAvailableObjectTypes' +type MockdataObjectRepository_GetAvailableObjectTypes_Call struct { + *mock.Call +} + +// GetAvailableObjectTypes is a helper method to define mock.On call +// - ctx context.Context +// - cfg *config.ConfigMap +func (_e *MockdataObjectRepository_Expecter) GetAvailableObjectTypes(ctx interface{}, cfg interface{}) *MockdataObjectRepository_GetAvailableObjectTypes_Call { + return &MockdataObjectRepository_GetAvailableObjectTypes_Call{Call: _e.mock.On("GetAvailableObjectTypes", ctx, cfg)} +} + +func (_c *MockdataObjectRepository_GetAvailableObjectTypes_Call) Run(run func(ctx context.Context, cfg *config.ConfigMap)) *MockdataObjectRepository_GetAvailableObjectTypes_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*config.ConfigMap)) + }) + return _c +} + +func (_c *MockdataObjectRepository_GetAvailableObjectTypes_Call) Return(_a0 *trie.Trie[string], _a1 error) *MockdataObjectRepository_GetAvailableObjectTypes_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockdataObjectRepository_GetAvailableObjectTypes_Call) RunAndReturn(run func(context.Context, *config.ConfigMap) (*trie.Trie[string], error)) *MockdataObjectRepository_GetAvailableObjectTypes_Call { + _c.Call.Return(run) + return _c +} + +// NewMockdataObjectRepository creates a new instance of MockdataObjectRepository. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMockdataObjectRepository(t interface { + mock.TestingT + Cleanup(func()) +}) *MockdataObjectRepository { + mock := &MockdataObjectRepository{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/aws/usage/mock_dataUsageRepository.go b/aws/usage/mock_dataUsageRepository.go new file mode 100644 index 0000000..b7de561 --- /dev/null +++ b/aws/usage/mock_dataUsageRepository.go @@ -0,0 +1,167 @@ +// Code generated by mockery v2.43.2. DO NOT EDIT. + +package usage + +import ( + context "context" + io "io" + + mock "github.com/stretchr/testify/mock" + + model "github.com/raito-io/cli-plugin-aws-account/aws/model" +) + +// MockdataUsageRepository is an autogenerated mock type for the dataUsageRepository type +type MockdataUsageRepository struct { + mock.Mock +} + +type MockdataUsageRepository_Expecter struct { + mock *mock.Mock +} + +func (_m *MockdataUsageRepository) EXPECT() *MockdataUsageRepository_Expecter { + return &MockdataUsageRepository_Expecter{mock: &_m.Mock} +} + +// GetFile provides a mock function with given fields: ctx, bucket, key, region +func (_m *MockdataUsageRepository) GetFile(ctx context.Context, bucket string, key string, region *string) (io.ReadCloser, error) { + ret := _m.Called(ctx, bucket, key, region) + + if len(ret) == 0 { + panic("no return value specified for GetFile") + } + + var r0 io.ReadCloser + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, *string) (io.ReadCloser, error)); ok { + return rf(ctx, bucket, key, region) + } + if rf, ok := ret.Get(0).(func(context.Context, string, string, *string) io.ReadCloser); ok { + r0 = rf(ctx, bucket, key, region) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(io.ReadCloser) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, string, *string) error); ok { + r1 = rf(ctx, bucket, key, region) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockdataUsageRepository_GetFile_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetFile' +type MockdataUsageRepository_GetFile_Call struct { + *mock.Call +} + +// GetFile is a helper method to define mock.On call +// - ctx context.Context +// - bucket string +// - key string +// - region *string +func (_e *MockdataUsageRepository_Expecter) GetFile(ctx interface{}, bucket interface{}, key interface{}, region interface{}) *MockdataUsageRepository_GetFile_Call { + return &MockdataUsageRepository_GetFile_Call{Call: _e.mock.On("GetFile", ctx, bucket, key, region)} +} + +func (_c *MockdataUsageRepository_GetFile_Call) Run(run func(ctx context.Context, bucket string, key string, region *string)) *MockdataUsageRepository_GetFile_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(*string)) + }) + return _c +} + +func (_c *MockdataUsageRepository_GetFile_Call) Return(_a0 io.ReadCloser, _a1 error) *MockdataUsageRepository_GetFile_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockdataUsageRepository_GetFile_Call) RunAndReturn(run func(context.Context, string, string, *string) (io.ReadCloser, error)) *MockdataUsageRepository_GetFile_Call { + _c.Call.Return(run) + return _c +} + +// ListFiles provides a mock function with given fields: ctx, bucket, prefix +func (_m *MockdataUsageRepository) ListFiles(ctx context.Context, bucket string, prefix *string) ([]model.AwsS3Entity, string, error) { + ret := _m.Called(ctx, bucket, prefix) + + if len(ret) == 0 { + panic("no return value specified for ListFiles") + } + + var r0 []model.AwsS3Entity + var r1 string + var r2 error + if rf, ok := ret.Get(0).(func(context.Context, string, *string) ([]model.AwsS3Entity, string, error)); ok { + return rf(ctx, bucket, prefix) + } + if rf, ok := ret.Get(0).(func(context.Context, string, *string) []model.AwsS3Entity); ok { + r0 = rf(ctx, bucket, prefix) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]model.AwsS3Entity) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, string, *string) string); ok { + r1 = rf(ctx, bucket, prefix) + } else { + r1 = ret.Get(1).(string) + } + + if rf, ok := ret.Get(2).(func(context.Context, string, *string) error); ok { + r2 = rf(ctx, bucket, prefix) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// MockdataUsageRepository_ListFiles_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ListFiles' +type MockdataUsageRepository_ListFiles_Call struct { + *mock.Call +} + +// ListFiles is a helper method to define mock.On call +// - ctx context.Context +// - bucket string +// - prefix *string +func (_e *MockdataUsageRepository_Expecter) ListFiles(ctx interface{}, bucket interface{}, prefix interface{}) *MockdataUsageRepository_ListFiles_Call { + return &MockdataUsageRepository_ListFiles_Call{Call: _e.mock.On("ListFiles", ctx, bucket, prefix)} +} + +func (_c *MockdataUsageRepository_ListFiles_Call) Run(run func(ctx context.Context, bucket string, prefix *string)) *MockdataUsageRepository_ListFiles_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(*string)) + }) + return _c +} + +func (_c *MockdataUsageRepository_ListFiles_Call) Return(_a0 []model.AwsS3Entity, _a1 string, _a2 error) *MockdataUsageRepository_ListFiles_Call { + _c.Call.Return(_a0, _a1, _a2) + return _c +} + +func (_c *MockdataUsageRepository_ListFiles_Call) RunAndReturn(run func(context.Context, string, *string) ([]model.AwsS3Entity, string, error)) *MockdataUsageRepository_ListFiles_Call { + _c.Call.Return(run) + return _c +} + +// NewMockdataUsageRepository creates a new instance of MockdataUsageRepository. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMockdataUsageRepository(t interface { + mock.TestingT + Cleanup(func()) +}) *MockdataUsageRepository { + mock := &MockdataUsageRepository{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/aws/utils/trie/trie.go b/aws/utils/trie/trie.go index a52d928..7209c61 100644 --- a/aws/utils/trie/trie.go +++ b/aws/utils/trie/trie.go @@ -30,6 +30,16 @@ func New[T any](keySeparator string) *Trie[T] { return &Trie[T]{sep: keySeparator} } +func FromMap[T any](keySeparator string, m map[string]T) *Trie[T] { + t := New[T](keySeparator) + + for k, v := range m { + t.Insert(k, v) + } + + return t +} + // Insert a new value with given key func (t *Trie[T]) Insert(key string, value T) { if t.root == nil { @@ -80,6 +90,36 @@ func (t *Trie[T]) SearchPrefix(key string) []T { return n.GetAllLeafs() } +// Get returns the value for a given key +func (t *Trie[T]) Get(key string) (T, bool) { + var t0 T + + keyParts := strings.Split(key, t.sep) + + node, found := t.root.GetNode(keyParts) + + if !found || node.leaf == nil { + return t0, false + } + + return node.leaf.value, true +} + +// GetClosest returns the value for the closest key +func (t *Trie[T]) GetClosest(key string) (string, T) { + var t0 T + + keyParts := strings.Split(key, t.sep) + + node := t.root.GetClosestNode(keyParts) + + if node.leaf == nil { + return "", t0 + } + + return node.leaf.key, node.leaf.value +} + func (t *Trie[T]) Size() int { if t.root == nil { return 0 @@ -88,6 +128,24 @@ func (t *Trie[T]) Size() int { return t.root.Count() } +func (t *Trie[T]) Iterate(f func(key string, value T)) { + if t.root != nil { + t.root.Iterate(f) + } +} + +func (t *Trie[T]) Equal(other *Trie[T], equalFn func(a T, b T) bool) bool { + if t.root == nil { + return other.root == nil + } + + if other.root == nil { + return false + } + + return t.root.Equal(other.root, equalFn) +} + func (n *Node[T]) GetAllLeafs() []T { var result []T @@ -112,6 +170,40 @@ func (n *Node[T]) Count() int { return count } +func (n *Node[T]) GetNode(keyParts []string) (*Node[T], bool) { + if len(keyParts) == 0 { + return n, true + } + + if n.edges == nil { + return nil, false + } + + e, ok := n.edges[keyParts[0]] + if !ok { + return nil, false + } + + return e.node.GetNode(keyParts[1:]) +} + +func (n *Node[T]) GetClosestNode(keyParts []string) *Node[T] { + if len(keyParts) == 0 { + return n + } + + if n.edges == nil { + return n + } + + e, ok := n.edges[keyParts[0]] + if !ok { + return n + } + + return e.node.GetClosestNode(keyParts[1:]) +} + func (n *Node[T]) Iterate(f func(key string, value T)) { if n.leaf != nil { f(n.leaf.key, n.leaf.value) @@ -121,3 +213,28 @@ func (n *Node[T]) Iterate(f func(key string, value T)) { e.node.Iterate(f) } } + +func (n *Node[T]) Equal(other *Node[T], equalFn func(a T, b T) bool) bool { + if n.leaf != nil { + if other.leaf == nil { + return false + } + + if !equalFn(n.leaf.value, other.leaf.value) { + return false + } + } + + if len(n.edges) != len(other.edges) { + return false + } + + for k, e := range n.edges { + otherE, ok := other.edges[k] + if !ok || !e.node.Equal(otherE.node, equalFn) { + return false + } + } + + return true +} diff --git a/aws/utils/trie/trie_test.go b/aws/utils/trie/trie_test.go index 72997e8..58e74ca 100644 --- a/aws/utils/trie/trie_test.go +++ b/aws/utils/trie/trie_test.go @@ -6,6 +6,41 @@ import ( "github.com/stretchr/testify/assert" ) +func TestFromMap(t *testing.T) { + t.Run("Empty", func(t *testing.T) { + trie := FromMap[string]("_", map[string]string{}) + + assert.Equal(t, 0, trie.Size()) + }) + + t.Run("Non-empty", func(t *testing.T) { + trie := FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }) + + refTree := New[string]("_") + refTree.Insert("AWSAdministratorAccess_533d93b1c83ef85b", "AWSAdministratorAccess") + refTree.Insert("AWSOrganizationsFullAccess_8f907acb75f8d979", "AWSOrganizationsFullAccess") + refTree.Insert("AWSReadOnlyAccess_80b3822d57d5269c", "AWSReadOnlyAccess") + refTree.Insert("RAITO_DataProduct_077954824694_0ba0787139fb31cf", "RAITO_DataProduct_077954824694") + refTree.Insert("RAITO_DataProduct_459789456148_0ba0787139fb31cf", "RAITO_DataProduct_459789456148") + refTree.Insert("RAITO_OtherDataProduct_549786428_0ba0747895fb31cf", "RAITO_OtherDataProduct_549786428") + refTree.Insert("OtherRole_with_a_random_Postfix", "OtherRole") + + assert.Equal(t, 7, trie.Size()) + assert.True(t, trie.Equal(refTree, func(a string, b string) bool { + return a == b + })) + }) + +} + func TestTrie_SearchPrefix(t *testing.T) { trie := New[string]("_") trie.Insert("AWSAdministratorAccess_533d93b1c83ef85b", "AWSAdministratorAccess") @@ -85,3 +120,214 @@ func TestTrie_Size(t *testing.T) { assert.Equal(t, 7, trie.Size()) }) } + +func TestTrie_Get(t1 *testing.T) { + trie := New[string]("_") + trie.Insert("AWSAdministratorAccess_533d93b1c83ef85b", "AWSAdministratorAccess") + trie.Insert("AWSOrganizationsFullAccess_8f907acb75f8d979", "AWSOrganizationsFullAccess") + trie.Insert("AWSReadOnlyAccess_80b3822d57d5269c", "AWSReadOnlyAccess") + trie.Insert("RAITO_DataProduct_077954824694_0ba0787139fb31cf", "RAITO_DataProduct_077954824694") + trie.Insert("RAITO_DataProduct_459789456148_0ba0787139fb31cf", "RAITO_DataProduct_459789456148") + trie.Insert("RAITO_OtherDataProduct_549786428_0ba0747895fb31cf", "RAITO_OtherDataProduct_549786428") + trie.Insert("OtherRole_with_a_random_Postfix", "OtherRole") + + type testCase struct { + name string + keyArg string + want string + found bool + } + tests := []testCase{ + { + name: "Existing key", + keyArg: "RAITO_DataProduct_077954824694_0ba0787139fb31cf", + want: "RAITO_DataProduct_077954824694", + found: true, + }, + { + name: "Non-existing key", + keyArg: "RAITO_DataProduct_077954824694_NonExisting", + want: "", + found: false, + }, + } + for _, tt := range tests { + t1.Run(tt.name, func(t1 *testing.T) { + got, got1 := trie.Get(tt.keyArg) + assert.Equalf(t1, tt.want, got, "Get(%v)", tt.keyArg) + assert.Equalf(t1, tt.found, got1, "Get(%v)", tt.keyArg) + }) + } +} + +func TestTrie_GetClosest(t1 *testing.T) { + trie := New[string]("_") + trie.Insert("AWSAdministratorAccess_533d93b1c83ef85b", "AWSAdministratorAccess") + trie.Insert("AWSOrganizationsFullAccess_8f907acb75f8d979", "AWSOrganizationsFullAccess") + trie.Insert("AWSReadOnlyAccess_80b3822d57d5269c", "AWSReadOnlyAccess") + trie.Insert("RAITO_DataProduct_077954824694_0ba0787139fb31cf", "RAITO_DataProduct_077954824694") + trie.Insert("RAITO_DataProduct_459789456148_0ba0787139fb31cf", "RAITO_DataProduct_459789456148") + trie.Insert("RAITO_OtherDataProduct_549786428_0ba0747895fb31cf", "RAITO_OtherDataProduct_549786428") + trie.Insert("OtherRole_with_a_random_Postfix", "OtherRole") + + type testCase struct { + name string + key string + wantKey string + wantValue string + } + tests := []testCase{ + { + name: "Existing key", + key: "RAITO_DataProduct_077954824694_0ba0787139fb31cf", + wantKey: "RAITO_DataProduct_077954824694_0ba0787139fb31cf", + wantValue: "RAITO_DataProduct_077954824694", + }, + { + name: "Extended key", + key: "RAITO_DataProduct_077954824694_0ba0787139fb31cf_AntoherRandomPart_AndAnother", + wantKey: "RAITO_DataProduct_077954824694_0ba0787139fb31cf", + wantValue: "RAITO_DataProduct_077954824694", + }, + } + for _, tt := range tests { + t1.Run(tt.name, func(t1 *testing.T) { + got, got1 := trie.GetClosest(tt.key) + assert.Equalf(t1, tt.wantKey, got, "GetClosest(%v)", tt.key) + assert.Equalf(t1, tt.wantValue, got1, "GetClosest(%v)", tt.key) + }) + } +} + +func TestTrie_Equal(t1 *testing.T) { + type args[T any] struct { + other *Trie[T] + } + type testCase[T any] struct { + name string + t *Trie[T] + args args[T] + want bool + } + tests := []testCase[string]{ + { + name: "Empty equal", + t: FromMap[string]("_", nil), + args: args[string]{ + other: FromMap[string]("_", nil), + }, + want: true, + }, + { + name: "Empty not equal", + t: FromMap[string]("_", nil), + args: args[string]{ + other: FromMap[string]("_", map[string]string{"key": "value"}), + }, + want: false, + }, + { + name: "equal", + t: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + args: args[string]{ + other: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + }, + want: true, + }, + { + name: "non equal - empty leaf", + t: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + args: args[string]{ + other: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b_1234567": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + }, + want: false, + }, + { + name: "non equal - non equal leaf", + t: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + args: args[string]{ + other: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "OtherAWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + }, + want: false, + }, + { + name: "non equal - other edges", + t: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b": "AWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "AWSReadOnlyAccess_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + args: args[string]{ + other: FromMap[string]("_", map[string]string{ + "AWSAdministratorAccess_533d93b1c83ef85b_1234567": "OtherAWSAdministratorAccess", + "AWSOrganizationsFullAccess_8f907acb75f8d979": "AWSOrganizationsFullAccess", + "RAITO_DataProduct_077954824694_80b3822d57d5269c": "AWSReadOnlyAccess", + "RAITO_DataProduct_077954824694_0ba0787139fb31cf": "RAITO_DataProduct_077954824694", + "RAITO_DataProduct_459789456148_0ba0787139fb31cf": "RAITO_DataProduct_459789456148", + "RAITO_OtherDataProduct_549786428_0ba0747895fb31cf": "RAITO_OtherDataProduct_549786428", + "OtherRole_with_a_random_Postfix": "OtherRole", + }), + }, + want: false, + }, + } + for _, tt := range tests { + t1.Run(tt.name, func(t1 *testing.T) { + assert.Equalf(t1, tt.want, tt.t.Equal(tt.args.other, func(a string, b string) bool { + return a == b + }), "Equal(%v, ==)", tt.args.other) + }) + } +} diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..ce36918 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,4 @@ +ignore: + - "**/*_enumer.go" + - "**/mock_*.go" + - ".infra/**" \ No newline at end of file diff --git a/main.go b/main.go index f15dc81..01c1c49 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "github.com/raito-io/cli-plugin-aws-account/aws/constants" "github.com/raito-io/cli-plugin-aws-account/aws/data_access" "github.com/raito-io/cli-plugin-aws-account/aws/data_source" + "github.com/raito-io/cli-plugin-aws-account/aws/usage" "github.com/raito-io/cli-plugin-aws-account/aws" ) @@ -30,7 +31,7 @@ func main() { wrappers.IdentityStoreSync(aws.NewIdentityStoreSyncer()), wrappers.DataSourceSync(data_source.NewDataSourceSyncer()), wrappers.DataAccessSync(data_access.NewDataAccessSyncer()), - wrappers.DataUsageSync(aws.NewDataUsageSyncer()), &info.InfoImpl{ + wrappers.DataUsageSync(usage.NewDataUsageSyncer()), &info.InfoImpl{ Info: &plugin.PluginInfo{ Name: "AWS Account", Version: plugin.ParseVersion(version), @@ -44,7 +45,7 @@ func main() { // AWS S3 parameters {Name: constants.AwsS3Enabled, Description: fmt.Sprintf("If set to true (default), S3 buckets and objects will be retrieved directly from the S3 API. See all other 'aws-s3-' parameters for more control over what is imported and what not. This cannot be enabled together with the %q parameter.", constants.AwsGlueEnabled), Mandatory: false}, {Name: constants.AwsS3EmulateFolderStructure, Description: "Emulate a folder structure for S3 objects, just like in the AWS UI", Mandatory: false}, - {Name: constants.AwsS3MaxFolderDepth, Description: fmt.Sprintf("If %s is set to true, fetch all objects up to a certain folder depth. If not set, 20 is used as default.", constants.AwsS3EmulateFolderStructure), Mandatory: false}, + {Name: constants.AwsS3MaxFolderDepth, Description: fmt.Sprintf("If %s is set to true, fetch all objects up to a certain folder depth. If not set, %d is used as default.", constants.AwsS3EmulateFolderStructure, constants.AwsS3MaxFolderDepthDefault), Mandatory: false}, {Name: constants.AwsS3IncludeBuckets, Description: "Optional comma-separated list of buckets to include. If specified, only these buckets will be handled. Wildcards (*) can be used.", Mandatory: false}, {Name: constants.AwsS3ExcludeBuckets, Description: "Optional comma-separated list of buckets to exclude. If specified, these buckets will not be handled. Wildcard (*) can be used. Excludes have preference over includes.", Mandatory: false}, {Name: constants.AwsConcurrency, Description: "The number of threads to use for concurrent API calls to AWS. The default is 5.", Mandatory: false},