From ee79e169eb2599ccd92bc2849595aa5327113773 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 8 Oct 2024 22:21:08 +0300 Subject: [PATCH] =?UTF-8?q?feat(snowflake-driver):=20support=20DefaultAzur?= =?UTF-8?q?eCredential=20and=C2=A0K8S=20Managed=20Indentity=20auth=20for?= =?UTF-8?q?=C2=A0export=20bucket?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/cubejs-backend-shared/src/env.ts | 39 ++++++++ packages/cubejs-base-driver/package.json | 1 + packages/cubejs-base-driver/src/BaseDriver.ts | 89 +++++++++++++++---- .../src/SnowflakeDriver.ts | 40 +++++++-- yarn.lock | 54 +++++++++++ 5 files changed, 202 insertions(+), 21 deletions(-) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 2b4ce43f0d7b4..570719c558ad9 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -782,6 +782,45 @@ const variables: Record any> = { ] ), + /** + * Azure Client ID for the Azure based export bucket storage. + */ + dbExportBucketAzureClientId: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_EXPORT_BUCKET_AZURE_CLIENT_ID', dataSource) + ] + ), + + /** + * Azure Federated Token File Path for the Azure based export bucket storage. + */ + dbExportBucketAzureTokenFilePAth: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_EXPORT_BUCKET_AZURE_FEDERATED_TOKEN_FILE', dataSource) + ] + ), + + /** + * Azure Tenant ID for the Azure based export bucket storage. + */ + dbExportBucketAzureTenantId: ({ + dataSource, + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_EXPORT_BUCKET_AZURE_TENANT_ID', dataSource) + ] + ), + /** * Export bucket options for Integration based. */ diff --git a/packages/cubejs-base-driver/package.json b/packages/cubejs-base-driver/package.json index 1554797b7e625..7b64e55102a03 100644 --- a/packages/cubejs-base-driver/package.json +++ b/packages/cubejs-base-driver/package.json @@ -31,6 +31,7 @@ "dependencies": { "@aws-sdk/client-s3": "^3.49.0", "@aws-sdk/s3-request-presigner": "^3.49.0", + "@azure/identity": "^4.4.1", "@azure/storage-blob": "^12.9.0", "@cubejs-backend/shared": "^0.36.5", "@google-cloud/storage": "^7.13.0", diff --git a/packages/cubejs-base-driver/src/BaseDriver.ts b/packages/cubejs-base-driver/src/BaseDriver.ts index c2a67f0da8281..cf4ff4793f590 100644 --- a/packages/cubejs-base-driver/src/BaseDriver.ts +++ b/packages/cubejs-base-driver/src/BaseDriver.ts @@ -26,6 +26,9 @@ import { SASProtocol, generateBlobSASQueryParameters, } from '@azure/storage-blob'; +import { + DefaultAzureCredential, +} from '@azure/identity'; import { cancelCombinator } from './utils'; import { @@ -52,9 +55,30 @@ import { ForeignKeysQueryResult, } from './driver.interface'; +/** + * @see {@link DefaultAzureCredential} constructor options + */ export type AzureStorageClientConfig = { - azureKey: string, + azureKey?: string, sasToken?: string, + /** + * The client ID of a Microsoft Entra app registration. + * In case of DefaultAzureCredential flow if it is omitted + * the Azure library will try to use the AZURE_CLIENT_ID env + */ + clientId?: string, + /** + * ID of the application's Microsoft Entra tenant. Also called its directory ID. + * In case of DefaultAzureCredential flow if it is omitted + * the Azure library will try to use the AZURE_TENANT_ID env + */ + tenantId?: string, + /** + * The path to a file containing a Kubernetes service account token that authenticates the identity. + * In case of DefaultAzureCredential flow if it is omitted + * the Azure library will try to use the AZURE_FEDERATED_TOKEN_FILE env + */ + tokenFilePath?: string, }; export type GoogleStorageClientConfig = { @@ -730,9 +754,52 @@ export abstract class BaseDriver implements DriverInterface { const parts = bucketName.split('.blob.core.windows.net/'); const account = parts[0]; const container = parts[1].split('/')[0]; - const credential = new StorageSharedKeyCredential(account, azureConfig.azureKey); + let credential: StorageSharedKeyCredential | DefaultAzureCredential; + let blobServiceClient: BlobServiceClient; + let getSas; + + if (azureConfig.azureKey) { + credential = new StorageSharedKeyCredential(account, azureConfig.azureKey); + getSas = async (name: string, startsOn: Date, expiresOn: Date) => generateBlobSASQueryParameters( + { + containerName: container, + blobName: name, + permissions: ContainerSASPermissions.parse('r'), + startsOn, + expiresOn, + protocol: SASProtocol.Https, + version: '2020-08-04', + }, + credential as StorageSharedKeyCredential + ).toString(); + } else { + const opts = { + tenantId: azureConfig.tenantId, + clientId: azureConfig.clientId, + tokenFilePath: azureConfig.tokenFilePath, + }; + credential = new DefaultAzureCredential(opts); + getSas = async (name: string, startsOn: Date, expiresOn: Date) => { + // getUserDelegationKey works only for authorization with Microsoft Entra ID + const userDelegationKey = await blobServiceClient.getUserDelegationKey(startsOn, expiresOn); + return generateBlobSASQueryParameters( + { + containerName: container, + blobName: name, + permissions: ContainerSASPermissions.parse('r'), + startsOn, + expiresOn, + protocol: SASProtocol.Https, + version: '2020-08-04', + }, + userDelegationKey, + account, + ).toString(); + }; + } + const url = `https://${account}.blob.core.windows.net`; - const blobServiceClient = azureConfig.sasToken ? + blobServiceClient = azureConfig.sasToken ? new BlobServiceClient(`${url}?${azureConfig.sasToken}`) : new BlobServiceClient(url, credential); @@ -741,19 +808,9 @@ export abstract class BaseDriver implements DriverInterface { const blobsList = containerClient.listBlobsFlat({ prefix: `${tableName}/` }); for await (const blob of blobsList) { if (blob.name && (blob.name.endsWith('.csv.gz') || blob.name.endsWith('.csv'))) { - const sas = generateBlobSASQueryParameters( - { - containerName: container, - blobName: blob.name, - permissions: ContainerSASPermissions.parse('r'), - startsOn: new Date(new Date().valueOf()), - expiresOn: - new Date(new Date().valueOf() + 1000 * 60 * 60), - protocol: SASProtocol.Https, - version: '2020-08-04', - }, - credential, - ).toString(); + const starts = new Date(); + const expires = new Date(starts.valueOf() + 1000 * 60 * 60); + const sas = await getSas(blob.name, starts, expires); csvFiles.push(`${url}/${container}/${blob.name}?${sas}`); } } diff --git a/packages/cubejs-snowflake-driver/src/SnowflakeDriver.ts b/packages/cubejs-snowflake-driver/src/SnowflakeDriver.ts index 3d61acec017c5..76dcc5a873248 100644 --- a/packages/cubejs-snowflake-driver/src/SnowflakeDriver.ts +++ b/packages/cubejs-snowflake-driver/src/SnowflakeDriver.ts @@ -133,9 +133,21 @@ interface SnowflakeDriverExportGCS { interface SnowflakeDriverExportAzure { bucketType: 'azure', bucketName: string, - azureKey: string, + azureKey?: string, sasToken?: string, integrationName?: string, + /** + * The client ID of a Microsoft Entra app registration. + */ + clientId?: string, + /** + * ID of the application's Microsoft Entra tenant. Also called its directory ID. + */ + tenantId?: string, + /** + * The path to a file containing a Kubernetes service account token that authenticates the identity. + */ + tokenFilePath?: string, } export type SnowflakeDriverExportBucket = SnowflakeDriverExportAWS | SnowflakeDriverExportGCS @@ -317,12 +329,30 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface { // sasToken is optional for azure if storage integration is used const sasToken = getEnv('dbExportAzureSasToken', { dataSource }); + if (!integrationName && !sasToken) { + throw new Error( + 'Unsupported exportBucket configuration, some keys are empty: integrationName|sasToken' + ); + } + + // azureKey is optional if DefaultAzureCredential() is used + const azureKey = getEnv('dbExportBucketAzureKey', { dataSource }); + + // These 3 options make sense in case you want to authorize to Azure from + // application running in the k8s environment. + const clientId = getEnv('dbExportBucketAzureClientId', { dataSource }); + const tenantId = getEnv('dbExportBucketAzureTenantId', { dataSource }); + const tokenFilePath = getEnv('dbExportBucketAzureTokenFilePAth', { dataSource }); + return { bucketType, bucketName: getEnv('dbExportBucket', { dataSource }), - azureKey: getEnv('dbExportBucketAzureKey', { dataSource }), - ...(sasToken !== undefined && { sasToken }), ...(integrationName !== undefined && { integrationName }), + ...(sasToken !== undefined && { sasToken }), + ...(azureKey !== undefined && { azureKey }), + ...(clientId !== undefined && { clientId }), + ...(tenantId !== undefined && { tenantId }), + ...(tokenFilePath !== undefined && { tokenFilePath }), }; } @@ -643,11 +673,11 @@ export class SnowflakeDriver extends BaseDriver implements DriverInterface { ); return this.extractFilesFromGCS({ credentials }, bucketName, tableName); } else if (bucketType === 'azure') { - const { azureKey, sasToken } = ( + const { azureKey, sasToken, clientId, tenantId, tokenFilePath } = ( this.config.exportBucket ); return this.extractFilesFromAzure( - { azureKey, sasToken }, + { azureKey, sasToken, clientId, tenantId, tokenFilePath }, bucketName, tableName, ); diff --git a/yarn.lock b/yarn.lock index 2fd122919b09e..e6a7f18597b7e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1021,6 +1021,19 @@ "@azure/logger" "^1.0.0" tslib "^2.6.2" +"@azure/core-client@^1.9.2": + version "1.9.2" + resolved "https://registry.yarnpkg.com/@azure/core-client/-/core-client-1.9.2.tgz#6fc69cee2816883ab6c5cdd653ee4f2ff9774f74" + integrity sha512-kRdry/rav3fUKHl/aDLd/pDLcB+4pOFwPPTVEExuMyaI5r+JBbMWqRbCY1pn5BniDaU3lRxO9eaQ1AmSMehl/w== + dependencies: + "@azure/abort-controller" "^2.0.0" + "@azure/core-auth" "^1.4.0" + "@azure/core-rest-pipeline" "^1.9.1" + "@azure/core-tracing" "^1.0.0" + "@azure/core-util" "^1.6.1" + "@azure/logger" "^1.0.0" + tslib "^2.6.2" + "@azure/core-http-compat@^2.0.1": version "2.1.0" resolved "https://registry.yarnpkg.com/@azure/core-http-compat/-/core-http-compat-2.1.0.tgz#a48451c4e9dae7ad0ca85bbd2b98e0f2ae02836e" @@ -1124,6 +1137,26 @@ stoppable "^1.1.0" tslib "^2.2.0" +"@azure/identity@^4.4.1": + version "4.4.1" + resolved "https://registry.yarnpkg.com/@azure/identity/-/identity-4.4.1.tgz#490fa2ad26786229afa36411892bb53dfa3478d3" + integrity sha512-DwnG4cKFEM7S3T+9u05NstXU/HN0dk45kPOinUyNKsn5VWwpXd9sbPKEg6kgJzGbm1lMuhx9o31PVbCtM5sfBA== + dependencies: + "@azure/abort-controller" "^1.0.0" + "@azure/core-auth" "^1.5.0" + "@azure/core-client" "^1.9.2" + "@azure/core-rest-pipeline" "^1.1.0" + "@azure/core-tracing" "^1.0.0" + "@azure/core-util" "^1.3.0" + "@azure/logger" "^1.0.0" + "@azure/msal-browser" "^3.14.0" + "@azure/msal-node" "^2.9.2" + events "^3.0.0" + jws "^4.0.0" + open "^8.0.0" + stoppable "^1.1.0" + tslib "^2.2.0" + "@azure/keyvault-keys@^4.4.0": version "4.8.0" resolved "https://registry.yarnpkg.com/@azure/keyvault-keys/-/keyvault-keys-4.8.0.tgz#1513b3a187bb3a9a372b5980c593962fb793b2ad" @@ -1148,6 +1181,13 @@ dependencies: tslib "^2.6.2" +"@azure/msal-browser@^3.14.0": + version "3.25.0" + resolved "https://registry.yarnpkg.com/@azure/msal-browser/-/msal-browser-3.25.0.tgz#7ce0949977bc9e0c58319f7090c44fe5537104d4" + integrity sha512-a0Y7pmSy8SC1s9bvwr+REvyAA1nQcITlZvkElM2gNUPYFTTNUTEdcpg73TmawNucyMdZ9xb/GFcuhrLOqYAzwg== + dependencies: + "@azure/msal-common" "14.15.0" + "@azure/msal-browser@^3.5.0": version "3.10.0" resolved "https://registry.yarnpkg.com/@azure/msal-browser/-/msal-browser-3.10.0.tgz#8925659e8d1a4bd21e389cca4683eb52658c778e" @@ -1155,6 +1195,11 @@ dependencies: "@azure/msal-common" "14.7.1" +"@azure/msal-common@14.15.0": + version "14.15.0" + resolved "https://registry.yarnpkg.com/@azure/msal-common/-/msal-common-14.15.0.tgz#0e27ac0bb88fe100f4f8d1605b64d5c268636a55" + integrity sha512-ImAQHxmpMneJ/4S8BRFhjt1MZ3bppmpRPYYNyzeQPeFN288YKbb8TmmISQEbtfkQ1BPASvYZU5doIZOPBAqENQ== + "@azure/msal-common@14.7.1": version "14.7.1" resolved "https://registry.yarnpkg.com/@azure/msal-common/-/msal-common-14.7.1.tgz#b13443fbacc87ce2019a91e81a6582ea73847c75" @@ -1169,6 +1214,15 @@ jsonwebtoken "^9.0.0" uuid "^8.3.0" +"@azure/msal-node@^2.9.2": + version "2.15.0" + resolved "https://registry.yarnpkg.com/@azure/msal-node/-/msal-node-2.15.0.tgz#50bf8e692a6656027c073a75d877a8a478aafdfd" + integrity sha512-gVPW8YLz92ZeCibQH2QUw96odJoiM3k/ZPH3f2HxptozmH6+OnyyvKXo/Egg39HAM230akarQKHf0W74UHlh0Q== + dependencies: + "@azure/msal-common" "14.15.0" + jsonwebtoken "^9.0.0" + uuid "^8.3.0" + "@azure/storage-blob@12.18.x": version "12.18.0" resolved "https://registry.yarnpkg.com/@azure/storage-blob/-/storage-blob-12.18.0.tgz#9dd001c9aa5e972216f5af15131009086cfeb59e"