Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aws requester pays #1173

Merged
merged 40 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b0b8147
add functionality for requester pays buckets
MichaelLukowski Aug 8, 2024
9dbf6f1
add removed dependency
MichaelLukowski Aug 8, 2024
dd4a111
add debug for requester pays
MichaelLukowski Aug 8, 2024
fa84b16
remove debug statements and fix url concat
MichaelLukowski Aug 8, 2024
0ed13e8
Merge branch 'master' into feat/aws-requesterpays
MichaelLukowski Aug 8, 2024
0532e4e
fix presigned url tests and clean up
MichaelLukowski Aug 13, 2024
fe8a5f2
resolve poetry.lock conflict
MichaelLukowski Aug 13, 2024
b361663
add mock presigned urls for blank tests
MichaelLukowski Aug 13, 2024
e4d64ee
add functionality for requester pays buckets
MichaelLukowski Aug 8, 2024
eddf1dd
add removed dependency
MichaelLukowski Aug 8, 2024
01a1867
add debug for requester pays
MichaelLukowski Aug 8, 2024
5f1a5e5
remove debug statements and fix url concat
MichaelLukowski Aug 8, 2024
6ee1947
fix presigned url tests and clean up
MichaelLukowski Aug 13, 2024
a31cab2
add mock presigned urls for blank tests
MichaelLukowski Aug 13, 2024
e46889d
Merge branch 'feat/aws-requesterpays' of github.com:uc-cdis/fence int…
MichaelLukowski Aug 13, 2024
83b8b38
clean up and fix function case
MichaelLukowski Aug 14, 2024
af96840
adding config to boto clients
MichaelLukowski Aug 15, 2024
4b565cb
add handler for custom parameters for s3 presigned urls
MichaelLukowski Aug 15, 2024
e8d831b
fix tests
MichaelLukowski Aug 15, 2024
7ec3476
clean up and add extra params for requester pays
MichaelLukowski Aug 15, 2024
8523473
remove custom parameters for upload presigned url
MichaelLukowski Aug 15, 2024
c73f225
fix requester pays params logic
MichaelLukowski Aug 15, 2024
670ad26
add session token (#1176)
mfshao Aug 20, 2024
0b93bad
refactor logic
mfshao Aug 22, 2024
49618b2
update lock
mfshao Aug 22, 2024
0a6537d
address pr comments
mfshao Aug 26, 2024
913dbc7
update
mfshao Aug 26, 2024
c7cd09d
update lock
mfshao Aug 26, 2024
2f54f15
test
mfshao Aug 26, 2024
3d8b6fd
fix
mfshao Aug 26, 2024
7d3b980
test
mfshao Aug 26, 2024
f02631a
restore
mfshao Aug 26, 2024
edf2fa6
try authlib version
mfshao Aug 26, 2024
bfa47a5
update comment
mfshao Aug 26, 2024
612690b
update lock
mfshao Aug 26, 2024
db11006
update version
mfshao Aug 27, 2024
d82205c
Update pyproject.toml
mfshao Aug 27, 2024
8bc9a67
Update pyproject.toml
mfshao Aug 27, 2024
b454062
redo lock
mfshao Aug 27, 2024
240fb48
add to default config
mfshao Aug 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,14 @@
"filename": "tests/conftest.py",
mfshao marked this conversation as resolved.
Show resolved Hide resolved
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 1569
"line_number": 1570
},
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "227dea087477346785aefd575f91dd13ab86c108",
"is_verified": false,
"line_number": 1593
"line_number": 1594
}
],
"tests/credentials/google/test_credentials.py": [
Expand Down Expand Up @@ -422,5 +422,5 @@
}
]
},
"generated_at": "2024-07-25T17:19:58Z"
"generated_at": "2024-08-22T19:43:39Z"
}
57 changes: 41 additions & 16 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import re
import time
import json
import boto3
from botocore.client import Config
from urllib.parse import urlparse, ParseResult, urlunparse
from datetime import datetime, timedelta

from sqlalchemy.sql.functions import user
from cached_property import cached_property
import gen3cirrus
from gen3cirrus import GoogleCloudManager
from gen3cirrus import AwsService
from cdislogging import get_logger
from cdispyutils.config import get_value
from cdispyutils.hmac4 import generate_aws_presigned_url
import flask
from flask import current_app
import requests
Expand Down Expand Up @@ -396,7 +398,7 @@ def make_signed_url(self, file_name, protocol=None, expires_in=None, bucket=None
@staticmethod
def init_multipart_upload(key, expires_in=None, bucket=None):
"""
Initilize multipart upload given key
Initialize multipart upload given key

Args:
key(str): object key
Expand Down Expand Up @@ -441,7 +443,7 @@ def generate_aws_presigned_url_for_part(

Args:
key(str): object key of `guid/filename`
uploadID(str): uploadId of the current upload.
uploadId(str): uploadId of the current upload.
partNumber(int): the part number

Returns:
Expand Down Expand Up @@ -1061,6 +1063,8 @@ def get_signed_url(
bucket_name = self.bucket_name()
bucket = s3_buckets.get(bucket_name)

object_id = self.parsed_url.path.strip("/")

if bucket and bucket.get("endpoint_url"):
http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
self.parsed_url.netloc, self.parsed_url.path.strip("/")
Expand Down Expand Up @@ -1092,18 +1096,38 @@ def get_signed_url(
region = flask.current_app.boto.get_bucket_region(
self.parsed_url.netloc, credential
)
s3client = boto3.client(
"s3",
aws_access_key_id=credential["aws_access_key_id"],
aws_secret_access_key=credential["aws_secret_access_key"],
aws_session_token=credential.get("aws_session_token", None),
region_name=region,
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
Avantol13 marked this conversation as resolved.
Show resolved Hide resolved
)

cirrus_aws = AwsService(s3client)
auth_info = _get_auth_info_for_id_or_from_request(user=authorized_user)

url = generate_aws_presigned_url(
http_url,
ACTION_DICT["s3"][action],
credential,
"s3",
region,
MichaelLukowski marked this conversation as resolved.
Show resolved Hide resolved
expires_in,
auth_info,
)
action = ACTION_DICT["s3"][action]

# get presigned url for upload
if action == "PUT":
url = cirrus_aws.upload_presigned_url(
bucket_name, object_id, expires_in, None
)
# get presigned url for download
else:
if bucket.get("requester_pays") is True:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have integration tests for r pays? If not, can we make sure we test this manually in an environment that has it set up?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requester pays feature is tested in qa-heal, we have an RP bucket there
I can create a ticket for adding a test case into CI and work with QA

paulineribeyre marked this conversation as resolved.
Show resolved Hide resolved
# need to add extra parameter to signing url for header
# https://github.com/boto/boto3/issues/3685
auth_info["x-amz-request-payer"] = "requester"
url = cirrus_aws.requester_pays_download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)
else:
url = cirrus_aws.download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)

return url

Expand All @@ -1115,7 +1139,7 @@ def init_multipart_upload(self, expires_in):
expires(int): expiration time

Returns:
UploadId(str)
uploadId(str)
"""
aws_creds = get_value(
config, "AWS_CREDENTIALS", InternalError("credentials not configured")
Expand All @@ -1133,18 +1157,19 @@ def generate_presigned_url_for_part_upload(self, uploadId, partNumber, expires_i
Generate presigned url for uploading object part given uploadId and part number

Args:
uploadId(str): uploadID of the multipart upload
uploadId(str): uploadId of the multipart upload
partNumber(int): part number
expires(int): expiration time

Returns:
presigned_url(str)
"""
bucket_name = self.bucket_name()
aws_creds = get_value(
config, "AWS_CREDENTIALS", InternalError("credentials not configured")
)
credential = S3IndexedFileLocation.get_credential_to_access_bucket(
self.bucket_name(), aws_creds, expires_in
bucket_name, aws_creds, expires_in
)

region = self.get_bucket_region()
Expand All @@ -1154,7 +1179,7 @@ def generate_presigned_url_for_part_upload(self, uploadId, partNumber, expires_i
)

return multipart_upload.generate_presigned_url_for_uploading_part(
self.parsed_url.netloc,
bucket_name,
self.parsed_url.path.strip("/"),
credential,
uploadId,
Expand Down
36 changes: 15 additions & 21 deletions fence/blueprints/data/multipart_upload.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from retry.api import retry_call

from cdispyutils.hmac4 import generate_aws_presigned_url
from cdispyutils.config import get_value
from cdislogging import get_logger
from gen3cirrus import AwsService
from fence.config import config
from fence.errors import InternalError

Expand Down Expand Up @@ -58,7 +59,7 @@ def initialize_multipart_upload(bucket_name, key, credentials):
key, error
)
)
raise InternalError("Can not initilize multipart upload for {}".format(key))
raise InternalError("Can not initialize multipart upload for {}".format(key))

return multipart_upload.get("UploadId")

Expand Down Expand Up @@ -140,28 +141,21 @@ def generate_presigned_url_for_uploading_part(
Returns:
presigned_url(str)
"""
s3_buckets = get_value(
config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured")
)
bucket = s3_buckets.get(bucket_name)

s3_buckets = get_value(
config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured")
)
bucket = s3_buckets.get(bucket_name)

if bucket.get("endpoint_url"):
url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
bucket_name, key.strip("/")
try:
s3client = boto3.client(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems like exactly the same code as above, could we just make a function and not have it duplicated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, seems to be having a client specific for signing the URL is more readable imo

"s3",
aws_access_key_id=credentials["aws_access_key_id"],
aws_secret_access_key=credentials["aws_secret_access_key"],
aws_session_token=credentials.get("aws_session_token", None),
region_name=region,
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
)
else:
url = "https://{}.s3.amazonaws.com/{}".format(bucket_name, key)
additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId}
cirrus_aws = AwsService(s3client)

try:
presigned_url = generate_aws_presigned_url(
url, "PUT", credentials, "s3", region, expires, additional_signed_qs
presigned_url = cirrus_aws.multipart_upload_presigned_url(
bucket_name, key, expires, uploadId, partNumber
)

return presigned_url
except Exception as e:
raise InternalError(
Expand Down
4 changes: 4 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,10 @@ S3_BUCKETS: {}
# cred: 'CRED1'
# region: 'us-east-1'
# role-arn: 'arn:aws:iam::role1'
# bucket5:
# cred: 'CRED3'
# region: 'us-east-1'
# requester_pays: true # to indicate this is a requester pay enabled S3 bucket
GS_BUCKETS: {}
# NOTE: Remove the {} and supply buckets if needed. Example in comments below
# bucket1:
Expand Down
Loading
Loading