Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aws requester pays #1173

Merged
merged 40 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b0b8147
add functionality for requester pays buckets
MichaelLukowski Aug 8, 2024
9dbf6f1
add removed dependency
MichaelLukowski Aug 8, 2024
dd4a111
add debug for requester pays
MichaelLukowski Aug 8, 2024
fa84b16
remove debug statements and fix url concat
MichaelLukowski Aug 8, 2024
0ed13e8
Merge branch 'master' into feat/aws-requesterpays
MichaelLukowski Aug 8, 2024
0532e4e
fix presigned url tests and clean up
MichaelLukowski Aug 13, 2024
fe8a5f2
resolve poetry.lock conflict
MichaelLukowski Aug 13, 2024
b361663
add mock presigned urls for blank tests
MichaelLukowski Aug 13, 2024
e4d64ee
add functionality for requester pays buckets
MichaelLukowski Aug 8, 2024
eddf1dd
add removed dependency
MichaelLukowski Aug 8, 2024
01a1867
add debug for requester pays
MichaelLukowski Aug 8, 2024
5f1a5e5
remove debug statements and fix url concat
MichaelLukowski Aug 8, 2024
6ee1947
fix presigned url tests and clean up
MichaelLukowski Aug 13, 2024
a31cab2
add mock presigned urls for blank tests
MichaelLukowski Aug 13, 2024
e46889d
Merge branch 'feat/aws-requesterpays' of github.com:uc-cdis/fence int…
MichaelLukowski Aug 13, 2024
83b8b38
clean up and fix function case
MichaelLukowski Aug 14, 2024
af96840
adding config to boto clients
MichaelLukowski Aug 15, 2024
4b565cb
add handler for custom parameters for s3 presigned urls
MichaelLukowski Aug 15, 2024
e8d831b
fix tests
MichaelLukowski Aug 15, 2024
7ec3476
clean up and add extra params for requester pays
MichaelLukowski Aug 15, 2024
8523473
remove custom parameters for upload presigned url
MichaelLukowski Aug 15, 2024
c73f225
fix requester pays params logic
MichaelLukowski Aug 15, 2024
670ad26
add session token (#1176)
mfshao Aug 20, 2024
0b93bad
refactor logic
mfshao Aug 22, 2024
49618b2
update lock
mfshao Aug 22, 2024
0a6537d
address pr comments
mfshao Aug 26, 2024
913dbc7
update
mfshao Aug 26, 2024
c7cd09d
update lock
mfshao Aug 26, 2024
2f54f15
test
mfshao Aug 26, 2024
3d8b6fd
fix
mfshao Aug 26, 2024
7d3b980
test
mfshao Aug 26, 2024
f02631a
restore
mfshao Aug 26, 2024
edf2fa6
try authlib version
mfshao Aug 26, 2024
bfa47a5
update comment
mfshao Aug 26, 2024
612690b
update lock
mfshao Aug 26, 2024
db11006
update version
mfshao Aug 27, 2024
d82205c
Update pyproject.toml
mfshao Aug 27, 2024
8bc9a67
Update pyproject.toml
mfshao Aug 27, 2024
b454062
redo lock
mfshao Aug 27, 2024
240fb48
add to default config
mfshao Aug 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,14 @@
"filename": "tests/conftest.py",
mfshao marked this conversation as resolved.
Show resolved Hide resolved
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 1569
"line_number": 1573
},
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "227dea087477346785aefd575f91dd13ab86c108",
"is_verified": false,
"line_number": 1593
"line_number": 1597
}
],
"tests/credentials/google/test_credentials.py": [
Expand Down Expand Up @@ -422,5 +422,5 @@
}
]
},
"generated_at": "2024-07-25T17:19:58Z"
"generated_at": "2024-08-13T16:48:45Z"
}
64 changes: 53 additions & 11 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import re
import time
import json
from urllib.parse import urlparse, ParseResult, urlunparse
import boto3
from botocore.client import Config
from urllib.parse import urlparse, ParseResult, urlunparse, quote
from datetime import datetime, timedelta

from sqlalchemy.sql.functions import user
from cached_property import cached_property
import gen3cirrus
from gen3cirrus import GoogleCloudManager
from gen3cirrus import AwsService
from cdislogging import get_logger
from cdispyutils.config import get_value
from cdispyutils.hmac4 import generate_aws_presigned_url
import flask
from flask import current_app
import requests
Expand Down Expand Up @@ -1043,6 +1045,24 @@ def get_bucket_region(self):
else:
return bucket_cred["region"]

def is_custom(k):
if k == "user_id" or k == "username":
mfshao marked this conversation as resolved.
Show resolved Hide resolved
return True
else:
return False

def client_param_handler(*, params, context, **_kw):
# Store custom parameters in context for later event handlers
context["custom_params"] = {k: v for k, v in params.items() if is_custom(k)}
# Remove custom parameters from client parameters,
# because validation would fail on them
return {k: v for k, v in params.items() if not is_custom(k)}

def request_param_injector(*, request, **_kw):
if request.context["custom_params"]:
request.url += "&" if "?" in request.url else "?"
request.url += urlencode(request.context["custom_params"])

def get_signed_url(
self,
action,
Expand All @@ -1061,6 +1081,8 @@ def get_signed_url(
bucket_name = self.bucket_name()
bucket = s3_buckets.get(bucket_name)

object_id = self.parsed_url.path.strip("/")

if bucket and bucket.get("endpoint_url"):
http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
self.parsed_url.netloc, self.parsed_url.path.strip("/")
Expand Down Expand Up @@ -1093,18 +1115,38 @@ def get_signed_url(
self.parsed_url.netloc, credential
)

auth_info = _get_auth_info_for_id_or_from_request(user=authorized_user)

url = generate_aws_presigned_url(
http_url,
ACTION_DICT["s3"][action],
credential,
client = boto3.client(
"s3",
region,
MichaelLukowski marked this conversation as resolved.
Show resolved Hide resolved
expires_in,
auth_info,
aws_access_key_id=credential["aws_access_key_id"],
aws_secret_access_key=credential["aws_secret_access_key"],
region_name=region,
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
Avantol13 marked this conversation as resolved.
Show resolved Hide resolved
)

client.meta.events.register(
"provide-client-params.s3.GetObject", client_param_handler
)
client.meta.events.register("before-sign.s3.GetObject", request_param_injector)

cirrus_aws = AwsService(client)
auth_info = _get_auth_info_for_id_or_from_request(user=authorized_user)

action = ACTION_DICT["s3"][action]

if action == "PUT": # get presigned url for upload
mfshao marked this conversation as resolved.
Show resolved Hide resolved
url = cirrus_aws.upload_presigned_url(
bucket_name, object_id, expires_in, auth_info
)
else: # get presigned url for download
if bucket.get("requester_pays") == True:
url = cirrus_aws.requester_pays_download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)
else:
url = cirrus_aws.download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)

return url

def init_multipart_upload(self, expires_in):
Expand Down
21 changes: 11 additions & 10 deletions fence/blueprints/data/multipart_upload.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from retry.api import retry_call

from cdispyutils.hmac4 import generate_aws_presigned_url
from cdispyutils.config import get_value
from cdislogging import get_logger
from fence.config import config
Expand Down Expand Up @@ -150,18 +150,19 @@ def generate_presigned_url_for_uploading_part(
)
bucket = s3_buckets.get(bucket_name)

if bucket.get("endpoint_url"):
url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
bucket_name, key.strip("/")
try:
s3client = boto3.client(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems like exactly the same code as above, could we just make a function and not have it duplicated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, seems to be having a client specific for signing the URL is more readable imo

"s3",
aws_access_key_id=credentials["aws_access_key_id"],
aws_secret_access_key=credentials["aws_secret_access_key"],
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
)
else:
url = "https://{}.s3.amazonaws.com/{}".format(bucket_name, key)
additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId}
cirrus_aws = AwsService(client)

try:
presigned_url = generate_aws_presigned_url(
url, "PUT", credentials, "s3", region, expires, additional_signed_qs
presigned_url = cirrus_aws.multipart_upload_presigned_url(
bucket, key, expires, uploadId, partNumber
)

return presigned_url
except Exception as e:
raise InternalError(
Expand Down
Loading
Loading