Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issuer info in namespaces JSON (SOFTWARE-5768) #3498

Merged
10 changes: 10 additions & 0 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,10 @@ The JSON also contains an attribute `namespaces` that is a list of namespaces wi
Note that scopes are usually relative to the namespace path.
- `vault_server`: the Vault server for the `Vault` strategy or null
- `vault_issuer`: the Vault issuer for the `Vault` strategy (or null).
- `scitokens` is information about any `SciTokens` sections in the `Authorizations` list for that namespace (or the empty list if there are none). Each list item has:
- `issuer`: the value of the `Issuer` field in the scitokens block
- `base_path`: a list which is the value of the `BasePath` (or `Base Path`) field split on commas
- `restricted_path`: a list which is the value of the `RestrictedPath` (or `Restricted Path`) field split on commas, or the empty list if unspecified

The final result looks like
```json
Expand Down Expand Up @@ -567,6 +571,7 @@ The final result looks like
"dirlisthost": null,
"path": "/xenon/PROTECTED",
"readhttps": true,
"scitokens": [],
"usetokenonread": false,
"writebackhost": null
},
Expand All @@ -582,6 +587,11 @@ The final result looks like
"dirlisthost": "https://origin-auth2001.chtc.wisc.edu:1095",
"path": "/ospool/PROTECTED",
"readhttps": true,
"scitokens": {
"issuer": "https://osg-htc.org/ospool",
"base_path": ["/ospool/PROTECTED", "/s3.amazonaws.com/us-east-1", "/s3.amazonaws.com/us-west-1"],
"restricted_path": []
},
"usetokenonread": true,
"writebackhost": "https://origin-auth2001.chtc.wisc.edu:1095"
}
Expand Down
8 changes: 8 additions & 0 deletions src/stashcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,13 @@ def get_credential_generation_dict_for_namespace(ns: Namespace) -> Optional[Dict
return info


def get_scitokens_list_for_namespace(ns: Namespace) -> List[Dict]:
"""Return the list of scitokens issuer info for the .namespaces[*].scitokens attribute in the namespaces JSON"""
return list(
filter(None, (a.get_namespaces_scitokens_block() for a in ns.authz_list))
)


def get_namespaces_info(global_data: GlobalData) -> PreJSON:
"""Return data for the /stashcache/namespaces JSON endpoint.

Expand Down Expand Up @@ -564,6 +571,7 @@ def _namespace_dict(ns: Namespace):
"caches": [],
"origins": [],
"credential_generation": get_credential_generation_dict_for_namespace(ns),
"scitokens": get_scitokens_list_for_namespace(ns),
}

for cache_name, cache_resource_obj in cache_resource_objs.items():
Expand Down
68 changes: 9 additions & 59 deletions src/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import flask
import pytest
from typing import Dict, List
import urllib.parse
from pytest_mock import MockerFixture

Expand All @@ -17,10 +18,6 @@

from app import app, global_data
from webapp.topology import Facility, Site, Resource, ResourceGroup
from webapp.data_federation import CredentialGeneration

HOST_PORT_RE = re.compile(r"[a-zA-Z0-9.-]{3,63}:[0-9]{2,5}")
PROTOCOL_HOST_PORT_RE = re.compile(r"[a-z]+://" + HOST_PORT_RE.pattern)

INVALID_USER = dict(
username="invalid",
Expand Down Expand Up @@ -61,7 +58,9 @@
"/cache/scitokens.conf",
"/api/institutions",
"/cache/grid-mapfile",
"/origin/grid-mapfile"
"/origin/grid-mapfile",
"/osdf/namespaces",
"/stashcache/namespaces",
]


Expand Down Expand Up @@ -189,60 +188,6 @@ def test_stashcache_file(key, endpoint, fqdn, resource_stashcache_files):
else:
app.config["STASHCACHE_LEGACY_AUTH"] = old_legacy_auth

def test_stashcache_namespaces(self, client: flask.Flask):
def validate_cache_schema(cc):
assert HOST_PORT_RE.match(cc["auth_endpoint"])
assert HOST_PORT_RE.match(cc["endpoint"])
assert cc["resource"] and isinstance(cc["resource"], str)

def validate_namespace_schema(ns):
assert isinstance(ns["caches"], list) # we do have a case where it's empty
assert ns["path"].startswith("/") # implies str
assert isinstance(ns["readhttps"], bool)
assert isinstance(ns["usetokenonread"], bool)
assert ns["dirlisthost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["dirlisthost"])
assert ns["writebackhost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["writebackhost"])
credgen = ns["credential_generation"]
if credgen is not None:
assert isinstance(credgen["max_scope_depth"], int) and credgen["max_scope_depth"] > -1
assert credgen["strategy"] in CredentialGeneration.STRATEGIES
assert credgen["issuer"]
parsed_issuer = urllib.parse.urlparse(credgen["issuer"])
assert parsed_issuer.netloc and parsed_issuer.scheme == "https"
if credgen["vault_server"]:
assert isinstance(credgen["vault_server"], str)
if credgen["vault_issuer"]:
assert isinstance(credgen["vault_issuer"], str)
if credgen["base_path"]:
assert isinstance(credgen["base_path"], str)

response = client.get('/stashcache/namespaces')
assert response.status_code == 200
namespaces_json = response.json

assert "caches" in namespaces_json
caches = namespaces_json["caches"]
# Have a reasonable number of caches
assert len(caches) > 20
for cache in caches:
validate_cache_schema(cache)

assert "namespaces" in namespaces_json
namespaces = namespaces_json["namespaces"]
# Have a reasonable number of namespaces
assert len(namespaces) > 15

found_credgen = False
for namespace in namespaces:
if namespace["credential_generation"] is not None:
found_credgen = True
validate_namespace_schema(namespace)
if namespace["caches"]:
for cache in namespace["caches"]:
validate_cache_schema(cache)
assert found_credgen, "At least one namespace with credential_generation"


def test_institution_accept_type(self, client: flask.Flask):
"""Checks both formats output the same content"""

Expand Down Expand Up @@ -337,6 +282,11 @@ def test_cache_grid_mapfile(self, client: flask.Flask):
hashes_not_in_authfile = mapfile_hashes - authfile_hashes
assert not hashes_not_in_authfile, f"Hashes in mapfile but not in authfile: {hashes_not_in_authfile}"

def test_namespaces_json(self, client):
response = client.get('/osdf/namespaces')
assert response.status_code == 200
assert "namespaces" in response.json


class TestEndpointContent:
# Pre-build some test cases based on AMNH resources
Expand Down
146 changes: 136 additions & 10 deletions src/tests/test_stashcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
from pytest_mock import MockerFixture
import time
from typing import List, Dict
import urllib, urllib.parse

# Rewrites the path so the app can be imported like it normally is
import os
Expand All @@ -18,8 +20,12 @@
from app import app, global_data
from webapp import models, topology, vos_data
from webapp.common import load_yaml_file
from webapp.data_federation import CredentialGeneration
import stashcache

HOST_PORT_RE = re.compile(r"[a-zA-Z0-9.-]{3,63}:[0-9]{2,5}")
PROTOCOL_HOST_PORT_RE = re.compile(r"[a-z]+://" + HOST_PORT_RE.pattern)

GRID_MAPPING_REGEX = re.compile(r'^"(/[^"]*CN=[^"]+")\s+([0-9a-f]{8}[.]0)$')
# ^^ the DN starts with a slash and will at least have a CN in it.
EMPTY_LINE_REGEX = re.compile(r'^\s*(#|$)') # Empty or comment-only lines
Expand All @@ -28,7 +34,9 @@
# fake origins in our test data:
TEST_ITB_HELM_ORIGIN = "helm-origin.osgdev.test.io"
TEST_SC_ORIGIN = "sc-origin.test.wisc.edu"

TEST_ORIGIN_AUTH2000 = "origin-auth2000.test.wisc.edu"
TEST_ISSUER = "https://test.wisc.edu"
TEST_BASEPATH = "/testvo"

# Some DNs I can use for testing and the hashes they map to.
# All of these were generated with osg-ca-generator on alma8
Expand All @@ -45,7 +53,8 @@
MOCK_DN_LIST = list(MOCK_DNS_AND_HASHES.keys())


def get_test_global_data(global_data: models.GlobalData) -> models.GlobalData:
@pytest.fixture
def test_global_data() -> models.GlobalData:
"""Get a copy of the global data with some entries created for testing"""
new_global_data = copy.deepcopy(global_data)

Expand Down Expand Up @@ -105,8 +114,7 @@ def test_allowedVO_excludes_LIGO_and_ANY_for_ligo_inclusion(self, client: flask.

assert spy.call_count == 0

def test_scitokens_issuer_sections(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)
def test_scitokens_issuer_sections(self, test_global_data):
origin_scitokens_conf = stashcache.generate_origin_scitokens(
test_global_data, TEST_ITB_HELM_ORIGIN)
assert origin_scitokens_conf.strip(), "Generated scitokens.conf empty"
Expand All @@ -128,9 +136,7 @@ def test_scitokens_issuer_sections(self, client: flask.Flask):
print(f"Generated origin scitokens.conf text:\n{origin_scitokens_conf}\n", file=sys.stderr)
raise

def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)

def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, test_global_data):
namespaces_json = stashcache.get_namespaces_info(test_global_data)
namespaces = namespaces_json["namespaces"]
testvo_PUBLIC_namespace_list = [
Expand All @@ -145,9 +151,7 @@ def test_scitokens_issuer_public_read_auth_write_namespaces_info(self, client: f
assert ns["writebackhost"] == f"https://{TEST_SC_ORIGIN}:1095", \
"writebackhost is wrong for namespace with auth write"

def test_scitokens_issuer_public_read_auth_write_scitokens_conf(self, client: flask.Flask):
test_global_data = get_test_global_data(global_data)

def test_scitokens_issuer_public_read_auth_write_scitokens_conf(self, test_global_data):
origin_scitokens_conf = stashcache.generate_origin_scitokens(
test_global_data, TEST_SC_ORIGIN)
assert origin_scitokens_conf.strip(), "Generated scitokens.conf empty"
Expand Down Expand Up @@ -222,5 +226,127 @@ def test_cache_grid_mapfile_i2_cache(self, client: flask.Flask, mocker: MockerFi
assert num_mappings > 5, f"Too few mappings found.\nFull text:\n{text}\n"


class TestNamespaces:
@pytest.fixture
def namespaces_json(self, test_global_data) -> Dict:
return stashcache.get_namespaces_info(test_global_data)

@pytest.fixture
def namespaces(self, namespaces_json) -> List[Dict]:
assert "namespaces" in namespaces_json
return namespaces_json["namespaces"]

@staticmethod
def validate_cache_schema(cc):
assert HOST_PORT_RE.match(cc["auth_endpoint"])
assert HOST_PORT_RE.match(cc["endpoint"])
assert cc["resource"] and isinstance(cc["resource"], str)

@staticmethod
def validate_namespace_schema(ns):
assert isinstance(ns["caches"], list) # we do have a case where it's empty
assert ns["path"].startswith("/") # implies str
assert isinstance(ns["readhttps"], bool)
assert isinstance(ns["usetokenonread"], bool)
assert ns["dirlisthost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["dirlisthost"])
assert ns["writebackhost"] is None or PROTOCOL_HOST_PORT_RE.match(ns["writebackhost"])
credgen = ns["credential_generation"]
if credgen is not None:
assert isinstance(credgen["max_scope_depth"], int) and credgen["max_scope_depth"] > -1
assert credgen["strategy"] in CredentialGeneration.STRATEGIES
assert credgen["issuer"]
parsed_issuer = urllib.parse.urlparse(credgen["issuer"])
assert parsed_issuer.netloc and parsed_issuer.scheme == "https"
if credgen["vault_server"]:
assert isinstance(credgen["vault_server"], str)
if credgen["vault_issuer"]:
assert isinstance(credgen["vault_issuer"], str)
if credgen["base_path"]:
assert isinstance(credgen["base_path"], str)

def test_caches(self, namespaces_json):
assert "caches" in namespaces_json
caches = namespaces_json["caches"]
# Have a reasonable number of caches
assert len(caches) > 20
for cache in caches:
self.validate_cache_schema(cache)

def test_namespaces(self, namespaces):
# Have a reasonable number of namespaces
assert len(namespaces) > 15

found_credgen = False
for namespace in namespaces:
if namespace["credential_generation"] is not None:
found_credgen = True
self.validate_namespace_schema(namespace)
if namespace["caches"]:
for cache in namespace["caches"]:
self.validate_cache_schema(cache)
assert found_credgen, "At least one namespace with credential_generation"

@staticmethod
def validate_scitokens_block(sci):
assert sci["issuer"]
assert isinstance(sci["issuer"], str)
assert "://" in sci["issuer"]
assert isinstance(sci["base_path"], list)
assert sci["base_path"] # must have at least 1
for bp in sci["base_path"]:
assert bp.startswith("/") # implies str
assert "," not in bp
assert isinstance(sci["restricted_path"], list)
for rp in sci["restricted_path"]: # may be empty
assert rp.startswith("/") # implies str
assert "," not in rp

def test_issuers_in_namespaces(self, namespaces):
for namespace in namespaces:
assert isinstance(namespace["scitokens"], list)
for scitokens_block in namespace["scitokens"]:
self.validate_scitokens_block(scitokens_block)

def test_testvo_public_namespace(self, namespaces):
ns = [
ns for ns in namespaces if ns["path"] == "/testvo/PUBLIC"
][0]

assert ns["readhttps"] is False
assert ns["usetokenonread"] is False
assert TEST_SC_ORIGIN in ns["writebackhost"]
assert len(ns["caches"]) > 10
assert len(ns["origins"]) == 2
assert ns["credential_generation"] is None
assert len(ns["scitokens"]) == 1
sci = ns["scitokens"][0]
assert sci["issuer"] == TEST_ISSUER
assert sci["base_path"] == [TEST_BASEPATH]
assert sci["restricted_path"] == []


def test_testvo_namespace(self, namespaces):
ns = [
ns for ns in namespaces if ns["path"] == "/testvo"
][0]

assert ns["readhttps"] is True
assert ns["usetokenonread"] is True
assert TEST_ORIGIN_AUTH2000 in ns["writebackhost"]
assert TEST_ORIGIN_AUTH2000 in ns["dirlisthost"]
assert len(ns["caches"]) > 10
assert len(ns["origins"]) == 1
credgen = ns["credential_generation"]
assert credgen["base_path"] == TEST_BASEPATH
assert credgen["strategy"] == "OAuth2"
assert credgen["issuer"] == TEST_ISSUER
assert credgen["max_scope_depth"] == 3
assert len(ns["scitokens"]) == 1
sci = ns["scitokens"][0]
assert sci["issuer"] == TEST_ISSUER
assert sci["base_path"] == [TEST_BASEPATH]
assert sci["restricted_path"] == []


if __name__ == '__main__':
pytest.main()
12 changes: 12 additions & 0 deletions src/webapp/data_federation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import urllib
import urllib.parse
from collections import OrderedDict
Expand Down Expand Up @@ -25,6 +26,8 @@ def get_scitokens_conf_block(self, service_name: str):
def get_grid_mapfile_line(self):
return ""

def get_namespaces_scitokens_block(self):
return None

class NullAuth(AuthMethod):
pass
Expand Down Expand Up @@ -100,6 +103,15 @@ def get_scitokens_conf_block(self, service_name: str):

return block

def get_namespaces_scitokens_block(self):
base_path = re.split(r"\s*,\s*", self.base_path)
restricted_path = re.split(r"\s*,\s*", self.restricted_path) if self.restricted_path else []
return {
"issuer": self.issuer,
"base_path": base_path,
"restricted_path": restricted_path,
}


# TODO Use a dataclass (https://docs.python.org/3.9/library/dataclasses.html)
# once we can ditch Python 3.6; the webapp no longer supports 3.6 but some of
Expand Down