diff --git a/README.md b/README.md index 6f3c5966..bac98b81 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,13 @@ [![All Contributors](https://img.shields.io/badge/all_contributors-12-orange.svg?style=flat-square)](#contributors-) +--- + +### *:loudspeaker: A new catalog is coming...* + +We're working on a newer, leaner, more modular, and more interoperable solution to the same challenge that the current `datalad-catalog` aims to address. This new development is taking place within the broader context of making DataLad datasets interoperable with linked and semantic (meta)data. For more background, see [this issue](https://github.com/psychoinformatics-de/datalad-concepts/issues/115). To keep up to date, follow progress at [`psychoinformatics-de/datalad-concepts`](https://github.com/psychoinformatics-de/datalad-concepts), [`psychoinformatics-de/shacl-vue`](https://github.com/psychoinformatics-de/shacl-vue), and in the [new development branch](https://github.com/datalad/datalad-catalog/tree/revolution). Because of this redirected focus, `datalad-catalog` itself will be downscaled by focusing on maintenance and assessing the priority of new features on a case-by-case basis. + +--- @@ -186,7 +193,8 @@ cd datalad-catalog pip install -r requirements-devel.txt ``` -This installs `sphinx` and related packages for documentation building, `coverage` for code coverage, and `pytest` for testing. +This installs `sphinx` and related packages for documentation building, `coverage` for code coverage, +`black` for linting, and `pytest` for testing. ### Contribution process @@ -195,10 +203,12 @@ To make a contribution to the code or documentation, please: - create an issue describing the bug/feature - fork the project repository, - create a branch from `main`, -- check that tests succeed: from the project root directory, run `pytest` - commit your changes, -- push to your fork +- check that linting tests succeed: from the project root directory, run `black .` +- check that tests succeed: from the project root directory, run `python -m pytest` +- push your commits to your fork - create a pull request with a clear description of the changes +- check that all continuous integration tests succeed on the pull request ## Contributors ✨ diff --git a/datalad_catalog/__init__.py b/datalad_catalog/__init__.py index 8d983c62..a68ca09c 100644 --- a/datalad_catalog/__init__.py +++ b/datalad_catalog/__init__.py @@ -62,6 +62,12 @@ "catalog-remove", "catalog_remove", ), + ( + "datalad_catalog.tree", + "Tree", + "catalog-tree", + "catalog_tree", + ), ( "datalad_catalog.translate", "MetaTranslate", diff --git a/datalad_catalog/catalog/assets/app_component_dataset.js b/datalad_catalog/catalog/assets/app_component_dataset.js index 38a16f5f..d2fa9347 100644 --- a/datalad_catalog/catalog/assets/app_component_dataset.js +++ b/datalad_catalog/catalog/assets/app_component_dataset.js @@ -77,6 +77,8 @@ const datasetView = () => console.debug("Active dataset:"); dataset = this.selectedDataset; console.debug(this.selectedDataset); + dataset_options = dataset.dataset_options + disp_dataset = {}; // Set name to unknown if not available if (!dataset.hasOwnProperty("name") || !dataset["name"]) { @@ -134,61 +136,117 @@ const datasetView = () => dataset.dataset_id + "_" + dataset.dataset_version; disp_dataset.download_filename = "dataset_" + disp_dataset.id_and_version + ".json"; - // URL - disp_dataset.is_github = false; // Github / gitlab / url / binder - disp_dataset.is_gitlab = false; // Github / gitlab / url / binder - disp_dataset.is_gin = false; // GIN - disp_dataset.url = ""; - if ( - dataset.hasOwnProperty("url") && - (dataset["url"] instanceof Array || Array.isArray(dataset["url"])) && - dataset["url"].length > 0 - ) { - for (var i = 0; i < dataset.url.length; i++) { - if (dataset.url[i].toLowerCase().indexOf("github") >= 0) { - disp_dataset.is_github = true; - disp_dataset.url = dataset.url[i]; - disp_dataset.url = disp_dataset.url.replace('git@github.com:', 'https://github.com'); - } - if (dataset.url[i].toLowerCase().indexOf("gin.g-node") >= 0) { - disp_dataset.is_gin = true; - disp_dataset.url = dataset.url[i]; - disp_dataset.url = disp_dataset.url.replace('ssh://', ''); - disp_dataset.url = disp_dataset.url.replace('git@gin.g-node.org:', 'https://gin.g-node.org'); - disp_dataset.url = disp_dataset.url.replace('git@gin.g-node.org', 'https://gin.g-node.org'); - disp_dataset.url = disp_dataset.url.replace('.git', ''); - } - } - if (!disp_dataset.url) { + + // ------- + // BUTTONS + // ------- + // show all buttons by default, unless logic dictates otherwise + + // first get dataset options + dataset_options = dataset.config.dataset_options; + + console.log("Dataset options (on dataset or inferred from catalog)") + console.log(dataset_options) + + // then initialise show/hide flags + disp_dataset.show_datalad = true; // Download with DataLad button + disp_dataset.show_github = true; // Github + disp_dataset.show_gitlab = true; // Gitlab + disp_dataset.show_gin = true; // GIN.g-node + disp_dataset.show_binder = true; // MyBinder + disp_dataset.show_cite = true; // 'Cite' button + disp_dataset.show_export = true; // 'Export metadata' button + disp_dataset.show_access_request = true; // 'Request access' button + disp_dataset.show_homepage = true; // 'Homepage' button + disp_dataset.show_download = true; // 'Download' button + + // URL (this is the field for the datalad dataset url to clone from) + // If URL does not exist, several buttons cannot be shown + // If URL is an array, first select first element + if (dataset.hasOwnProperty("url")) { + if ( + (dataset["url"] instanceof Array || Array.isArray(dataset["url"])) && + dataset["url"].length > 0 + ) { disp_dataset.url = dataset.url[0]; + } else { + disp_dataset.url = dataset.url; } - } else { - disp_dataset.url = dataset.url; - if (disp_dataset.url && dataset.url.toLowerCase().indexOf("gin.g-node") >= 0) { - disp_dataset.is_gin = true; + // show/hide datalad button + disp_dataset.show_datalad = dataset_options.include_datalad ?? true + // show/hide github button + if (disp_dataset.url.toLowerCase().indexOf("github") >= 0) { + disp_dataset.url = disp_dataset.url.replace('git@github.com:', 'https://github.com'); + disp_dataset.show_github = dataset_options.include_github ?? true + } else { + disp_dataset.show_github = false; + } + // show/hide GIN button + if (disp_dataset.url.toLowerCase().indexOf("gin.g-node") >= 0) { disp_dataset.url = disp_dataset.url.replace('ssh://', ''); disp_dataset.url = disp_dataset.url.replace('git@gin.g-node.org:', 'https://gin.g-node.org'); - disp_dataset.url = disp_dataset.url.replace('git@gin.g-node.org', 'https://gin.g-node.org'); disp_dataset.url = disp_dataset.url.replace('.git', ''); + disp_dataset.show_gin = dataset_options.include_gin ?? true + } else { + disp_dataset.show_gin = false; } + // show/hide gitlab button + if (disp_dataset.url.toLowerCase().indexOf("gitlab") >= 0) { + disp_dataset.show_gitlab = dataset_options.include_gitlab ?? true + } else { + disp_dataset.show_gitlab = false; + } + } else{ + // none of these buttons can be shown without a URL + disp_dataset.show_datalad = false; + disp_dataset.show_github = false; + disp_dataset.show_gitlab = false; + disp_dataset.show_gin = false; } - // Description - if ( - dataset.hasOwnProperty("description") && - (dataset["description"] instanceof Array || - Array.isArray(dataset["description"])) && - dataset["description"].length > 0 - ) { - disp_dataset.description = dataset.description; - disp_dataset.selected_description = disp_dataset.description[0]; - this.selectDescription(disp_dataset.selected_description); + // Show binder button: (if disp_dataset.url exists OR if dataset has a notebook specified in metadata) AND config specifies (or is missing) + disp_dataset.show_binder_button = false + if ( disp_dataset.url || disp_dataset.hasOwnProperty("notebooks") && disp_dataset.notebooks.length > 0 ) { + disp_dataset.show_binder = dataset_options.include_binder ?? true + } else { + disp_dataset.show_binder = false; } - if ( - (dataset.hasOwnProperty("description") && - dataset["description"] instanceof String) || - typeof dataset["description"] === "string" - ) { - this.description_ready = true; + // Show cite button: if the dataset.doi exists AND config specifies (or is missing) + disp_dataset.show_cite = dataset.doi ? (dataset_options.include_cite ?? true) : false + + // Show homepage button: if the dataset.homepage_url exists AND config specifies (or is missing) + if (dataset.hasOwnProperty("homepage_url")) { + if ( + (dataset["homepage_url"] instanceof Array || Array.isArray(dataset["homepage_url"])) && + dataset["homepage_url"].length > 0 + ) { + disp_dataset.homepage_url = dataset.homepage_url[0]; + } else { + disp_dataset.homepage_url = dataset.homepage_url; + } + } + disp_dataset.show_homepage = disp_dataset.homepage_url ? (dataset_options.include_homepage ?? true) : false + // Show download button: if the dataset.download_url exists AND config specifies (or is missing) + if (dataset.hasOwnProperty("download_url")) { + if ( + (dataset["download_url"] instanceof Array || Array.isArray(dataset["download_url"])) && + dataset["download_url"].length > 0 + ) { + disp_dataset.download_url = dataset.download_url[0]; + } else { + disp_dataset.download_url = dataset.download_url; + } + } + disp_dataset.show_download = disp_dataset.download_url ? (dataset_options.include_download ?? true) : false + // Show export button: if config specifies (or is missing) + disp_dataset.show_export = dataset_options.include_metadata_export ?? true + // Show/hide config for "Request access" button: + // if the (access_request_contact exists OR access_request_url exists) + // AND config specifies (or is missing) + if ( dataset.hasOwnProperty("access_request_contact") && dataset["access_request_contact"] || + dataset.hasOwnProperty("access_request_url") && dataset["access_request_url"] ) { + disp_dataset.show_access_request = dataset_options.include_access_request ?? true + } else { + disp_dataset.show_access_request = false; } // Create href mailto for request access contact if ( @@ -197,7 +255,6 @@ const datasetView = () => ) { var email_to = dataset.access_request_contact.email var email_subject = "Access request: " + disp_dataset.short_name - disp_dataset.access_request_mailto = "mailto:" + email_to + @@ -208,25 +265,24 @@ const datasetView = () => "%20" + dataset.access_request_contact.familyName; } - // Rendering options for dataset page - if (this.$root.hasOwnProperty("dataset_options") && this.$root.dataset_options.hasOwnProperty("include_metadata_export")) { - disp_dataset.show_export = this.$root.dataset_options.include_metadata_export - } - else { - disp_dataset.show_export = false - } - // Determine show/hide confirg for "Request access" button - if (dataset.config?.hasOwnProperty("dataset_options") && dataset.config.dataset_options.hasOwnProperty("include_access_request")) { - disp_dataset.show_access_request = dataset.config.dataset_options.include_access_request - } - else { - // default should be to display the access request button, if access request contact/url are included - disp_dataset.show_access_request = true + + // Description + if ( + dataset.hasOwnProperty("description") && + (dataset["description"] instanceof Array || + Array.isArray(dataset["description"])) && + dataset["description"].length > 0 + ) { + disp_dataset.description = dataset.description; + disp_dataset.selected_description = disp_dataset.description[0]; + this.selectDescription(disp_dataset.selected_description); } - // Show / hide binder button: if disp_dataset.url exists OR if dataset has a notebook specified in metadata - disp_dataset.show_binder_button = false - if ( disp_dataset.url || disp_dataset.hasOwnProperty("notebooks") && disp_dataset.notebooks.length > 0 ) { - disp_dataset.show_binder_button = true + if ( + (dataset.hasOwnProperty("description") && + dataset["description"] instanceof String) || + typeof dataset["description"] === "string" + ) { + this.description_ready = true; } // Set correct URL query string to mirrorif keyword(s) included in query parameters @@ -423,7 +479,7 @@ const datasetView = () => history.replaceState( {}, null, - current_route.path + query_string + (router.options.base + current_route.path + query_string).replace("//", "/") ) console.debug("- After: Vue Route query params: %s", JSON.stringify(Object.assign({}, this.$route.query))) let url_qp2 = new URL(document.location.toString()).searchParams @@ -1049,15 +1105,20 @@ const datasetView = () => // set the root data for available tabs available_tabs_lower = available_tabs this.$root.selectedDataset.available_tabs = available_tabs_lower - // Now get dataset config if it exists + // Now get dataset config if it exists, else set to catalog-level config dataset_config_path = metadata_dir + "/" + sDs.dataset_id + "/" + sDs.dataset_version + "/config.json"; configresponse = await fetch(dataset_config_path, {cache: "no-cache"}); if (configresponse.status == 404) { - this.$root.selectedDataset.config = {}; + this.$root.selectedDataset.config = this.$root.catalog_config; } else { configtext = await configresponse.text(); config = JSON.parse(configtext); - this.$root.selectedDataset.config = config; + this.$root.selectedDataset.config = {...this.$root.catalog_config, ...config}; + if (config.dataset_options) { + // dataset options exist in catalog level config and dataset-level config + // they need to be merged, with dataset-level taking priority + this.$root.selectedDataset.config.dataset_options = {...this.$root.catalog_config.dataset_options, ...config.dataset_options}; + } } // Set the correct tab to be rendered correct_tab = to.query.hasOwnProperty("tab") ? to.query.tab : null @@ -1204,15 +1265,20 @@ const datasetView = () => available_tabs_lower = available_tabs // set the root data for available tabs this.$root.selectedDataset.available_tabs = available_tabs_lower - // Now get dataset config if it exists + // Now get dataset config if it exists, else set to catalog-level config dataset_config_path = metadata_dir + "/" + sDs.dataset_id + "/" + sDs.dataset_version + "/config.json"; configresponse = await fetch(dataset_config_path, {cache: "no-cache"}); if (configresponse.status == 404) { - this.$root.selectedDataset.config = {}; + this.$root.selectedDataset.config = this.$root.catalog_config; } else { configtext = await configresponse.text(); config = JSON.parse(configtext); - this.$root.selectedDataset.config = config; + this.$root.selectedDataset.config = {...this.$root.catalog_config, ...config}; + if (config.dataset_options) { + // dataset options exist in catalog level config and dataset-level config + // they need to be merged, with dataset-level taking priority + this.$root.selectedDataset.config.dataset_options = {...this.$root.catalog_config.dataset_options, ...config.dataset_options}; + } } // --- // Note for future: Handle route query parameters (tab and keyword) here? diff --git a/datalad_catalog/catalog/assets/app_globals.js b/datalad_catalog/catalog/assets/app_globals.js index 228df91b..32a1ea49 100644 --- a/datalad_catalog/catalog/assets/app_globals.js +++ b/datalad_catalog/catalog/assets/app_globals.js @@ -2,9 +2,9 @@ // Data // /********/ -const template_dir = "/templates"; -const config_file = "/config.json"; -const metadata_dir = "/metadata"; +const template_dir = "templates"; +const config_file = "config.json"; +const metadata_dir = "metadata"; const superdatasets_file = metadata_dir + "/super.json"; const SPLIT_INDEX = 3; const SHORT_NAME_LENGTH = 0; // number of characters in name to display, zero if all @@ -13,7 +13,7 @@ const default_config = { catalog_url: "https://datalad-catalog.netlify.app/", link_color: "#fba304", link_hover_color: "#af7714", - logo_path: "/artwork/catalog_logo.svg", + logo_path: "artwork/catalog_logo.svg", social_links: { about: null, documentation: "https://docs.datalad.org/projects/catalog/en/latest/", diff --git a/datalad_catalog/catalog/assets/app_router.js b/datalad_catalog/catalog/assets/app_router.js index 02711109..3095ba4e 100644 --- a/datalad_catalog/catalog/assets/app_router.js +++ b/datalad_catalog/catalog/assets/app_router.js @@ -50,7 +50,7 @@ const routes = [ // Create router const router = new VueRouter({ mode: 'history', - base: '/', + base: window.location.pathname.split('dataset/')[0], routes: routes, scrollBehavior(to, from, savedPosition) { return { x: 0, y: 0, behavior: "auto" }; diff --git a/datalad_catalog/catalog/config.json b/datalad_catalog/catalog/config.json index f1737515..db221fec 100644 --- a/datalad_catalog/catalog/config.json +++ b/datalad_catalog/catalog/config.json @@ -11,7 +11,14 @@ "x": "https://x.com/datalad" }, "dataset_options": { - "include_metadata_export": true + "include_datalad": true, + "include_github": true, + "include_gitlab": true, + "include_gin": true, + "include_binder": true, + "include_cite": true, + "include_metadata_export": true, + "include_access_request": true }, "property_sources": { "dataset": { diff --git a/datalad_catalog/catalog/index.html b/datalad_catalog/catalog/index.html index 989b00e1..f233309d 100644 --- a/datalad_catalog/catalog/index.html +++ b/datalad_catalog/catalog/index.html @@ -10,26 +10,34 @@ - - - + + + + + DataLad Catalog - - - - - + + + + + - - - - - + + + + + @@ -74,13 +82,13 @@
- - - - - - - - + + + + + + + + \ No newline at end of file diff --git a/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/config.json b/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/config.json index d8e313ce..77ea3ca4 100644 --- a/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/config.json +++ b/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/config.json @@ -11,6 +11,7 @@ }, "dataset_options": { "include_metadata_export": true, + "include_access_request": false, "default_tab": "subdatasets" }, "property_sources": { diff --git a/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/ddb/8aa081e7afeb3831fe007a3816dce.json b/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/ddb/8aa081e7afeb3831fe007a3816dce.json index 360decfd..f9a2f1ba 100644 --- a/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/ddb/8aa081e7afeb3831fe007a3816dce.json +++ b/datalad_catalog/catalog/metadata/deabeb9b-7a37-4062-a1e0-8fcef7909609/6d7fb68264f9b9951ae141fc830712a8744e3293/ddb/8aa081e7afeb3831fe007a3816dce.json @@ -32,6 +32,9 @@ "url": [ "https://github.com/psychoinformatics-de/studyforrest-data" ], + "homepage_url": [ + "https://www.studyforrest.org/" + ], "authors": [ { "name": "Adina Wagner", diff --git a/datalad_catalog/catalog/metadata/studyforrest_multires3t/e7d094ff09f0602affe30f46cdf19c37.json b/datalad_catalog/catalog/metadata/studyforrest_multires3t/682dc55d6c6669a970f33337cda41ae0.json similarity index 100% rename from datalad_catalog/catalog/metadata/studyforrest_multires3t/e7d094ff09f0602affe30f46cdf19c37.json rename to datalad_catalog/catalog/metadata/studyforrest_multires3t/682dc55d6c6669a970f33337cda41ae0.json diff --git a/datalad_catalog/catalog/schema/jsonschema_dataset.json b/datalad_catalog/catalog/schema/jsonschema_dataset.json index 19b6cf64..3e190902 100644 --- a/datalad_catalog/catalog/schema/jsonschema_dataset.json +++ b/datalad_catalog/catalog/schema/jsonschema_dataset.json @@ -48,7 +48,23 @@ }, "url": { "description": "The location of the datalad dataset's annex", - "title": "URL", + "title": "DataLad URL", + "type": ["array", "string"], + "items": { + "type": "string" + } + }, + "download_url": { + "description": "The URL at which the complete dataset can be downloaded directly, typically via HTTP protocol and without further access restrictions", + "title": "Download URL", + "type": ["array", "string"], + "items": { + "type": "string" + } + }, + "homepage_url": { + "description": "A unique URL with content that describes the current dataset and which may provide further data access", + "title": "Homepage URL", "type": ["array", "string"], "items": { "type": "string" diff --git a/datalad_catalog/catalog/templates/dataset-template.html b/datalad_catalog/catalog/templates/dataset-template.html index 8928340d..37a59b72 100644 --- a/datalad_catalog/catalog/templates/dataset-template.html +++ b/datalad_catalog/catalog/templates/dataset-template.html @@ -33,18 +33,21 @@
- + datalad_d Download with DataLad  - View on GitHub  - View on GIN  - Cite  + View on GitHub  + View on GIN  + View on GitLab  + Cite  Export metadata  - binder_logoExplore with Binder  Share  + Homepage  + Download  Request access  Request access @@ -58,7 +61,7 @@ ok-only>
Step 1: Install DataLad
@@ -173,7 +176,7 @@
Full URL
- {{window.location.origin + 'dataset/' + selectedDataset.dataset_id + '/' + selectedDataset.dataset_version}} + {{window.location.origin + '/dataset/' + selectedDataset.dataset_id + '/' + selectedDataset.dataset_version}} diff --git a/datalad_catalog/config/config.json b/datalad_catalog/config/config.json index 506e9a1f..3ccce46f 100644 --- a/datalad_catalog/config/config.json +++ b/datalad_catalog/config/config.json @@ -12,7 +12,16 @@ "x": "https://x.com/datalad" }, "dataset_options": { - "include_metadata_export": true + "include_datalad": true, + "include_github": true, + "include_gitlab": true, + "include_gin": true, + "include_binder": true, + "include_cite": true, + "include_metadata_export": true, + "include_access_request": true, + "include_homepage": true, + "include_download": true }, "property_sources": { "dataset": { diff --git a/datalad_catalog/serve.py b/datalad_catalog/serve.py index 68b197f2..f5539184 100644 --- a/datalad_catalog/serve.py +++ b/datalad_catalog/serve.py @@ -16,16 +16,18 @@ WebCatalog, ) from datalad_next.commands import ( - EnsureCommandParameterization, - ValidatedInterface, - Parameter, build_doc, + EnsureCommandParameterization, eval_results, get_status_dict, + ValidatedInterface, + Parameter, + ParameterConstraintContext, ) from datalad_next.constraints import ( - EnsureRange, EnsureInt, + EnsurePath, + EnsureRange, ) import logging from pathlib import Path @@ -40,13 +42,42 @@ class ServeParameterValidator(EnsureCommandParameterization): """""" + def _validate_combinations( + self, + catalog, + port, + base, + ): + """""" + # parameter combinations for catalog and base + + if catalog and base: + if not catalog.location.resolve().is_relative_to(base.resolve()): + self.raise_for( + dict( + catalog=catalog, + port=port, + base=base, + ), + ( + f"the catalog location ({catalog.location.resolve()}) should be relative " + "to the base path" + ), + ) + def __init__(self): + all_params = ("catalog", "port", "base") super().__init__( param_constraints=dict( catalog=CatalogRequired() & EnsureWebCatalog(), port=EnsureInt() & EnsureRange(1025, 9999), + base=EnsurePath(), ), - joint_constraints=dict(), + joint_constraints={ + ParameterConstraintContext( + all_params, "validate-parameter-combinations" + ): self._validate_combinations, + }, ) @@ -75,6 +106,16 @@ class Serve(ValidatedInterface): doc="""The port at which the content is served at 'localhost' (default 8000)""", ), + base=Parameter( + # cmdline argument definitions, incl aliases + args=("--base",), + # documentation + doc="""The base path that should be served as the 'localhost' + root, implying that the catalog will be served from a + subdirectory relative to the base path. Must be a parent + path of the catalog location. + """, + ), ) _examples_ = [ @@ -91,6 +132,14 @@ class Serve(ValidatedInterface): code_py="catalog_serve(catalog='/tmp/my-cat/', port=8001)", code_cmd="datalad catalog-serve -c /tmp/my-cat -p 8001", ), + dict( + text=( + "SERVE the content of the catalog via a local HTTP server " + "at a custom subdirectory, e.g. http://localhost:8000/my-cat" + ), + code_py="catalog_serve(catalog='/tmp/my-cat/', base='/tmp')", + code_cmd="datalad catalog-serve -c /tmp/my-cat --base /tmp", + ), ] @staticmethod @@ -101,13 +150,15 @@ class Serve(ValidatedInterface): def __call__( catalog: Union[Path, WebCatalog], port: int = 8000, + base=None, ): res_kwargs = dict( action="catalog_serve", path=catalog.location, + basepath=base, ) try: - catalog.serve(port=port) + catalog.serve(port=port, base=base) yield get_status_dict( **res_kwargs, status="ok", diff --git a/datalad_catalog/tests/test_schema_utils.py b/datalad_catalog/tests/test_schema_utils.py index 78bc71f2..830584a6 100644 --- a/datalad_catalog/tests/test_schema_utils.py +++ b/datalad_catalog/tests/test_schema_utils.py @@ -14,6 +14,8 @@ "description": "", "doi": "", "url": "", + "download_url": "", + "homepage_url": "", "license": {"name": "", "url": ""}, "authors": [ { diff --git a/datalad_catalog/translators/bids_dataset_translator.py b/datalad_catalog/translators/bids_dataset_translator.py index 9ed4c2d6..20aad12d 100644 --- a/datalad_catalog/translators/bids_dataset_translator.py +++ b/datalad_catalog/translators/bids_dataset_translator.py @@ -143,12 +143,12 @@ def get_authors(self): '"email":"", "honorificSuffix":"", "identifiers":[]}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_keywords(self): program = ". as $parent | .entities.task + .variables.dataset" result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_funding(self): program = ( @@ -156,7 +156,7 @@ def get_funding(self): '{"name": "", "grant":"", "description":$fund}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_publications(self): program = ( @@ -170,12 +170,12 @@ def get_publications(self): '"authors": []}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_additional_display(self): program = '[{"name": "BIDS", "content": .entities}]' result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_top_display(self): program = ( @@ -185,7 +185,7 @@ def get_top_display(self): '{"name": "Runs", "value": (.entities.run | length)}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def translate(self): translated_record = { diff --git a/datalad_catalog/translators/datacite_gin_translator.py b/datalad_catalog/translators/datacite_gin_translator.py index c60c930e..7bb5bf2b 100644 --- a/datalad_catalog/translators/datacite_gin_translator.py +++ b/datalad_catalog/translators/datacite_gin_translator.py @@ -117,7 +117,7 @@ def get_license(self): program = '.license | { "name": .name, "url": .url}' result = jq.first(program, self.extracted_metadata) # todo check for license info missing - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_authors(self): program = ( @@ -130,7 +130,7 @@ def get_authors(self): "else null end]" ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_keywords(self): return self.extracted_metadata.get("keywords") @@ -141,7 +141,7 @@ def get_funding(self): '{"name": $element, "identifier": "", "description": ""}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_publications(self): program = ( @@ -155,7 +155,7 @@ def get_publications(self): '"authors": []}]' ) result = jq.first(program, self.extracted_metadata) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def translate(self): translated_record = { diff --git a/datalad_catalog/translators/metalad_core_translator.py b/datalad_catalog/translators/metalad_core_translator.py index b3b6354d..7f2eb5f0 100644 --- a/datalad_catalog/translators/metalad_core_translator.py +++ b/datalad_catalog/translators/metalad_core_translator.py @@ -140,7 +140,7 @@ def get_subdatasets(self): '"dirs_from_path": []}]' ) result = jq.first(program, self.graph) - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_file_url(self): program = ".distribution? | .url?" diff --git a/datalad_catalog/translators/metalad_studyminimeta_translator.py b/datalad_catalog/translators/metalad_studyminimeta_translator.py index 98848684..a63f61e3 100644 --- a/datalad_catalog/translators/metalad_studyminimeta_translator.py +++ b/datalad_catalog/translators/metalad_studyminimeta_translator.py @@ -168,7 +168,7 @@ def get_funding(self): '{"name": .name, "identifier": "", "description": ""}]' ) result = jq.first(program, self.graph) # [] if nothing found - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def get_publications(self): if self.combinedpersonspubs is not None: @@ -194,7 +194,7 @@ def get_subdatasets(self): '"dataset_path": .name, "dirs_from_path": []}]' ) result = jq.first(program, self.graph) # [] if nothing found - return result if len(result) > 0 else None + return result if result is not None and len(result) > 0 else None def translate(self): translated_record = { diff --git a/datalad_catalog/tree.py b/datalad_catalog/tree.py new file mode 100644 index 00000000..12ce406a --- /dev/null +++ b/datalad_catalog/tree.py @@ -0,0 +1,155 @@ +# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See COPYING file distributed along with the datalad package for the +# copyright and license terms. +# +# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +"""Print the dataset-version tree of a catalog +""" +from datalad_catalog.constraints import ( + CatalogRequired, + EnsureWebCatalog, +) +from datalad_catalog.webcatalog import ( + WebCatalog, +) +from datalad_next.commands import ( + EnsureCommandParameterization, + ValidatedInterface, + Parameter, + build_doc, + eval_results, + get_status_dict, +) +from datalad_next.uis import ui_switcher +import logging +from pathlib import Path +from time import gmtime, strftime +from typing import Union + + +__docformat__ = "restructuredtext" + +lgr = logging.getLogger("datalad.catalog.serve") + + +class TreeParameterValidator(EnsureCommandParameterization): + """""" + + def __init__(self): + super().__init__( + param_constraints=dict( + catalog=CatalogRequired() & EnsureWebCatalog(), + ), + joint_constraints=dict(), + ) + + +# Decoration auto-generates standard help +@build_doc +# All extension commands must be derived from Interface +class Tree(ValidatedInterface): + """Print the tree of datasets and their respective versions + contained in a catalog + """ + + _validator_ = TreeParameterValidator() + + _params_ = dict( + catalog=Parameter( + # cmdline argument definitions, incl aliases + args=("-c", "--catalog"), + # documentation + doc="""Location of the existing catalog""", + ), + ) + + _examples_ = [ + dict( + text=("Print a catalog metadata tree"), + code_py="catalog_tree(catalog='/tmp/my-cat/')", + code_cmd="datalad catalog-tree -c /tmp/my-cat", + ), + ] + + @staticmethod + def custom_result_renderer(res, **kwargs): + """This result renderer dumps the value of the 'output' key + in the result record in JSON-line format -- only if status==ok""" + ui = ui_switcher.ui + if res["result_type"] == "dataset": + if res["i"] == 0: + ui.message(res["catalog_name"]) + ui.message(".") + ds_prefix = "└──" if res["i"] + 1 == res["N_datasets"] else "├──" + ui.message(f"{ds_prefix} DS[{res['i']}]: {res['dataset_name']}") + indent = " " if res["i"] + 1 == res["N_datasets"] else "│ " + ui.message(f"{indent}ID: {res['d']}; ALIAS: {res['dataset_alias']}") + ui.message(f"{indent}Versions:") + elif res["result_type"] == "version": + indent = " " if res["i"] + 1 == res["N_datasets"] else "│ " + version_prefix = ( + "└──" if res["j"] + 1 == res["N_ds_versions"] else "├──" + ) + postfix = "" + if ( + res["d"] == res["homepage"]["id"] + and res["dataset_version"] == res["homepage"]["version"] + ): + postfix = " (HOMEPAGE)" + ui.message( + f"{indent}{version_prefix} {res['dataset_version']} (Updated: {strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime(res['updated_at']))}){postfix}" + ) + else: + ui.message("│") + + @staticmethod + # generic handling of command results (logging, rendering, filtering, ...) + @eval_results + # signature must match parameter list above + # additional generic arguments are added by decorators + def __call__( + catalog: Union[Path, WebCatalog], + ): + res_kwargs = dict( + action="catalog_tree", + path=catalog.location, + ) + report = catalog.get_catalog_report() + res_kwargs["catalog_name"] = catalog.location.name + res_kwargs["homepage"] = { + "id": report.get("homepage_id"), + "version": report.get("homepage_version"), + } + all_datasets = report.get("datasets") + N_datasets = len(all_datasets) + res_kwargs["N_datasets"] = N_datasets + for i, d in enumerate(all_datasets): + res_kwargs["i"] = i + res_kwargs["d"] = d + + found_dv = next( + (dv for dv in report.get("versions") if dv["dataset_id"] == d), + "", + ) + res_kwargs["dataset_name"] = found_dv["dataset_name"] + res_kwargs["dataset_alias"] = found_dv.get("alias", None) + res_kwargs["result_type"] = "dataset" + yield get_status_dict(status="ok", **res_kwargs) + + current_ds_versions = [ + dsv for dsv in report.get("versions") if dsv["dataset_id"] == d + ] + res_kwargs["N_ds_versions"] = len(current_ds_versions) + for j, cdsv in enumerate(current_ds_versions): + res_kwargs["j"] = j + res_kwargs["dataset_version"] = cdsv.get("dataset_version") + res_kwargs["updated_at"] = cdsv.get("updated_at") + res_kwargs["result_type"] = "version" + yield get_status_dict(status="ok", **res_kwargs) + + if not i + 1 == N_datasets: + res_kwargs["result_type"] = "" + yield get_status_dict(status="ok", **res_kwargs) diff --git a/datalad_catalog/webcatalog.py b/datalad_catalog/webcatalog.py index 8f72d23e..71990667 100644 --- a/datalad_catalog/webcatalog.py +++ b/datalad_catalog/webcatalog.py @@ -286,9 +286,22 @@ def serve( self, host: str = "localhost", port: int = 8000, + base: str = None, ): """Serve a catalog via a local http server""" - os.chdir(self.location) + + if base and not self.location.resolve().is_relative_to( + Path(base).resolve() + ): + error_msg = "The catalog location should be relative to the supplied base path " + raise ValueError(error_msg) + if base: + base_path = Path(base).resolve() + relpath = str(self.location.resolve().relative_to(base_path)) + os.chdir(base_path) + else: + relpath = "" + os.chdir(self.location) from http.server import SimpleHTTPRequestHandler import socketserver @@ -298,19 +311,20 @@ def serve( class CustomHandler(SimpleHTTPRequestHandler): # Redirect all '/dataset' URLs to '/index.html' def do_GET(self): - if self.path.startswith("/dataset"): - self.path = "/index.html" + if self.path.startswith(f"/{relpath}/dataset"): + self.path = f"/{relpath}/index.html" # Continue with the default behavior return SimpleHTTPRequestHandler.do_GET(self) try: with socketserver.TCPServer((host, port), CustomHandler) as httpd: ui.message( - "\nServing catalog at: http://{h}:{p}/ - navigate to this " + "\nServing catalog at: http://{h}:{p}/{s} - navigate to this " "address in your browser to test the catalog locally - press " "CTRL+C to stop local testing\n".format( h=ac.color_word(host, ac.BOLD), p=ac.color_word(port, ac.BOLD), + s=ac.color_word(relpath, ac.BOLD), ) ) httpd.serve_forever() diff --git a/datalad_catalog/workflow.py b/datalad_catalog/workflow.py index e10011a1..ba859e22 100644 --- a/datalad_catalog/workflow.py +++ b/datalad_catalog/workflow.py @@ -117,7 +117,7 @@ class Workflow(ValidatedInterface): ), mode=Parameter( # cmdline argument definitions, incl aliases - args=("-t", "--type"), + args=("-m", "--mode"), # documentation doc="""Which type of workflow to run: one of ['new', 'update']""", ), @@ -193,7 +193,7 @@ class Workflow(ValidatedInterface): "dataset='path/to/superdataset', extractor='metalad_core')" ), code_cmd=( - "datalad catalog-workflow -t new -c /tmp/my-cat " + "datalad catalog-workflow -m new -c /tmp/my-cat " "-d path/to/superdataset -e metalad_core" ), ), @@ -208,7 +208,7 @@ class Workflow(ValidatedInterface): "extractor='metalad_core')" ), code_cmd=( - "datalad catalog-workflow -t new -c /tmp/my-cat " + "datalad catalog-workflow -m update -c /tmp/my-cat " "-d path/to/superdataset -s path/to/subdataset -e metalad_core" ), ), @@ -244,7 +244,7 @@ def __call__( ) if mode == "new": yield from super_workflow( - ds=dataset, + ds=dataset.ds, cat=catalog, extractors=extractor, config_file=config_file, @@ -253,8 +253,8 @@ def __call__( ) if mode == "update": yield from update_workflow( - superds=dataset, - subds=subdataset, + superds=dataset.ds, + subds=subdataset.ds, catalog=catalog, extractors=extractor, **res_kwargs, diff --git a/docs/source/command_line_reference.rst b/docs/source/command_line_reference.rst index b7ba52b6..8444cad6 100644 --- a/docs/source/command_line_reference.rst +++ b/docs/source/command_line_reference.rst @@ -15,3 +15,4 @@ Command Line Reference generated/man/datalad-catalog-set generated/man/datalad-catalog-translate generated/man/datalad-catalog-workflow + generated/man/datalad-catalog-tree diff --git a/docs/source/index.rst b/docs/source/index.rst index 68bf099f..c3ac6f21 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -26,6 +26,26 @@ Foundation) under grant SFB 1451 (`431549029`_, INF project). .. _431549029: https://gepris.dfg.de/gepris/projekt/431549029 + +NOTE: Future development +======================== + +We're working on a newer, leaner, more modular, and more interoperable solution +to the same challenge that the current ``datalad-catalog`` aims to address. +This new development is taking place within the broader context of making +DataLad datasets interoperable with linked and semantic (meta)data. For more +background, see `this issue`_. To keep up to date, follow progress at +`psychoinformatics-de/datalad-concepts`_, `psychoinformatics-de/shacl-vue`_, and +in the `new development branch`_. Because of this redirected focus, ``datalad-catalog`` +itself will be downscaled by focusing on maintenance and assessing the priority +of new features on a case-by-case basis. + + +.. _this issue: https://github.com/psychoinformatics-de/datalad-concepts/issues/115 +.. _psychoinformatics-de/datalad-concepts: https://github.com/psychoinformatics-de/datalad-concepts +.. _psychoinformatics-de/shacl-vue: https://github.com/psychoinformatics-de/shacl-vue +.. _new development branch: https://github.com/datalad/datalad-catalog/tree/revolution + Demo ==== diff --git a/docs/source/python_module_reference.rst b/docs/source/python_module_reference.rst index fbc91a21..e23e0906 100644 --- a/docs/source/python_module_reference.rst +++ b/docs/source/python_module_reference.rst @@ -16,3 +16,4 @@ Python Module Reference catalog_set catalog_translate catalog_workflow + catalog_tree diff --git a/requirements-devel.txt b/requirements-devel.txt index 714c7dbf..14108ef1 100644 --- a/requirements-devel.txt +++ b/requirements-devel.txt @@ -8,6 +8,7 @@ pyyaml pytest pytest-cov coverage +black # requirements for a document building sphinx diff --git a/tools/create_alias_concept_metadata.py b/tools/create_alias_concept_metadata.py index d1759d1e..4b3f09b8 100644 --- a/tools/create_alias_concept_metadata.py +++ b/tools/create_alias_concept_metadata.py @@ -37,7 +37,7 @@ def add_aliases(alias_path, catalog): metadata=json.dumps(meta_item), ) ids_processed.append(row["dataset_id"]) - + return ids_processed