From d5e9e8467701b7865e8d7f0dc149685e38e9fd59 Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Fri, 28 Jul 2023 15:42:40 +1000 Subject: [PATCH 01/12] Add endpoint to update SG fields (#526) --- api/routes/sequencing_groups.py | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/api/routes/sequencing_groups.py b/api/routes/sequencing_groups.py index ab4905420..5e1e81c28 100644 --- a/api/routes/sequencing_groups.py +++ b/api/routes/sequencing_groups.py @@ -1,11 +1,15 @@ +from typing import Any from fastapi import APIRouter +from pydantic import BaseModel from api.utils.db import ( get_project_readonly_connection, Connection, get_projectless_db_connection, + get_project_write_connection, ) from db.python.layers.sequencing_group import SequencingGroupLayer +from models.models.sequencing_group import SequencingGroupUpsertInternal from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import ( # Sample, @@ -18,6 +22,15 @@ # region CREATES +class SequencingGroupUpdateModel(BaseModel): + """Update sequencing group model""" + + meta: dict[str, Any] | None = None + platform: str | None = None + technology: str | None = None + type: str | None = None + + @router.get('{sequencing_group_id}', operation_id='getSequencingGroup') async def get_sequencing_group( sequencing_group_id: str, connection: Connection = get_projectless_db_connection @@ -44,3 +57,28 @@ async def get_all_sequencing_group_ids_by_sample_by_type( } for sid, sg_type_to_sg_ids in sg.items() } + + +@router.get('/project/{sequencing_group_id}', operation_id='updateSequencingGroup') +async def update_sequencing_group( + sequencing_group_id: str, + sequencing_group: SequencingGroupUpdateModel, + connection: Connection = get_project_write_connection, +) -> bool: + """Update the meta fields of a sequencing group""" + st = SequencingGroupLayer(connection) + await st.upsert_sequencing_groups( + [ + SequencingGroupUpsertInternal( + id=sequencing_group_id_transform_to_raw(sequencing_group_id), + meta=sequencing_group.meta, + platform=sequencing_group.platform, + technology=sequencing_group.technology, + type=sequencing_group.type, + ) + ] + ) + return True + + +# endregion From 0df04068ff4b8cd706b89237920e003a860e052a Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 4 Aug 2023 13:29:02 +1000 Subject: [PATCH 02/12] Add missing npm run compile step in documentation and fix the spelling mistake. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cc9596c63..6b6b4fb2e 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,7 @@ We could now place breakpoints on the sample route (ie: `api/routes/sample.py`), # This will automatically proxy request to the server. cd web npm install +npm run compile npm start ``` @@ -311,7 +312,7 @@ Or you can build the docker file, and specify that # SM_DOCKER is a known env variable to regenerate_api.py export SM_DOCKER="cpg/sample-metadata-server:dev" docker build --build-arg SM_ENVIRONMENT=local -t $SM_DOCKER -f deploy/api/Dockerfile . -python regenerate_apy.py +python regenerate_api.py ``` ## Deployment From 38ac7965e6b5905e86707002d87b941426395622 Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Mon, 7 Aug 2023 16:35:35 +1000 Subject: [PATCH 03/12] Remove functionality from SG update endpoint (#531) --- api/routes/sequencing_groups.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/api/routes/sequencing_groups.py b/api/routes/sequencing_groups.py index 5e1e81c28..edfbe170d 100644 --- a/api/routes/sequencing_groups.py +++ b/api/routes/sequencing_groups.py @@ -22,13 +22,10 @@ # region CREATES -class SequencingGroupUpdateModel(BaseModel): +class SequencingGroupMetaUpdateModel(BaseModel): """Update sequencing group model""" meta: dict[str, Any] | None = None - platform: str | None = None - technology: str | None = None - type: str | None = None @router.get('{sequencing_group_id}', operation_id='getSequencingGroup') @@ -62,7 +59,7 @@ async def get_all_sequencing_group_ids_by_sample_by_type( @router.get('/project/{sequencing_group_id}', operation_id='updateSequencingGroup') async def update_sequencing_group( sequencing_group_id: str, - sequencing_group: SequencingGroupUpdateModel, + sequencing_group: SequencingGroupMetaUpdateModel, connection: Connection = get_project_write_connection, ) -> bool: """Update the meta fields of a sequencing group""" @@ -72,9 +69,6 @@ async def update_sequencing_group( SequencingGroupUpsertInternal( id=sequencing_group_id_transform_to_raw(sequencing_group_id), meta=sequencing_group.meta, - platform=sequencing_group.platform, - technology=sequencing_group.technology, - type=sequencing_group.type, ) ] ) From c5fa24e9c09d29cd488872e0080cc232011024f3 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Mon, 28 Aug 2023 11:26:13 +1000 Subject: [PATCH 04/12] Improve set-up documentation (#534) * Improve set-up documentation + aux changes Signed-off-by: Michael Franklin * Fix default ped path for generate_data * Apply review feedback * Sneaky sneaky --------- Signed-off-by: Michael Franklin Co-authored-by: Michael Franklin --- README.md | 314 +++++++++++++++++++++++++------------ api/settings.py | 2 + api/utils/openapi.py | 8 +- db/README.md | 10 -- requirements-dev.txt | 6 +- resources/debug-api.png | Bin 0 -> 18825 bytes test/README.md | 121 ++------------ test/data/generate_data.py | 21 +-- 8 files changed, 244 insertions(+), 238 deletions(-) delete mode 100644 db/README.md create mode 100644 resources/debug-api.png diff --git a/README.md b/README.md index 6b6b4fb2e..12069a94c 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,14 @@ You can configure the MariaDB connection with environment variables. ### Creating the environment -Dependencies for the `metamist` API package are listed in `setup.py`. +Python dependencies for the `metamist` API package are listed in `setup.py`. Additional dev requirements are listed in `requirements-dev.txt`, and packages for the sever-side code are listed in `requirements.txt`. -To create the full dev environment, run: +We _STRONGLY_ encourage the use of `pyenv` for managing Python versions. +Debugging and the server will run on a minimum python version of 3.10. + +To setup the python environment, you can run: ```shell virtualenv venv @@ -94,29 +97,151 @@ pip install -r requirements-dev.txt pip install --editable . ``` -### Default DB set-up +### Extra software + +You'll need to install the following software to develop metamist: + +- Node / NPM (recommend using nvm) +- MariaDB (using MariaDB in docker is also good) +- Java (for liquibase / openapi-generator) +- Liquibase +- OpenAPI generator +- wget (optional) + +Our recommendation is in the following code block: + +```shell +brew install wget +brew install java +brew install liquibase +``` + +Add the following to your `.zshrc` file: + +```shell + +# homebrew should export this on an M1 Mac +# the intel default is /usr/local +export HB_PREFIX=${HOMEBREW_PREFIX-/usr/local} + +# installing Java through brew recommendation +export CPPFLAGS="-I$HB_PREFIX/opt/openjdk/include" + +# installing liquibase through brew recommendation +export LIQUIBASE_HOME=$(brew --prefix)/opt/liquibase/libexec + +export PATH="$HB_PREFIX/bin:$PATH:$HB_PREFIX/opt/openjdk/bin" +``` + +#### Node through node-version manager (nvm) + +We aren't using too many node-specific features, anything from 16 should work fine, +this will install the LTS version: + +```shell +brew install nvm + +# you may need to add the the following to your .zshrc +# export NVM_DIR="$HOME/.nvm" +# [ -s "$HB_PREFIX/opt/nvm/nvm.sh" ] && \. "$HB_PREFIX/opt/nvm/nvm.sh" # This loads nvm +# [ -s "$HB_PREFIX/opt/nvm/etc/bash_completion.d/nvm" ] && \. "$HB_PREFIX/opt/nvm/etc/bash_completion.d/nvm" # This loads nvm bash_completion + +# install latest version of node + npm +nvm install --lts +``` + +#### OpenAPI generator + +You'll need this to generate the Python and Typescript API. + +```shell +npm install @openapitools/openapi-generator-cli -g +openapi-generator-cli version-manager set 5.3.0 + +# put these in your .zshrc +export OPENAPI_COMMAND="npx @openapitools/openapi-generator-cli" +alias openapi-generator="npx @openapitools/openapi-generator-cli" +``` + +#### MariaDB install + +If you're planning to install MariaDB locally, brew is the easiest: + +```shell + +brew install mariadb@10.8 +# start mariadb on computer start +brew services start mariadb@10.8 + +# make mariadb command available on path +export PATH="$HB_PREFIX/opt/mariadb@10.8/bin:$PATH" +``` + +#### Your .zshrc file + +If you installed all the software through brew and npm +like this guide suggests, your `.zshrc` may look like this: + + +```shell +alias openapi-generator="npx @openapitools/openapi-generator-cli" + +# homebrew should export this on an M1 Mac +# the intel default is /usr/local +export HB_PREFIX=${HOMEBREW_PREFIX-/usr/local} + +# metamist +export SM_ENVIRONMENT=LOCAL # good default to have +export SM_DEV_DB_USER=sm_api # makes it easier to copy liquibase update command +export OPENAPI_COMMAND="npx @openapitools/openapi-generator-cli" + +export PATH="$HB_PREFIX/bin:$HB_PREFIX/opt/mariadb@10.8/bin:$PATH:$HB_PREFIX/opt/openjdk/bin" + +export CPPFLAGS="-I$HB_PREFIX/opt/openjdk/include" +export LIQUIBASE_HOME=$(brew --prefix)/opt/liquibase/libexec + +# node +export NVM_DIR="$HOME/.nvm" +[ -s "$HB_PREFIX/opt/nvm/nvm.sh" ] && \. "$HB_PREFIX/opt/nvm/nvm.sh" # This loads nvm +[ -s "$HB_PREFIX/opt/nvm/etc/bash_completion.d/nvm" ] && \. "$HB_PREFIX/opt/nvm/etc/bash_completion.d/nvm" # This loads nvm bash_completion +``` + +### Database setup These are the default values for the SM database connection. Please alter them if you use any different values when setting up the database. ```shell -export SM_DEV_DB_USER=root +export SM_DEV_DB_USER=root # this is the default, but we now recommend sm_api export SM_DEV_DB_PASSWORD= # empty password export SM_DEV_DB_HOST=127.0.0.1 export SM_DEV_DB_PORT=3306 # default mariadb port +export SM_DEV_DB_NAME=sm_dev; ``` Create the database in MariaDB (by default, we call it `sm_dev`): -If you use a different databse name also set the following + +> In newer installs of MariaDB, the root user is protected by default. + +We'll setup a user called `sm_api`, and setup permissions ```shell -export SM_DEV_DB_NAME=sm_database_name +sudo mysql -u root --execute " + CREATE DATABASE sm_dev; + CREATE USER sm_api@'%' + CREATE USER sm_api@localhost; + CREATE ROLE sm_api_role; + GRANT sm_api_role TO sm_api@'%' + GRANT sm_api_role TO sm_api@localhost; + GRANT ALL PRIVILEGES ON sm_dev.* TO sm_api_role; +" ``` -> Sample-metadata stores all metadata in one database (_previously: one database per project_). +Then, before you run you'll need to export the varied: ```shell -mysql -u root --execute 'CREATE DATABASE sm_dev' +# also put this in your .zshrc +export SM_DEV_DB_USER=sm_api ``` Download the `mariadb-java-client` and create the schema using liquibase: @@ -129,7 +254,7 @@ liquibase \ --url jdbc:mariadb://localhost/sm_dev \ --driver org.mariadb.jdbc.Driver \ --classpath mariadb-java-client-3.0.3.jar \ - --username root \ + --username ${SM_DEV_DB_USER:-root} \ update popd ``` @@ -139,7 +264,7 @@ popd Pull mariadb image ```bash -docker pull mariadb +docker pull mariadb:10.8.3 ``` Run a mariadb container that will server your database. `-p 3307:3306` remaps the port to 3307 in case if you local MySQL is already using 3306 @@ -148,7 +273,7 @@ Run a mariadb container that will server your database. `-p 3307:3306` remaps th docker stop mysql-p3307 # stop and remove if the container already exists docker rm mysql-p3307 # run with an empty root password -docker run -p 3307:3306 --name mysql-p3307 -e MYSQL_ALLOW_EMPTY_PASSWORD=true -d mariadb +docker run -p 3307:3306 --name mysql-p3307 -e MYSQL_ALLOW_EMPTY_PASSWORD=true -d mariadb:10.8.3 ``` ```bash @@ -183,11 +308,24 @@ export SM_DEV_DB_PORT=3307 You'll want to set the following environment variables (permanently) in your local development environment. +The `SM_LOCALONLY_DEFAULTUSER` environment variable along with `ALLOWALLACCESS` to allow access to a local metamist server without providing a bearer token. This will allow you to test the front-end components that access data. This happens automatically on the production instance through the Google identity-aware-proxy. + ```shell -# ensures the SWAGGER page (localhost:8000/docs) points to your local environment +export SM_ALLOWALLACCESS=1 +export SM_LOCALONLY_DEFAULTUSER=$(whoami) +``` + +```shell +# ensures the SWAGGER page points to your local: (localhost:8000/docs) +# and ensures if you use the PythonAPI, it also points to your local export SM_ENVIRONMENT=LOCAL # skips permission checks in your local environment export SM_ALLOWALLACCESS=true +# uses your username as the "author" in requests +export SM_LOCALONLY_DEFAULTUSER=$(whoami) + +# probably need this + # start the server python3 -m api.server @@ -195,44 +333,77 @@ python3 -m api.server # uvicorn --port 8000 --host 0.0.0.0 api.server:app ``` -In a different terminal, execute the following -request to create a new project called 'dev' +#### Running + debugging in VSCode -```shell -curl -X 'PUT' \ - 'http://localhost:8000/api/v1/project/?name=dev&dataset=dev&gcp_id=dev&create_test_project=false' \ - -H 'accept: application/json' \ - -H "Authorization: Bearer $(gcloud auth print-identity-token)" +The following `launch.json` is a good base to debug the web server in VSCode: + +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Run API", + "type": "python", + "request": "launch", + "module": "api.server", + "justMyCode": false, + "env": { + "SM_ALLOWALLACCESS": "true", + "SM_LOCALONLY_DEFAULTUSER": "-local", + "SM_ENVIRONMENT": "local", + "SM_DEV_DB_USER": "sm_api", + } + } + ] +} ``` -#### Quickstart: Generate and install the installable API +We could now place breakpoints on the sample route (ie: `api/routes/sample.py`), and debug requests as they come in. -It's best to do this with an already running server: +Then in VSCode under the _Run and Debug_ tab (⌘⇧D), you can "Run API": -```shell -python3 regenerate_api.py \ +![Run API](resources/debug-api.png) + +#### Quickstart: Generate and install the python installable API + +Generating the installable APIs (Python + Typescript) involves running +the server, getting the `/openapi.json`, and running `openapi-generator`. + +The `regenerate_api.py` script does this in a few ways: + +1. Uses a running server on `localhost:8000` +2. Runs a docker container from the `SM_DOCKER` environment variable +3. Spins up the server itself + +Most of the time, you'll use 1 or 3: + +```bash +# this will start the api.server, so make sure you have the dependencies installed, +python regenerate_api.py \ && pip install . ``` -#### Debugging the server in VSCode +If you'd prefer to use the Docker approach (eg: on CI), this command +will build the docker container and supply it to regenerate_api.py. -VSCode allows you to debug python modules, we could debug the web API at `api/server.py` by considering the following `launch.json`: - -```json -{ - "version": "0.2.0", - "configurations": [ - { - "name": "API server", - "type": "python", - "request": "launch", - "module": "api.server" - } - ] -} +```bash +# SM_DOCKER is a known env variable to regenerate_api.py +export SM_DOCKER="cpg/sample-metadata-server:dev" +docker build --build-arg SM_ENVIRONMENT=local -t $SM_DOCKER -f deploy/api/Dockerfile . +python regenerate_api.py ``` -We could now place breakpoints on the sample route (ie: `api/routes/sample.py`), and debug requests as they come in. +#### Generating example data + +> You'll need to generate the installable API before running this step + +You can run the `generate_data.py` script to generate some +random data to look at. + +```shell +export SM_ENVIRONMENT=local # important +python test/data/generate_data.py +``` #### Developing the UI @@ -245,14 +416,8 @@ npm run compile npm start ``` -#### Unauthenticated access +This will start a web server using Vite, running on [localhost:5173](http://localhost:5173). -You'll want to set the `SM_LOCALONLY_DEFAULTUSER` environment variable along with `ALLOWALLACCESS` to allow access to a local metamist server without providing a bearer token. This will allow you to test the front-end components that access data. This happens automatically on the production instance through the Google identity-aware-proxy. - -```shell -export SM_ALLOWALLACCESS=1 -export SM_LOCALONLY_DEFAULTUSER=$(whoami) -``` ### OpenAPI and Swagger @@ -276,58 +441,11 @@ The web API exposes this schema in two ways: - You could put this into the [Swagger editor](https://editor.swagger.io/) to see the same "Swagger UI" that `/api/docs` exposes. - We generate the metamist installable Python API based on this schema. -#### Generating the installable API - -The installable API is automatically generated through the `package.yml` GitHub action and uploaded to PyPI. - -To generate the python api you'll need to install openapi generator v5.x.x - -To install a specific version of the openapi-generator dow the following: - -```bash -npm install @openapitools/openapi-generator-cli -g -openapi-generator-cli version-manager set 5.3.0 -``` - -Then set your environment variable OPENAPI_COMMAND to the following. -You can also add an alias to your ~/.bash_profile or equivalent for running in the -terminal. - -```bash -export OPENAPI_COMMAND="npx @openapitools/openapi-generator-cli" -alias openapi-generator="npx @openapitools/openapi-generator-cli" -``` - -You could generate the installable API and install it with pip by running: - -```bash -# this will start the api.server, so make sure you have the dependencies installed, -python regenerate_api.py \ - && pip install . -``` - -Or you can build the docker file, and specify that - -```bash -# SM_DOCKER is a known env variable to regenerate_api.py -export SM_DOCKER="cpg/sample-metadata-server:dev" -docker build --build-arg SM_ENVIRONMENT=local -t $SM_DOCKER -f deploy/api/Dockerfile . -python regenerate_api.py -``` - ## Deployment -The metamist server - -You'll want to complete the following steps: +The CPG deploy is managed through Cloud Run on the Google Cloud Platform. +The deploy github action builds the container, and is deployed. -- Ensure there is a database created for each project (with the database name being the project), -- Ensure there are secrets in `projects/sample_metadata/secrets/databases/versions/latest`, that's an array of objects with keys `dbname, host, port, username, password`. -- Ensure `google-cloud` was installed - -```bash -export SM_ENVIRONMENT='PRODUCTION' - -# OR, point to the dev instance with -export SM_ENVIRONMENT='DEVELOPMENT' -``` +Additionally you can access metamist through the identity-aware proxy (IAP), +which handles the authentication through OAuth, allowing you to access the +front-end. diff --git a/api/settings.py b/api/settings.py index 8bf38672c..5bb68265c 100644 --- a/api/settings.py +++ b/api/settings.py @@ -1,6 +1,7 @@ # pylint: disable=global-statement import os from functools import lru_cache + from cpg_utils.cloud import read_secret TRUTH_SET = ('1', 'y', 't', 'true') @@ -10,6 +11,7 @@ ) _ALLOW_ALL_ACCESS: bool = os.getenv('SM_ALLOWALLACCESS', 'n').lower() in TRUTH_SET _DEFAULT_USER = os.getenv('SM_LOCALONLY_DEFAULTUSER') +SM_ENVIRONMENT = os.getenv('SM_ENVIRONMENT', 'local').lower() SKIP_DATABASE_CONNECTION = bool(os.getenv('SM_SKIP_DATABASE_CONNECTION')) PROFILE_REQUESTS = os.getenv('SM_PROFILE_REQUESTS', 'false').lower() in TRUTH_SET IGNORE_GCP_CREDENTIALS_ERROR = os.getenv('SM_IGNORE_GCP_CREDENTIALS_ERROR') in TRUTH_SET diff --git a/api/utils/openapi.py b/api/utils/openapi.py index e37b04a2e..671396ce0 100644 --- a/api/utils/openapi.py +++ b/api/utils/openapi.py @@ -1,13 +1,15 @@ from os import getenv + from fastapi.openapi.utils import get_openapi -env = getenv('SM_ENVIRONMENT', 'local').lower() +from api.settings import SM_ENVIRONMENT + URLS = [] -if 'dev' in env: +if 'dev' in SM_ENVIRONMENT: URLS.append('https://sample-metadata-dev.populationgenomics.org.au') URLS.append('https://sample-metadata-api-dev-mnrpw3mdza-ts.a.run.app') -elif 'prod' in env: +elif 'prod' in SM_ENVIRONMENT: URLS.append('https://sample-metadata.populationgenomics.org.au') URLS.append('https://sample-metadata-api-mnrpw3mdza-ts.a.run.app') else: diff --git a/db/README.md b/db/README.md deleted file mode 100644 index 6882c62fc..000000000 --- a/db/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Database information - -## Postgres reminder - -Postgres: - -- Database - - has many Schemas - - has manyTables - - has datatypes, functions, and operators diff --git a/requirements-dev.txt b/requirements-dev.txt index e49ff0214..1113f5753 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,11 +1,11 @@ black>=22.3.0 +bump2version click==8.0.4 +coverage flake8 flake8-bugbear +nest-asyncio pre-commit pylint -bump2version testcontainers[mariadb] -nest-asyncio -coverage types-PyMySQL diff --git a/resources/debug-api.png b/resources/debug-api.png new file mode 100644 index 0000000000000000000000000000000000000000..e73e233fd64e10012a7814f6ce313783cfa826d5 GIT binary patch literal 18825 zcmZ_019WB0wl^vMly1tboP^*`5gz~ujG15hEz6bSTRZPWn2pPy*J z_mk$IU(lF9AaKAP;%7+~@PD{xf&7~iI3w%J|6C&j+5z#&3y6sUe&zM;3=OU9O{^WT zY~8;B3LtGnRqcU*P)R<&z+!U5S3p2tz)TfX98{zvIrOb9X>|>(^$clUENwpd0CBo- z04^;J9dz+sEG?|;Ib661|F+-&Tz?kR5#s-C;$Y52s3IkUFJNtFh|fYxPfJh84TX=7 z&uM31#33gr{4a9A9T%aAgM$qR9i6kYGp#ctt+kyo-FJ3&b~<_nItB(BKnogsS1Siy z7aA*jqJNnDn~$KOy}q5Pjf1JR75*n*T|H|@2QEUwPeK3l`A1Jf7t{Ypvadh+PD@Yse~~$u8vXx}eb)R#_P1aEh~xZZ#vxp%kjQcwiU@-m?{dejA zrcn7mDgL|ke^dOlGX;!;?x$*>WA|6Wf0zBMJty6#q5PNO{9~1WO95ud4aG_KKPJu% zB}k(j2?WFoBqqqG-~xP}4(_5cpZr)@JYE#(cG=&)9SGg;1Dn0%4-WxZHEiS@TQy9& zJWjiO7CX+p**Ho{U?JL32kXmw1PntQ=VMtFVX0ANDA7Qo+AfjHjcNM)$a6TR5h0v1 z9d1NCE_zwdX@5a+={k9t;dtBZ@(vTD@k3NAT~o$T9>iF2oBjSC22h+AER zvooY^rwjN(LUzt1^apKUxCU9E9zxOb zSQ#iH#CLWO{`}NGGSGLDxH;v2gNOBT`fG^5zhZ;mIv-u$ZC?}cF!;b?z6pquz0+Fo|ET9Z5wYS>`ect@8p-!p{1)sB2C=W~O$uD+rieD}hjCpXk zV`xNSYm*$Q&@;YmXT;BNE5wIlRS zd*ARHXSv#+P(MgC!KWV?4~7_8M?sK@7JfdKf(+EwQU#x9f1BTy*ug~r!V7^ADyqv5 z`^@Frx3>UL0_Pg;vOdnZb7l#nGZ;lW8^n zYy@c6IGE%rmVVXUjr>+315o)kj3jBlNtv!Sxo3Ih!dg)@a1A&dw~Acg3B@z6bkKrv z+A6LYa*m3CB=OL{V{?Ah7z3|GxF6t`%GM$fetKK%&E-s}@s{V`c=)Ts79*##+S!@G5I zRIoExTWJ}7cpaU^3GH=D<^=Ce!TA!x-X2`Lzz|?agK`{kBwwAWm>F}>V&R?X+^G5{ zrFsUl?X2o9s&GFI>V?9IR5S_PfQk;p{s6N)O;g7)o8YD?nt=c zctZBQD|jayL65y*iLN~Q^F068#DShCl4SB$#;?jy*{^BoRY{RcIXL)&?3G0_{>CHT zlAOqel&RwnTJDH`s*+d`!IYu)VQWUdaIHjF9}KO2%Mz{eV!D{iNnE?bO-u!#w?_59 zHxX4SwN#tUs3UXNUc)Fx?~StIO^{T6&>2eP1#Zpr<44ozUcjCXEWJT0m{H%+X-u zbD;Gf-KR(vEQGL?RZ&aQD;kR5+Lq)F?Okuf)(|p=zpA_-^>B!_Z1|@hB+B(%q?pu82==hQ6w8w zV%(cMtI)u;>d7RwcNh_br?vT7?g~5Uc`WsCAl~L^EQ|m|7KV7+2o_B!PS#AVI)>ck z`*k;B$VvCzVR4^gAEd-kPk@^wBWkiCEz=+wC9OOdE@w$_R9SQ2*%yKxJKI2N!bihl zHl}PQuOx=lCuT-FYWR<4D{8PqAh<&E3=ID9d{{27s_c=iQl2eo1V5uUjloXp9kBNdn z0cC91shg030~2a<&3KK3@cy*NSJo6p$h|vgD*OBz-^ssgG>!w71D( zl(1;BY@RfnFsA6FhoM@-99Xv20FKACN5Tm6@Tor2i{9RXi z>saou%1iI{X-ddkyXX=@9|(A0G!V+`C!I!&PMcHa@_Xi%4(>!5JH)@VF+h0AjjAj( zbZ1u9gsrFvi_vkUAc8tjz%XUQ@lNu#2}*^tQjjEY06D?DDu^;!m6lSfx^MARbDgLO zX#X_k0D+1Z4^T?&*Gs02dkqFczz5M4^SQ6jC+?GtrJ=6GrH}#*_5lZ>ES)!FiExK7 z+%QXi`xDY~OMP@S+TWfa2Qcufwd>TXNo!4c ze%yy|AQ+Kz;S{(?407cTo6jnYh-M_v^i`>hdB(3N)Ab2|o3y~-5+n5nj7kCsFxapW}<)B`jKAg)@uO8uu{XylgRCL~lkIIonP_cIwtFa$8rLV?A49P}bYOVvrXd zQ-hT1ahno%I{H#L8vEzIe9nBafJp83{U`0m$*4OW}Xj6YC2aU!73Xh87$?CeuD!>i57GTQ!Y_HN*c~-C@e%5 zu9*~RlghM4Ax!9unPGWxKioR7k0H&Z(zexz@tT{jAqWNMZ@;ZU+v-;^2c?e3LaRC& zS{tP5~I zLKa$vN<&0RnhvHn-&+H5IA{)>DY?|A#uLZ$WJ%w4{roYyCXgIYM4H6(rS zw&X{$L-<_C1@K$yveMTmxR>fE?rN*w|MYrhs&=^26*S=Jq&ofd#x9!P@aXV_{>tIH z6Czq=3H35~`0{O`34J6)=U_%_sxJLhT0IKlCaKP4p>yC{gD)e?-bLlvaB=7%_Z{D7 z7m_l7Rt#_pJ99ohUfgCw3Q)&t<0-1zJ9gpZowomYw)c^6pW3d^89b>}f-)%}7I=kf*EJsk#{G&TQGQe*+TTAb*< zLCTwbI1%8p_;@1|@7LX4w0?DaV#|Z%B@A21tT+vtLgjwPr8H+q*}!Fas-7xo!@K3| zb1~6Rd`-Va05`AGI!lS8JJ^3H$7HsaSz-7iZ&65dQ>UnoBH|>)FZc~GQk)1$%XO~e zVt-BCH=1Otx|Z62nY90}-(p16X+4`j4POnSnA)CDlLFk>4&&B7N0(glkk{rh(z zd=S9BKPj+TeZYYuK{7lsQ81Lpz9)NDNrH;ID`g6G%&<{Y8>39 zN61^=(C@%pObrms2H~<=8G(=Bs|r6fF)=bW6dW0v!|YL;g+?fLb#ULky*)56P+fhj z?S)9_<|f;_O53UxBMXm^l)!5%tEp^&8uKr|RWx~`Y5A$q-DrH-Q z-;~l+z->1)GD1s1%fA;G7})UOgeO0)$%GQw94j3Y0tB%&kLr{8ijuQKlY*jR z)UETxPb^=~L=a%dP`A@fYwUC8H1CR$DV_%8&^Ct=h}MU$H*HZdX>P|81tX;eb?eRu zR{IUd!;aF*4!4(ib=}1bx98-58mkiC4EzK-ZEkL>v-9(GHyV38JDnS)3XSiR3sU)^ zOD^%dBvZY;M9L?X!L;Y~=8H_sEU@2TAr&PgBzW6$b8D-zc=HwJ=d}^uDJY7MO^fHz z=vv}Mvt_i}9Z#D_YTep*X%%E;zjKsWJ67yXPK-oGMpjrC1$Bjo!w@&BP@m(Zye8{>~zy@sW-29_Z+V=VAI z;zInek`qb>oF7J3BoI zgV~>HDYJ(0re&vFHX0ThOj%BROixcw1->|Dl&d>9%w42f^##|jS#5Tlo5dctY;ygX z3)b@9tWUsp#0&0s}&0~HN$%wZ1F{e z&+EN@@vN-8bhc=|xw-ks`J=aX@*WPROK07smzKhO4V_jihG27TEiH0F`tIOj*?v}g z!~Nqa1PKfbES1G7T2ft4ujgW&6>8RLd?J1EY)w{P-5i_MYQ<%~Bz{!;d6%kVF|avI z_0L(rS%hlorFwERckkxGsub&bo!6V&(bw?u@^b6WHU0G>Z;zAb(@Gq4?B_SeNnsI@ zo7B3uK^|9_MoY)=mb|8?DR4Y$WY{~z=M|}=%20iMeUlp94bn0!HKun`IO6xy%FgQ% z^B3>CBYl@Ot!lkyXW60==Zp2zea|w{4!8MFTXQsDYzpnctoM9*XnBTs+)qhS*77`O z?eyG9?0j~Vc3NmCt#k@w2>lN zHZLY5X40{*oqpPi`TO^8G+K?P`$})vSpSOcNsj}@jaj#3$~L2MOofp7Qe_E5e0Vr{ z_zv%n_p`IJ^;Tyhh*}JRp;DGD z_F*d)_3fo5-CrQq*a^c<7Mp=09BV7aWEvxe3z6&^CMbA#nJFn83^`~&3u=|V`}jL0 z(adRHB1&g)1RP2+opK6=g@jPhP`LhySDrI-)Ut|1KJN>OnzmVOvd!JwJvf>_=H_;_ z`*UMx^*&R)Y{Bhv^ed+aB%7GhSFRY*dxi!a1%ijMoCPwfg{GWFZlyT#=Ee?vJZz%y z_#z_%Fsg%=mKQ(!G{~Yso7wGc?MWa?jLWDy4RXNg74k9 z{PhpGQcPWv9%b|CLj%nMl}>0H*ZdxeOIOIxGPvAjbd46b-|0606s~eRN(cxL88Psm zZTzt66%ZB@`lIQ*5i&b+dw5zg=jpb&5%lrrM@6!mZSMpsiqsVyEo~1Eg_o=@LW=Lq zad;$#*8^k0BO-7CY*6r)T5KCJq|y{%5$Wa?G!;Q89lr)ZZ}itR%$Qfcjca=s7c^L{ zB$+E!3dh1Cq0=cSZU=7eoM~e*hj+Xmq&japqIsdrlKLNRZOL#3MsVUCMG5F=Xi#(8 zI=^i=e+c!x9W7Nq!eRTnpQ3WUSo!5MCXeA|a+2^DzWIZlU0fjWwK{1=U4S&vEoxL$ zS|_o*`?=^5Z$C}`Jg^?sbes&e{Ak*5zr?)}f{pIZ7yKAF7*FlD0|WYvxdPD-%>L9@ zP!h#?Gvv6a3Et~`Jd;-;{2GI>y*C_d(z@-_OJjbQL%mQBZ<3r-Rb}O}V`XI(h_#N% zy6M4ag>pEV>AbtOOK~yS`L=#ZjXLOnx0Igwv6CqJGH&gGQ#mYK-Il-1TcB+)fabM>6&CTBh-p`}rtoFE(g` zEO8)mkH`7j8CIE0@sw^<5CaS;KG9ktuhfWav5dGC+tj7g)l{~pyETuiS;_?pXV4Cr z?dIV{KN23t%ck24j;WHFloX95nUz$%W27&Ha<$Fw;*lr#Ze5*%U~iHzzX##h(U05T zA5O70cezpDusQv|;$)Ovwjb8=xFIJlT(oSrD$sJS4h;SnJDS?c`p)4tTOcJM@Qc83 zMTWz%hU2t^I?g07-BJG8t}7?o5k@^Q@3x16FPb3Jp!w8v{nZB30$ zv!>MQLbZTiRA zin2#~IKV{ZpvP`B>kl$cazn4Tw6xUJFqD>*ZVw=7J4rk32Z*tj5{X2`C&p+0%Bt+# ztfzJ2Obi_4EDzrJ%D#?V4#4j)tdkN zcTT(KF|wn<_JK_b+hWUk#S4 z3zu!Ya>~EvJOmKeaIYxV-H8c;+wAtm!%@yw8{smV&UHokf=enZD=Um@3b8bk~4DWS@S2yc3{rIA3xpg@F~E-?MHAR&sa|<@D$$`mdc?B$0*B zy(B?rSZeCGtjDZNYwAm%KIG1`IC3Hfx~gRp?6_`qWe0UUpoD8`HOobPt9QR_CgVq8 zXX5m~0IG{QS)=)lgL9?*GP2oB5tZ@s9QaiQYC9?t_my8m3Tp)dz2<2~n~R)8kP3x` zL^V}GO)XT>MI;jYx1Ya2`7}O=+r@~u#8uJz7+%tKzi%e_YXDZeQDzz!bYpa2fs2qU z`wBG|SCSb~5VK%pJQ{6LuKrcm^C3@XK+-zbdZBct>(%>9ROZu?S#5=8``53YCEX}D zhaZf>ShWuwiWDfrp7n2s8SOe#7_al&yHPwFHHM=P=c_zVTVIms9mafc=?e-AC!eTL zoX%wz4h#q19ydFk&ef76nz|jt4Yz)y?hY>38q5BfhWy3+1vmk&(N}|e@N{T8mw)ch zZP_guDJkZ;4`ZfBjn_IyS+#c2h8B;SC9q#o=3db?x_aHJ zQ6q%++WE)R2o+0RPjeRnmpkaf{o`W$z0!TYgc6m!Fo#aO;9yWUz1!=Pt*LQ9cQ#*f zXw^{DTeGMD5i*a3piu|Ii+rcI(QQHL3?su;-3wvrh|K7lp^0Wg9aAnA*qasE&+Y4H zCS`F@+CAfmfwM8kWAZrd2xiuZ=^ZwktuD*P^{+nYjO$gpz0MuCICTwI>W*RT_s9k1 zH8pZKK@@EdE9L>Fv7Cv)xeW`l>L7HNDa@s+@}sn!_M6|oPC!%mHLYUcWJVx1*ef@0^0qxJkM$JkZR$(&b5yR^T+5H>8xCWw=?FF4;X}caj zAHy}dx0C%UnfSOgFyOnd!QXrtGt>Ho^U$-ps$ll+BpcuDP;ZRW`=WF~@ouXqiGGY+ z%Jyz`4`U1{n9`CwS(JbZ2JC&-ZA#35v3X~?YhKv%5m zYe>bVJl()w^PtA%5LNTs0@7Y!;B$+C$*7PaAv0r79l^*4(sQxFtRTip%>=+4=r@#HK6bU6Jh zMUR~d4|mHQWqrYLof@bgc!y&PA$Wnb#_kvWhj~ML5Vjz(=P5;ugwIs&+U2`sTGaiOlYfz6rqT8J%jl~-7FiVdJr{Phzej9|U3O zD3PkMwJ-Sw2?q(1(>y?pX$Hz_SY>p!I0=JOYfdW+OX|DY?W)X~6w5<6_#B=R!X>Ey z^(PaLoI*Ub?L65ncRo@+Y}7m=X8;?T{;+*0E?XP4Z{4h?qC2{~h7Z&kf6n!Xy_W>8 zm!ZJ#-d{kh@@I04VFPsawIQUE=#NG?mQB!h0)yOWdsn@6%@IM;iq1nT^Z2RsmXBo% z8!BVM=(HZ`mfLU5Xjqv%Jk{FK1>Sn&e-vc*kf1ld+|BI;L~WNU4S$hi6W z^k;M{ASGLTznAu(p_SrGLmzmc?preJ&PN*okEP%_o%Uur zoE(Una-AFyr56_azb}aP`m6nTf1-G4$QGyL4ba5Ljw?q4DI#OR z*a-GH2k+-`*xL2aB*F;Kcsdm7!v-zxn;7j7b;5#6XAwKOY1$Z;UoD6T+*dM7nPZ_z ziN21rn$#fLi0>+)AH%9OhkB<*WEhDdgcbvGX3HNl>gQhA2@KwVUW2*5L0fp7P`5vG z?_h?xG1sm!)0&52aiQ}!s=^%=Z6C~@k|F5gSYDFCsE_x&;(37J2S-SfNu5*mcmjBOi1(MtiQk7llz&L-L7Q!Y+UE#;3sO*;=*Ns_gWoy9E`M} zq3?a`_2rwCaNayINZV|LB9fB))vyJ2R2NTxy__FAH{=bboC$Poy#2MS<*G=_8cyyC z!}n#HnJF|@nXvczr0oNCT7(cls^U*&G@n9-;5aL=)26_x7H7x^vJ1Kpm|785wm7E^ zXgYAq%uNZ7`a8HL8)TY3h!twYeOlM(pP<@+k_vkbD?~heVBKLsA=^2GZIrCFWnyC@ zj9M`fQ$Iocm`+|s(Vhcgn9J|m^C*veoWCceXo(%C5mc5^b_VyNs_;8KSC!hjpP#Og z^cWKy&6Ttr=L1J>(fXi;AJ=VvG~}~6zxRj?MuzQZIFC!W-9TBr>VboQy(%fed@m@a z^Srj?YfGl~vhG zSmWYRwd=ib{QP;_Ry4$+!SlMkw$pug;C?)t&z-EZUihhU+TI|aVLEATILv=tBqrEK zRT|SB9m)&nV}`UOv4M8rK`q%BQ`Am%{{zC-`B>`=$ZQ?w?vkw;XtdgcZ^Q>QN;f^T zvasrp>Yv?*s{TSiD5;xMuwa|-*1ic%^ddB}myqz%(&F>-(wC2ogCB44^#yJ-_8L8q zTC#xoK_J0rW5Q(SvcSrOih_!Yg51R^mVGwvUdZFi)xP_netiQy>|G+jlf!ej-PdI+ zCX2A+;N+ku;(7;BrdBVDVt?Idsv#Q8$ zc}#l8^04T6cEbLnkwGq9qXVN{SF{#WAO}Q;FE6pCsUrRk(UK6taUiuVB$&^+xav@2 zD&VW~7ULw>M}DUp5QAp<>IBZ})kZH_rq=)s9aE&vuaQyl+Z(%FqFne;*65l-sbrYz zYNfMYBD{5PZ7o?{-%?WvaY=!#}^ue%7imcN>sT1;kU(E2za*@}sYRagLpf_J44 z@x~_Dssba|++vLA+oa;fW_vpb;4j8gePb=IH;F4_>hvs$I()u@DkR9%R-d10Y`mVZu2#`Hv*@mxD3_q& zC2y+drKg~=CtBEX>!Cc}>S*-KTN7JbP1VL5>;=VzBcsytW&Y3CpV*j~bJkd%9we#fK!>e~Sm38-vcNn*8)L2ubGSm}!jQoWJiyCzrD=Egh zEyNq8fy45Y0KJkUmAaQTM9N(Ka(T}69Z4=BDu!kc=rc9%_2H+reUmSdb=u;T%YEgm zkfpYOd>%h@?)$nSKW zEqIeL$Hm3ywB)pD^p+f&+UDk19J>o;T8ggQe%=(_04==99|PnBxb8WxmMjo)*e;WEq|#|B^{ef?UV-7Mv1F0ibR zE{=;UI^v?yqgXV5{;U*jW#iuTeAFz)udo`nm&UnX-batYZL*u$neok^AyygRea}ih5btl8-Z=Q(5VDxnw@)o!c<`vr9T=FcJ!7F%+fUEACTng>%7hh|ZE0bY zyuPe)uZe%A_EXB**|Cy=pCY+o`(ayvhtuQsqusP02ZbzC$9Xf~$^ZGP==7dGMY8jK z{G;XB#V*WP&Ofwp`mT;CLU?v%A@Fgi+up{;=H;Oj8tP`+oK&nU*Kj|8l2bveUblCf zNTivkR^Bi_|46yGq5}1y0+8_QZK=;E>EEfZL67XhLk-_OU#|F>TdGt6A5&UF$03#K zHY*%tdWwh6JBjJ1RH5>19Uh6hf-8iO#&aSP2=^ifk?~&hzQPGMCkAS6dwzZT||4!3zEP@m^+dlja-tp}w)X>Ai~cLXSfrIFgc*WT+67LI<8Dg1SaN&mk!nm4-SsvqMrLHa zc~h`(=v5+h;MPE4>>touQwf&7U8+%y!h<6H#{FTh;ObwcVh3UESdFaTMh=`i9ye@) zem*?D{cwJr{SBRgE4o-f`g)hb%-ZHQC$F8_Z`4~y_8x=*wX{7{-H3oIP9NU;KE8|a zwHM-6RKKIP=gPtQvd_|~qgFfbPxdV|oU=T%af-!pfpF5aanm{BPFKseGA42>i+&`d zFghMfB%J&4h|U7&f{`$L7+p^v(((=!R=f!?WYa@sa|@bGewG33Y2cwo9Y0L_IB>(F zKezrH4seB|!75J){S1fclLW*fs(HClX~$xeu78#c6d1vr8FWA?FUJ^xZ&6hHjv9ps_KhfZMlOV1-5C9~oJ zV_$WC6ls1ugYXJL zt^tY{8~O)7W@tfqbNx8dkDE+ZkesMARTP$aU^tGXM_~1SQolS>vdL7;>;weHxMnI| zgQk6c8C=m{vIT8jpqA{gHPOFp`r)LM)g`8ua^C9NWF} z3JbxhqL1`~*n#I{B%2O1doT|63GNSreO&GwptNOrNn^o`{h6q!pi|Q+;_WP&f>BbG zO2MFqc6WDUVq>d2^))qdluS6FB{y|NyZW2N8u#d_zZP>If?}5zqb`+I>wV3hS5i=j zOHQ6%TwI))IpLTI6^RO!3=&toqHPv4~@dW>*bTpJ9fpNlBHLYW|W8FVmIC92vQ_>21-aaKXorU2!GbDvLW) zcbn^ig>K#YT8Hnk0nLvw0S+uuu5;kOh@QyldTz^}iDt}e9VL>frsW0f1Z8Gvd9ScO ztx{TC91z2}Isl8$ws! z;Pm$|-XMJvyT8kCryaJ#)p-kOD8#}VEuUvX7S`8u<%G5zU2g|c7@$yrjpp)3MlwY9 zm0B%LO!^FZ8Z@;YhTxRmiOKRHw@8B;DPSYQm&(ghd-8_^8MUkh_mdy_L7BpR5=|dY z+RjjzWmCR)%o~QQanZAxI(LPj??DNH6vaoH;>nSqnCVIN1Z!QQ>uhmZ&wZeVqS;J1 zAi}y>wMK(qRu6K3z9OL*?$}Q-C5cr*y%{q0U!z_;#328arFmVKUGkol2fcyg11ak0 z(fJO3QnpTu!De(dmC_q(Z}qIR>!NumObaT(PYIR@)cRh=&E_?$qDi7=s;4t!+f!>; zeeeNMG#?0?pPPe2$9rAT;_@?em~*`KzKtwzDuC3-ZMYnbQEQ2#*EUl)KQLRr@azle zPle5Z;y&mI6Nu4B95ox(T8FXj@eFz|=w34XSr#UyxVWS)vscNVL}Fqgm4_*;*%gk)- z4~!OF`~t|mcw5gN9v^k1nMg?T8yD51u%Oa^|7PdpJbOlL3m~W7XU75=3%piGbFOk5 zr=ojZVwFs(vOrRF>0wSiBbQ2ox6v?Cf?V>>X6D(oDoj?dnkh1}uyK_wudKv_Wo^S! z;bHJw%yth|RZ)RQLWvP4o0%6wwj82tPFt~pA?jGGHnQOF%@msfwx&=AP`T> zN?{|WYtsCM3tYbv-OcOsQyTr6t2FOUrwq{PE5CcK)d$*%~whZCmQsK|i0r2U7$_6#hhl zL2v`}9GrfuB6`A%7W~wWcM5XM=tg8J9^V&$PlET|LcFr}XKHedW%{c*HiFEQdci?3 zbqEOn`veRrQ)&skEx!P0I)(zwL+U*i4@LY5`67h305MqclZx8|bO{~=)Wn|&loq7; zQ$Q7@jJA4JNDL={A-@k$M08-65I)MME-U~P7Fa6zFaTDC4+`wodtLH-;!_`9UIYY$?*hnB01zwy z;)V4I@mfV8@&piF0f<=e@MVz!oO>dG$_Zc_sf3>aDB7(rz+mm~>)_(Z0f4^K69I(M zx9a540;G1pXLYb>lgoa>yn4cY`15aeJw0`m*yI4+_01ROMG@5oAY&ZF#l<~5+Jtju zi{luQ*DP3~A|kf$Z#dGeJUl##RgzlnYO-&CD1uBWk06iv68NUwE0}{VLIS95FF5KrA55t*&Tj z0GwqS)i&n`jR6ov0PqX|u{1Zo(EZRXl=Y9fr6J1b_Of&`3{8xT(2*aVoLHEdWth$R zlXLFv_|{cc!qK!Ej#8zhsI9E7zKsMB0(9gB1tG|~F~}%DNr_8>XjRhI-dta&Aj}^IpN_Y{SaLf(aA26GQ0nO=i_ntzb_KfN;ZxzsemB^s1YwVW?XqZvElU|~$?Cfl8tgNa^5%Uw41P>r9xJv^bkh=wBaFQf8$=P$?AvXWD)MTAkl_VF*Y-$4OUG=1g{~ zYb?w&Auw2h_*4a8Q?$U^>Xu2Jfv&jdaE_z6vp-rmTF;!+t*xzWX!IfX}`TM^6 zKKX;HV!_6LeT0hnTpk4kTc;4kYUQ|?*eSw3Vafo+#e~lKRG++1(N$R}OFD97bF0{j z=>SF^#2)^qQG$l~h!^*P6eq*QQ|he+If7MLU0>S*PMLoFQpuzTv^J@PJl5acMX1wq zG@3#F1+X-H_^bN)Kx1({Wf}!VO~F#wbgS=XFc~X@0|V}EE$(h^)opXM_DRk7h4lfQYI!swiPHq*#8pnk!_Wl zV&O24u{nN;${eMn)YI1w3JOwBQ5U{*J4=d7NlR0f0XkKZ)$+%ojC*zs`T12EG&JS(_NB^|6QiU3&m}cAHQn8O z)CnRz08B&U-an|^Bxn#QJQ30FYsIIt`v%6%%xhC2q;m|(k}AY-jarqHLql6X_JdXd)!WhY?jZ6WTisa_x08M&J{d_$NkSBV2iEVaWr1Ep2Wt&TA6P4FM#f{KtDz z;VG%;Aoktd+;?_%r(Ob$1#0O)UU8%DyIGHGA=VC?Ys|zopO_3kJP|3gRGa zfDOX?HWT3w9yB&?bf6UsR&q`e_P(1_3=fmGJ>+|L1#d9tBAmirY07GSJv|_m;^DMw zZdgcE?gDHZm>V0KNechE7$0rWS9kn)!Cp~aM?MKj2XQ-b5B@f#RP^l7E_@J{IWF&P zD5SNSS-EAOVhtnu@cW0moc4o;=~Uv-e{{$~0)UkU$^!H}P0ZC(EyS^)dm-42Xc7?r zE=UmKEkJG_$UbN9ufncGyakpUGV;QJyD>aq?ZxPcOyM{rfRh%)*8zbtJrVB85&eYL z3*sw4$duA3RDd1>oCh!bXXJs`jUgulM4kYk)o^kJ2Li5tjGmC!(~=KFOb>7eK>`A0 z!GSFP98LRI96o%Ou0K96F#_Pu9}}48!5o@-UhJ>g`jCMz@t=gi`~kR3_yJIlbv@Sb zFwv4vCw~VKZL03Jm?ka>ngDp~S6BrEEjd&aGyxY7hiKZ(rAbIZBz%DCR}#8}3J4K> z^F@4wMH4re3lWigNqD;!8wcrtkrh5eGAi6BMTWT;2?2;8FClL)1R~Vu+agsipAtA1 zW5oFZuVe@U$Vret-HOgN;Ms#JVcyJ9S65fVqHJ03c=q^;Qf>vq^m-105)wfL8T1vG zV7>?dzpGR#)pv8`%FTTSpzlIMp_(;1$W3Gj0E3`H4EkzSzkjg5Y>@!K<%}CKq60Q< ziE+q)wCgt*7-~df1sH&GJ()h6FZKNc13jUTt${m_cXvZ)H7XILud*_bpIkUUxpWr+ z9#$lPa&mJF?2`NSetT>M2L}Uyx=KPt!oqg~0s?m>^2%&`pD(N6LOi{3lxAdH>;(=d-Xti1lYM1}*OvjjkJEhc(jT6<% zhbJd{@#1RLXcB4XMJEeg?zL4_m!Fr?LPPZ1P38WS2HiQyWE= z^n3lNfv^X4F&dW1`_%qvMp9lniLV>#nFJZkC2Unk)+P z{|}r%)Nbf+pogIKReh+}Wk$*11?-f|%NfyO_{jt`a4`*Iyh<7ME;N1)(8V)06`M476n0|8rd5eLj#4>tFZ?e?#hSvL+8zv^hw%8^G8>=Q#j>{ zhr;Ezt%cm~9xErQ#Y632=l3m~xky<5x{YAK3K1Tf9F>q1FY1)rT%Sck$fl&Bp`oI~ zVb2{5mGzV>E;TA#6}bk_9JCs?#HBy)WUI!7&Hx zE`$#OHZefWQnw!C+B=TEYtqqtl8AwJU0+_z!#3w@E4SFoq-n4S@wo~BUMI&ce7&h44r|;)ktqKmqK>vpZ*$vcy*Rp8yJ9&7Uooa zA>m1uYHjNq>RcX5v9Nk)i%3CqHN(CE_!5AaNS{qc9>4>@LYZp2 z(|JmFH^G>RglO>K?eWwUeN7Zj5wjWT`0G=nIiotb)GW&~(;IW`Y?9pHZHf``)+0R# zi{20KcYZ#;4y9BR7WLKDRWHfN%Eesxd@oILd62tiL4#BTkw?`U)NihUn146q+aP8rJy0Ci`Q@C>^y^+l_=M|Q zz(4VTVCxa0_;IX?p>FSP*QC`oH1I)f_&9SKMDC}@Q0C`if|t={5Rm~DMN*|lC}zAKJR=ro9+Z4Nva>EVtD@sV_qLnkd;5Z5*tV2qorO85};6oe+LK$ zxbv`nJe`s_`L9_~e{O1oV)i~wj33mGlOL0xgh=tz(e-@ZwfKw4O#}895CkBhSy9n} zGTWjkpX-eB(}T}b{oE%72?NaROFfMlrUDzF1{D&ReZx;fUjCP5AKvpbk3WC1Tx@KL z`YJyk(u^;lA6F#2u{=GGE58o_)8#_UTQE+9355>`Y#xwP^!!g+KW5SZiba7UFHtP& z4G1X! zw~~aArKFyuN&mT>0oI0)(N3j{BmV|yrz^~xEjNDfxz`c@yVoHR#?R_#*xAYPepw8% zZhGgSuF=+8D{2A=^JfAELmik@>yFvkxx2e-cfWj^;PIkjWi?)~BPReb6U1)<^`j5J z>WlqZJfGtI{Lpx@<}PAjU|@4kWC~ya4|owq2JD?$Rnzl&r(dGc>mg?X~` z=?3uFXp{~083sQeoq279MI(fFsVX}jj`VqOu<$-sl;0n^UpnEju&}66#6Fc5?9vA5 zK6|&evU=Z+!u8x8=P6gM#q5PnWwneO-G8SPyavn`K8P<8M1koq7WBBMQTNM_MIs&x zbL)?u!%O$0?8<_IJkSafF#s9Ef$)t}0LQ8xFwRUPR~H;VCZ9wXNcPe=7ik_Hy26wN&-qe5h?=Wt!X? z6K(K00X~8{(rT75cYu5ZnfJ#Xo%eleQqqyN_bJ}PS%(-D;Xf}TT|j^`@s})~dQQXG z^F`u(pw~){jE;DO{ImtwAU+5%q72v!^uu)Z?l?{P#v=8oN9}*SSx%;sC{}!eb&o=*6_jKScj zUGK!>^Zh5Hdo{mRY|Zfya)mcvOC=QT?DVv>vQk|B_pY~}GGju)v&oxhY-~H&*z{+Q z{QoEQCQ`j8Q>5hN+NOo2rnujnyK7IjjMVL$<@}SUGKa6vaChL3*+)yskPhk>mI+(d&DPa z(-J2A>v~mj*)G<*5kmWQ?Z0(i-gQlJvPjGEt5d!u8y+?}>+9qM3Mt@lPQT!#7cVlN z{F(j#ukNh;by|PUPX8a3b9FVaCVIvHoxd!#_WdrNEj<5&%%5c45?FHj=Dg~UmfL?o zdNv$fpzeu%{hxZ5y6M?jhDi@*ncwRyy}fq+q5EI;d3XIf96L#-^gB0i{hsn!>$XpP zQndX9RHqoD&yug1t$%jcSHA!BRO-pAZ+fiL;ufx0WAp!2_U~T9lX+QpzXv_udCt(> zzc33}G2F_4bem_eGRB81@A{d(HkA3ZX!h5zlVar-HJQ@l~6 z9aNAi2!1GyyDeLOr|$5&*ZuPL8*?IAziI9Y2;~(7P8mu_OWT+I`SC3``fOUTx9{4( z$!eWt7C)xMmiy22xqMzOAKGsNIZ@O;vH*dg_}0> z-c!zId%dyeO0^7dz!FqCIVqf2#;fh@?PovlR@IHkei`eh&iN;)tMK(mJ80K0Fit}+ z#9OiATMM<|g|8DhN5pZgFpzqnT5-+s)x&9fUZtncodfL_0mmE8$u~Z6%mkNWpM4Zg z)aHI(W1H#@>Ke|N)^I4-_r;cpCnke}Tp*4`r1;8pu{aTrXke-olVm))ML#z(rFF?8 zpzMY#92X1@1?_$m5~s95g=53rYtvjDS55$WMMsq>r}EGC|1aln+*hkPb=E21CfEZ? z>JGl!TMRbOA|{ib4JY&sMEf~(e3`CfMbElE*UOhh%(C%6 Yf4TKT=k&~uAO;}tboFyt=akR{0BGk7!2kdN literal 0 HcmV?d00001 diff --git a/test/README.md b/test/README.md index deea89a14..cfc021b45 100644 --- a/test/README.md +++ b/test/README.md @@ -1,118 +1,19 @@ # Running tests locally -```bash -``` - -Clone the repo and install the env - -```bash -git clone https://github.com/populationgenomics/sample-metadata -cd sample-metadata -virtualenv venv -source venv/bin/activate -pip install -r requirements.txt -pip install -r requirements-dev.txt -pip install --editable . -``` - -Start the DB server - -```bash -docker stop mysql-p3307 -docker rm mysql-p3307 -docker run -p 3307:3306 --name mysql-p3307 -e MYSQL_ROOT_PASSWORD=root -d mariadb -``` - -Configure environment variables - -```bash -# use credentials defined in db/python/connect.py:dev_config -export SM_ENVIRONMENT=LOCAL -# use specific mysql settings -export SM_DEV_DB_PROJECT=sm_dev -export SM_DEV_DB_USER=root -export SM_DEV_DB_PORT=3307 -export SM_DEV_DB_HOST=127.0.0.1 -export SM_DEV_DB_PASSWORD=root -``` - -Create the DB - -```bash -mysql --host=$SM_DEV_DB_HOST --port=$SM_DEV_DB_PORT -u $SM_DEV_DB_USER -p -e 'CREATE DATABASE '$SM_DEV_DB_PROJECT';' -# mysql --host=$SM_DEV_DB_HOST --port=$SM_DEV_DB_PORT -u $SM_DEV_DB_USER -p -e 'show databases;' -``` - -Install tables - -```bash -cd db -liquibase update --url jdbc:mariadb://$SM_DEV_DB_HOST:$SM_DEV_DB_PORT/$SM_DEV_DB_PROJECT --username=$SM_DEV_DB_USER --password=$SM_DEV_DB_PASSWORD --classpath mariadb-java-client-2.7.3.jar --changelog-file=project.xml - -# mysql --host=$SM_DEV_DB_HOST --port=$SM_DEV_DB_PORT -u $SM_DEV_DB_USER -p -e 'use '$SM_DEV_DB_PROJECT'; show tables;' -``` - -Add project into the DB - -```bash -INPUT_PROJECT=test_input_project -OUTPUT_PROJECT=test_output_project -USER=vladislav.savelyev@populationgenomics.org.au -GCP_ID=vlad-dev +Running tests requires docker (for mariadb), and all the regular dev dependencies. -mysql --host=$SM_DEV_DB_HOST --port=$SM_DEV_DB_PORT -u $SM_DEV_DB_USER -p -e 'use '$SM_DEV_DB_PROJECT'; insert into project (id, name, author, dataset, gcp_id, read_secret_name, write_secret_name) values (1, "'$INPUT_PROJECT'", "'$USER'", "'$INPUT_PROJECT'", "'$GCP_ID'", "'$INPUT_PROJECT'-sample-metadata-main-read-members-cache", "'$INPUT_PROJECT'-sample-metadata-main-write-members-cache"), (2, "'$INPUT_PROJECT'", "'$USER'", "'$OUTPUT_PROJECT'", "'$GCP_ID'", "'$OUTPUT_PROJECT'-sample-metadata-main-read-members-cache", "'$OUTPUT_PROJECT'-sample-metadata-main-write-members-cache");' +If you have these installed, you can run the tests on the terminal with: -mysql --host=$SM_DEV_DB_HOST --port=$SM_DEV_DB_PORT -u $SM_DEV_DB_USER -p -e 'use '$SM_DEV_DB_PROJECT'; select * from project;' +```shell +python -m unittest discover -s test/ ``` -Create secrets to test access to a project +Otherwise, in VSCode: -```bash -# To read and NOT write input project: -gcloud secrets create $INPUT_PROJECT-sample-metadata-main-read-members-cache --project $GCP_ID -gcloud secrets create $INPUT_PROJECT-sample-metadata-main-write-members-cache --project $GCP_ID +- Make sure your VSCode knows your python virtual environment version (`which python`) +- Then from the "Testing" tab, you can "Configure Python Tests" with: + - `unittest` + - `test/` folder + - `test_*.py` format -gcloud secrets versions add $INPUT_PROJECT-sample-metadata-main-read-members-cache --data-file=<(echo ,$USER,) --project $GCP_ID -# Note empty user list for the write secret: -gcloud secrets versions add $INPUT_PROJECT-sample-metadata-main-write-members-cache --data-file=<(echo ,) --project $GCP_ID - -# To read and write input project: -gcloud secrets create $OUTPUT_PROJECT-sample-metadata-main-read-members-cache --project $GCP_ID -gcloud secrets create $OUTPUT_PROJECT-sample-metadata-main-write-members-cache --project $GCP_ID - -gcloud secrets versions add $OUTPUT_PROJECT-sample-metadata-main-read-members-cache --data-file=<(echo ,$USER,) --project $GCP_ID -gcloud secrets versions add $OUTPUT_PROJECT-sample-metadata-main-write-members-cache --data-file=<(echo ,$USER,) --project $GCP_ID -``` - -Generate and install API - -```bash -python regenerate_api.py -pip install -e . -``` - -Start the server to populate samples (can do in a separate window) - -```bash -export SM_ALLOWALLACCESS=1 -python3 -m api.server -``` - -Populate samples - -```bash -python test/test_add_samples_for_joint_calling.py -``` - -Stop the server and restart with SM_ALLOWALLACCESS unset, to test permissions - -```bash -export SM_ALLOWALLACCESS=0 -python3 -m api.server -``` - -Run the test that simulates the joint-calling workflow - -```bash -python test/test_joint_calling_workflow.py -``` +This should display a full list of Python tests which you run all, or debug individual tests. diff --git a/test/data/generate_data.py b/test/data/generate_data.py index b48fa1471..281814a71 100755 --- a/test/data/generate_data.py +++ b/test/data/generate_data.py @@ -4,27 +4,20 @@ import asyncio import datetime import random +from pathlib import Path from pprint import pprint -from metamist.apis import ( - ProjectApi, - ParticipantApi, - FamilyApi, - SampleApi, - AnalysisApi, -) +from metamist.apis import AnalysisApi, FamilyApi, ParticipantApi, ProjectApi, SampleApi from metamist.graphql import gql, query_async from metamist.model.analysis import Analysis from metamist.model.analysis_status import AnalysisStatus -from metamist.models import ( - SampleUpsert, - AssayUpsert, - SequencingGroupUpsert, -) +from metamist.models import AssayUpsert, SampleUpsert, SequencingGroupUpsert from metamist.parser.generic_parser import chunk EMOJIS = [':)', ':(', ':/', ':\'('] +default_ped_location = str(Path(__file__).parent / 'greek-myth-forgeneration.ped') + QUERY_SG_ID = gql( """ query MyQuery($project: String!) { @@ -51,7 +44,7 @@ ) -async def main(ped_path='greek-myth-forgeneration.ped', project='greek-myth'): +async def main(ped_path=default_ped_location, project='greek-myth'): """Doing the generation for you""" papi = ProjectApi() @@ -234,7 +227,7 @@ def generate_random_number_within_distribution(): parser.add_argument( '--ped-path', type=str, - default='greek-myth-forgeneration.ped', + default=default_ped_location, help='Path to the pedigree file', ) parser.add_argument('--project', type=str, default='greek-myth') From a2500e3da3022b6cbe7e79a6fd5415dfdd60bbef Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:48:08 +1000 Subject: [PATCH 05/12] Return SG IDs from get_sg_ids endpoint, not sample IDs (#537) --- db/python/tables/participant.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/db/python/tables/participant.py b/db/python/tables/participant.py index 903b534e5..0b0b8eb9c 100644 --- a/db/python/tables/participant.py +++ b/db/python/tables/participant.py @@ -298,11 +298,11 @@ async def get_external_participant_id_to_internal_sequencing_group_id_map( self, project: ProjectId, sequencing_type: str | None = None ) -> list[tuple[str, int]]: """ - Get a map of {external_participant_id} -> {internal_sample_id} - useful to matching joint-called samples in the matrix table to the participant + Get a map of {external_participant_id} -> {internal_sequencing_group_id} + useful to match joint-called sequencing groups in the matrix table to the participant Return a list not dictionary, because dict could lose - participants with multiple samples. + participants with multiple sequencing groups. """ wheres = ['p.project = :project'] values: dict[str, Any] = {'project': project} @@ -311,7 +311,7 @@ async def get_external_participant_id_to_internal_sequencing_group_id_map( values['sequencing_type'] = sequencing_type _query = f""" -SELECT p.external_id, s.id +SELECT p.external_id, sg.id FROM participant p INNER JOIN sample s ON p.id = s.participant_id INNER JOIN sequencing_group sg ON sg.sample_id = s.id From e94a4d8c1b8d33d439fb7abf0e38128911c0689b Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Thu, 31 Aug 2023 17:30:15 +1000 Subject: [PATCH 06/12] Add set default role in instructions (#535) * Add set default role in instructions Missed step from me, not sure why it worked initially, but it definitely didn't work without it * Add more semicolons --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 12069a94c..a55c542e9 100644 --- a/README.md +++ b/README.md @@ -228,11 +228,13 @@ We'll setup a user called `sm_api`, and setup permissions ```shell sudo mysql -u root --execute " CREATE DATABASE sm_dev; - CREATE USER sm_api@'%' + CREATE USER sm_api@'%'; CREATE USER sm_api@localhost; CREATE ROLE sm_api_role; - GRANT sm_api_role TO sm_api@'%' + GRANT sm_api_role TO sm_api@'%'; GRANT sm_api_role TO sm_api@localhost; + SET DEFAULT ROLE sm_api_role FOR sm_api@'%'; + SET DEFAULT ROLE sm_api_role FOR sm_api@localhost; GRANT ALL PRIVILEGES ON sm_dev.* TO sm_api_role; " ``` From 57ca4d3c5d490cc0a3140b476b288543967f9255 Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Mon, 18 Sep 2023 12:01:39 +1000 Subject: [PATCH 07/12] Address mypy errors (#546) * Address mypy errors * More linting updates * Two tests to fix checks * Rejig lint to build package to mypy scripts * Add strawberry[debug-server] requirement * Revert to the OpenApiGenNoneType for tests * Add extra import --------- Co-authored-by: Michael Franklin --- .github/workflows/lint.yaml | 38 +++- .pre-commit-config.yaml | 174 ++++++++++--------- api/routes/analysis.py | 86 +++++---- db/python/connect.py | 4 +- db/python/enum_tables/enums.py | 10 +- db/python/layers/seqr.py | 14 +- db/python/layers/sequencing_group.py | 13 +- db/python/layers/web.py | 15 +- db/python/tables/analysis.py | 12 +- db/python/tables/project.py | 15 +- db/python/tables/sequencing_group.py | 22 +-- etl/endpoint/main.py | 6 +- metamist/parser/generic_parser.py | 42 ++--- models/base.py | 2 +- models/models/__init__.py | 41 ++--- models/models/analysis.py | 2 +- models/models/assay.py | 19 +- models/models/participant.py | 22 +-- models/models/sample.py | 26 ++- models/models/sequencing_group.py | 24 +-- mypy.ini | 7 + regenerate_api.py | 4 +- requirements-dev.txt | 2 + requirements.txt | 2 +- scripts/20230420_sequencinggroupmigration.py | 10 +- scripts/create_test_subset.py | 26 ++- scripts/parse_ont_sheet.py | 18 +- scripts/parse_ped.py | 10 +- scripts/sync_seqr.py | 27 +-- test/test_analysis.py | 17 +- test/test_assay.py | 30 +++- test/test_generic_auditor.py | 64 ++++--- test/test_generic_filters.py | 6 - test/test_graphql.py | 46 +++-- test/test_import_individual_metadata.py | 20 ++- test/test_parse_generic_metadata.py | 38 ++-- test/test_parse_ont_processor.py | 7 +- test/test_parse_ont_sheet.py | 11 +- test/test_pedigree.py | 11 +- test/test_sample.py | 7 +- test/test_search.py | 98 +++++++---- test/test_web.py | 72 ++++---- web/src/pages/project/ProjectGrid.tsx | 2 +- 43 files changed, 605 insertions(+), 517 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index d4863658a..68f65dc86 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -4,22 +4,52 @@ on: push jobs: lint: runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: 1 + BUILDKIT_PROGRESS: plain + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + # used for generating API + SM_DOCKER: samplemetadata:dev defaults: run: shell: bash -eo pipefail -l {0} - steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@main - uses: actions/setup-python@v2 with: python-version: "3.10" - cache: "pip" - - name: Install packages + - uses: actions/setup-java@v2 + with: + distribution: "temurin" # See 'Supported distributions' for available options + java-version: "17" + + - name: Setup build env + run: | + set -euxo pipefail + + pip install -r requirements-dev.txt + pip install -r requirements.txt + + # openapi-generator + wget https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/5.3.0/openapi-generator-cli-5.3.0.jar -O openapi-generator-cli.jar + + - name: "build image" + run: | + docker build \ + --build-arg SM_ENVIRONMENT=local \ + --tag $SM_DOCKER \ + -f deploy/api/Dockerfile \ + . + + - name: Build + install packages run: | + export OPENAPI_COMMAND="java -jar openapi-generator-cli.jar" + python regenerate_api.py pip install -r requirements-dev.txt pip install . + mkdir .mypy_cache - name: pre-commit run: pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 155d7cda1..c4d4ec406 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,92 +1,98 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - exclude: '\.*conda/.*' - - id: end-of-file-fixer - - id: trailing-whitespace - exclude: '\.txt$|\.tsv$' - - id: check-case-conflict - - id: check-merge-conflict - - id: detect-private-key - - id: debug-statements - - id: check-added-large-files + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + exclude: '\.*conda/.*' + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: '\.txt$|\.tsv$' + - id: check-case-conflict + - id: check-merge-conflict + - id: detect-private-key + - id: debug-statements + - id: check-added-large-files - - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.33.0 - hooks: - - id: markdownlint - args: ["--config", ".markdownlint.json"] + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.33.0 + hooks: + - id: markdownlint + args: ["--config", ".markdownlint.json"] - - repo: https://github.com/ambv/black - rev: 23.3.0 - hooks: - - id: black - args: [.] - pass_filenames: false - always_run: true - exclude: ^metamist/ + - repo: https://github.com/ambv/black + rev: 23.3.0 + hooks: + - id: black + args: [.] + pass_filenames: false + always_run: true + exclude: ^metamist/ - - repo: https://github.com/PyCQA/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear, flake8-quotes] + - repo: https://github.com/PyCQA/flake8 + rev: "6.0.0" + hooks: + - id: flake8 + additional_dependencies: [flake8-bugbear, flake8-quotes] - # Using system installation of pylint to support checking python module imports - - repo: local - hooks: - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] + # Using system installation of pylint to support checking python module imports + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] - # mypy - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 - hooks: - - id: mypy - args: - [ - --pretty, - --show-error-codes, - --no-strict-optional, - --ignore-missing-imports, - --install-types, - --non-interactive, - ] - additional_dependencies: - - strawberry-graphql[fastapi]==0.138.1 + # mypy + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.5.1 + hooks: + - id: mypy + args: + [ + --pretty, + --show-error-codes, + --no-strict-optional, + --ignore-missing-imports, + --install-types, + --non-interactive, + --show-error-context, + --check-untyped-defs, + --explicit-package-bases, + --disable-error-code, + operator, + ] + additional_dependencies: + - strawberry-graphql[fastapi]==0.206.0 + - types-PyMySQL==1.1.0.1 - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.0-alpha.4" - hooks: - - id: prettier - # I'm not exactly sure why it changes behaviour, but - # calling `cd web`, then calling `ls src/**/*.tsx` - # returns different results to `cd web && ls src/**/*.tsx` - # so just include both patterns here - entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.0.0-alpha.4" + hooks: + - id: prettier + # I'm not exactly sure why it changes behaviour, but + # calling `cd web`, then calling `ls src/**/*.tsx` + # returns different results to `cd web && ls src/**/*.tsx` + # so just include both patterns here + entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' - - repo: https://github.com/pre-commit/mirrors-eslint - rev: "v8.33.0" - hooks: - - id: eslint - entry: bash -c 'cd web && eslint' - files: \.[jt]sx?$ - types: [file] - additional_dependencies: - - eslint@^7.32.0 - - eslint-config-airbnb@^19.0.4 - - eslint-config-airbnb-base@^15.0.0 - - eslint-config-airbnb-typescript@^17.0.0 - - eslint-config-prettier@^8.6.0 - - eslint-plugin-import@^2.26.0 - - eslint-plugin-jsx-a11y@^6.6.1 - - eslint-plugin-prettier@^4.2.1 - - eslint-plugin-react@^7.31.11 - - eslint-plugin-react-hooks@^4.6.0 - - "@typescript-eslint/eslint-plugin@^5.48.0" - - "@typescript-eslint/parser@^5.48.0" + - repo: https://github.com/pre-commit/mirrors-eslint + rev: "v8.33.0" + hooks: + - id: eslint + entry: bash -c 'cd web && eslint' + files: \.[jt]sx?$ + types: [file] + additional_dependencies: + - eslint@^7.32.0 + - eslint-config-airbnb@^19.0.4 + - eslint-config-airbnb-base@^15.0.0 + - eslint-config-airbnb-typescript@^17.0.0 + - eslint-config-prettier@^8.6.0 + - eslint-plugin-import@^2.26.0 + - eslint-plugin-jsx-a11y@^6.6.1 + - eslint-plugin-prettier@^4.2.1 + - eslint-plugin-react@^7.31.11 + - eslint-plugin-react-hooks@^4.6.0 + - "@typescript-eslint/eslint-plugin@^5.48.0" + - "@typescript-eslint/parser@^5.48.0" diff --git a/api/routes/analysis.py b/api/routes/analysis.py index 0af9ade7b..1a0057dcc 100644 --- a/api/routes/analysis.py +++ b/api/routes/analysis.py @@ -8,12 +8,11 @@ from pydantic import BaseModel from starlette.responses import StreamingResponse -from api.utils.dates import parse_date_only_string from api.utils.db import ( - get_projectless_db_connection, + Connection, get_project_readonly_connection, get_project_write_connection, - Connection, + get_projectless_db_connection, ) from api.utils.export import ExportType from db.python.layers.analysis import AnalysisLayer @@ -22,20 +21,17 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models.analysis import ( + Analysis, AnalysisInternal, ProjectSizeModel, - SequencingGroupSizeModel, - DateSizeModel, - Analysis, ) from models.utils.sample_id_format import ( sample_id_transform_to_raw_list, - sample_id_format, ) from models.utils.sequencing_group_id_format import ( + sequencing_group_id_format, sequencing_group_id_format_list, sequencing_group_id_transform_to_raw_list, - sequencing_group_id_format, ) router = APIRouter(prefix='/analysis', tags=['analysis']) @@ -326,40 +322,42 @@ async def get_sequencing_group_file_sizes( """ Get the per sample file size by type over the given projects and date range """ - atable = AnalysisLayer(connection) - - # Check access to projects - project_ids = None - pt = ProjectPermissionsTable(connection=connection.connection) - project_ids = await pt.get_project_ids_from_names_and_user( - connection.author, project_names, readonly=True - ) - - # Map from internal pids to project name - prj_name_map = dict(zip(project_ids, project_names)) - - # Convert dates - start = parse_date_only_string(start_date) - end = parse_date_only_string(end_date) - - # Get results with internal ids as keys - results = await atable.get_sequencing_group_file_sizes( - project_ids=project_ids, start_date=start, end_date=end - ) - - # Convert to the correct output type, converting internal ids to external - fixed_pids: list[Any] = [ - ProjectSizeModel( - project=prj_name_map[project_data['project']], - samples=[ - SequencingGroupSizeModel( - sample=sample_id_format(s['sample']), - dates=[DateSizeModel(**d) for d in s['dates']], - ) - for s in project_data['samples'] - ], - ) - for project_data in results - ] - return fixed_pids + raise NotImplementedError('This route is broken, and not properly implemented yet') + # atable = AnalysisLayer(connection) + + # # Check access to projects + # project_ids = None + # pt = ProjectPermissionsTable(connection=connection.connection) + # project_ids = await pt.get_project_ids_from_names_and_user( + # connection.author, project_names, readonly=True + # ) + + # # Map from internal pids to project name + # prj_name_map = dict(zip(project_ids, project_names)) + + # # Convert dates + # start = parse_date_only_string(start_date) + # end = parse_date_only_string(end_date) + + # # Get results with internal ids as keys + # results = await atable.get_sequencing_group_file_sizes( + # project_ids=project_ids, start_date=start, end_date=end + # ) + + # # Convert to the correct output type, converting internal ids to external + # fixed_pids: list[Any] = [ + # ProjectSizeModel( + # project=prj_name_map[project_data['project']], + # samples=[ + # SequencingGroupSizeModel( + # sample=sample_id_format(s['sample']), + # dates=[DateSizeModel(**d) for d in s['dates']], + # ) + # for s in project_data['samples'] + # ], + # ) + # for project_data in results + # ] + + # return fixed_pids diff --git a/db/python/connect.py b/db/python/connect.py index 0b9acf57a..8a5a7811e 100644 --- a/db/python/connect.py +++ b/db/python/connect.py @@ -121,7 +121,9 @@ def get_connection_string(self): if self.port: _host += f':{self.port}' - options = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} + options: dict[ + str, str | int + ] = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} _options = '&'.join(f'{k}={v}' for k, v in options.items()) url = f'mysql://{u_p}@{_host}/{self.dbname}?{_options}' diff --git a/db/python/enum_tables/enums.py b/db/python/enum_tables/enums.py index e6419b98b..113daf3d6 100644 --- a/db/python/enum_tables/enums.py +++ b/db/python/enum_tables/enums.py @@ -1,6 +1,7 @@ -import re import abc +import re from functools import lru_cache + from async_lru import alru_cache from db.python.connect import DbBase @@ -36,7 +37,8 @@ def _get_table_name(cls): matcher = table_name_matcher.match(tn) if not matcher: raise ValueError( - f'The tablename {tn} is not valid (must match {table_name_matcher.pattern})' + f'The tablename {tn} is not valid (must match ' + f'{table_name_matcher.pattern})' ) return tn @@ -47,9 +49,9 @@ async def get(self) -> list[str]: """ _query = f'SELECT DISTINCT name FROM {self._get_table_name()}' rows = await self.connection.fetch_all(_query) - rows = [r['name'] for r in rows] + nrows = [r['name'] for r in rows] - return rows + return nrows async def insert(self, value: str): """ diff --git a/db/python/layers/seqr.py b/db/python/layers/seqr.py index 8c203979a..32ef5f5e2 100644 --- a/db/python/layers/seqr.py +++ b/db/python/layers/seqr.py @@ -1,8 +1,8 @@ # pylint: disable=unnecessary-lambda-assignment,too-many-locals,broad-exception-caught +import asyncio import os import re -import asyncio import traceback from collections import defaultdict from datetime import datetime @@ -15,13 +15,14 @@ from cpg_utils.cloud import get_google_identity_token from api.settings import ( - SEQR_URL, SEQR_AUDIENCE, SEQR_MAP_LOCATION, SEQR_SLACK_NOTIFICATION_CHANNEL, + SEQR_URL, get_slack_token, ) from db.python.connect import Connection +from db.python.enum_tables import SequencingTypeTable from db.python.layers.analysis import AnalysisLayer from db.python.layers.base import BaseLayer from db.python.layers.family import FamilyLayer @@ -29,15 +30,14 @@ from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.analysis import AnalysisFilter from db.python.tables.project import ProjectPermissionsTable -from db.python.enum_tables import SequencingTypeTable -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.enums import AnalysisStatus # literally the most temporary thing ever, but for complete # automation need to have sample inclusion / exclusion from models.utils.sequencing_group_id_format import ( - sequencing_group_id_format_list, sequencing_group_id_format, + sequencing_group_id_format_list, ) SEQUENCING_GROUPS_TO_IGNORE = {22735, 22739} @@ -421,9 +421,9 @@ async def update_es_index( ) if len(es_index_analyses) == 0: - return [f'No ES index to synchronise'] + return ['No ES index to synchronise'] - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) es_index = es_index_analyses[-1].output diff --git a/db/python/layers/sequencing_group.py b/db/python/layers/sequencing_group.py index 6d6a02a55..5ff5b133b 100644 --- a/db/python/layers/sequencing_group.py +++ b/db/python/layers/sequencing_group.py @@ -6,13 +6,13 @@ from db.python.tables.assay import AssayTable, NoOpAenter from db.python.tables.sample import SampleTable from db.python.tables.sequencing_group import ( - SequencingGroupTable, SequencingGroupFilter, + SequencingGroupTable, ) from db.python.utils import ProjectId from models.models.sequencing_group import ( - SequencingGroupUpsertInternal, SequencingGroupInternal, + SequencingGroupUpsertInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format @@ -133,7 +133,7 @@ async def get_participant_ids_sequencing_group_ids_for_sequencing_type( ( projects, pids, - ) = await self.seqgt.get_participant_ids_and_sequence_group_ids_for_sequencing_type( + ) = await self.seqgt.get_participant_ids_and_sequencing_group_ids_for_sequencing_type( sequencing_type ) if not pids: @@ -209,7 +209,7 @@ async def create_sequencing_group_from_assays( type_=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, + assay_ids=assay_ids, meta=meta, ) return SequencingGroupInternal( @@ -217,7 +217,6 @@ async def create_sequencing_group_from_assays( type=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, sample_id=next(iter(sample_ids)), meta=meta, assays=assays, @@ -249,7 +248,7 @@ async def recreate_sequencing_group_with_new_assays( technology=seqgroup.technology, platform=seqgroup.platform, meta={**seqgroup.meta, **meta}, - sequence_ids=assays, + assay_ids=assays, author=self.author, open_transaction=False, ) @@ -324,7 +323,7 @@ async def upsert_sequencing_groups( technology=sg.technology, platform=sg.platform, meta=sg.meta, - sequence_ids=assay_ids, + assay_ids=assay_ids, open_transaction=False, ) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 6fffe2e20..95979457e 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -16,12 +16,12 @@ from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable from models.models import ( + AssayInternal, + FamilySimpleInternal, NestedParticipantInternal, NestedSampleInternal, NestedSequencingGroupInternal, - AssayInternal, SearchItem, - FamilySimpleInternal, ) from models.models.web import ProjectSummaryInternal, WebProject @@ -82,7 +82,7 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]): # the query to determine the total count, then take the selection of samples # for the current page. This is more efficient than doing 2 queries separately. sample_query = f""" - SELECT s.id, s.external_id, s.type, s.meta, s.participant_id + SELECT s.id, s.external_id, s.type, s.meta, s.participant_id, s.active FROM sample s LEFT JOIN assay a ON s.id = a.sample_id LEFT JOIN participant p ON p.id = s.participant_id @@ -189,6 +189,7 @@ def _project_summary_process_sample_rows( created_date=str(sample_id_start_times.get(s['id'], '')), sequencing_groups=sg_models_by_sample_id.get(s['id'], []), non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []), + active=bool(ord(s['active'])), ) for s in sample_rows ] @@ -402,8 +403,8 @@ async def get_project_summary( sg_models_by_sample_id=seq_group_models_by_sample_id, sample_id_start_times=sample_id_start_times, ) - # the pydantic model is casting to the id to a str, as that makes sense on the front end - # but cast back here to do the lookup + # the pydantic model is casting to the id to a str, as that makes sense on + # the front end but cast back here to do the lookup sid_to_pid = {s['id']: s['participant_id'] for s in sample_rows} smodels_by_pid = group_by(smodels, lambda s: sid_to_pid[int(s.id)]) @@ -429,7 +430,7 @@ async def get_project_summary( reported_sex=None, reported_gender=None, karyotype=None, - project=self.project, + # project=self.project, ) ) elif pid not in pid_seen: @@ -445,7 +446,7 @@ async def get_project_summary( reported_sex=p['reported_sex'], reported_gender=p['reported_gender'], karyotype=p['karyotype'], - project=self.project, + # project=self.project, ) ) diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py index d6d2fe41d..a1d4a4c82 100644 --- a/db/python/tables/analysis.py +++ b/db/python/tables/analysis.py @@ -2,15 +2,15 @@ import dataclasses from collections import defaultdict from datetime import datetime -from typing import List, Optional, Set, Tuple, Dict, Any +from typing import Any, Dict, List, Optional, Set, Tuple from db.python.connect import DbBase, NotFoundError from db.python.tables.project import ProjectId from db.python.utils import ( - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + to_db_json, ) from models.enums import AnalysisStatus from models.models.analysis import AnalysisInternal @@ -285,7 +285,7 @@ async def get_incomplete_analyses( """ Gets details of analysis with status queued or in-progress """ - _query = f""" + _query = """ SELECT a.id as id, a.type as type, a.status as status, a.output as output, a_sg.sequencing_group_id as sequencing_group_id, a.project as project, a.meta as meta @@ -339,7 +339,7 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type( if row['sequencing_group_id'] in seen_sequencing_group_ids: continue seen_sequencing_group_ids.add(row['sequencing_group_id']) - analyses.append(AnalysisInternal.from_db(**row)) + analyses.append(AnalysisInternal.from_db(**dict(row))) # reverse after timestamp_completed return analyses[::-1] @@ -439,7 +439,7 @@ async def get_sample_cram_path_map_for_seqr( seq_check = 'IN :seq_types' values['seq_types'] = sequencing_types - filters.append(f'JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) + filters.append('JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) if participant_ids: filters.append('p.id IN :pids') diff --git a/db/python/tables/project.py b/db/python/tables/project.py index d7c002c28..ac01e18bb 100644 --- a/db/python/tables/project.py +++ b/db/python/tables/project.py @@ -1,22 +1,21 @@ # pylint: disable=global-statement import asyncio -from typing import Dict, List, Set, Iterable, Optional, Tuple, Any - import json from datetime import datetime, timedelta +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple +from cpg_utils.cloud import get_cached_group_members from databases import Database from google.cloud import secretmanager -from cpg_utils.cloud import get_cached_group_members from api.settings import MEMBERS_CACHE_LOCATION, is_all_access from db.python.utils import ( - ProjectId, Forbidden, + InternalError, NoProjectAccess, + ProjectId, get_logger, to_db_json, - InternalError, ) from models.models.project import Project @@ -440,9 +439,9 @@ async def get_seqr_projects(self) -> list[dict[str, Any]]: projects = [] for r in await self.connection.fetch_all(_query): - r = dict(r) - r['meta'] = json.loads(r['meta'] or '{}') - projects.append(r) + row = dict(r) + row['meta'] = json.loads(row['meta'] or '{}') + projects.append(row) return projects diff --git a/db/python/tables/sequencing_group.py b/db/python/tables/sequencing_group.py index a3a83e864..fefa36116 100644 --- a/db/python/tables/sequencing_group.py +++ b/db/python/tables/sequencing_group.py @@ -6,11 +6,11 @@ from db.python.connect import DbBase, NoOpAenter, NotFoundError from db.python.utils import ( - ProjectId, - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + ProjectId, + to_db_json, ) from models.models.sequencing_group import SequencingGroupInternal @@ -125,10 +125,10 @@ async def get_sequencing_groups_by_ids( f'Couldn\'t find sequencing groups with internal id {ids})' ) - rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] - projects = set(r.project for r in rows) + sg_rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] + projects = set(r.project for r in sg_rows) - return projects, rows + return projects, sg_rows async def get_assay_ids_by_sequencing_group_ids( self, ids: list[int] @@ -172,7 +172,7 @@ async def get_all_sequencing_group_ids_by_sample_ids_by_type( return sequencing_group_ids_by_sample_ids_by_type - async def get_participant_ids_and_sequence_group_ids_for_sequencing_type( + async def get_participant_ids_and_sequencing_group_ids_for_sequencing_type( self, sequencing_type: str ) -> tuple[set[ProjectId], dict[int, list[int]]]: """ @@ -252,7 +252,7 @@ async def create_sequencing_group( type_: str, technology: str, platform: str, - sequence_ids: list[int], + assay_ids: list[int], meta: dict = None, author: str = None, open_transaction=True, @@ -319,16 +319,16 @@ async def create_sequencing_group( _query, {**values, 'author': author or self.author}, ) - sequence_insert_values = [ + assay_id_insert_values = [ { 'seqgroup': id_of_seq_group, 'assayid': s, 'author': author or self.author, } - for s in sequence_ids + for s in assay_ids ] await self.connection.execute_many( - _seqg_linker_query, sequence_insert_values + _seqg_linker_query, assay_id_insert_values ) return id_of_seq_group diff --git a/etl/endpoint/main.py b/etl/endpoint/main.py index e56679bd0..7a53fca68 100644 --- a/etl/endpoint/main.py +++ b/etl/endpoint/main.py @@ -3,12 +3,12 @@ import logging import os import uuid -import functions_framework + import flask +import functions_framework import google.cloud.bigquery as bq -from google.cloud import pubsub_v1 - from cpg_utils.cloud import email_from_id_token +from google.cloud import pubsub_v1 # type: ignore BIGQUERY_TABLE = os.getenv('BIGQUERY_TABLE') PUBSUB_TOPIC = os.getenv('PUBSUB_TOPIC') diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index cfdf6926f..a31fea483 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -1,47 +1,45 @@ # pylint: disable=too-many-lines,too-many-instance-attributes,too-many-locals,unused-argument,assignment-from-none,invalid-name,ungrouped-imports -import json -import sys import asyncio import csv +import json import logging import os import re +import sys from abc import abstractmethod from collections import defaultdict +from functools import wraps from io import StringIO from typing import ( - List, + Any, + Coroutine, Dict, - Union, - Optional, - Tuple, + Hashable, + Iterable, + Iterator, + List, Match, - Any, + Optional, Sequence, - TypeVar, - Iterator, - Coroutine, Set, - Iterable, - Hashable, + Tuple, + TypeVar, + Union, ) -from functools import wraps from cloudpathlib import AnyPath -from metamist.graphql import query_async, gql -from metamist.parser.cloudhelper import CloudHelper, group_by - -from metamist.apis import SampleApi, AssayApi, AnalysisApi, ParticipantApi +from metamist.apis import AnalysisApi, AssayApi, ParticipantApi, SampleApi +from metamist.graphql import gql, query_async from metamist.models import ( Analysis, AnalysisStatus, + AssayUpsert, ParticipantUpsert, SampleUpsert, SequencingGroupUpsert, - AssayUpsert, ) - +from metamist.parser.cloudhelper import CloudHelper, group_by # https://mypy.readthedocs.io/en/stable/runtime_troubles.html#using-new-additions-to-the-typing-module if sys.version_info >= (3, 8): @@ -322,8 +320,8 @@ def __init__( def to_sm(self) -> AssayUpsert: """Convert to SM upsert model""" return AssayUpsert( - type=self.assay_type, id=self.internal_id, + type=self.assay_type, external_ids=self.external_ids, # sample_id=self.s, meta=self.meta, @@ -1080,7 +1078,9 @@ async def add_analyses(self, analyses_to_add, external_to_internal_id_map): for external_id, analysis in chunked_analysis: # TODO: resolve this external_to_internal_id_map # this one is going to be slightly harder : - analysis.sequence_group_ids = [external_to_internal_id_map[external_id]] + analysis.sequencing_group_ids = [ + external_to_internal_id_map[external_id] + ] promises.append( analysisapi.create_analysis_async( project=proj, analysis_model=analysis diff --git a/models/base.py b/models/base.py index 133671761..389c38a56 100644 --- a/models/base.py +++ b/models/base.py @@ -2,7 +2,7 @@ # annotate any external objects that must be instantiated with this # type to force openapi generator to allow for Nones (it will actually allow Any) -OpenApiGenNoneType = bytes +OpenApiGenNoneType = bytes | None class SMBase(BaseModel): diff --git a/models/models/__init__.py b/models/models/__init__.py index 24069f708..37f1068a9 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -1,60 +1,61 @@ from models.models.analysis import ( - AnalysisInternal, Analysis, + AnalysisInternal, DateSizeModel, - SequencingGroupSizeModel, ProjectSizeModel, + SequencingGroupSizeModel, ) from models.models.assay import ( - AssayInternal, - AssayUpsertInternal, Assay, + AssayInternal, AssayUpsert, + AssayUpsertInternal, ) from models.models.family import ( - FamilySimpleInternal, + Family, FamilyInternal, FamilySimple, - Family, + FamilySimpleInternal, PedRowInternal, ) from models.models.participant import ( - ParticipantInternal, + NestedParticipant, NestedParticipantInternal, - ParticipantUpsertInternal, Participant, - NestedParticipant, + ParticipantInternal, ParticipantUpsert, + ParticipantUpsertInternal, ) from models.models.project import Project from models.models.sample import ( - SampleInternal, + NestedSample, NestedSampleInternal, - SampleUpsertInternal, Sample, - NestedSample, + SampleInternal, SampleUpsert, + SampleUpsertInternal, ) from models.models.search import ( - SearchResponseData, + ErrorResponse, FamilySearchResponseData, ParticipantSearchResponseData, SampleSearchResponseData, - ErrorResponse, - SearchResponse, SearchItem, + SearchResponse, + SearchResponseData, + SequencingGroupSearchResponseData, ) from models.models.sequencing_group import ( - SequencingGroupInternal, + NestedSequencingGroup, NestedSequencingGroupInternal, - SequencingGroupUpsertInternal, SequencingGroup, - NestedSequencingGroup, + SequencingGroupInternal, SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) from models.models.web import ( + PagingLinks, + ProjectSummary, ProjectSummaryInternal, WebProject, - ProjectSummary, - PagingLinks, ) diff --git a/models/models/analysis.py b/models/models/analysis.py index a88d6dd99..f35e95a48 100644 --- a/models/models/analysis.py +++ b/models/models/analysis.py @@ -15,7 +15,7 @@ class AnalysisInternal(SMBase): """Model for Analysis""" - id: int | None + id: int | None = None type: str status: AnalysisStatus output: str = None diff --git a/models/models/assay.py b/models/models/assay.py index 6e17a6ef7..04658ad44 100644 --- a/models/models/assay.py +++ b/models/models/assay.py @@ -1,11 +1,8 @@ import json from typing import Any -from models.base import SMBase, OpenApiGenNoneType -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.base import OpenApiGenNoneType, SMBase +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class AssayInternal(SMBase): @@ -104,12 +101,12 @@ def to_internal(self): _sample_id = None if self.sample_id: # but may be provided directly when inserting directly - _sample_id = sample_id_transform_to_raw(self.sample_id) + _sample_id = sample_id_transform_to_raw(self.sample_id) # type: ignore return AssayUpsertInternal( - id=self.id, - type=self.type, - external_ids=self.external_ids, - sample_id=_sample_id, - meta=self.meta, + id=self.id, # type: ignore + type=self.type, # type: ignore + external_ids=self.external_ids, # type: ignore + sample_id=_sample_id, # type: ignore + meta=self.meta, # type: ignore ) diff --git a/models/models/participant.py b/models/models/participant.py index c72451274..ab843e343 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -1,14 +1,14 @@ import json from db.python.utils import ProjectId -from models.base import SMBase, OpenApiGenNoneType +from models.base import OpenApiGenNoneType, SMBase +from models.models.family import FamilySimple, FamilySimpleInternal from models.models.sample import ( - SampleUpsertInternal, - SampleUpsert, - NestedSampleInternal, NestedSample, + NestedSampleInternal, + SampleUpsert, + SampleUpsertInternal, ) -from models.models.family import FamilySimple, FamilySimpleInternal class ParticipantInternal(SMBase): @@ -135,12 +135,12 @@ class ParticipantUpsert(SMBase): def to_internal(self): """Convert to internal model, doesn't really do much""" p = ParticipantUpsertInternal( - id=self.id, - external_id=self.external_id, - reported_sex=self.reported_sex, - reported_gender=self.reported_gender, - karyotype=self.karyotype, - meta=self.meta, + id=self.id, # type: ignore + external_id=self.external_id, # type: ignore + reported_sex=self.reported_sex, # type: ignore + reported_gender=self.reported_gender, # type: ignore + karyotype=self.karyotype, # type: ignore + meta=self.meta, # type: ignore ) if self.samples: diff --git a/models/models/sample.py b/models/models/sample.py index 94fd3a901..a41c06463 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -1,17 +1,14 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsertInternal, AssayUpsert, AssayInternal, Assay +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal from models.models.sequencing_group import ( - SequencingGroupUpsert, NestedSequencingGroup, - SequencingGroupUpsertInternal, NestedSequencingGroupInternal, + SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class SampleInternal(SMBase): @@ -143,6 +140,7 @@ def to_internal(self): type=self.type, participant_id=self.participant_id, active=self.active, + author='', ) @@ -180,12 +178,12 @@ def to_internal(self) -> SampleUpsertInternal: sample_upsert = SampleUpsertInternal( id=_id, - external_id=self.external_id, - meta=self.meta, - project=self.project, - type=self.type, - participant_id=self.participant_id, - active=self.active, + external_id=self.external_id, # type: ignore + meta=self.meta, # type: ignore + project=self.project, # type: ignore + type=self.type, # type: ignore + participant_id=self.participant_id, # type: ignore + active=self.active, # type: ignore ) if self.sequencing_groups: diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py index 36e250c73..1b0bbd3f4 100644 --- a/models/models/sequencing_group.py +++ b/models/models/sequencing_group.py @@ -1,11 +1,11 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsert, AssayUpsertInternal, Assay, AssayInternal -from models.utils.sample_id_format import sample_id_transform_to_raw, sample_id_format +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) @@ -60,6 +60,7 @@ def to_external(self): type=self.type, technology=self.technology, platform=self.platform, + external_ids=self.external_ids, meta=self.meta, sample_id=sample_id_format(self.sample_id), assays=[a.to_external() for a in self.assays or []], @@ -141,6 +142,7 @@ class SequencingGroup(SMBase): sample_id: str external_ids: dict[str, str] archived: bool + assays: list[Assay] class NestedSequencingGroup(SMBase): @@ -169,7 +171,7 @@ class SequencingGroupUpsert(SMBase): sample_id: str | OpenApiGenNoneType = None external_ids: dict[str, str] | OpenApiGenNoneType = None - assays: list[AssayUpsert] | None = None + assays: list[AssayUpsert] | OpenApiGenNoneType = None def to_internal(self) -> SequencingGroupUpsertInternal: """ @@ -185,15 +187,15 @@ def to_internal(self) -> SequencingGroupUpsertInternal: sg_internal = SequencingGroupUpsertInternal( id=_id, - type=self.type, - technology=self.technology, - platform=self.platform.lower() if self.platform else None, - meta=self.meta, + type=self.type, # type: ignore + technology=self.technology, # type: ignore + platform=self.platform.lower() if self.platform else None, # type: ignore + meta=self.meta, # type: ignore sample_id=_sample_id, - external_ids=self.external_ids or {}, + external_ids=self.external_ids or {}, # type: ignore ) if self.assays is not None: - sg_internal.assays = [a.to_internal() for a in self.assays] + sg_internal.assays = [a.to_internal() for a in self.assays] # type: ignore return sg_internal diff --git a/mypy.ini b/mypy.ini index 3403e0acb..418e757ed 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,10 +4,15 @@ python_version = 3.10 ; warn_return_any = True ; warn_unused_configs = True + exclude = (build|update_sample_status) + # Per-module options: plugins = strawberry.ext.mypy_plugin +[mypy.db] +disable_error_code = operator + [mypy-sample_metadata.*] ignore_errors = true @@ -44,3 +49,5 @@ ignore_missing_imports=True ignore_missing_imports = True [mypy-graphql] ignore_missing_imports = True +[mypy-strawberry] +ignore_errors = True diff --git a/regenerate_api.py b/regenerate_api.py index 07112163e..8e6a9fece 100755 --- a/regenerate_api.py +++ b/regenerate_api.py @@ -184,7 +184,7 @@ def generate_schema_file(): Generate schema file and place in the metamist/graphql/ directory """ command = ['strawberry', 'export-schema', 'api.graphql.schema:schema'] - schema = subprocess.check_output(command, stderr=subprocess.STDOUT).decode() + schema = subprocess.check_output(command).decode() with open(os.path.join(MODULE_DIR, 'graphql/schema.graphql'), 'w+') as f: f.write(schema) @@ -317,7 +317,7 @@ def main(): while (not check_if_server_is_accessible()) and startup_tries > 0: startup_tries -= 1 logger.info( - f'Dockerised API server is not ready yet. ' + 'Dockerised API server is not ready yet. ' + f'Retrying in {wait_time_in_seconds} seconds. ' + f'Remaining tries: {startup_tries}' ) diff --git a/requirements-dev.txt b/requirements-dev.txt index 1113f5753..0202733d1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,3 +9,5 @@ pre-commit pylint testcontainers[mariadb] types-PyMySQL +# some strawberry dependency +strawberry-graphql[debug-server]==0.206.0 diff --git a/requirements.txt b/requirements.txt index b5cc04e02..2affefbb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ google-cloud-logging==2.7.0 google-cloud-storage==1.43.0 uvicorn==0.18.3 fastapi[all]==0.85.1 -strawberry-graphql[fastapi]==0.177.1 +strawberry-graphql[fastapi]==0.206.0 python-multipart==0.0.5 databases[mysql]==0.6.1 SQLAlchemy==1.4.41 diff --git a/scripts/20230420_sequencinggroupmigration.py b/scripts/20230420_sequencinggroupmigration.py index a3d2a705a..3feeecdd5 100644 --- a/scripts/20230420_sequencinggroupmigration.py +++ b/scripts/20230420_sequencinggroupmigration.py @@ -27,7 +27,7 @@ import json from collections import defaultdict from textwrap import dedent -from typing import Any, List, Dict, Tuple +from typing import Any, Dict, List, Tuple import click from databases import Database @@ -338,7 +338,7 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ) analysis_samples = await connection.fetch_all(analyses_query) - sequence_group_ids_of_duplicate_samples_query = dedent( + sequencing_group_ids_of_duplicate_samples_query = dedent( """ SELECT sg.sample_id, sg.id, sg.type FROM sequencing_group sg @@ -352,13 +352,13 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ORDER BY sg.sample_id DESC; """ ) - sequence_group_ids_of_duplicate_samples = await connection.fetch_all( - sequence_group_ids_of_duplicate_samples_query + sequencing_group_ids_of_duplicate_samples = await connection.fetch_all( + sequencing_group_ids_of_duplicate_samples_query ) duplicate_sg_id_map: Dict[ SampleId, Dict[SequenceType, SequenceGroupId] ] = defaultdict(dict) - for row in sequence_group_ids_of_duplicate_samples: + for row in sequencing_group_ids_of_duplicate_samples: duplicate_sg_id_map[row['sample_id']][row['type']] = row['id'] values_to_insert: List[Tuple[int, SequenceGroupId]] = [] diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index fd1e74381..c14f13d79 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 -# pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments +# type: ignore +# pylint: skip-file + + +# # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments """ Example Invocation @@ -11,7 +15,7 @@ This example will populate acute-care-test with the metamist data for 4 families. """ -from typing import Optional +import csv import logging import os import random @@ -19,25 +23,19 @@ import traceback import typing from collections import Counter -import csv +from typing import Optional import click from google.cloud import storage from metamist import exceptions -from metamist.apis import ( - AnalysisApi, - AssayApi, - SampleApi, - FamilyApi, - ParticipantApi, -) +from metamist.apis import AnalysisApi, AssayApi, FamilyApi, ParticipantApi, SampleApi from metamist.models import ( - BodyGetAssaysByCriteria, - AssayUpsert, - SampleUpsert, Analysis, AnalysisStatus, + AssayUpsert, + BodyGetAssaysByCriteria, + SampleUpsert, ) logger = logging.getLogger(__file__) @@ -313,7 +311,7 @@ def main( ) logger.info(f'Creating {a_type} analysis entry in test') aapi.create_analysis(project=target_project, analysis=am) - logger.info(f'-') + logger.info('-') def transfer_families( diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py index 82998215d..a157e0ff6 100644 --- a/scripts/parse_ont_sheet.py +++ b/scripts/parse_ont_sheet.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument -from typing import List import logging +from typing import List import click -from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup from metamist.parser.generic_metadata_parser import ( - run_as_sync, GenericMetadataParser, + run_as_sync, ) +from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup logger = logging.getLogger(__file__) logger.addHandler(logging.StreamHandler()) @@ -97,14 +97,14 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]: return [fastqs] async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: - sequence_groups = await super().group_assays(sample) + sequencing_groups = await super().group_assays(sample) - for sequence_group in sequence_groups: + for sequencing_group in sequencing_groups: failed_fastqs: list[str] = [] - for r in sequence_group.rows: + for r in sequencing_group.rows: parsed_failed_fastqs = await self.parse_files( - sequence_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] + sequencing_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] ) if 'reads' not in parsed_failed_fastqs: raise ValueError( @@ -120,9 +120,9 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup ) failed_fastqs.extend(parsed_failed_fastq_reads['fastq']) - sequence_group.meta['failed_reads'] = failed_fastqs + sequencing_group.meta['failed_reads'] = failed_fastqs - return sequence_groups + return sequencing_groups @click.command() diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index 894d0c241..f587e483b 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -1,12 +1,9 @@ """ A really simple script to import a pedigree file """ import click +from cloudpathlib import AnyPath -from cloudpathlib import CloudPath - -from metamist.apis import ( - FamilyApi, -) +from metamist.apis import FamilyApi @click.command() @@ -17,7 +14,8 @@ def main(ped_file_path: str, project: str): fapi = FamilyApi() - with CloudPath(ped_file_path).open() as ped_file: + # pylint: disable=no-member + with AnyPath(ped_file_path).open() as ped_file: # type: ignore fapi.import_pedigree( file=ped_file, has_header=True, diff --git a/scripts/sync_seqr.py b/scripts/sync_seqr.py index 67967e437..12be0fd93 100644 --- a/scripts/sync_seqr.py +++ b/scripts/sync_seqr.py @@ -1,27 +1,28 @@ # pylint: disable=missing-timeout,unnecessary-lambda-assignment,import-outside-toplevel,too-many-locals import asyncio -import os -import re -import json import datetime +import json import logging +import os +import re import traceback from collections import defaultdict -from typing import Any from io import StringIO +from typing import Any import aiohttp import yaml from cloudpathlib import AnyPath -from metamist.graphql import query_async -from metamist.model.analysis_status import AnalysisStatus -from metamist.model.export_type import ExportType -from metamist.model.analysis_query_model import AnalysisQueryModel + from metamist.apis import ( - SeqrApi, - ProjectApi, AnalysisApi, + ProjectApi, + SeqrApi, ) +from metamist.graphql import query_async +from metamist.model.analysis_query_model import AnalysisQueryModel +from metamist.model.analysis_status import AnalysisStatus +from metamist.model.export_type import ExportType from metamist.parser.generic_parser import chunk loggers_to_silence = [ @@ -410,7 +411,7 @@ async def update_es_index( fn_path = os.path.join(MAP_LOCATION, filename) # pylint: disable=no-member - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) if check_metamist: # len(es_index_analyses) > 0: @@ -678,7 +679,9 @@ def sync_all_datasets(sequencing_type: str, ignore: set[str] = None): continue try: el.run_until_complete( - sync_dataset_async(project_name, seqr_guid, sequencing_type=sequencing_type) + sync_dataset_async( + project_name, seqr_guid, sequencing_type=sequencing_type + ) ) except Exception as e: # pylint: disable=broad-exception-caught print( diff --git a/test/test_analysis.py b/test/test_analysis.py index ec03f0702..ef7e1acac 100644 --- a/test/test_analysis.py +++ b/test/test_analysis.py @@ -1,21 +1,20 @@ # pylint: disable=invalid-overridden-method -from datetime import timedelta, datetime - +from datetime import datetime, timedelta from test.testbase import DbIsolatedTest, run_as_sync -from db.python.tables.analysis import AnalysisFilter -from db.python.utils import GenericFilter -from db.python.layers.assay import AssayLayer from db.python.layers.analysis import AnalysisLayer +from db.python.layers.assay import AssayLayer from db.python.layers.sample import SampleLayer from db.python.layers.sequencing_group import SequencingGroupLayer +from db.python.tables.analysis import AnalysisFilter +from db.python.utils import GenericFilter +from models.enums import AnalysisStatus from models.models import ( AnalysisInternal, AssayUpsertInternal, - SequencingGroupUpsertInternal, SampleUpsertInternal, + SequencingGroupUpsertInternal, ) -from models.enums import AnalysisStatus class TestAnalysis(DbIsolatedTest): @@ -132,7 +131,7 @@ async def test_get_analysis(self): AnalysisInternal( type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], meta={}, ) ) @@ -148,7 +147,7 @@ async def test_get_analysis(self): id=a_id, type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], output=None, timestamp_completed=None, project=1, diff --git a/test/test_assay.py b/test/test_assay.py index af641a278..cde5f242a 100644 --- a/test/test_assay.py +++ b/test/test_assay.py @@ -1,10 +1,11 @@ from test.testbase import DbIsolatedTest, run_as_sync + from pymysql.err import IntegrityError from db.python.connect import NotFoundError -from db.python.layers.sample import SampleLayer -from db.python.layers.assay import AssayLayer from db.python.enum_tables import AssayTypeTable +from db.python.layers.assay import AssayLayer +from db.python.layers.sample import SampleLayer from db.python.tables.assay import AssayFilter from db.python.utils import GenericFilter from models.models.assay import AssayUpsertInternal @@ -200,11 +201,20 @@ async def test_getting_assay_by_external_id(self): ) ) - fquery_1 = AssayFilter(external_id='SEQ01', project=self.project_id) + fquery_1 = AssayFilter( + external_id=GenericFilter(eq='SEQ01'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_1))[0].id) - fquery_2 = AssayFilter(external_id='EXT_SEQ1', project=self.project_id) + fquery_2 = AssayFilter( + external_id=GenericFilter(eq='EXT_SEQ1'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_2))[0].id) - fquery_3 = AssayFilter(external_id='SEQ02', project=self.project_id) + fquery_3 = AssayFilter( + external_id=GenericFilter(eq='SEQ02'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq2.id, (await self.assaylayer.query(fquery_3))[0].id) @run_as_sync @@ -285,20 +295,22 @@ async def search_result_to_ids(filter_: AssayFilter): ) self.assertSetEqual( {seq1_id, seq2_id}, - await search_result_to_ids(AssayFilter(meta={'common': 'common'})), + await search_result_to_ids( + AssayFilter(meta={'common': GenericFilter(eq='common')}) + ), ) # sample meta self.assertSetEqual( {seq1_id, seq2_id}, await search_result_to_ids( - AssayFilter(sample_meta={'collection-year': '2022'}) + AssayFilter(sample_meta={'collection-year': GenericFilter(eq='2022')}) ), ) self.assertSetEqual( set(), await search_result_to_ids( - AssayFilter(sample_meta={'unknown_key': '2022'}) + AssayFilter(sample_meta={'unknown_key': GenericFilter(eq='2022')}) ), ) @@ -315,7 +327,7 @@ async def search_result_to_ids(filter_: AssayFilter): {seq2_id}, await search_result_to_ids( AssayFilter( - sample_meta={'collection-year': '2022'}, + sample_meta={'collection-year': GenericFilter(eq='2022')}, external_id=GenericFilter(in_=['SEQ02']), ) ), diff --git a/test/test_generic_auditor.py b/test/test_generic_auditor.py index 4fb526dc9..21ef3f6b5 100644 --- a/test/test_generic_auditor.py +++ b/test/test_generic_auditor.py @@ -1,16 +1,16 @@ -from collections import namedtuple import unittest -from unittest.mock import MagicMock, patch +import unittest.mock +from collections import namedtuple + from metamist.audit.generic_auditor import GenericAuditor -# pylint: disable=dangerous-default-value # noqa: B006 class TestGenericAuditor(unittest.TestCase): """Test the audit helper functions""" - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_get_participant_data_for_dataset(self, mock_query): """Only participants with a non-empty samples field should be returned""" auditor = GenericAuditor( @@ -360,7 +360,7 @@ def test_get_sequence_mapping_warning_logging(self): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_analyses_crams(self, mock_query): """Test that only the genome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -412,7 +412,7 @@ def test_query_genome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_and_exome_analyses_crams(self, mock_query): """Test that both the genome and exome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -472,7 +472,7 @@ def test_query_genome_and_exome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_broken_analyses_crams(self, mock_query): """ All analysis crams must have 'sequencing_type' meta field, @@ -506,7 +506,7 @@ def test_query_broken_analyses_crams(self, mock_query): assay_sg_id_map={1: 'CPG123'} ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_analyses_crams_warning(self, mock_query): """Warn if the sample_ids field is absent and the sample meta field is used instead""" auditor = GenericAuditor( @@ -541,7 +541,7 @@ def test_query_analyses_crams_warning(self, mock_query): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_analyses_for_sgs_without_crams(self, mock_query): """Log any analyses found for samples without completed CRAMs""" auditor = GenericAuditor( @@ -569,7 +569,9 @@ def test_analyses_for_sgs_without_crams(self, mock_query): } with self.assertLogs(level='WARNING') as log: - _ = auditor.analyses_for_sgs_without_crams(sgs_without_crams) + # catch the warning logs from here and check below + auditor.analyses_for_sgs_without_crams(sgs_without_crams) + self.assertEqual(len(log.output), 8) # 8 analysis types checked self.assertEqual(len(log.records), 8) self.assertIn( @@ -577,7 +579,21 @@ def test_analyses_for_sgs_without_crams(self, mock_query): log.output[0], ) - def test_get_complete_and_incomplete_sgs(self): + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.get_gcs_bucket_subdirs_to_search' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_files_in_gcs_buckets_subdirs' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.analyses_for_sgs_without_crams' + ) + def test_get_complete_and_incomplete_sgs( + self, + mock_analyses_for_sgs_without_crams, + mock_find_files_in_gcs_buckets_subdirs, + mock_get_gcs_bucket_subdirs, + ): """Report on samples that have completed CRAMs and those that dont""" assay_sg_id_map = { # noqa: B006 1: 'CPG123', @@ -591,17 +607,15 @@ def test_get_complete_and_incomplete_sgs(self): auditor = GenericAuditor( dataset='dev', sequencing_type=['genome', 'exome'], file_types=('fastq',) ) - auditor.get_gcs_bucket_subdirs_to_search = MagicMock() - auditor.find_files_in_gcs_buckets_subdirs = MagicMock() - auditor.analyses_for_sgs_without_crams = MagicMock() - auditor.get_gcs_bucket_subdirs_to_search.return_value = { + mock_get_gcs_bucket_subdirs.return_value = { 'cpg-dataset-main': ['cram', 'exome/cram'] } - auditor.find_files_in_gcs_buckets_subdirs.return_value = [ + mock_find_files_in_gcs_buckets_subdirs.return_value = [ 'gs://cpg-dataset-main/cram/CPG123.cram', 'gs://cpg-dataset-main/exome/cram/CPG456.cram', ] + mock_analyses_for_sgs_without_crams.return_value = None result = auditor.get_complete_and_incomplete_sgs( assay_sg_id_map=assay_sg_id_map, @@ -615,8 +629,16 @@ def test_get_complete_and_incomplete_sgs(self): self.assertDictEqual(result, expected_result) - async def test_check_for_uningested_or_moved_assays(self): - """Test 2 ingested reads, one ingested and moved read, and one uningested read""" + @unittest.mock.patch('metamist.audit.generic_auditor.GenericAuditor.file_size') + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_sequence_files_in_gcs_bucket' + ) + async def test_check_for_uningested_or_moved_assays( + self, mock_find_sequence_files_in_gcs_bucket, mock_file_size + ): + """ + Test 2 ingested reads, one ingested and moved read, and one uningested read + """ auditor = GenericAuditor( dataset='dev', sequencing_type=['genome'], file_types=('fastq',) ) @@ -627,16 +649,14 @@ async def test_check_for_uningested_or_moved_assays(self): sg_sample_id_map = {'CPG123': 'EXT123'} assay_sg_id_map = {1: 'CPG123'} sample_internal_external_id_map = {'CPG123': 'EXT123'} - auditor.find_sequence_files_in_gcs_bucket = MagicMock() - auditor.find_sequence_files_in_gcs_bucket.return_value = [ + mock_find_sequence_files_in_gcs_bucket.return_value = [ 'read1.fq', 'read2.fq', 'dir2/read3.fq', 'read4.fq', ] - auditor.file_size = MagicMock() - auditor.file_size.return_value = 12 + mock_file_size.return_value = 12 ( uningested_sequence_paths, diff --git a/test/test_generic_filters.py b/test/test_generic_filters.py index 343047c96..cc01e7f80 100644 --- a/test/test_generic_filters.py +++ b/test/test_generic_filters.py @@ -15,12 +15,6 @@ class GenericFilterTest(GenericFilterModel): class TestGenericFilters(unittest.TestCase): """Test generic filters SQL generation""" - def test_post_init_correction(self): - """Test that the post init correction works""" - filter_ = GenericFilterTest(test_string='test') - self.assertIsInstance(filter_.test_string, GenericFilter) - self.assertEqual(filter_.test_string.eq, 'test') - def test_basic_no_override(self): """Test that the basic filter converts to SQL as expected""" filter_ = GenericFilterTest(test_string=GenericFilter(eq='test')) diff --git a/test/test_graphql.py b/test/test_graphql.py index 95b817931..c61eed238 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -1,20 +1,19 @@ from test.testbase import DbIsolatedTest, run_as_sync + from graphql.error import GraphQLError, GraphQLSyntaxError import api.graphql.schema -from db.python.layers import ParticipantLayer, AnalysisLayer +from db.python.layers import AnalysisLayer, ParticipantLayer +from metamist.graphql import configure_sync_client, gql, validate +from models.enums import AnalysisStatus from models.models import ( - SampleUpsertInternal, + AnalysisInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, - AnalysisInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format -from models.enums import AnalysisStatus - -from metamist.graphql import gql, validate, configure_sync_client - default_assay_meta = { 'sequencing_type': 'genome', @@ -24,7 +23,6 @@ def _get_single_participant_upsert(): - return ParticipantUpsertInternal( external_id='Demeter', meta={}, @@ -43,20 +41,20 @@ def _get_single_participant_upsert(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -114,7 +112,7 @@ def test_validate_provided_schema(self): (strawberry has an as_str() method) """ client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(TEST_QUERY, client=client) diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py index ae51f7a20..adb65622e 100644 --- a/test/test_import_individual_metadata.py +++ b/test/test_import_individual_metadata.py @@ -1,5 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync +from databases.interfaces import Record + from db.python.layers.participant import ParticipantLayer from models.models.participant import ParticipantUpsertInternal @@ -21,20 +23,22 @@ async def test_import_many_hpo_terms(self): 'HPO Term 3', 'HPO Term 20', ] - rows = [['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004']] + rows_to_insert = [ + ['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004'] + ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) - rows = list( + db_rows: list[Record] = list( await self.connection.connection.fetch_all( 'SELECT participant_id, description, value FROM participant_phenotypes' ) ) - self.assertEqual(1, len(rows)) - self.assertEqual('HPO Terms (present)', rows[0]['description']) + self.assertEqual(1, len(db_rows)) + self.assertEqual('HPO Terms (present)', db_rows[0]['description']) self.assertEqual( - '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', rows[0]['value'] + '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', db_rows[0]['value'] ) @run_as_sync @@ -50,12 +54,12 @@ async def test_import_basic_metadata(self): ) headers = ['Individual ID', 'HPO Term 20', 'Age of Onset'] - rows = [ + rows_to_insert = [ ['TP01', 'HP:0000020', 'Congenital'], ['TP02', 'HP:00000021; HP:023', 'Infantile'], ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) rows = list( await self.connection.connection.fetch_all( diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py index ed95a4fbc..ebcc5ec75 100644 --- a/test/test_parse_generic_metadata.py +++ b/test/test_parse_generic_metadata.py @@ -1,33 +1,31 @@ import unittest from datetime import datetime from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - import api.graphql.schema from db.python.layers import ParticipantLayer +from metamist.graphql import configure_sync_client, validate +from metamist.parser.generic_metadata_parser import GenericMetadataParser +from metamist.parser.generic_parser import ( + QUERY_MATCH_ASSAYS, + QUERY_MATCH_PARTICIPANTS, + QUERY_MATCH_SAMPLES, + QUERY_MATCH_SEQUENCING_GROUPS, + ParsedParticipant, + ParsedSample, + ParsedSequencingGroup, +) from models.models import ( + AssayUpsertInternal, ParticipantUpsertInternal, SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, ) from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import sequencing_group_id_format -from metamist.graphql import validate, configure_sync_client -from metamist.parser.generic_parser import ( - ParsedParticipant, - ParsedSample, - QUERY_MATCH_PARTICIPANTS, - QUERY_MATCH_SAMPLES, - QUERY_MATCH_SEQUENCING_GROUPS, - QUERY_MATCH_ASSAYS, - ParsedSequencingGroup, -) -from metamist.parser.generic_metadata_parser import GenericMetadataParser - def _get_basic_participant_to_upsert(): default_assay_meta = { @@ -96,7 +94,7 @@ def test_queries(self): # only need to apply schema to the first client to create, then it gets cached client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(QUERY_MATCH_PARTICIPANTS) validate(QUERY_MATCH_SAMPLES, client=client) @@ -332,11 +330,11 @@ async def test_rows_with_participants(self, mock_graphql_query): # Call generic parser file_contents = '\n'.join(rows) - summary, participants = await parser.parse_manifest( + summary, prows = await parser.parse_manifest( StringIO(file_contents), delimiter='\t', dry_run=True ) - participants: list[ParsedParticipant] = participants + participants: list[ParsedParticipant] = prows self.assertEqual(3, summary['participants']['insert']) self.assertEqual(0, summary['participants']['update']) @@ -749,7 +747,9 @@ async def test_matching_sequencing_groups_and_assays( mock_datetime_added.return_value = datetime.fromisoformat('2022-02-02T22:22:22') player = ParticipantLayer(self.connection) - participant = await player.upsert_participant(_get_basic_participant_to_upsert()) + participant = await player.upsert_participant( + _get_basic_participant_to_upsert() + ) filenames = [ 'sample_id001.filename-R1.fastq.gz', diff --git a/test/test_parse_ont_processor.py b/test/test_parse_ont_processor.py index c14d95e73..a3d301754 100644 --- a/test/test_parse_ont_processor.py +++ b/test/test_parse_ont_processor.py @@ -1,8 +1,7 @@ import unittest from io import StringIO -from unittest.mock import patch - from test.testbase import run_as_sync +from unittest.mock import patch from scripts.process_ont_products import OntProductParser @@ -36,7 +35,7 @@ async def test_single_row_all_files_exist( dry_run=True, ) - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True fs = [ 'Sample01.bam', 'Sample01.sv.vcf.gz', @@ -44,7 +43,7 @@ async def test_single_row_all_files_exist( 'Sample01.indels.vcf.gz', ] parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs} - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True file_contents = '\n'.join(rows) analyses = await parser.parse_manifest( diff --git a/test/test_parse_ont_sheet.py b/test/test_parse_ont_sheet.py index 72b1acdc0..534bb50f1 100644 --- a/test/test_parse_ont_sheet.py +++ b/test/test_parse_ont_sheet.py @@ -1,11 +1,10 @@ from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - from db.python.layers import ParticipantLayer -from models.models import ParticipantUpsertInternal, SampleUpsertInternal from metamist.parser.generic_parser import ParsedParticipant +from models.models import ParticipantUpsertInternal, SampleUpsertInternal from scripts.parse_ont_sheet import OntParser @@ -125,6 +124,6 @@ async def test_simple_sheet(self, mock_graphql_query): ], } self.maxDiff = None - sequence_group = participants[0].samples[0].sequencing_groups[0] - self.assertDictEqual(seqgroup_meta, sequence_group.meta) - self.assertDictEqual(meta_dict, sequence_group.assays[0].meta) + sequencing_group = participants[0].samples[0].sequencing_groups[0] + self.assertDictEqual(seqgroup_meta, sequencing_group.meta) + self.assertDictEqual(meta_dict, sequencing_group.assays[0].meta) diff --git a/test/test_pedigree.py b/test/test_pedigree.py index 77d395863..8966c8cc3 100644 --- a/test/test_pedigree.py +++ b/test/test_pedigree.py @@ -1,9 +1,8 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.participant import ParticipantUpsertInternal - from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer +from models.models.participant import ParticipantUpsertInternal class TestPedigree(DbIsolatedTest): @@ -14,10 +13,10 @@ async def test_import_get_pedigree(self): """Test import + get pedigree""" fl = FamilyLayer(self.connection) - rows = [ - ['FAM01', 'EX01_father', '', '', 1, 1], - ['FAM01', 'EX01_mother', '', '', 2, 1], - ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', 1, 2], + rows: list[list[str]] = [ + ['FAM01', 'EX01_father', '', '', '1', '1'], + ['FAM01', 'EX01_mother', '', '', '2', '1'], + ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', '1', '2'], ] await fl.import_pedigree( diff --git a/test/test_sample.py b/test/test_sample.py index b256a2ae0..e5b8639b7 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.sample import SampleUpsertInternal from db.python.layers.sample import SampleLayer +from models.models.sample import SampleUpsertInternal class TestSample(DbIsolatedTest): @@ -17,7 +17,7 @@ async def setUp(self) -> None: @run_as_sync async def test_add_sample(self): """Test inserting a sample""" - s = await self.slayer.upsert_sample( + sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='Test01', type='blood', @@ -30,8 +30,7 @@ async def test_add_sample(self): 'SELECT id, type, meta, project FROM sample' ) self.assertEqual(1, len(samples)) - s = samples[0] - self.assertEqual(1, s['id']) + self.assertEqual(sample.id, samples[0]['id']) @run_as_sync async def test_get_sample(self): diff --git a/test/test_search.py b/test/test_search.py index fa92cf80f..ca6718ec5 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -1,18 +1,25 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer from db.python.layers.search import SearchLayer -from db.python.layers.family import FamilyLayer from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.family_participant import FamilyParticipantTable - from models.enums import SearchResponseType -from models.models.family import PedRowInternal -from models.models.sample import sample_id_format, SampleUpsertInternal -from models.models.participant import ParticipantUpsertInternal -from models.models.sequencing_group import SequencingGroupUpsertInternal, sequencing_group_id_format -from models.models.assay import AssayUpsertInternal +from models.models import ( + AssayUpsertInternal, + FamilySearchResponseData, + ParticipantSearchResponseData, + ParticipantUpsertInternal, + PedRowInternal, + SampleSearchResponseData, + SampleUpsertInternal, + SequencingGroupSearchResponseData, + SequencingGroupUpsertInternal, +) +from models.models.sample import sample_id_format +from models.models.sequencing_group import sequencing_group_id_format class TestSample(DbIsolatedTest): @@ -68,7 +75,11 @@ async def test_search_isolated_sample_by_id(self): self.assertEqual(1, len(results)) self.assertEqual(cpg_id, results[0].title) self.assertEqual(cpg_id, results[0].data.id) - self.assertListEqual(['EX001'], results[0].data.sample_external_ids) + + result_data = results[0].data + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertListEqual(['EX001'], result_data.sample_external_ids) @run_as_sync async def test_search_isolated_sequencing_group_by_id(self): @@ -97,19 +108,24 @@ async def test_search_isolated_sequencing_group_by_id(self): 'sequencing_type': 'transcriptome', 'sequencing_technology': 'long-read', 'sequencing_platform': 'illumina', - } + }, ) - ] + ], ) ] ) cpg_sg_id = sequencing_group_id_format(sg[0].id) - results = await self.schlay.search(query=cpg_sg_id, project_ids=[self.project_id]) + results = await self.schlay.search( + query=cpg_sg_id, project_ids=[self.project_id] + ) self.assertEqual(1, len(results)) self.assertEqual(cpg_sg_id, results[0].title) - self.assertEqual(cpg_sg_id, results[0].data.id) - self.assertEqual(cpg_sg_id, results[0].data.sg_external_id) + result_data = results[0].data + assert isinstance(result_data, SequencingGroupSearchResponseData) + self.assertIsInstance(result_data, SequencingGroupSearchResponseData) + self.assertEqual(cpg_sg_id, result_data.id) + self.assertEqual(cpg_sg_id, result_data.sg_external_id) @run_as_sync async def test_search_isolated_sample_by_external_id(self): @@ -125,12 +141,16 @@ async def test_search_isolated_sample_by_external_id(self): cpg_id = sample_id_format(sample.id) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(cpg_id, result.title) - self.assertEqual(cpg_id, result.data.id) - self.assertListEqual(['EX001'], result.data.sample_external_ids) - self.assertListEqual([], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) + + self.assertEqual(cpg_id, results[0].title) + result_data = results[0].data + + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertEqual(cpg_id, result_data.id) + self.assertListEqual(['EX001'], result_data.sample_external_ids) + self.assertListEqual([], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_participant_isolated(self): @@ -145,12 +165,13 @@ async def test_search_participant_isolated(self): query='PART01', project_ids=[self.project_id] ) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(p.id, result.data.id) - self.assertEqual('PART01', result.title) - self.assertListEqual(['PART01'], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + + self.assertEqual('PART01', results[0].title) + result_data = results[0].data + assert isinstance(result_data, ParticipantSearchResponseData) + self.assertEqual(p.id, result_data.id) + self.assertListEqual(['PART01'], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_family(self): @@ -164,11 +185,11 @@ async def test_search_family(self): ) self.assertEqual(1, len(results)) result = results[0] - self.assertEqual(f_id, result.data.id) self.assertEqual('FAMXX01', result.title) - self.assertListEqual(['FAMXX01'], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.participant_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + result_data = result.data + assert isinstance(result_data, FamilySearchResponseData) + self.assertEqual(f_id, result_data.id) + self.assertListEqual(['FAMXX01'], result_data.family_external_ids) @run_as_sync async def test_search_mixed(self): @@ -195,7 +216,7 @@ async def test_search_mixed(self): sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='X:SAM001', - sample_type='blood', + type='blood', participant_id=p.id, ) ) @@ -214,19 +235,26 @@ async def test_search_mixed(self): sample_result = next( r for r in all_results if r.type == SearchResponseType.SAMPLE ) + family_result_data = family_result.data + participant_result_data = participant_result.data + sample_result_data = sample_result.data + + assert isinstance(family_result_data, FamilySearchResponseData) + assert isinstance(participant_result_data, ParticipantSearchResponseData) + assert isinstance(sample_result_data, SampleSearchResponseData) # linked family matches self.assertEqual('X:FAM01', family_result.title) # linked participant matches self.assertEqual('X:PART01', participant_result.title) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) # linked sample matches cpg_id = sample_id_format(sample.id) - self.assertEqual(cpg_id, sample_result.data.id) - self.assertListEqual(['X:SAM001'], sample_result.data.sample_external_ids) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertEqual(cpg_id, sample_result_data.id) + self.assertListEqual(['X:SAM001'], sample_result_data.sample_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) self.assertListEqual( - ['X:PART01'], participant_result.data.participant_external_ids + ['X:PART01'], participant_result_data.participant_external_ids ) diff --git a/test/test_web.py b/test/test_web.py index 4e6cb53f6..cc5a0bb63 100644 --- a/test/test_web.py +++ b/test/test_web.py @@ -1,27 +1,27 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers import ( + AssayLayer, + ParticipantLayer, + SampleLayer, + SequencingGroupLayer, + WebLayer, +) from models.enums import MetaSearchEntityPrefix from models.models import ( + Assay, + AssayInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + ProjectSummaryInternal, SampleUpsertInternal, + SearchItem, SequencingGroupUpsertInternal, - AssayUpsertInternal, - ProjectSummaryInternal, - AssayInternal, - Assay, + WebProject, ) -from models.models import WebProject, SearchItem from models.utils.sample_id_format import sample_id_transform_to_raw from models.utils.sequencing_group_id_format import sequencing_group_id_transform_to_raw -from db.python.layers import ( - AssayLayer, - SequencingGroupLayer, - SampleLayer, - ParticipantLayer, - WebLayer, -) - default_assay_meta = { 'sequencing_type': 'genome', 'sequencing_technology': 'short-read', @@ -71,20 +71,20 @@ def get_test_participant(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -204,9 +204,7 @@ async def test_project_summary_empty(self): # Expect an empty project expected = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -308,9 +306,7 @@ async def project_summary_with_filter_no_results(self): ], ) empty_result = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -455,12 +451,10 @@ async def test_field_with_space(self): token=0, grid_filter=[ SearchItem( - **{ - 'model_type': MetaSearchEntityPrefix.ASSAY, - 'query': 'field wi', - 'field': 'field with spaces', - 'is_meta': True, - } + model_type=MetaSearchEntityPrefix.ASSAY, + query='field wi', + field='field with spaces', + is_meta=True, ) ], ) diff --git a/web/src/pages/project/ProjectGrid.tsx b/web/src/pages/project/ProjectGrid.tsx index 52dbeaa64..83a9fda74 100644 --- a/web/src/pages/project/ProjectGrid.tsx +++ b/web/src/pages/project/ProjectGrid.tsx @@ -372,7 +372,7 @@ const ProjectGrid: React.FunctionComponent = ({ : '1px solid var(--color-border-default)', backgroundColor, }} - key={`${s.id}sequence_group.${k}`} + key={`${s.id}sequencing_group.${k}`} rowSpan={(seq.assays ?? []).length} > {k === 'id' ? ( From 78cafc7970ef1adeafd7e4a5005a7f7b7d31e500 Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Mon, 18 Sep 2023 12:27:09 +1000 Subject: [PATCH 08/12] Add participant phenotypes to graphql (#545) * Add participant phenotypes to graphql * Add test for graphql phenotypes * Fix unrelated linting issues * Slight linting updates * PR cleanup --------- Co-authored-by: Michael Franklin --- api/graphql/loaders.py | 38 +++++++++++++++++++++++---------- api/graphql/schema.py | 33 ++++++++++++++-------------- db/python/layers/participant.py | 30 ++++++++++++++++++++++++-- scripts/parse_ped.py | 2 +- test/test_graphql.py | 27 +++++++++++++++++++++++ 5 files changed, 99 insertions(+), 31 deletions(-) diff --git a/api/graphql/loaders.py b/api/graphql/loaders.py index 2a54fc514..905297009 100644 --- a/api/graphql/loaders.py +++ b/api/graphql/loaders.py @@ -13,26 +13,26 @@ from db.python.connect import NotFoundError from db.python.layers import ( AnalysisLayer, - SampleLayer, AssayLayer, + FamilyLayer, ParticipantLayer, + SampleLayer, SequencingGroupLayer, - FamilyLayer, ) from db.python.tables.analysis import AnalysisFilter from db.python.tables.assay import AssayFilter from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sample import SampleFilter from db.python.tables.sequencing_group import SequencingGroupFilter -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.models import ( - AssayInternal, - SampleInternal, - SequencingGroupInternal, AnalysisInternal, - ParticipantInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, Project, + SampleInternal, + SequencingGroupInternal, ) @@ -53,6 +53,8 @@ class LoaderKeys(enum.Enum): SAMPLES_FOR_PARTICIPANTS = 'samples_for_participants' SAMPLES_FOR_PROJECTS = 'samples_for_projects' + PHENOTYPES_FOR_PARTICIPANTS = 'phenotypes_for_participants' + PARTICIPANTS_FOR_IDS = 'participants_for_ids' PARTICIPANTS_FOR_FAMILIES = 'participants_for_families' PARTICIPANTS_FOR_PROJECTS = 'participants_for_projects' @@ -291,9 +293,7 @@ async def load_participants_for_ids( p_by_id = {p.id: p for p in persons} missing_pids = set(participant_ids) - set(p_by_id.keys()) if missing_pids: - raise NotFoundError( - f'Could not find participants with ids {missing_pids}' - ) + raise NotFoundError(f'Could not find participants with ids {missing_pids}') return [p_by_id.get(p) for p in participant_ids] @@ -400,7 +400,23 @@ async def load_analyses_for_sequencing_groups( return by_sg_id -async def get_context(request: Request, connection=get_projectless_db_connection): # pylint: disable=unused-argument +@connected_data_loader(LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS) +async def load_phenotypes_for_participants( + participant_ids: list[int], connection +) -> list[dict]: + """ + Data loader for phenotypes for participants + """ + player = ParticipantLayer(connection) + participant_phenotypes = await player.get_phenotypes_for_participants( + participant_ids=participant_ids + ) + return [participant_phenotypes.get(pid, {}) for pid in participant_ids] + + +async def get_context( + request: Request, connection=get_projectless_db_connection +): # pylint: disable=unused-argument """Get loaders / cache context for strawberyy GraphQL""" mapped_loaders = {k: fn(connection) for k, fn in loaders.items()} return { diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 8befa8867..7255821ed 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -14,16 +14,10 @@ from strawberry.fastapi import GraphQLRouter from strawberry.types import Info -from api.graphql.filters import ( - GraphQLFilter, - GraphQLMetaFilter, -) -from api.graphql.loaders import ( - get_context, - LoaderKeys, -) +from api.graphql.filters import GraphQLFilter, GraphQLMetaFilter +from api.graphql.loaders import LoaderKeys, get_context from db.python import enum_tables -from db.python.layers import AnalysisLayer, SequencingGroupLayer, SampleLayer +from db.python.layers import AnalysisLayer, SampleLayer, SequencingGroupLayer from db.python.layers.assay import AssayLayer from db.python.layers.family import FamilyLayer from db.python.tables.analysis import AnalysisFilter @@ -34,21 +28,19 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models import ( - SampleInternal, - ParticipantInternal, - Project, AnalysisInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, + Project, + SampleInternal, SequencingGroupInternal, - AssayInternal, ) from models.models.sample import sample_id_transform_to_raw -from models.utils.sample_id_format import ( - sample_id_format, -) +from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) enum_methods = {} @@ -336,6 +328,13 @@ async def samples( samples = await info.context[LoaderKeys.SAMPLES_FOR_PARTICIPANTS].load(q) return [GraphQLSample.from_internal(s) for s in samples] + @strawberry.field + async def phenotypes( + self, info: Info, root: 'GraphQLParticipant' + ) -> strawberry.scalars.JSON: + loader = info.context[LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS] + return await loader.load(root.id) + @strawberry.field async def families( self, info: Info, root: 'GraphQLParticipant' diff --git a/db/python/layers/participant.py b/db/python/layers/participant.py index 216427e31..44d6d4db2 100644 --- a/db/python/layers/participant.py +++ b/db/python/layers/participant.py @@ -2,9 +2,9 @@ import re from collections import defaultdict from enum import Enum -from typing import Dict, List, Tuple, Optional, Any +from typing import Any, Dict, List, Optional, Tuple -from db.python.connect import NotFoundError, NoOpAenter +from db.python.connect import NoOpAenter, NotFoundError from db.python.layers.base import BaseLayer from db.python.layers.sample import SampleLayer from db.python.tables.family import FamilyTable @@ -335,6 +335,21 @@ async def fill_in_missing_participants(self): return f'Updated {len(sample_ids_to_update)} records' + async def insert_participant_phenotypes( + self, participant_phenotypes: dict[int, dict] + ): + """ + Insert participant phenotypes, with format: {pid: {key: value}} + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.add_key_value_rows( + [ + (pid, pk, pv) + for pid, phenotypes in participant_phenotypes.items() + for pk, pv in phenotypes.items() + ] + ) + async def generic_individual_metadata_importer( self, headers: List[str], @@ -653,6 +668,17 @@ async def update_many_participant_external_ids( # region PHENOTYPES / SEQR + async def get_phenotypes_for_participants( + self, participant_ids: list[int] + ) -> dict[int, dict[str, Any]]: + """ + Get phenotypes for participants keyed by by pid + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.get_key_value_rows_for_participant_ids( + participant_ids=participant_ids + ) + async def get_seqr_individual_template( self, project: int, diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index f587e483b..831ddd402 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -15,7 +15,7 @@ def main(ped_file_path: str, project: str): fapi = FamilyApi() # pylint: disable=no-member - with AnyPath(ped_file_path).open() as ped_file: # type: ignore + with AnyPath(ped_file_path).open() as ped_file: fapi.import_pedigree( file=ped_file, has_header=True, diff --git a/test/test_graphql.py b/test/test_graphql.py index c61eed238..a52a5f947 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -229,3 +229,30 @@ async def test_sg_analyses_query(self): self.assertIn('id', analyses[0]) self.assertIn('meta', analyses[0]) self.assertIn('output', analyses[0]) + + @run_as_sync + async def test_participant_phenotypes(self): + """ + Test getting participant phentypes in graphql + """ + # insert participant + p = await self.player.upsert_participant( + ParticipantUpsertInternal(external_id='Demeter', meta={}, samples=[]) + ) + + phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}} + # insert participant_phenotypes + await self.player.insert_participant_phenotypes({p.id: phenotypes}) + + q = """ +query MyQuery($pid: Int!) { + participant(id: $pid) { + phenotypes + } +}""" + + resp = await self.run_graphql_query_async(q, {'pid': p.id}) + + self.assertIn('participant', resp) + self.assertIn('phenotypes', resp['participant']) + self.assertDictEqual(phenotypes, resp['participant']['phenotypes']) From f128b70ca71d33537c9d0922f196901dc3f23d2f Mon Sep 17 00:00:00 2001 From: John Marshall Date: Mon, 18 Sep 2023 15:33:57 +1200 Subject: [PATCH 09/12] Use current GitHub Actions versions (and avoid ::set-output) (#520) Prevent "The following action(s) uses node12 which is deprecated" warnings by updating to the current releases of the Actions used. (Node.js 12 is scheduled for removal from Actions runners next month.) Use the current releases instead of `@main` too, as we don't want to be affected by bleeding-edge bugs. Rewrite ::set-output as a write to $GITHUB_OUTPUT instead; the ::save-state and ::set-output commands are also deprecated. --- .github/workflows/deploy.yaml | 8 ++++---- .github/workflows/lint.yaml | 4 ++-- .github/workflows/test.yaml | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index ca5c077f1..997b1129d 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -21,10 +21,10 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - name: "gcloud setup" - uses: google-github-actions/setup-gcloud@v0 + uses: google-github-actions/setup-gcloud@v1 with: project_id: sample-metadata service_account_key: ${{ secrets.GCP_SERVER_DEPLOY_KEY }} @@ -33,11 +33,11 @@ jobs: run: | gcloud auth configure-docker australia-southeast1-docker.pkg.dev - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 68f65dc86..71c330c5a 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -14,9 +14,9 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aaffc014b..e15e52f8e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,13 +16,13 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" @@ -70,10 +70,10 @@ jobs: rc=$? coverage xml - echo "::set-output name=rc::$rc" + echo "rc=$rc" >> $GITHUB_OUTPUT - name: "Upload coverage report" - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: files: ./coverage.xml @@ -89,7 +89,7 @@ jobs: - name: Fail if tests are not passing if: ${{ steps.runtests.outputs.rc != 0 }} - uses: actions/github-script@v3 + uses: actions/github-script@v6 with: script: | core.setFailed('Unit tests failed with rc = ${{ steps.runtests.outputs.rc }}') From 151f61f32874ebb8f0d4809fdb24b338e9d7c4de Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Wed, 20 Sep 2023 10:42:03 +1000 Subject: [PATCH 10/12] Add workload identity federated login (#553) --- .github/workflows/deploy.yaml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 997b1129d..45ce05ba8 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -7,6 +7,10 @@ on: - main - dev +permissions: + id-token: write + contents: read + jobs: deploy: runs-on: ubuntu-latest @@ -23,11 +27,16 @@ jobs: steps: - uses: actions/checkout@v3 - - name: "gcloud setup" - uses: google-github-actions/setup-gcloud@v1 + - id: "google-cloud-auth" + name: "Authenticate to Google Cloud" + uses: "google-github-actions/auth@v1" with: - project_id: sample-metadata - service_account_key: ${{ secrets.GCP_SERVER_DEPLOY_KEY }} + workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/gh-deploy-pool/providers/gh-provider" + service_account: "sample-metadata-deploy@sample-metadata.iam.gserviceaccount.com" + + - id: "google-cloud-sdk-setup" + name: "Set up Cloud SDK" + uses: google-github-actions/setup-gcloud@v1 - name: "gcloud docker auth" run: | From 0c148a6d54980d983b5fcca964f4eb70cdcba4b1 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Thu, 13 Apr 2023 15:09:15 +1000 Subject: [PATCH 11/12] allow noninteractive running --- scripts/create_test_subset.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index c14f13d79..5d7d9513a 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -21,7 +21,6 @@ import random import subprocess import traceback -import typing from collections import Counter from typing import Optional @@ -97,6 +96,13 @@ This is in addition to the number of families specified in --families and the number of samples specified in -n""", ) +@click.option( + '--noninteractive', + 'noninteractive', + is_flag=True, + default=False, + help='Skip interactive confirmation', +) def main( project: str, samples_n: Optional[int], @@ -104,13 +110,14 @@ def main( skip_ped: Optional[bool] = True, additional_families: Optional[tuple[str]] = None, additional_samples: Optional[tuple[str]] = None, + noninteractive: Optional[bool] = False, ): """ Script creates a test subset for a given project. A new project with a prefix -test is created, and for any files in sample/meta, sequence/meta, or analysis/output a copy in the -test namespace is created. """ - samples_n, families_n = _validate_opts(samples_n, families_n) + samples_n, families_n = _validate_opts(samples_n, families_n, noninteractive) _additional_families: list[str] = list(additional_families) _additional_samples: list[str] = list(additional_samples) @@ -121,7 +128,7 @@ def main( } ) logger.info(f'Found {len(all_samples)} samples') - if samples_n and samples_n >= len(all_samples): + if (samples_n and samples_n >= len(all_samples)) and not noninteractive: resp = str( input( f'Requesting {samples_n} samples which is >= ' @@ -440,8 +447,7 @@ def get_map_ipid_esid( ip_es_map = [] for ip_is_pair in ip_is_map: - samples_per_participant = [] - samples_per_participant.append(ip_is_pair[0]) + samples_per_participant = [ip_is_pair[0]] for isid in ip_is_pair[1:]: if isid in is_es_map: samples_per_participant.append(is_es_map[isid]) @@ -453,10 +459,9 @@ def get_map_ipid_esid( return external_sample_internal_participant_map -def get_samples_for_families(project: str, additional_families: list[str]): +def get_samples_for_families(project: str, additional_families: list[str]) -> list[str]: """Returns the samples that belong to a list of families""" - samples: list[str] = [] full_pedigree = fapi.get_pedigree( project=project, replace_with_participant_external_ids=False, @@ -477,7 +482,7 @@ def get_samples_for_families(project: str, additional_families: list[str]): } ) - samples = [sample['id'] for sample in sample_objects] + samples: list[str] = [sample['id'] for sample in sample_objects] return samples @@ -485,9 +490,8 @@ def get_samples_for_families(project: str, additional_families: list[str]): def get_fams_for_samples( project: str, additional_samples: Optional[list[str]] = None, -): +) -> list[str]: """Returns the families that a list of samples belong to""" - fams: set[str] = set() sample_objects = sapi.get_samples( body_get_samples={ 'project_ids': [project], @@ -503,7 +507,7 @@ def get_fams_for_samples( replace_with_family_external_ids=True, ) - fams = { + fams: set[str] = { fam['family_id'] for fam in full_pedigree if str(fam['individual_id']) in pids } @@ -524,7 +528,7 @@ def _normalise_map(unformatted_map: list[list[str]]) -> dict[str, str]: def _validate_opts( - samples_n: int, families_n: int + samples_n: int, families_n: int, noninteractive: bool ) -> tuple[Optional[int], Optional[int]]: if samples_n is not None and families_n is not None: raise click.BadParameter('Please specify only one of --samples or --families') @@ -539,7 +543,7 @@ def _validate_opts( if families_n is not None and families_n < 1: raise click.BadParameter('Please specify --families higher than 0') - if families_n is not None and families_n >= 30: + if (families_n is not None and families_n >= 30) and not noninteractive: resp = str( input( f'You requested a subset of {families_n} families. ' @@ -549,7 +553,7 @@ def _validate_opts( if resp.lower() != 'y': raise SystemExit() - if samples_n is not None and samples_n >= 100: + if (samples_n is not None and samples_n >= 100) and not noninteractive: resp = str( input( f'You requested a subset of {samples_n} samples. ' @@ -563,7 +567,7 @@ def _validate_opts( def _print_fam_stats(families: list[dict[str, str]]): family_sizes = Counter([fam['family_id'] for fam in families]) - fam_by_size: typing.Counter[int] = Counter() + fam_by_size: Counter[int] = Counter() # determine number of singles, duos, trios, etc for fam in family_sizes: fam_by_size[family_sizes[fam]] += 1 From b95ca51718bdb35ddc8f6745591962daec1227a4 Mon Sep 17 00:00:00 2001 From: danielreti <47542969+danielreti@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:44:44 +1000 Subject: [PATCH 12/12] Added graphql link + changed title to metamist (#549) --- web/src/index.html | 2 +- web/src/shared/components/Header/NavBar.tsx | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/web/src/index.html b/web/src/index.html index 74c7cadab..913637632 100644 --- a/web/src/index.html +++ b/web/src/index.html @@ -8,7 +8,7 @@ - CPG Sample Metadata + CPG Metamist diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 69da89ca4..1b9cd09f0 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -7,6 +7,7 @@ import { Popup } from 'semantic-ui-react' import ExploreIcon from '@mui/icons-material/Explore' import DescriptionIcon from '@mui/icons-material/Description' import InsightsIcon from '@mui/icons-material/Insights' +import BuildIcon from '@mui/icons-material/Build' import Searchbar from './Search' import MuckTheDuck from '../MuckTheDuck' import DarkModeTriButton from './DarkModeTriButton/DarkModeTriButton' @@ -60,6 +61,14 @@ const NavBar: React.FunctionComponent = () => ( + + GraphQL + + } hoverable position="bottom center"> +
GraphQL
+
+
+