diff --git a/documentation/reports.md b/documentation/reports.md index 434fa9c4cd..914d1228ca 100644 --- a/documentation/reports.md +++ b/documentation/reports.md @@ -95,3 +95,58 @@ Fields in the admin screen CSV report - views - downloads - citations + +Authors at an Institution Report (from SQL) +------------------------------------------- + +This SQL may be more complicated than it needs to be, but it seems to work. It gives information about +published and embargoed items with authors at an institution based on the ROR ids you put into the query +(look and replace in two separate places). +It can then be exported from SQL as TSV/CSV or whatever. There are duplicate rows per dataset if more +than one author is from the institution that contributed to the same dataset. The views and downloads may +be off if we go to a model where we retrieve them in real-time from DataCite because they come from +multiple sources (like us and Zenodo) and we don't pre-populate them all the time. + +It includes published and embargoed (shows the landing page but no downloads) which is why some +publication dates are in the future. + +Sorry, IDK why the markdown seems to do it's own thing when indenting, even if I do it preformatted. + +
+SELECT se_id3.identifier, se_res3.title, se_auth3.author_first_name, se_auth3.author_last_name, dcs_affil3.long_name, se_res3.publication_date, +(stash_engine_counter_stats.unique_investigation_count - stash_engine_counter_stats.unique_request_count) as unique_views, stash_engine_counter_stats.unique_request_count as unique_downloads +FROM + /* get only the earliest published/embagoed one */ + (SELECT unique_ids.identifier_id, min(res2.id) first_pub_resource FROM + /* only get distinct identifiers from all the ror_ids working back through zillions of joined tables */ + (SELECT DISTINCT se_id.id as identifier_id + FROM dcs_affiliations affil + JOIN dcs_affiliations_authors affil_auth + ON affil.id = affil_auth.`affiliation_id` + JOIN stash_engine_authors auth + ON affil_auth.`author_id` = auth.`id` + JOIN stash_engine_resources res + ON auth.`resource_id` = res.id + JOIN stash_engine_identifiers se_id + ON se_id.id = res.identifier_id + WHERE affil.ror_id IN ('https://ror.org/02y3ad647', 'https://ror.org/0419bgt07', 'https://ror.org/04tk2gy88') + AND se_id.pub_state IN ('published', 'embargoed')) as unique_ids + JOIN stash_engine_resources res2 + ON unique_ids.identifier_id = res2.identifier_id + WHERE res2.publication_date IS NOT NULL + GROUP BY unique_ids.identifier_id) as ident_and_res +JOIN stash_engine_identifiers se_id3 +ON se_id3.id = ident_and_res.identifier_id +JOIN stash_engine_resources se_res3 +ON se_res3.id = ident_and_res.first_pub_resource +JOIN stash_engine_authors se_auth3 +ON se_res3.id = se_auth3.`resource_id` +JOIN dcs_affiliations_authors dcs_affils_authors3 +ON se_auth3.`id` = dcs_affils_authors3.`author_id` +JOIN dcs_affiliations dcs_affil3 +ON dcs_affils_authors3.`affiliation_id` = dcs_affil3.`id` +LEFT JOIN stash_engine_counter_stats +ON se_id3.id = stash_engine_counter_stats.`identifier_id` +WHERE dcs_affil3.ror_id IN ('https://ror.org/02y3ad647', 'https://ror.org/0419bgt07', 'https://ror.org/04tk2gy88') +ORDER BY se_res3.publication_date, se_id3.identifier, se_res3.title; +