From 75f92f92f464115791941d02527bed7ea25ac794 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Wed, 3 Jul 2024 13:56:17 +0000 Subject: [PATCH 01/15] remove classic aotf aggregation code --- app/models/Backend.scala | 306 ++----------------------- app/models/ElasticRetriever.scala | 59 ----- app/models/entities/Aggregations.scala | 33 --- app/models/entities/Associations.scala | 3 +- app/models/gql/Arguments.scala | 13 +- app/models/gql/Objects.scala | 13 +- conf/application.conf | 4 - 7 files changed, 18 insertions(+), 413 deletions(-) delete mode 100644 app/models/entities/Aggregations.scala diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 0c79159f..3746885e 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -13,7 +13,6 @@ import gql.validators.QueryTermsValidator._ import models.Helpers._ import models.db.{QAOTF, QLITAGG, QW2V, SentenceQuery} import models.entities.Publication._ -import models.entities.Aggregations._ import models.entities.Associations._ import models.entities.Configuration._ import models.entities.DiseaseHPOs._ @@ -463,7 +462,6 @@ class Backend @Inject() (implicit datasources: Option[Seq[DatasourceSettings]], indirect: Boolean, facetFilters: Seq[String], - aggregationFilters: Seq[AggregationFilter], targetSet: Set[String], filter: Option[String], orderBy: Option[(String, String)], @@ -491,202 +489,31 @@ class Backend @Inject() (implicit val indirectIDs = if (indirect) disease.descendants.toSet + disease.id else Set.empty[String] val targetIds = applyFacetFiltersToBIDs("facet_search_target", targetSet, facetFilters) val simpleQ = aotfQ(indirectIDs, targetIds).simpleQuery(0, 100000) - - val evidencesIndexName = defaultESSettings.entities - .find(_.name == "evidences_aotf") - .map(_.index) - .getOrElse("evidences_aotf") - - val tractabilityMappings = - List("SmallMolecule", "Antibody", "Protac", "OtherModalities").map { t => - s"tractability$t" -> AggregationMapping( - s"facet_tractability_${t.toLowerCase}", - IndexedSeq.empty, - nested = false - ) - }.toMap - val mappings = Map( - "dataTypes" -> AggregationMapping( - "datatype_id", - IndexedSeq("datatype_id", "datasource_id"), - false - ), - "pathwayTypes" -> AggregationMapping("facet_reactome", IndexedSeq("l1", "l2"), true), - "targetClasses" -> AggregationMapping("facet_classes", IndexedSeq("l1", "l2"), true) - ) ++ tractabilityMappings - - val queries = ElasticRetriever.aggregationFilterProducer(aggregationFilters, mappings) - val filtersMap = queries._2 - - val uniqueTargetsAgg = - CardinalityAggregation("uniques", Some("target_id.keyword"), precisionThreshold = Some(40000)) - val reverseTargetsAgg = ReverseNestedAggregation("uniques", None, Seq(uniqueTargetsAgg)) - - val queryAggs = Seq( - FilterAggregation( - "uniques", - queries._1, - subaggs = Seq( - uniqueTargetsAgg, - TermsAggregation("ids", field = Some("target_id.keyword"), size = Some(40000)) - ) - ), - FilterAggregation( - "dataTypes", - filtersMap("dataTypes"), - subaggs = Seq( - uniqueTargetsAgg, - TermsAggregation( - "aggs", - Some("datatype_id.keyword"), - size = Some(100), - subaggs = Seq( - uniqueTargetsAgg, - TermsAggregation( - "aggs", - Some("datasource_id.keyword"), - size = Some(100), - subaggs = Seq( - uniqueTargetsAgg - ) - ) - ) - ) - ) - ), - FilterAggregation( - "pathwayTypes", - filtersMap("pathwayTypes"), - subaggs = Seq( - uniqueTargetsAgg, - NestedAggregation( - "aggs", - path = "facet_reactome", - subaggs = Seq( - TermsAggregation( - "aggs", - Some("facet_reactome.l1.keyword"), - size = Some(100), - subaggs = Seq( - TermsAggregation( - "aggs", - Some("facet_reactome.l2.keyword"), - size = Some(100), - subaggs = Seq(reverseTargetsAgg) - ), - reverseTargetsAgg - ) - ), - reverseTargetsAgg - ) - ) - ) - ), - FilterAggregation( - "targetClasses", - filtersMap("targetClasses"), - subaggs = Seq( - uniqueTargetsAgg, - NestedAggregation( - "aggs", - path = "facet_classes", - subaggs = Seq( - TermsAggregation( - "aggs", - Some("facet_classes.l1.keyword"), - size = Some(100), - subaggs = Seq( - TermsAggregation( - "aggs", - Some("facet_classes.l2.keyword"), - size = Some(100), - subaggs = Seq(reverseTargetsAgg) - ), - reverseTargetsAgg - ) - ), - reverseTargetsAgg - ) - ) - ) - ) - ) ++ tractabilityMappings.map { kv => - FilterAggregation( - kv._1, - ElasticRetriever.aggregationFilterProducer(aggregationFilters, Map(kv))._1, - subaggs = Seq( - uniqueTargetsAgg, - TermsAggregation( - "aggs", - Some(s"${kv._2.key}.keyword"), - size = Some(100), - subaggs = Seq( - uniqueTargetsAgg - ) - ) - ) - ) - } - - val esQ = esRetriever.getAggregationsByQuery( - evidencesIndexName, - boolQuery() - .withShould( - boolQuery() - .withMust(termsQuery("disease_id.keyword", indirectIDs)) - .withMust(not(termsQuery("datasource_id.keyword", dontPropagate))) - ) - .withShould( - boolQuery() - .withMust(termQuery("disease_id.keyword", disease.id)) - ), - queryAggs - ) map { - case obj: JsObject => - logger.trace(Json.prettyPrint(obj)) - - val ids = (obj \ "uniques" \ "ids" \ "buckets" \\ "key").map(_.as[String]).toSet - val uniques = (obj \ "uniques" \\ "value").head.as[Long] - val restAggs: Seq[NamedAggregation] = ((obj - "uniques").fields map { pair => - NamedAggregation( - pair._1, - (pair._2 \ "uniques" \\ "value").headOption.map(jv => jv.as[Long]), - ArraySeq.unsafeWrapArray((pair._2 \\ "buckets").head.as[Array[entities.Aggregation]]) - ) - }).to(Seq) - - Some((Aggregations(uniques, restAggs), ids)) - - case _ => None - } - - // TODO use option to enable or disable the computation of each of the sides - (dbRetriever.executeQuery[String, Query](simpleQ) zip esQ) flatMap { case (tIDs, esR) => - val tids = esR.map(_._2 intersect tIDs.toSet).getOrElse(tIDs.toSet) + + (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case tIDs => + val tids = tIDs.toSet + logger.info(tids.toString()) val fullQ = aotfQ(indirectIDs, tids).query logger.debug( - s"disease fixed get simpleQ n ${tIDs.size} " + - s"agg n ${esR.map(_._2.size).getOrElse(-1)} " + + s"target fixed get simpleQ n ${tIDs.size} " + s"inter n ${tids.size}" ) - if (tids.nonEmpty) { dbRetriever.executeQuery[Association, Query](fullQ) map { case assocs => - Associations(dss, esR.map(_._1), tids.size, assocs) + Associations(dss, tids.size, assocs) } } else { - Future.successful(Associations(dss, esR.map(_._1), tids.size, Vector.empty)) + Future.successful(Associations(dss, tids.size, Vector.empty)) } } } - + def getAssociationsTargetFixed( target: Target, datasources: Option[Seq[DatasourceSettings]], indirect: Boolean, facetFilters: Seq[String], - aggregationFilters: Seq[AggregationFilter], diseaseSet: Set[String], filter: Option[String], orderBy: Option[(String, String)], @@ -728,127 +555,22 @@ class Backend @Inject() (implicit val diseaseIds = applyFacetFiltersToBIDs("facet_search_disease", diseaseSet, facetFilters) val simpleQ = aotfQ(indirectIDs, diseaseIds).simpleQuery(0, 100000) - - val evidencesIndexName = defaultESSettings.entities - .find(_.name == "evidences_aotf") - .map(_.index) - .getOrElse("evidences_aotf") - - val mappings = Map( - "dataTypes" -> AggregationMapping( - "datatype_id", - IndexedSeq("datatype_id", "datasource_id"), - false - ), - "therapeuticAreas" -> AggregationMapping("facet_therapeuticAreas", IndexedSeq.empty, false) - ) - - val queries = ElasticRetriever.aggregationFilterProducer(aggregationFilters, mappings) - val filtersMap = queries._2 - - val uniqueDiseasesAgg = CardinalityAggregation( - "uniques", - Some("disease_id.keyword"), - precisionThreshold = Some(40000) - ) - - val queryAggs = Seq( - FilterAggregation( - "uniques", - queries._1, - subaggs = Seq( - uniqueDiseasesAgg, - TermsAggregation("ids", field = Some("disease_id.keyword"), size = Some(40000)) - ) - ), - FilterAggregation( - "dataTypes", - filtersMap("dataTypes"), - subaggs = Seq( - uniqueDiseasesAgg, - TermsAggregation( - "aggs", - Some("datatype_id.keyword"), - size = Some(100), - subaggs = Seq( - uniqueDiseasesAgg, - TermsAggregation( - "aggs", - Some("datasource_id.keyword"), - size = Some(100), - subaggs = Seq( - uniqueDiseasesAgg - ) - ) - ) - ) - ) - ), - FilterAggregation( - "therapeuticAreas", - filtersMap("therapeuticAreas"), - subaggs = Seq( - uniqueDiseasesAgg, - TermsAggregation( - "aggs", - Some("facet_therapeuticAreas.keyword"), - size = Some(100), - subaggs = Seq( - uniqueDiseasesAgg - ) - ) - ) - ) - ) - - val esQ = esRetriever.getAggregationsByQuery( - evidencesIndexName, - boolQuery() - .withShould( - boolQuery() - .withMust(termsQuery("target_id.keyword", indirectIDs)) - .withMust(not(termsQuery("datasource_id.keyword", dontPropagate))) - ) - .withShould( - boolQuery() - .withMust(termQuery("target_id.keyword", target.id)) - ), - queryAggs - ) map { - case obj: JsObject => - logger.trace(Json.prettyPrint(obj)) - - val ids = (obj \ "uniques" \ "ids" \ "buckets" \\ "key").map(_.as[String]).toSet - val uniques = (obj \ "uniques" \\ "value").head.as[Long] - val restAggs = (obj - "uniques").fields map { pair => - NamedAggregation( - pair._1, - (pair._2 \ "uniques" \\ "value").headOption.map(jv => jv.as[Long]), - ArraySeq.unsafeWrapArray((pair._2 \\ "buckets").head.as[Array[entities.Aggregation]]) - ) - } - - Some((Aggregations(uniques, restAggs.to(Seq)), ids)) - - case _ => None - } - - (dbRetriever.executeQuery[String, Query](simpleQ) zip esQ) flatMap { case (dIDs, esR) => - val dids = esR.map(_._2 intersect dIDs.toSet).getOrElse(dIDs.toSet) + + (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case dIDs => + val dids = dIDs.toSet + logger.info(dids.toString()) val fullQ = aotfQ(indirectIDs, dids).query logger.debug( s"target fixed get simpleQ n ${dIDs.size} " + - s"agg n ${esR.map(_._2.size).getOrElse(-1)} " + s"inter n ${dids.size}" ) - if (dids.nonEmpty) { dbRetriever.executeQuery[Association, Query](fullQ) map { case assocs => - Associations(dss, esR.map(_._1), dids.size, assocs) + Associations(dss, dids.size, assocs) } } else { - Future.successful(Associations(dss, esR.map(_._1), dids.size, Vector.empty)) + Future.successful(Associations(dss, dids.size, Vector.empty)) } } } diff --git a/app/models/ElasticRetriever.scala b/app/models/ElasticRetriever.scala index 90e80ae1..9765b333 100644 --- a/app/models/ElasticRetriever.scala +++ b/app/models/ElasticRetriever.scala @@ -703,65 +703,6 @@ class ElasticRetriever @Inject() ( } object ElasticRetriever extends Logging { - - /** aggregationFilterProducer returns a tuple where the first element is the overall list - * of filters and the second is a map with the cartesian product of each aggregation with - * the complementary list of filters - */ - def aggregationFilterProducer( - filters: Seq[AggregationFilter], - mappings: Map[String, AggregationMapping] - ): (BoolQuery, Map[String, BoolQuery]) = { - val filtersByName = filters - .groupBy(_.name) - .view - .filterKeys(mappings.contains) - .toMap - .map { case (facet, filters) => - val mappedFacet = mappings(facet) - val ff = filters.foldLeft(BoolQuery()) { (b, filter) => - val termKey = filter.path.zipWithIndex.last - val termLevel = mappedFacet.pathKeys.lift - val termPrefix = if (mappedFacet.nested) s"${mappedFacet.key}." else "" - val keyName = termPrefix + s"${termLevel(termKey._2).getOrElse(mappedFacet.key)}.keyword" - b.withShould(TermQuery(keyName, termKey._1)) - } - - if (mappedFacet.nested) { - facet -> NestedQuery(mappedFacet.key, ff) - } else { - facet -> ff - } - - } - .withDefaultValue(BoolQuery()) - - val overallFilters = filtersByName.foldLeft(BoolQuery()) { case (b, f) => - b.withMust(f._2) - } - - val namesR = mappings.keys.toList.reverse - if (namesR.size > 1) { - val mappedMappgings = - mappings.map(p => p._1 -> filtersByName(p._1)).toList.combinations(namesR.size - 1).toList - - val cartesianProd = (namesR zip mappedMappgings).toMap.view - .mapValues(_.foldLeft(BoolQuery()) { (b, q) => - b.withMust(q._2) - }) - .toMap - - logger.debug(s"overall filters $overallFilters") - cartesianProd foreach { el => - logger.debug(s"cartesian product ${el._1} -> ${el._2.toString}") - } - - (overallFilters, cartesianProd) - } else { - (overallFilters, Map.empty[String, BoolQuery].withDefaultValue(BoolQuery())) - } - } - /** * * SortBy case class use the `fieldName` to sort by and asc if `desc` is false * otherwise desc diff --git a/app/models/entities/Aggregations.scala b/app/models/entities/Aggregations.scala deleted file mode 100644 index 87155101..00000000 --- a/app/models/entities/Aggregations.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.entities - -import play.api.Logging -import play.api.libs.json._ -import play.api.libs.json.Reads._ -import play.api.libs.functional.syntax._ - -case class AggregationFilter(name: String, path: Seq[String]) - -case class AggregationMapping(key: String, pathKeys: IndexedSeq[String], nested: Boolean) - -case class Aggregation(key: String, uniques: Long, aggs: Option[Seq[Aggregation]]) - -case class NamedAggregation(name: String, uniques: Option[Long], rows: Seq[Aggregation]) - -case class Aggregations(uniques: Long, aggs: Seq[NamedAggregation]) - -object Aggregations extends Logging { - val empty: Aggregations = Aggregations(0, Seq.empty) - - implicit val aggregationImpWrites: OWrites[Aggregation] = Json.writes[Aggregation] - implicit val aggregationImpReads: Reads[Aggregation] = - ((__ \ "key").read[String] and - (__ \ "uniques" \\ "value").readWithDefault[Long](0) and - (__ \ "aggs" \\ "buckets") - .lazyReadNullable(Reads.seq[Aggregation](aggregationImpReads)))(Aggregation.apply _) - - implicit val namedAggregationImpFormat: OFormat[NamedAggregation] = Json.format[NamedAggregation] - implicit val aggregationsImpFormat: OWrites[Aggregations] = Json.writes[Aggregations] - - implicit val aggregationFilterImpFormat: OFormat[AggregationFilter] = - Json.format[AggregationFilter] -} diff --git a/app/models/entities/Associations.scala b/app/models/entities/Associations.scala index 43146a83..77447c5b 100644 --- a/app/models/entities/Associations.scala +++ b/app/models/entities/Associations.scala @@ -22,7 +22,6 @@ case class Association( case class Associations( datasources: Seq[DatasourceSettings], - aggregations: Option[Aggregations], count: Long, rows: Vector[Association] ) @@ -30,7 +29,7 @@ case class Associations( case class EvidenceSource(datasource: String, datatype: String) object Associations { - val empty: Associations = Associations(Seq.empty, None, 0, Vector.empty) + val empty: Associations = Associations(Seq.empty, 0, Vector.empty) implicit val getAssociationOTFRowFromDB: GetResult[Association] = GetResult { r => diff --git a/app/models/gql/Arguments.scala b/app/models/gql/Arguments.scala index ffb5a9bb..3b6d6112 100644 --- a/app/models/gql/Arguments.scala +++ b/app/models/gql/Arguments.scala @@ -11,8 +11,6 @@ import sangria.util.tag object Arguments { - import Aggregations._ - val paginationGQLImp: InputObjectType[Pagination] = deriveInputObjectType[Pagination]() val datasourceSettingsInputImp: InputObjectType[DatasourceSettings] = @@ -20,9 +18,6 @@ object Arguments { InputObjectTypeName("DatasourceSettingsInput") ) - val aggregationFilterImp: InputObjectType[AggregationFilter] = - deriveInputObjectType[AggregationFilter]() - val entityNames: Argument[Option[Seq[String]]] = Argument( "entityNames", OptionInputType(ListInputType(StringType)), @@ -139,15 +134,9 @@ object Arguments { description = "List of datasource settings" ) - val aggregationFiltersListArg: Argument[Option[Seq[AggregationFilter]]] = - Argument("aggregationFilters", - OptionInputType(ListInputType(aggregationFilterImp)), - description = "List of the facets to aggregate by" - ) - val facetFiltersListArg: Argument[Option[Seq[String]]] = Argument( "facetFilters", OptionInputType(ListInputType(StringType)), - description = "List of the facet IDs to filter by (using OR)" + description = "List of the facet IDs to filter by (using AND)" ) } diff --git a/app/models/gql/Objects.scala b/app/models/gql/Objects.scala index bf867602..bdafb091 100644 --- a/app/models/gql/Objects.scala +++ b/app/models/gql/Objects.scala @@ -194,14 +194,13 @@ object Objects extends Logging { associatedOTFDiseasesImp, description = Some("associations on the fly"), arguments = - BIds :: indirectTargetEvidences :: datasourceSettingsListArg :: facetFiltersListArg :: aggregationFiltersListArg :: BFilterString :: scoreSorting :: pageArg :: Nil, + BIds :: indirectTargetEvidences :: datasourceSettingsListArg :: facetFiltersListArg :: BFilterString :: scoreSorting :: pageArg :: Nil, resolve = ctx => ctx.ctx.getAssociationsTargetFixed( ctx.value, ctx arg datasourceSettingsListArg, ctx arg indirectTargetEvidences getOrElse false, ctx arg facetFiltersListArg getOrElse (Seq.empty), - ctx arg aggregationFiltersListArg getOrElse Seq.empty, ctx arg BIds map (_.toSet) getOrElse Set.empty, ctx arg BFilterString, (ctx arg scoreSorting) map (_.split(" ").take(2).toList match { @@ -421,14 +420,13 @@ object Objects extends Logging { associatedOTFTargetsImp, description = Some("associations on the fly"), arguments = - BIds :: indirectEvidences :: datasourceSettingsListArg :: facetFiltersListArg :: aggregationFiltersListArg :: BFilterString :: scoreSorting :: pageArg :: Nil, + BIds :: indirectEvidences :: datasourceSettingsListArg :: facetFiltersListArg :: BFilterString :: scoreSorting :: pageArg :: Nil, resolve = ctx => ctx.ctx.getAssociationsDiseaseFixed( ctx.value, ctx arg datasourceSettingsListArg, ctx arg indirectEvidences getOrElse (true), ctx arg facetFiltersListArg getOrElse (Seq.empty), - ctx arg aggregationFiltersListArg getOrElse (Seq.empty), ctx arg BIds map (_.toSet) getOrElse (Set.empty), ctx arg BFilterString, (ctx arg scoreSorting) map (_.split(" ").take(2).toList match { @@ -1059,13 +1057,6 @@ object Objects extends Logging { deriveObjectType[Backend, HarmonicSettings]() implicit val clickhouseSettingsImp: ObjectType[Backend, ClickhouseSettings] = deriveObjectType[Backend, ClickhouseSettings]() - - implicit lazy val aggregationImp: ObjectType[Backend, Aggregation] = - deriveObjectType[Backend, Aggregation]() - implicit lazy val namedAggregationImp: ObjectType[Backend, NamedAggregation] = - deriveObjectType[Backend, NamedAggregation]() - implicit lazy val aggregationsImp: ObjectType[Backend, Aggregations] = - deriveObjectType[Backend, Aggregations]() implicit val evidenceSourceImp: ObjectType[Backend, EvidenceSource] = deriveObjectType[Backend, EvidenceSource]() diff --git a/conf/application.conf b/conf/application.conf index 27d862a6..07320801 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -87,10 +87,6 @@ ot { name = "interaction" index = "interaction" }, - { - name = "evidences_aotf" - index = "evidences_aotf" - }, { name = "known_drugs" index = "known_drugs" From 95c47131b092ee9ab9c0bad76c56c435e4f81fe0 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Wed, 3 Jul 2024 15:09:15 +0000 Subject: [PATCH 02/15] update tests --- test/controllers/GqlTest.scala | 9 --- test/inputs/GqlCase.scala | 114 ++++++------------------------ test/inputs/GqlItTestInputs.scala | 76 -------------------- 3 files changed, 20 insertions(+), 179 deletions(-) diff --git a/test/controllers/GqlTest.scala b/test/controllers/GqlTest.scala index 3fcca40e..d33b0017 100644 --- a/test/controllers/GqlTest.scala +++ b/test/controllers/GqlTest.scala @@ -327,9 +327,6 @@ class GqlTest } "Disease page queries" must { - "return a valid response for disease facets" taggedAs (IntegrationTestTag, ClickhouseTestTag) in { - testQueryAgainstGqlEndpoint(DiseaseAggregationfilter("DiseasePage_DiseaseFacets")) - } "return a valid response for disease page" taggedAs IntegrationTestTag in { testQueryAgainstGqlEndpoint(Disease("DiseasePage_DiseasePage")) } @@ -508,18 +505,12 @@ class GqlTest } "Target page" must { - "return valid associations visualisation" taggedAs (IntegrationTestTag, ClickhouseTestTag) in { - testQueryAgainstGqlEndpoint(TargetAggregationfilter("TargetPage_AssociationsViz")) - } "return valid chemical probes" taggedAs (IntegrationTestTag) in { testQueryAgainstGqlEndpoint(Target("ChemicalProbes_ChemicalProbes"))(ensgTransform) } "return valid gene ontology" taggedAs (IntegrationTestTag) in { testQueryAgainstGqlEndpoint(Target("GeneOntology_GeneOntology"))(ensgTransform) } - "return valid target facets" taggedAs (IntegrationTestTag, ClickhouseTestTag) in { - testQueryAgainstGqlEndpoint(TargetAggregationfilter("TargetPage_TargetFacets")) - } "return valid target page" taggedAs (IntegrationTestTag) in { testQueryAgainstGqlEndpoint(Target("TargetPage_TargetPage")) } diff --git a/test/inputs/GqlCase.scala b/test/inputs/GqlCase.scala index 670d6779..683450b2 100644 --- a/test/inputs/GqlCase.scala +++ b/test/inputs/GqlCase.scala @@ -1,7 +1,6 @@ package inputs import controllers.GqlTest -import models.entities.AggregationFilter import org.scalacheck.Gen import play.api.Logging @@ -27,92 +26,60 @@ sealed trait GqlFragment[T] extends GqlCase[T] { def generateFragmentQuery: String } -case class AssociationDisease(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - disease <- diseaseGenerator - agg <- aggregationfilterGenerator - } yield (disease, agg) +case class AssociationDisease(file: String) extends GqlCase[String] { + val inputGenerator = diseaseGenerator - def generateVariables(inputs: (String, AggregationFilter)) = + def generateVariables(disease: String) = s""" "variables": { - "efoId": "${inputs._1}", + "efoId": "$disease", "index": 0, "size": 10, - "sortBy": "", - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] + "sortBy": "" } """ } -case class AssociationDiseaseIndirect(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - disease <- diseaseGenerator - agg <- aggregationfilterGenerator - } yield (disease, agg) +case class AssociationDiseaseIndirect(file: String) extends GqlCase[String] { + val inputGenerator = diseaseGenerator - def generateVariables(inputs: (String, AggregationFilter)) = + def generateVariables(disease: String) = s""" "variables": { - "efoId": "${inputs._1}", + "efoId": "$disease", "index": 0, "size": 10, "sortBy": "", - "enableIndirect": false, - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] + "enableIndirect": false } """ } -case class AssociationTarget(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - target <- geneGenerator - agg <- aggregationfilterGenerator - } yield (target, agg) +case class AssociationTarget(file: String) extends GqlCase[String] { + val inputGenerator = geneGenerator - def generateVariables(inputs: (String, AggregationFilter)) = + def generateVariables(target: String) = s""" "variables": { - "ensemblId": "${inputs._1}", + "ensemblId": "$target", "index": 0, "size": 10, - "sortBy": "", - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] + "sortBy": "" } """ } -case class AssociationTargetIndirect(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - target <- geneGenerator - agg <- aggregationfilterGenerator - } yield (target, agg) +case class AssociationTargetIndirect(file: String) extends GqlCase[String] { + val inputGenerator = geneGenerator - def generateVariables(inputs: (String, AggregationFilter)) = + def generateVariables(target: String) = s""" "variables": { - "ensemblId": "${inputs._1}", + "ensemblId": "$target", "index": 0, "enableIndirect": false, "size": 10, - "sortBy": "", - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] + "sortBy": "" } """ } @@ -130,25 +97,6 @@ case class Disease(file: String) extends GqlCase[String] { """ } -case class DiseaseAggregationfilter(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - disease <- diseaseGenerator - agg <- aggregationfilterGenerator - } yield (disease, agg) - - def generateVariables(inputs: (String, AggregationFilter)): String = - s""" - "variables": { - "efoId": "${inputs._1}", - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] - } - """ -} - abstract class AbstractDrug extends GqlCase[String] { val inputGenerator = drugGenerator @@ -320,28 +268,6 @@ case class TargetDisease(file: String) extends GqlCase[(String, String)] { } } -case class TargetAggregationfilter(file: String) extends GqlCase[(String, AggregationFilter)] { - val inputGenerator = for { - gene <- geneGenerator - aggregationFilter <- aggregationfilterGenerator - } yield (gene, aggregationFilter) - - def generateVariables(inputs: (String, AggregationFilter)): String = - s""" - "variables": { - "ensemblId": "${inputs._1}", - "size": 10, - "index": 0, - "aggregationFilters": [ - { - "name": "${inputs._2.name}", - "path": ${inputs._2.path.mkString("[\"", "\", \"", "\"]")} - }] - } - """ - -} - case class TargetDiseaseSize(file: String) extends GqlCase[(String, String, Int)] { val inputGenerator = targetDiseaseSizeGenerator diff --git a/test/inputs/GqlItTestInputs.scala b/test/inputs/GqlItTestInputs.scala index 0a7b1eb0..4b601f69 100644 --- a/test/inputs/GqlItTestInputs.scala +++ b/test/inputs/GqlItTestInputs.scala @@ -1,6 +1,5 @@ package inputs -import models.entities.AggregationFilter import org.scalacheck.Gen import scala.reflect.io.File @@ -16,82 +15,7 @@ trait GqlItTestInputs { lazy val goInputs = File(this.getClass.getResource(s"/gqlInputs/goIds.txt").getPath).lines().toList - val aggregationFilterMap: Map[String, Seq[String]] = Map( - "pathwayTypes" -> Seq( - "Autophagy", - "Cell Cycle", - "Cell-Cell communication", - "Cellular responses to external stimuli", - "Chromatin organization", - "Circadian Clock", - "Developmental Biology", - "Digestion and absorption", - "Disease", - "DNA Repair", - "DNA Replication", - "Extracellular matrix organization", - "Gene expression (Transcription)", - "Hemostasis", - "Immune System", - "Metabolism", - "Metabolism of proteins", - "Metabolism of RNA", - "Muscle contraction", - "Neuronal System", - "Organelle biogenesis and maintenance", - "Programmed Cell Death", - "Protein localization", - "Reproduction", - "Sensory Perception", - "Signal Transduction", - "Transport of small molecules", - "Vesicle-mediated transport" - ), - "targetClasses" -> Seq( - "Adhesion", - "Auxiliary transport protein", - "Enzyme", - "Epigenetic regulator", - "Ion channel", - "Membrane receptor", - "Other cytosolic protein", - "Other membrane protein", - "Other nuclear protein", - "Secreted protein", - "Surface antigen", - "Transcription factor", - "Transporter", - "Unclassified protein", - "Structural protein" - ), - "dataTypes" -> Seq( - "Genetic associations", - "Drugs", - "Text mining", - "RNA expression", - "Animal models", - "Somatic mutations" - ), - "tractabilityAntibody" -> Seq( - "Clinical precedence", - "Predicted tractable high confidence", - "Predicted tractable med low confidence" - ), - "tractabilitySmallMolecule" -> Seq( - "Clinical precedence", - "Discovery precedence", - "Predicted tractable" - ) - ) - // Generators - val aggregationfilterGenerator: Gen[AggregationFilter] = { - for { - name <- Gen.oneOf(aggregationFilterMap.keySet) - paths <- Gen.someOf(aggregationFilterMap(name)) - } yield AggregationFilter(name, paths.to(Seq)) - } - val geneGenerator: Gen[String] = Gen.oneOf(geneInputs) val diseaseGenerator: Gen[String] = Gen.oneOf(diseaseInputs) val drugGenerator: Gen[String] = Gen.oneOf(drugInputs) From 10f3a35dcb6fadd0fe2ec4b5f9c85451b432c0f9 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Wed, 3 Jul 2024 15:17:35 +0000 Subject: [PATCH 03/15] formatting --- app/models/Backend.scala | 10 +++++----- app/models/ElasticRetriever.scala | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 3746885e..3be33d1a 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -489,10 +489,10 @@ class Backend @Inject() (implicit val indirectIDs = if (indirect) disease.descendants.toSet + disease.id else Set.empty[String] val targetIds = applyFacetFiltersToBIDs("facet_search_target", targetSet, facetFilters) val simpleQ = aotfQ(indirectIDs, targetIds).simpleQuery(0, 100000) - + (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case tIDs => val tids = tIDs.toSet - logger.info(tids.toString()) + logger.info(tids.toString()) val fullQ = aotfQ(indirectIDs, tids).query logger.debug( @@ -508,7 +508,7 @@ class Backend @Inject() (implicit } } } - + def getAssociationsTargetFixed( target: Target, datasources: Option[Seq[DatasourceSettings]], @@ -555,10 +555,10 @@ class Backend @Inject() (implicit val diseaseIds = applyFacetFiltersToBIDs("facet_search_disease", diseaseSet, facetFilters) val simpleQ = aotfQ(indirectIDs, diseaseIds).simpleQuery(0, 100000) - + (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case dIDs => val dids = dIDs.toSet - logger.info(dids.toString()) + logger.info(dids.toString()) val fullQ = aotfQ(indirectIDs, dids).query logger.debug( diff --git a/app/models/ElasticRetriever.scala b/app/models/ElasticRetriever.scala index 9765b333..53d53b67 100644 --- a/app/models/ElasticRetriever.scala +++ b/app/models/ElasticRetriever.scala @@ -703,6 +703,7 @@ class ElasticRetriever @Inject() ( } object ElasticRetriever extends Logging { + /** * * SortBy case class use the `fieldName` to sort by and asc if `desc` is false * otherwise desc From 66ae67471cdc80e7d103f7cac0df363e9ee57fc1 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Wed, 3 Jul 2024 15:54:40 +0000 Subject: [PATCH 04/15] refactoring --- app/models/Backend.scala | 109 +++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 61 deletions(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 3be33d1a..18237967 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -457,24 +457,23 @@ class Backend @Inject() (implicit defaultOTSettings.clickhouse.disease.associations.name ) - def getAssociationsDiseaseFixed( - disease: Disease, + def getAssociationsEntityFixed( + tableName: String, datasources: Option[Seq[DatasourceSettings]], - indirect: Boolean, - facetFilters: Seq[String], - targetSet: Set[String], + fixedEntityId: String, + indirectIds: Set[String], + bIds: Set[String], filter: Option[String], orderBy: Option[(String, String)], pagination: Option[Pagination] ): Future[Associations] = { val page = pagination.getOrElse(Pagination.mkDefault) val dss = datasources.getOrElse(defaultOTSettings.clickhouse.harmonic.datasources) - val weights = dss.map(s => (s.id, s.weight)) val dontPropagate = dss.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( - defaultOTSettings.clickhouse.disease.associations.name, - disease.id, + tableName, + fixedEntityId, _, _, filter, @@ -484,59 +483,57 @@ class Backend @Inject() (implicit page.offset, page.size ) + val simpleQ = aotfQ(indirectIds, bIds).simpleQuery(0, 100000) - logger.debug(s"get disease id ${disease.name}") - val indirectIDs = if (indirect) disease.descendants.toSet + disease.id else Set.empty[String] - val targetIds = applyFacetFiltersToBIDs("facet_search_target", targetSet, facetFilters) - val simpleQ = aotfQ(indirectIDs, targetIds).simpleQuery(0, 100000) - - (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case tIDs => - val tids = tIDs.toSet - logger.info(tids.toString()) - val fullQ = aotfQ(indirectIDs, tids).query + (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case assocIds => + val assocIdSet = assocIds.toSet + val fullQ = aotfQ(indirectIds, assocIdSet).query - logger.debug( - s"target fixed get simpleQ n ${tIDs.size} " + - s"inter n ${tids.size}" - ) - if (tids.nonEmpty) { + if (assocIdSet.nonEmpty) { dbRetriever.executeQuery[Association, Query](fullQ) map { case assocs => - Associations(dss, tids.size, assocs) + Associations(dss, assocIdSet.size, assocs) } } else { - Future.successful(Associations(dss, tids.size, Vector.empty)) + Future.successful(Associations(dss, assocIdSet.size, Vector.empty)) } } } - def getAssociationsTargetFixed( - target: Target, + def getAssociationsDiseaseFixed( + disease: Disease, datasources: Option[Seq[DatasourceSettings]], indirect: Boolean, facetFilters: Seq[String], - diseaseSet: Set[String], + targetSet: Set[String], filter: Option[String], orderBy: Option[(String, String)], pagination: Option[Pagination] ): Future[Associations] = { - val page = pagination.getOrElse(Pagination.mkDefault) - val dss = datasources.getOrElse(defaultOTSettings.clickhouse.harmonic.datasources) - - val weights = dss.map(s => (s.id, s.weight)) - val dontPropagate = dss.withFilter(!_.propagate).map(_.id).toSet - val aotfQ = QAOTF( - defaultOTSettings.clickhouse.target.associations.name, - target.id, - _, - _, + logger.debug(s"get disease id ${disease.name}") + val indirectIDs = if (indirect) disease.descendants.toSet + disease.id else Set.empty[String] + val targetIds = applyFacetFiltersToBIDs("facet_search_target", targetSet, facetFilters) + getAssociationsEntityFixed( + defaultOTSettings.clickhouse.disease.associations.name, + datasources, + disease.id, + indirectIDs, + targetIds, filter, orderBy, - weights, - dontPropagate, - page.offset, - page.size + pagination ) + } + def getAssociationsTargetFixed( + target: Target, + datasources: Option[Seq[DatasourceSettings]], + indirect: Boolean, + facetFilters: Seq[String], + diseaseSet: Set[String], + filter: Option[String], + orderBy: Option[(String, String)], + pagination: Option[Pagination] + ): Future[Associations] = { logger.debug(s"get target id ${target.approvedSymbol} ACTUALLY DISABLED!") val indirectIDs = if (indirect) { val interactions = @@ -547,32 +544,22 @@ class Backend @Inject() (implicit .toSet + target.id case None => Set.empty + target.id } - interactions.await - } else Set.empty[String] val diseaseIds = applyFacetFiltersToBIDs("facet_search_disease", diseaseSet, facetFilters) - val simpleQ = aotfQ(indirectIDs, diseaseIds).simpleQuery(0, 100000) - - (dbRetriever.executeQuery[String, Query](simpleQ)) flatMap { case dIDs => - val dids = dIDs.toSet - logger.info(dids.toString()) - val fullQ = aotfQ(indirectIDs, dids).query - logger.debug( - s"target fixed get simpleQ n ${dIDs.size} " + - s"inter n ${dids.size}" - ) - if (dids.nonEmpty) { - dbRetriever.executeQuery[Association, Query](fullQ) map { case assocs => - Associations(dss, dids.size, assocs) - } - } else { - Future.successful(Associations(dss, dids.size, Vector.empty)) - } - } + getAssociationsEntityFixed( + defaultOTSettings.clickhouse.target.associations.name, + datasources, + target.id, + indirectIDs, + diseaseIds, + filter, + orderBy, + pagination + ) } def getSimilarW2VEntities( From 5c853d5a89b1f15a81defa4cf3826d4c17855f00 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Fri, 5 Jul 2024 15:47:16 +0000 Subject: [PATCH 05/15] add poc filter to interactions, added get with filter, refactor es builder --- app/models/ElasticRetriever.scala | 41 +++++++++++- .../ElasticRetrieverQueryBuilders.scala | 65 ++++++++++--------- app/models/entities/Interactions.scala | 13 +++- 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/app/models/ElasticRetriever.scala b/app/models/ElasticRetriever.scala index 90e80ae1..b873b6c6 100644 --- a/app/models/ElasticRetriever.scala +++ b/app/models/ElasticRetriever.scala @@ -7,6 +7,7 @@ import com.sksamuel.elastic4s.requests.common.Operator import com.sksamuel.elastic4s.requests.searches._ import com.sksamuel.elastic4s.requests.searches.aggs.AbstractAggregation import com.sksamuel.elastic4s.requests.searches.queries.NestedQuery +import com.sksamuel.elastic4s.requests.searches.queries.Query import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import com.sksamuel.elastic4s.requests.searches.queries.funcscorer._ import com.sksamuel.elastic4s.requests.searches.queries.matches.MultiMatchQueryBuilderType @@ -25,6 +26,7 @@ import scala.util.Try import com.sksamuel.elastic4s.requests.searches.sort.FieldSort import com.sksamuel.elastic4s.requests.searches.term.TermQuery import com.sksamuel.elastic4s.handlers.index.Search +import views.html.index.f class ElasticRetriever @Inject() ( client: ElasticClient, @@ -128,10 +130,39 @@ class ElasticRetriever @Inject() ( excludedFields: Seq[String] = Seq.empty ): Future[(IndexedSeq[A], JsValue)] = { // just log and execute the query - val searchRequest: SearchRequest = IndexQueryMust(esIndex, kv, pagination, aggs, excludedFields) + val indexQuery: IndexQuery[V] = IndexQuery( + esIndex = esIndex, + kv = kv, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields) + val searchRequest: SearchRequest = IndexQueryMust(indexQuery) getByIndexedQuery(searchRequest, sortByField, buildF) } + def getByIndexedQueryMustWithFilters[A, V]( + esIndex: String, + kv: Map[String, V], + filters: Seq[Query], + pagination: Pagination, + buildF: JsValue => Option[A], + aggs: Iterable[AbstractAggregation] = Iterable.empty, + sortByField: Option[sort.FieldSort] = None, + excludedFields: Seq[String] = Seq.empty + ): Future[(IndexedSeq[A], JsValue)] = { + // just log and execute the query + val indexQuery: IndexQuery[V] = IndexQuery( + esIndex = esIndex, + kv = kv, + filters = filters, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields) + val searchRequest: SearchRequest = IndexQueryMust(indexQuery) + getByIndexedQuery(searchRequest, sortByField, buildF) + } + + /** This fn represents a query where each kv from the map is used in * a bool 'should'. Based on the query asked by `getByIndexedQuery` and aggregation is applied */ @@ -144,8 +175,14 @@ class ElasticRetriever @Inject() ( sortByField: Option[sort.FieldSort] = None, excludedFields: Seq[String] = Seq.empty ): Future[(IndexedSeq[A], JsValue)] = { + val indexQuery: IndexQuery[V] = IndexQuery( + esIndex = esIndex, + kv = kv, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields) val searchRequest: SearchRequest = - IndexQueryShould(esIndex, kv, pagination, aggs, excludedFields) + IndexQueryShould(indexQuery) // log and execute the query getByIndexedQuery(searchRequest, sortByField, buildF) } diff --git a/app/models/ElasticRetrieverQueryBuilders.scala b/app/models/ElasticRetrieverQueryBuilders.scala index 0b7bab19..214b7b4f 100644 --- a/app/models/ElasticRetrieverQueryBuilders.scala +++ b/app/models/ElasticRetrieverQueryBuilders.scala @@ -9,37 +9,43 @@ import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import models.entities.Pagination import play.api.Logging + +/** + * IndexQuery is a case class that represents a query to be executed on an Elasticsearch index. + * @param esIndex the Elasticsearch index to query + * @param kv a map of key-value pairs to form match queries with, where the key is the field name and the value is the match value + * @param filters a sequence of additional filters to apply + * @param pagination the pagination settings + * @param aggs a sequence of aggregations to apply + * @param excludedFields a sequence of fields to exclude from the results + * @tparam V the type of the values in the key-value map + */ +case class IndexQuery[V]( + esIndex: String, + kv: Map[String, V], + filters: Seq[Query] = Seq.empty, + pagination: Pagination, + aggs: Iterable[AbstractAggregation] = Iterable.empty, + excludedFields: Seq[String] = Seq.empty +) + trait ElasticRetrieverQueryBuilders extends QueryApi with Logging { - def IndexQueryMust[A]( - esIndex: String, - kv: Map[String, A], - pagination: Pagination, - aggs: Iterable[AbstractAggregation] = Iterable.empty, - excludedFields: Seq[String] = Seq.empty - ): SearchRequest = - getByIndexQueryBuilder(esIndex, kv, pagination, aggs, excludedFields, must) + def IndexQueryMust[V](indexQuery: IndexQuery[V]): SearchRequest = + getByIndexQueryBuilder(indexQuery, must) - def IndexQueryShould[A]( - esIndex: String, - kv: Map[String, A], - pagination: Pagination, - aggs: Iterable[AbstractAggregation] = Iterable.empty, - excludedFields: Seq[String] = Seq.empty + def IndexQueryShould[V]( + indexQuery: IndexQuery[V] ): SearchRequest = - getByIndexQueryBuilder(esIndex, kv, pagination, aggs, excludedFields, should) + getByIndexQueryBuilder(indexQuery, should) - def getByIndexQueryBuilder[A, V]( - esIndex: String, - kv: Map[String, V], - pagination: Pagination, - aggs: Iterable[AbstractAggregation] = Iterable.empty, - excludedFields: Seq[String] = Seq.empty, + def getByIndexQueryBuilder[V]( + indexQuery: IndexQuery[V], f: Iterable[Query] => BoolQuery ): SearchRequest = { - val limitClause = pagination.toES + val limitClause = indexQuery.pagination.toES val query: Iterable[Query] = { - val querySeq = kv.toSeq + val querySeq = indexQuery.kv.toSeq querySeq.flatMap { it => it._2 match { case a: Iterable[Any] => a.map(iterVal => matchQuery(it._1, iterVal)) @@ -47,16 +53,13 @@ trait ElasticRetrieverQueryBuilders extends QueryApi with Logging { } } } - search(esIndex) - .bool { - f( - query - ) - } + val boolQuery: BoolQuery = f(query).filter(indexQuery.filters) + search(indexQuery.esIndex) + .bool(boolQuery) .start(limitClause._1) .limit(limitClause._2) - .aggs(aggs) + .aggs(indexQuery.aggs) .trackTotalHits(true) - .sourceExclude(excludedFields) + .sourceExclude(indexQuery.excludedFields) } } diff --git a/app/models/entities/Interactions.scala b/app/models/entities/Interactions.scala index fae8c95e..7fe0dfe6 100644 --- a/app/models/entities/Interactions.scala +++ b/app/models/entities/Interactions.scala @@ -1,7 +1,7 @@ package models.entities import com.sksamuel.elastic4s.ElasticApi.valueCountAgg -import com.sksamuel.elastic4s.ElasticDsl.boolQuery +import com.sksamuel.elastic4s.ElasticDsl.{boolQuery, rangeQuery, should, not, existsQuery} import com.sksamuel.elastic4s.requests.searches._ import com.sksamuel.elastic4s.requests.searches.aggs.TermsAggregation import com.sksamuel.elastic4s.requests.searches.sort._ @@ -44,14 +44,23 @@ object Interactions extends Logging { dbName.map("sourceDatabase.keyword" -> _) ).flatten.toMap + val filters = Seq( + should( + rangeQuery("scoring").gt(0.99), + not(existsQuery("scoring")) + ) + ) + + val aggs = Seq( valueCountAgg("rowsCount", "targetA.keyword") ) esRetriever - .getByIndexedQueryMust( + .getByIndexedQueryMustWithFilters( cbIndex, kv, + filters, pag, fromJsValue[JsValue], aggs, From d7f0382c148b1504493e30001b552c1a47487630 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Fri, 5 Jul 2024 16:03:20 +0000 Subject: [PATCH 06/15] add scoreThreshold argument to endpoint --- app/models/Backend.scala | 2 +- app/models/entities/Interactions.scala | 5 ++--- app/models/gql/Arguments.scala | 5 +++++ app/models/gql/Objects.scala | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 0c79159f..b1047bce 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -713,7 +713,7 @@ class Backend @Inject() (implicit logger.debug(s"get target id ${target.approvedSymbol} ACTUALLY DISABLED!") val indirectIDs = if (indirect) { val interactions = - Interactions.find(target.id, None, pagination = Some(Pagination(0, 10000))) map { + Interactions.find(target.id, None, None, pagination = Some(Pagination(0, 10000))) map { case Some(ints) => ints.rows .flatMap(int => (int \ ("targetB")).asOpt[String].filter(_.startsWith("ENSG"))) diff --git a/app/models/entities/Interactions.scala b/app/models/entities/Interactions.scala index 7fe0dfe6..d82d742a 100644 --- a/app/models/entities/Interactions.scala +++ b/app/models/entities/Interactions.scala @@ -26,7 +26,7 @@ object Interactions extends Logging { ) ) - def find(id: String, dbName: Option[String], pagination: Option[Pagination])(implicit + def find(id: String, scoreThreshold: Option[Double], dbName: Option[String], pagination: Option[Pagination])(implicit ec: ExecutionContext, esSettings: ElasticsearchSettings, esRetriever: ElasticRetriever @@ -46,11 +46,10 @@ object Interactions extends Logging { val filters = Seq( should( - rangeQuery("scoring").gt(0.99), + rangeQuery("scoring").gte(scoreThreshold.getOrElse(0.0d)), not(existsQuery("scoring")) ) ) - val aggs = Seq( valueCountAgg("rowsCount", "targetA.keyword") diff --git a/app/models/gql/Arguments.scala b/app/models/gql/Arguments.scala index ffb5a9bb..26b559fc 100644 --- a/app/models/gql/Arguments.scala +++ b/app/models/gql/Arguments.scala @@ -42,6 +42,11 @@ object Arguments { ) val pageSize: Argument[Option[Int]] = Argument("size", OptionInputType(IntType)) val cursor: Argument[Option[String]] = Argument("cursor", OptionInputType(StringType)) + val scoreThreshold: Argument[Option[Double]] = Argument( + "scoreThreshold", + OptionInputType(FloatType), + description = "Threshold similarity between 0 and 1" + ) val databaseName: Argument[Option[String]] = Argument("sourceDatabase", OptionInputType(StringType), description = "Database name") val queryString: Argument[String] = diff --git a/app/models/gql/Objects.scala b/app/models/gql/Objects.scala index bf867602..1c81912f 100644 --- a/app/models/gql/Objects.scala +++ b/app/models/gql/Objects.scala @@ -146,11 +146,11 @@ object Objects extends Logging { "interactions", OptionType(interactions), description = Some("Biological pathway membership from Reactome"), - arguments = databaseName :: pageArg :: Nil, + arguments = scoreThreshold :: databaseName :: pageArg :: Nil, resolve = r => { import r.ctx._ - Interactions.find(r.value.id, r arg databaseName, r arg pageArg) + Interactions.find(r.value.id, r arg scoreThreshold, r arg databaseName, r arg pageArg) } ), Field( From a5fac1c1041ff9d489f8b616dd017eb47881a94d Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Fri, 5 Jul 2024 16:04:30 +0000 Subject: [PATCH 07/15] formatting --- app/models/ElasticRetriever.scala | 39 +++++++++---------- .../ElasticRetrieverQueryBuilders.scala | 4 +- app/models/entities/Interactions.scala | 8 +++- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/app/models/ElasticRetriever.scala b/app/models/ElasticRetriever.scala index b873b6c6..4e951940 100644 --- a/app/models/ElasticRetriever.scala +++ b/app/models/ElasticRetriever.scala @@ -130,12 +130,12 @@ class ElasticRetriever @Inject() ( excludedFields: Seq[String] = Seq.empty ): Future[(IndexedSeq[A], JsValue)] = { // just log and execute the query - val indexQuery: IndexQuery[V] = IndexQuery( - esIndex = esIndex, - kv = kv, - pagination = pagination, - aggs = aggs, - excludedFields = excludedFields) + val indexQuery: IndexQuery[V] = IndexQuery(esIndex = esIndex, + kv = kv, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields + ) val searchRequest: SearchRequest = IndexQueryMust(indexQuery) getByIndexedQuery(searchRequest, sortByField, buildF) } @@ -151,18 +151,17 @@ class ElasticRetriever @Inject() ( excludedFields: Seq[String] = Seq.empty ): Future[(IndexedSeq[A], JsValue)] = { // just log and execute the query - val indexQuery: IndexQuery[V] = IndexQuery( - esIndex = esIndex, - kv = kv, - filters = filters, - pagination = pagination, - aggs = aggs, - excludedFields = excludedFields) + val indexQuery: IndexQuery[V] = IndexQuery(esIndex = esIndex, + kv = kv, + filters = filters, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields + ) val searchRequest: SearchRequest = IndexQueryMust(indexQuery) getByIndexedQuery(searchRequest, sortByField, buildF) } - /** This fn represents a query where each kv from the map is used in * a bool 'should'. Based on the query asked by `getByIndexedQuery` and aggregation is applied */ @@ -175,12 +174,12 @@ class ElasticRetriever @Inject() ( sortByField: Option[sort.FieldSort] = None, excludedFields: Seq[String] = Seq.empty ): Future[(IndexedSeq[A], JsValue)] = { - val indexQuery: IndexQuery[V] = IndexQuery( - esIndex = esIndex, - kv = kv, - pagination = pagination, - aggs = aggs, - excludedFields = excludedFields) + val indexQuery: IndexQuery[V] = IndexQuery(esIndex = esIndex, + kv = kv, + pagination = pagination, + aggs = aggs, + excludedFields = excludedFields + ) val searchRequest: SearchRequest = IndexQueryShould(indexQuery) // log and execute the query diff --git a/app/models/ElasticRetrieverQueryBuilders.scala b/app/models/ElasticRetrieverQueryBuilders.scala index 214b7b4f..7cff3816 100644 --- a/app/models/ElasticRetrieverQueryBuilders.scala +++ b/app/models/ElasticRetrieverQueryBuilders.scala @@ -9,9 +9,7 @@ import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery import models.entities.Pagination import play.api.Logging - -/** - * IndexQuery is a case class that represents a query to be executed on an Elasticsearch index. +/** IndexQuery is a case class that represents a query to be executed on an Elasticsearch index. * @param esIndex the Elasticsearch index to query * @param kv a map of key-value pairs to form match queries with, where the key is the field name and the value is the match value * @param filters a sequence of additional filters to apply diff --git a/app/models/entities/Interactions.scala b/app/models/entities/Interactions.scala index d82d742a..4b49478d 100644 --- a/app/models/entities/Interactions.scala +++ b/app/models/entities/Interactions.scala @@ -26,7 +26,11 @@ object Interactions extends Logging { ) ) - def find(id: String, scoreThreshold: Option[Double], dbName: Option[String], pagination: Option[Pagination])(implicit + def find(id: String, + scoreThreshold: Option[Double], + dbName: Option[String], + pagination: Option[Pagination] + )(implicit ec: ExecutionContext, esSettings: ElasticsearchSettings, esRetriever: ElasticRetriever @@ -48,8 +52,8 @@ object Interactions extends Logging { should( rangeQuery("scoring").gte(scoreThreshold.getOrElse(0.0d)), not(existsQuery("scoring")) - ) ) + ) val aggs = Seq( valueCountAgg("rowsCount", "targetA.keyword") From 047357054371313e9d6eb0caf84120b16a373e92 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 11 Jul 2024 15:16:31 +0000 Subject: [PATCH 08/15] customising elastic client with keep alive --- app/models/Backend.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index b1047bce..57df6557 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -22,6 +22,7 @@ import models.entities.MousePhenotypes._ import models.entities.Pharmacogenomics._ import models.entities.SearchFacetsResults._ import models.entities._ +import org.apache.http.impl.nio.reactor.IOReactorConfig import play.api.cache.AsyncCacheApi import play.api.db.slick.DatabaseConfigProvider import play.api.libs.json._ @@ -48,7 +49,11 @@ class Backend @Inject() (implicit /** return meta information loaded from ot.meta settings */ lazy val getMeta: Meta = defaultOTSettings.meta lazy val getESClient: ElasticClient = ElasticClient( - JavaClient(ElasticProperties(s"http://${defaultESSettings.host}:${defaultESSettings.port}")) + JavaClient( + ElasticProperties(s"http://${defaultESSettings.host}:${defaultESSettings.port}"), + httpClientConfigCallback = + _.setDefaultIOReactorConfig(IOReactorConfig.custom.setSoKeepAlive(true).build()) + ) ) val allSearchableIndices: Seq[String] = defaultESSettings.entities .withFilter(_.searchIndex.isDefined) From 52900132041b513de8b84be67c2115b4467d7781 Mon Sep 17 00:00:00 2001 From: ricardo Date: Fri, 2 Aug 2024 11:03:15 +0100 Subject: [PATCH 09/15] rename variant field --- app/models/entities/Evidence.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/entities/Evidence.scala b/app/models/entities/Evidence.scala index 5ab917b3..2cbe70ea 100644 --- a/app/models/entities/Evidence.scala +++ b/app/models/entities/Evidence.scala @@ -200,7 +200,7 @@ object Evidence extends Logging { ) ) val biomarkerVariantImp: ObjectType[Backend, JsValue] = ObjectType( - "variant", + "geneticVariation", fields[Backend, JsValue]( Field( "id", @@ -235,10 +235,10 @@ object Evidence extends Logging { resolve = js => (js.value \ "geneExpression").asOpt[Seq[JsValue]] ), Field( - "variant", + "geneticVariation", OptionType(ListType(biomarkerVariantImp)), description = None, - resolve = js => (js.value \ "variant").asOpt[Seq[JsValue]] + resolve = js => (js.value \ "geneticVariation").asOpt[Seq[JsValue]] ) ) ) From 7a66bcfb27555a6f366d236d7088f5673d9ad6f7 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 15 Aug 2024 10:43:12 +0000 Subject: [PATCH 10/15] added logic for filtering by datasource id --- app/models/db/QAOTF.scala | 122 ++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 72 deletions(-) diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index 3e70f999..dc1a0c8e 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -51,19 +51,55 @@ case class QAOTF( val maxHS: Column = literal(Harmonic.maxValue(100000, pExponentDefault, 1.0)) .as(Some("max_hs_score")) - val BFilterQ: Option[Column] = BFilter flatMap { case matchStr => - val tokens = matchStr - .split(" ") - .map { s => - F.like(BData.name, F.lower(literal(s"%${s.toLowerCase.trim}%"))) + val filterExpression: Column = { + val BFilterQ: Option[Column] = BFilter flatMap { case matchStr => + val tokens = matchStr + .split(" ") + .map { s => + F.like(BData.name, F.lower(literal(s"%${s.toLowerCase.trim}%"))) + } + .toList + + tokens match { + case h :: Nil => Some(h) + case h1 :: h2 :: rest => Some(F.and(h1, h2, rest: _*)) + case _ => None } - .toList + } + val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) + val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) + val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { + F.or( + F.and( + F.in(A, leftIdsC), + F.notIn(DS, nonPP) + ), + F.equals(A, literal(AId)) + ) + } else + F.in(A, leftIdsC) - tokens match { - case h :: Nil => Some(h) - case h1 :: h2 :: rest => Some(F.and(h1, h2, rest: _*)) - case _ => None + // in the case we also want to filter B set + val expressionLeftRight = if (BIDs.nonEmpty) { + val rightIdsC = F.set(BIDs.map(literal).toSeq) + F.and( + expressionLeft, + F.in(B, rightIdsC) + ) + } else { + expressionLeft } + val DSRequired = Set("eva") // TODO: read this from a "required" request parameter + val expressionLeftRighWithFilters = { + val expressionLeftRightWithBFilter = + BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) + if (DSRequired.nonEmpty) { + F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(DSRequired.map(literal).toSeq))) + } else { + expressionLeftRightWithBFilter + } + } + expressionLeftRighWithFilters } val DSScore: Column = F @@ -78,6 +114,7 @@ case class QAOTF( .as(Some("score_datasource")) val DSW: Column = F.ifNull(F.any(column("weight")), literal(1.0)).as(Some("datasource_weight")) + val DTAny = F.any(DT).as(Some(DT.rep)) val queryGroupByDS: Query = { val WC = F @@ -92,41 +129,12 @@ case class QAOTF( Select(DSFieldWC :: WFieldWC :: Nil), OrderBy(DSFieldWC.asc :: Nil) ) - - val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) - - val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) - val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { - F.or( - F.and( - F.in(A, leftIdsC), - F.notIn(DS, nonPP) - ), - F.equals(A, literal(AId)) - ) - } else - F.in(A, leftIdsC) - - // in the case we also want to filter B set - val expressionLeftRight = if (BIDs.nonEmpty) { - val rightIdsC = F.set(BIDs.map(literal).toSeq) - - F.and(expressionLeft, F.in(B, rightIdsC)) - } else { - expressionLeft - } - - val expressionLeftRightWithBFilter = - BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - - val DTAny = F.any(DT).as(Some(DT.rep)) - val withDT = With(DSScore :: DTAny :: DSW :: Nil) val selectDSScores = Select(B :: DSW.name :: DTAny.name :: DS :: DSScore.name :: Nil) val fromT = From(T, Some("l")) val joinWeights = Join(q.toColumn(None), Some("LEFT"), Some("OUTER"), false, Some("r"), DS :: Nil) - val preWhereQ = PreWhere(expressionLeftRightWithBFilter) + val preWhereQ = PreWhere(filterExpression) val groupByQ = GroupBy(B :: DS :: Nil) Q( @@ -140,40 +148,10 @@ case class QAOTF( } def simpleQuery(offset: Int, size: Int): Query = { - val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) - - val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) - val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { - F.or( - F.and( - F.in(A, leftIdsC), - F.notIn(DS, nonPP) - ), - F.equals(A, literal(AId)) - ) - } else - F.in(A, leftIdsC) - - // in the case we also want to filter B set - val expressionLeftRight = if (BIDs.nonEmpty) { - val rightIdsC = F.set(BIDs.map(literal).toSeq) - F.and( - expressionLeft, - F.in(B, rightIdsC) - ) - } else { - expressionLeft - } - - val expressionLeftRightWithBFilter = - BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - - val DTAny = F.any(DT).as(Some(DT.rep)) - val withDT = With(DTAny :: Nil) val selectDSScores = Select(B :: DTAny.name :: DS :: Nil) val fromT = From(T, Some("l")) - val preWhereQ = PreWhere(expressionLeftRightWithBFilter) + val preWhereQ = PreWhere(filterExpression) val groupByQ = GroupBy(B :: DS :: Nil) val aggDSQ = Q( @@ -297,7 +275,7 @@ case class QAOTF( val limitC = Limit(offset, size) val rootQ = Q(withScores, selectScores, fromAgg, groupByB, orderBySome, limitC) - logger.debug(rootQ.toString) + logger.info(rootQ.toString) rootQ } From cc6c0f15772fd45de51bcf86acd3191d919bf95a Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 22 Aug 2024 15:54:41 +0000 Subject: [PATCH 11/15] add required param to datasource settings --- app/models/Backend.scala | 6 ++++-- app/models/ClickhouseRetriever.scala | 2 ++ app/models/db/QAOTF.scala | 6 +++--- app/models/entities/Configuration.scala | 2 +- conf/application.conf | 18 +++++++++--------- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index b1047bce..c336342c 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -471,8 +471,8 @@ class Backend @Inject() (implicit ): Future[Associations] = { val page = pagination.getOrElse(Pagination.mkDefault) val dss = datasources.getOrElse(defaultOTSettings.clickhouse.harmonic.datasources) - val weights = dss.map(s => (s.id, s.weight)) + val mustIncludeDatasources = dss.withFilter(_.required).map(_.id).toSet val dontPropagate = dss.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( defaultOTSettings.clickhouse.disease.associations.name, @@ -482,6 +482,7 @@ class Backend @Inject() (implicit filter, orderBy, weights, + mustIncludeDatasources, dontPropagate, page.offset, page.size @@ -694,7 +695,7 @@ class Backend @Inject() (implicit ): Future[Associations] = { val page = pagination.getOrElse(Pagination.mkDefault) val dss = datasources.getOrElse(defaultOTSettings.clickhouse.harmonic.datasources) - + val mustIncludeDatasources = dss.withFilter(_.required).map(_.id).toSet val weights = dss.map(s => (s.id, s.weight)) val dontPropagate = dss.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( @@ -705,6 +706,7 @@ class Backend @Inject() (implicit filter, orderBy, weights, + mustIncludeDatasources, dontPropagate, page.offset, page.size diff --git a/app/models/ClickhouseRetriever.scala b/app/models/ClickhouseRetriever.scala index 98086691..41a95b01 100644 --- a/app/models/ClickhouseRetriever.scala +++ b/app/models/ClickhouseRetriever.scala @@ -73,6 +73,7 @@ class ClickhouseRetriever(dbConfig: DatabaseConfig[ClickHouseProfile], config: O pagination: Pagination ): Future[Vector[Association]] = { val weights = datasourceSettings.map(s => (s.id, s.weight)) + val mustIncludeDatasources = datasourceSettings.withFilter(_.required).map(_.id).toSet val dontPropagate = datasourceSettings.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( tableName, @@ -82,6 +83,7 @@ class ClickhouseRetriever(dbConfig: DatabaseConfig[ClickHouseProfile], config: O BFilter, None, weights, + mustIncludeDatasources, dontPropagate, pagination.offset, pagination.size diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index dc1a0c8e..ef1d4325 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -34,6 +34,7 @@ case class QAOTF( BFilter: Option[String], orderScoreBy: Option[(String, String)], datasourceWeights: Seq[(String, Double)], + mustIncludeDatasources: Set[String], nonPropagatedDatasources: Set[String], offset: Int, size: Int @@ -89,12 +90,11 @@ case class QAOTF( } else { expressionLeft } - val DSRequired = Set("eva") // TODO: read this from a "required" request parameter val expressionLeftRighWithFilters = { val expressionLeftRightWithBFilter = BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - if (DSRequired.nonEmpty) { - F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(DSRequired.map(literal).toSeq))) + if (mustIncludeDatasources.nonEmpty) { + F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(mustIncludeDatasources.map(literal).toSeq))) } else { expressionLeftRightWithBFilter } diff --git a/app/models/entities/Configuration.scala b/app/models/entities/Configuration.scala index d685dc24..3b54406a 100644 --- a/app/models/entities/Configuration.scala +++ b/app/models/entities/Configuration.scala @@ -43,7 +43,7 @@ object Configuration { case class DiseaseSettings(associations: DbTableSettings) - case class DatasourceSettings(id: String, weight: Double, propagate: Boolean) + case class DatasourceSettings(id: String, weight: Double, propagate: Boolean, required: Boolean = false) case class HarmonicSettings(pExponent: Int, datasources: Seq[DatasourceSettings]) diff --git a/conf/application.conf b/conf/application.conf index 27d862a6..4c6d1d99 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -52,15 +52,15 @@ ot { harmonic { pExponent = 2 datasources = [ - {id: "europepmc", weight: 0.2, data-type = "literature", propagate = true}, - {id: "expression_atlas", weight: 0.2, data-type = "rna_expression", propagate = false}, - {id: "impc", weight: 0.2, data-type = "animal_model", propagate = true}, - {id: "progeny", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "slapenrich", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "sysbio", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "cancer_biomarkers", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "ot_crispr", weight: 0.5, data-type = "ot_partner", propagate = true}, - {id: "encore", weight: 0.5, data-type = "ot_partner", propagate = true}, + {id: "europepmc", weight: 0.2, data-type = "literature", propagate = true, required = false}, + {id: "expression_atlas", weight: 0.2, data-type = "rna_expression", propagate = false, required = false}, + {id: "impc", weight: 0.2, data-type = "animal_model", propagate = true, required = false}, + {id: "progeny", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "slapenrich", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "sysbio", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "cancer_biomarkers", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "ot_crispr", weight: 0.5, data-type = "ot_partner", propagate = true, required = false}, + {id: "encore", weight: 0.5, data-type = "ot_partner", propagate = true, required = false}, ] } } From 2094995a35e1eaec46573cd06159760bf8d23ba4 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 22 Aug 2024 15:55:11 +0000 Subject: [PATCH 12/15] formatting --- app/models/db/QAOTF.scala | 4 +++- app/models/entities/Configuration.scala | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index ef1d4325..fef76385 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -94,7 +94,9 @@ case class QAOTF( val expressionLeftRightWithBFilter = BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) if (mustIncludeDatasources.nonEmpty) { - F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(mustIncludeDatasources.map(literal).toSeq))) + F.and(expressionLeftRightWithBFilter, + F.in(DS, F.set(mustIncludeDatasources.map(literal).toSeq)) + ) } else { expressionLeftRightWithBFilter } diff --git a/app/models/entities/Configuration.scala b/app/models/entities/Configuration.scala index 3b54406a..fb78a51f 100644 --- a/app/models/entities/Configuration.scala +++ b/app/models/entities/Configuration.scala @@ -43,7 +43,11 @@ object Configuration { case class DiseaseSettings(associations: DbTableSettings) - case class DatasourceSettings(id: String, weight: Double, propagate: Boolean, required: Boolean = false) + case class DatasourceSettings(id: String, + weight: Double, + propagate: Boolean, + required: Boolean = false + ) case class HarmonicSettings(pExponent: Int, datasources: Seq[DatasourceSettings]) From 4e04b66b45b27d5987d29f3433e1d3c749c3082c Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 15 Aug 2024 10:43:12 +0000 Subject: [PATCH 13/15] added logic for filtering by datasource id --- app/models/db/QAOTF.scala | 122 ++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 72 deletions(-) diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index 3e70f999..dc1a0c8e 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -51,19 +51,55 @@ case class QAOTF( val maxHS: Column = literal(Harmonic.maxValue(100000, pExponentDefault, 1.0)) .as(Some("max_hs_score")) - val BFilterQ: Option[Column] = BFilter flatMap { case matchStr => - val tokens = matchStr - .split(" ") - .map { s => - F.like(BData.name, F.lower(literal(s"%${s.toLowerCase.trim}%"))) + val filterExpression: Column = { + val BFilterQ: Option[Column] = BFilter flatMap { case matchStr => + val tokens = matchStr + .split(" ") + .map { s => + F.like(BData.name, F.lower(literal(s"%${s.toLowerCase.trim}%"))) + } + .toList + + tokens match { + case h :: Nil => Some(h) + case h1 :: h2 :: rest => Some(F.and(h1, h2, rest: _*)) + case _ => None } - .toList + } + val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) + val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) + val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { + F.or( + F.and( + F.in(A, leftIdsC), + F.notIn(DS, nonPP) + ), + F.equals(A, literal(AId)) + ) + } else + F.in(A, leftIdsC) - tokens match { - case h :: Nil => Some(h) - case h1 :: h2 :: rest => Some(F.and(h1, h2, rest: _*)) - case _ => None + // in the case we also want to filter B set + val expressionLeftRight = if (BIDs.nonEmpty) { + val rightIdsC = F.set(BIDs.map(literal).toSeq) + F.and( + expressionLeft, + F.in(B, rightIdsC) + ) + } else { + expressionLeft } + val DSRequired = Set("eva") // TODO: read this from a "required" request parameter + val expressionLeftRighWithFilters = { + val expressionLeftRightWithBFilter = + BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) + if (DSRequired.nonEmpty) { + F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(DSRequired.map(literal).toSeq))) + } else { + expressionLeftRightWithBFilter + } + } + expressionLeftRighWithFilters } val DSScore: Column = F @@ -78,6 +114,7 @@ case class QAOTF( .as(Some("score_datasource")) val DSW: Column = F.ifNull(F.any(column("weight")), literal(1.0)).as(Some("datasource_weight")) + val DTAny = F.any(DT).as(Some(DT.rep)) val queryGroupByDS: Query = { val WC = F @@ -92,41 +129,12 @@ case class QAOTF( Select(DSFieldWC :: WFieldWC :: Nil), OrderBy(DSFieldWC.asc :: Nil) ) - - val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) - - val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) - val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { - F.or( - F.and( - F.in(A, leftIdsC), - F.notIn(DS, nonPP) - ), - F.equals(A, literal(AId)) - ) - } else - F.in(A, leftIdsC) - - // in the case we also want to filter B set - val expressionLeftRight = if (BIDs.nonEmpty) { - val rightIdsC = F.set(BIDs.map(literal).toSeq) - - F.and(expressionLeft, F.in(B, rightIdsC)) - } else { - expressionLeft - } - - val expressionLeftRightWithBFilter = - BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - - val DTAny = F.any(DT).as(Some(DT.rep)) - val withDT = With(DSScore :: DTAny :: DSW :: Nil) val selectDSScores = Select(B :: DSW.name :: DTAny.name :: DS :: DSScore.name :: Nil) val fromT = From(T, Some("l")) val joinWeights = Join(q.toColumn(None), Some("LEFT"), Some("OUTER"), false, Some("r"), DS :: Nil) - val preWhereQ = PreWhere(expressionLeftRightWithBFilter) + val preWhereQ = PreWhere(filterExpression) val groupByQ = GroupBy(B :: DS :: Nil) Q( @@ -140,40 +148,10 @@ case class QAOTF( } def simpleQuery(offset: Int, size: Int): Query = { - val leftIdsC = F.set((AIDs + AId).map(literal).toSeq) - - val nonPP = F.set(nonPropagatedDatasources.map(literal).toSeq) - val expressionLeft = if (nonPropagatedDatasources.nonEmpty) { - F.or( - F.and( - F.in(A, leftIdsC), - F.notIn(DS, nonPP) - ), - F.equals(A, literal(AId)) - ) - } else - F.in(A, leftIdsC) - - // in the case we also want to filter B set - val expressionLeftRight = if (BIDs.nonEmpty) { - val rightIdsC = F.set(BIDs.map(literal).toSeq) - F.and( - expressionLeft, - F.in(B, rightIdsC) - ) - } else { - expressionLeft - } - - val expressionLeftRightWithBFilter = - BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - - val DTAny = F.any(DT).as(Some(DT.rep)) - val withDT = With(DTAny :: Nil) val selectDSScores = Select(B :: DTAny.name :: DS :: Nil) val fromT = From(T, Some("l")) - val preWhereQ = PreWhere(expressionLeftRightWithBFilter) + val preWhereQ = PreWhere(filterExpression) val groupByQ = GroupBy(B :: DS :: Nil) val aggDSQ = Q( @@ -297,7 +275,7 @@ case class QAOTF( val limitC = Limit(offset, size) val rootQ = Q(withScores, selectScores, fromAgg, groupByB, orderBySome, limitC) - logger.debug(rootQ.toString) + logger.info(rootQ.toString) rootQ } From 1b039381f24a5c5681415b07e82072a5b4759525 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Thu, 22 Aug 2024 15:54:41 +0000 Subject: [PATCH 14/15] add required param to datasource settings --- app/models/Backend.scala | 2 ++ app/models/ClickhouseRetriever.scala | 2 ++ app/models/db/QAOTF.scala | 10 ++++++---- app/models/entities/Configuration.scala | 6 +++++- conf/application.conf | 18 +++++++++--------- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 23eff801..f3cbaa42 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -475,6 +475,7 @@ class Backend @Inject() (implicit val page = pagination.getOrElse(Pagination.mkDefault) val dss = datasources.getOrElse(defaultOTSettings.clickhouse.harmonic.datasources) val weights = dss.map(s => (s.id, s.weight)) + val mustIncludeDatasources = dss.withFilter(_.required).map(_.id).toSet val dontPropagate = dss.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( tableName, @@ -484,6 +485,7 @@ class Backend @Inject() (implicit filter, orderBy, weights, + mustIncludeDatasources, dontPropagate, page.offset, page.size diff --git a/app/models/ClickhouseRetriever.scala b/app/models/ClickhouseRetriever.scala index 98086691..41a95b01 100644 --- a/app/models/ClickhouseRetriever.scala +++ b/app/models/ClickhouseRetriever.scala @@ -73,6 +73,7 @@ class ClickhouseRetriever(dbConfig: DatabaseConfig[ClickHouseProfile], config: O pagination: Pagination ): Future[Vector[Association]] = { val weights = datasourceSettings.map(s => (s.id, s.weight)) + val mustIncludeDatasources = datasourceSettings.withFilter(_.required).map(_.id).toSet val dontPropagate = datasourceSettings.withFilter(!_.propagate).map(_.id).toSet val aotfQ = QAOTF( tableName, @@ -82,6 +83,7 @@ class ClickhouseRetriever(dbConfig: DatabaseConfig[ClickHouseProfile], config: O BFilter, None, weights, + mustIncludeDatasources, dontPropagate, pagination.offset, pagination.size diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index dc1a0c8e..339f1f41 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -34,6 +34,7 @@ case class QAOTF( BFilter: Option[String], orderScoreBy: Option[(String, String)], datasourceWeights: Seq[(String, Double)], + mustIncludeDatasources: Set[String], nonPropagatedDatasources: Set[String], offset: Int, size: Int @@ -89,12 +90,13 @@ case class QAOTF( } else { expressionLeft } - val DSRequired = Set("eva") // TODO: read this from a "required" request parameter val expressionLeftRighWithFilters = { val expressionLeftRightWithBFilter = BFilterQ.map(f => F.and(f, expressionLeftRight)).getOrElse(expressionLeftRight) - if (DSRequired.nonEmpty) { - F.and(expressionLeftRightWithBFilter, F.in(DS, F.set(DSRequired.map(literal).toSeq))) + if (mustIncludeDatasources.nonEmpty) { + F.and(expressionLeftRightWithBFilter, + F.in(DS, F.set(mustIncludeDatasources.map(literal).toSeq)) + ) } else { expressionLeftRightWithBFilter } @@ -275,7 +277,7 @@ case class QAOTF( val limitC = Limit(offset, size) val rootQ = Q(withScores, selectScores, fromAgg, groupByB, orderBySome, limitC) - logger.info(rootQ.toString) + logger.debug(rootQ.toString) rootQ } diff --git a/app/models/entities/Configuration.scala b/app/models/entities/Configuration.scala index d685dc24..fb78a51f 100644 --- a/app/models/entities/Configuration.scala +++ b/app/models/entities/Configuration.scala @@ -43,7 +43,11 @@ object Configuration { case class DiseaseSettings(associations: DbTableSettings) - case class DatasourceSettings(id: String, weight: Double, propagate: Boolean) + case class DatasourceSettings(id: String, + weight: Double, + propagate: Boolean, + required: Boolean = false + ) case class HarmonicSettings(pExponent: Int, datasources: Seq[DatasourceSettings]) diff --git a/conf/application.conf b/conf/application.conf index 07320801..8cd21b4f 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -52,15 +52,15 @@ ot { harmonic { pExponent = 2 datasources = [ - {id: "europepmc", weight: 0.2, data-type = "literature", propagate = true}, - {id: "expression_atlas", weight: 0.2, data-type = "rna_expression", propagate = false}, - {id: "impc", weight: 0.2, data-type = "animal_model", propagate = true}, - {id: "progeny", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "slapenrich", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "sysbio", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "cancer_biomarkers", weight: 0.5, data-type = "affected_pathway", propagate = true}, - {id: "ot_crispr", weight: 0.5, data-type = "ot_partner", propagate = true}, - {id: "encore", weight: 0.5, data-type = "ot_partner", propagate = true}, + {id: "europepmc", weight: 0.2, data-type = "literature", propagate = true, required = false}, + {id: "expression_atlas", weight: 0.2, data-type = "rna_expression", propagate = false, required = false}, + {id: "impc", weight: 0.2, data-type = "animal_model", propagate = true, required = false}, + {id: "progeny", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "slapenrich", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "sysbio", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "cancer_biomarkers", weight: 0.5, data-type = "affected_pathway", propagate = true, required = false}, + {id: "ot_crispr", weight: 0.5, data-type = "ot_partner", propagate = true, required = false}, + {id: "encore", weight: 0.5, data-type = "ot_partner", propagate = true, required = false}, ] } } From f5e1c23e16277300425efcd8218db838b397a2ea Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Fri, 23 Aug 2024 15:22:41 +0000 Subject: [PATCH 15/15] logger level change --- app/models/db/QAOTF.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/db/QAOTF.scala b/app/models/db/QAOTF.scala index fef76385..339f1f41 100644 --- a/app/models/db/QAOTF.scala +++ b/app/models/db/QAOTF.scala @@ -277,7 +277,7 @@ case class QAOTF( val limitC = Limit(offset, size) val rootQ = Q(withScores, selectScores, fromAgg, groupByB, orderBySome, limitC) - logger.info(rootQ.toString) + logger.debug(rootQ.toString) rootQ }