From bd4eb7ba0bcec77670d9fef98022aada101e2d53 Mon Sep 17 00:00:00 2001 From: etj Date: Mon, 15 May 2023 12:14:17 +0200 Subject: [PATCH] [Fixes #10995] Faceting - Some implementations --- geonode/facets/providers/category.py | 83 +++++++++ geonode/facets/providers/thesaurus.py | 127 +++++++++++++ geonode/facets/providers/users.py | 83 +++++++++ geonode/facets/tests.py | 246 ++++++++++++++++++++++++++ geonode/settings.py | 3 + 5 files changed, 542 insertions(+) create mode 100644 geonode/facets/providers/category.py create mode 100644 geonode/facets/providers/thesaurus.py create mode 100644 geonode/facets/providers/users.py create mode 100644 geonode/facets/tests.py diff --git a/geonode/facets/providers/category.py b/geonode/facets/providers/category.py new file mode 100644 index 00000000000..3db78eb4d03 --- /dev/null +++ b/geonode/facets/providers/category.py @@ -0,0 +1,83 @@ +######################################################################### +# +# Copyright (C) 2023 Open Source Geospatial Foundation - all rights reserved +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging + +from django.db.models import Count + +from geonode.facets.models import FacetProvider, DEFAULT_FACET_PAGE_SIZE, FACET_TYPE_CATEGORY + +logger = logging.getLogger(__name__) + + +class CategoryFacetProvider(FacetProvider): + """ + Implements faceting for resource's topicCategory + """ + + @property + def name(self) -> str: + return "category" + + def get_info(self, lang="en") -> dict: + return { + "name": self.name, + "key": "filter{category__identifier}", + "label": "Category", + "type": FACET_TYPE_CATEGORY, + "hierarchical": False, + "order": 2, + } + + def get_facet_items( + self, + queryset=None, + start: int = 0, + end: int = DEFAULT_FACET_PAGE_SIZE, + lang="en", + topic_contains: str = None, + ) -> (int, list): + logger.debug("Retrieving facets for %s", self.name) + + q = queryset.values("category__identifier", "category__gn_description", "category__fa_class") + if topic_contains: + q = q.filter(category__gn_description=topic_contains) + q = q.annotate(count=Count("owner")).order_by("-count") + + cnt = q.count() + + logger.info("Found %d facets for %s", cnt, self.name) + logger.debug(" ---> %s\n\n", q.query) + logger.debug(" ---> %r\n\n", q.all()) + + topics = [ + { + "key": r["category__identifier"], + "label": r["category__gn_description"], + "count": r["count"], + "fa_class": r["category__fa_class"], + } + for r in q[start:end].all() + ] + + return cnt, topics + + @classmethod + def register(cls, registry, **kwargs) -> None: + registry.register_facet_provider(CategoryFacetProvider()) diff --git a/geonode/facets/providers/thesaurus.py b/geonode/facets/providers/thesaurus.py new file mode 100644 index 00000000000..5674635b6bd --- /dev/null +++ b/geonode/facets/providers/thesaurus.py @@ -0,0 +1,127 @@ +######################################################################### +# +# Copyright (C) 2023 Open Source Geospatial Foundation - all rights reserved +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging + +from django.db.models import Count + +from geonode.facets.models import FacetProvider, DEFAULT_FACET_PAGE_SIZE, FACET_TYPE_THESAURUS + +logger = logging.getLogger(__name__) + + +class ThesaurusFacetProvider(FacetProvider): + """ + Implements faceting for a given Thesaurus + """ + + def __init__(self, identifier, title, order, labels: dict): + self._name = identifier + self.label = title + self.order = order + self.labels = labels + + @property + def name(self) -> str: + return self._name + + def get_info(self, lang="en") -> dict: + return { + "name": self._name, + "key": "filter{tkeywords}", + "label": self.labels.get(lang, self.label), + "is_localized": self.labels.get(lang, None) is not None, + "type": FACET_TYPE_THESAURUS, + "hierarchical": False, + "order": self.order, + } + + def get_facet_items( + self, + queryset=None, + start: int = 0, + end: int = DEFAULT_FACET_PAGE_SIZE, + lang="en", + topic_contains: str = None, + **kwargs, + ) -> (int, list): + logger.debug("Retrieving facets for %s", self._name) + + filter = { + "tkeywords__thesaurus__identifier": self._name, + "tkeywords__keyword__lang": lang, + } + + if topic_contains: + filter["tkeywords__keyword__label__icontains"] = topic_contains + + q = ( + queryset.filter(**filter) + .values("tkeywords", "tkeywords__keyword__label", "tkeywords__alt_label") + .annotate(count=Count("tkeywords")) + .order_by("-count") + ) + + cnt = q.count() + + logger.info("Found %d facets for %s", cnt, self._name) + logger.debug(" ---> %s\n\n", q.query) + logger.debug(" ---> %r\n\n", q.all()) + + topics = [ + { + "key": r["tkeywords"], + "label": r["tkeywords__keyword__label"] or r["tkeywords__alt_label"], + "is_localized": r["tkeywords__keyword__label"] is not None, + "count": r["count"], + } + for r in q[start:end].all() + ] + + return cnt, topics + + @classmethod + def register(cls, registry, **kwargs) -> None: + # registry.register_facet_provider(CategoryFacetProvider()) + from geonode.base.models import Thesaurus + + # this query return the list of thesaurus X the list of localized titles + q = ( + Thesaurus.objects.filter(facet=True) + .values("identifier", "title", "order", "rel_thesaurus__label", "rel_thesaurus__lang") + .order_by("order") + ) + + # coalesce the localized labels + ret = {} + for r in q.all(): + identifier = r["identifier"] + t = ret.get(identifier, None) + if not t: + t = {k: r[k] for k in ("identifier", "title", "order")} + t["labels"] = {} + if r["rel_thesaurus__lang"] and r["rel_thesaurus__label"]: + t["labels"][r["rel_thesaurus__lang"]] = r["rel_thesaurus__label"] + ret[identifier] = t + + logger.info("Creating providers for %r", ret) + for t in ret.values(): + registry.register_facet_provider( + ThesaurusFacetProvider(t["identifier"], t["title"], t["order"], t["labels"]) + ) diff --git a/geonode/facets/providers/users.py b/geonode/facets/providers/users.py new file mode 100644 index 00000000000..8a1e5c3effe --- /dev/null +++ b/geonode/facets/providers/users.py @@ -0,0 +1,83 @@ +######################################################################### +# +# Copyright (C) 2023 Open Source Geospatial Foundation - all rights reserved +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging + +from django.db.models import Count + +from geonode.facets.models import FacetProvider, DEFAULT_FACET_PAGE_SIZE, FACET_TYPE_USER + +logger = logging.getLogger(__name__) + + +class OwnerFacetProvider(FacetProvider): + """ + Implements faceting for users owner of the resources + """ + + @property + def name(self) -> str: + return "owner" + + def get_info(self, lang="en") -> dict: + return { + "name": "owner", + "key": "owner", + "label": "Owner", + "type": FACET_TYPE_USER, + "hierarchical": False, + "order": 5, + } + + def get_facet_items( + self, + queryset=None, + start: int = 0, + end: int = DEFAULT_FACET_PAGE_SIZE, + lang="en", + topic_contains: str = None, + ) -> (int, list): + logger.debug("Retrieving facets for OWNER") + + q = queryset.values("owner", "owner__username") + if topic_contains: + q = q.filter(owner__username__icontains=topic_contains) + q = q.annotate(count=Count("owner")).order_by("-count") + + cnt = q.count() + + logger.info("Found %d facets for %s", cnt, self.name) + logger.debug(" ---> %s\n\n", q.query) + logger.debug(" ---> %r\n\n", q.all()) + + topics = [ + { + "key": r["owner"], + "label": r["owner__username"], + "localized_label": r["owner__username"], + "count": r["count"], + } + for r in q[start:end] + ] + + return cnt, topics + + @classmethod + def register(cls, registry, **kwargs) -> None: + registry.register_facet_provider(OwnerFacetProvider()) diff --git a/geonode/facets/tests.py b/geonode/facets/tests.py new file mode 100644 index 00000000000..8b512003d8b --- /dev/null +++ b/geonode/facets/tests.py @@ -0,0 +1,246 @@ +######################################################################### +# +# Copyright (C) 2023 Open Source Geospatial Foundation - all rights reserved +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging +import json +from tastypie.test import TestApiClient +from uuid import uuid4 + +from django.contrib.auth import get_user_model +from django.http import JsonResponse +from django.test import RequestFactory +from django.urls import reverse + +from geonode.base.models import Thesaurus, ThesaurusLabel, ThesaurusKeyword, ThesaurusKeywordLabel, ResourceBase +from geonode.tests.base import GeoNodeBaseTestSupport +import geonode.facets.views as views + + +logger = logging.getLogger(__name__) + + +class TestFacets(GeoNodeBaseTestSupport): + @classmethod + def setUpClass(cls): + super().setUpClass() + + cls.user = get_user_model().objects.create(username="user_00") + cls.admin = get_user_model().objects.get(username="admin") + + cls._create_thesauri() + cls._create_resources() + cls.rf = RequestFactory() + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + # remove_models(cls.get_obj_ids, type=cls.get_type, integration=cls.get_integration) + + def setUp(self): + super().setUp() + + self.api_client = TestApiClient() + + self.assertEqual(self.admin.username, "admin") + self.assertEqual(self.admin.is_superuser, True) + + @classmethod + def _create_thesauri(cls): + cls.thesauri = {} + cls.thesauri_k = {} + + for tn in range(2): + t = Thesaurus.objects.create(identifier=f"t_{tn}", title=f"Thesaurus {tn}") + cls.thesauri[tn] = t + for tl in ( + "en", + "it", + ): + ThesaurusLabel.objects.create(thesaurus=t, lang=tl, label=f"TLabel {tn} {tl}") + + for tkn in range(10): + tk = ThesaurusKeyword.objects.create(thesaurus=t, alt_label=f"alt_tkn{tkn}_t{tn}") + cls.thesauri_k[f"{tn}_{tkn}"] = tk + for tkl in ( + "en", + "it", + ): + ThesaurusKeywordLabel.objects.create(keyword=tk, lang=tkl, label=f"T{tn}_K{tkn}_{tkl}") + + @classmethod + def _create_resources(self): + public_perm_spec = {"users": {"AnonymousUser": ["view_resourcebase"]}, "groups": []} + + for x in range(20): + d: ResourceBase = ResourceBase.objects.create( + title=f"dataset_{x:02}", + uuid=str(uuid4()), + owner=self.user, + abstract=f"Abstract for dataset {x:02}", + subtype="vector", + is_approved=True, + is_published=True, + ) + + # These are the assigned keywords to the Resources + + # RB00 -> T1K0 + # RB01 -> T0K0 T1K0 + # RB02 -> T1K0 + # RB03 -> T0K0 T1K0 + # RB04 -> T1K0 + # RB05 -> T0K0 T1K0 + # RB06 -> T1K0 + # RB07 -> T0K0 T1K0 + # RB08 -> T1K0 T1K1 + # RB09 -> T0K0 T1K0 T1K1 + # RB10 -> T1K1 + # RB11 -> T0K0 T0K1 T1K1 + # RB12 -> T1K1 + # RB13 -> T0K0 T0K1 + # RB14 -> + # RB15 -> T0K0 T0K1 + # RB16 -> + # RB17 -> T0K0 T0K1 + # RB18 -> + # RB19 -> T0K0 T0K1 + + if x % 2 == 1: + print(f"ADDING KEYWORDS {self.thesauri_k['0_0']} to RB {d}") + d.tkeywords.add(self.thesauri_k["0_0"]) + d.save() + if x % 2 == 1 and x > 10: + print(f"ADDING KEYWORDS {self.thesauri_k['0_1']} to RB {d}") + d.tkeywords.add(self.thesauri_k["0_1"]) + d.save() + if x < 10: + print(f"ADDING KEYWORDS {self.thesauri_k['1_0']} to RB {d}") + d.tkeywords.add(self.thesauri_k["1_0"]) + d.save() + if 7 < x < 13: + d.tkeywords.add(self.thesauri_k["1_1"]) + d.save() + + d.set_permissions(public_perm_spec) + + @staticmethod + def _facets_to_map(facets): + return {f["name"]: f for f in facets} + + def test_facets_base(self): + req = self.rf.get(reverse("list_facets"), data={"lang": "en"}) + res: JsonResponse = views.list_facets(req) + obj = json.loads(res.content) + self.assertIn("facets", obj) + facets_list = obj["facets"] + self.assertEqual(4, len(facets_list)) + fmap = self._facets_to_map(facets_list) + for name in ("category", "owner", "t_0", "t_1"): + self.assertIn(name, fmap) + + def test_facets_rich(self): + # make sure the resources are in + c = ResourceBase.objects.count() + self.assertEqual(20, c) + + # make sure tkeywords have been assigned by checking a sample resource + rb = ResourceBase.objects.get(title="dataset_01") + self.assertEqual(2, rb.tkeywords.count()) + + # run the request + req = self.rf.get(reverse("list_facets"), data={"include_topics": 1, "lang": "en"}) + res: JsonResponse = views.list_facets(req) + obj = json.loads(res.content) + + facets_list = obj["facets"] + self.assertEqual(4, len(facets_list)) + fmap = self._facets_to_map(facets_list) + for expected in ( + { + "name": "category", + "topics": { + "total": 1, + }, + }, + { + "name": "owner", + "topics": { + "total": 1, + }, + }, + { + "name": "t_0", + "topics": { + "total": 2, + "items": [ + {"label": "T0_K0_en", "count": 10}, + {"label": "T0_K1_en", "count": 5}, + ], + }, + }, + { + "name": "t_1", + "topics": { + "total": 2, + "items": [ + {"label": "T1_K0_en", "count": 10}, + ], + }, + }, + ): + name = expected["name"] + self.assertIn(name, fmap) + facet = fmap[name] + expected_topics = expected["topics"] + for topic_key in expected_topics: + if topic_key != "items": + self.assertEqual( + expected_topics[topic_key], facet["topics"][topic_key], f"Mismatching '{topic_key}' for {name}" + ) + else: + items = facet["topics"]["items"] + expected_items = expected_topics["items"] + for exp_item in expected_items: + exp_label = exp_item["label"] + found = None + for item in items: + if item["label"] == exp_label: + found = item + break + + self.assertIsNotNone(item, f"topic not found '{exp_label}'") + for exp_field in exp_item: + self.assertEqual(exp_item[exp_field], found[exp_field], f"Mismatch item key:{exp_field}") + + def test_bad_lang(self): + # for thesauri, make sure that by requesting a non-existent language the faceting is still working, + # using the default labels + # TODO impl+test + pass + + def test_user_auth(self): + # make sure the user authorization pre-filters the visible resources + # TODO test + pass + + def test_thesauri_reloading(self): + # Thesauri facets are cached. + # Make sure that when Thesauri or ThesauriLabel change the facets cache is invalidated + # TODO impl+test + pass diff --git a/geonode/settings.py b/geonode/settings.py index b91efd37f25..316f7f6fa9b 100644 --- a/geonode/settings.py +++ b/geonode/settings.py @@ -2316,4 +2316,7 @@ def get_geonode_catalogue_service(): INSTALLED_APPS += ("geonode.facets",) FACET_PROVIDERS = ( + "geonode.facets.providers.category.CategoryFacetProvider", + "geonode.facets.providers.users.OwnerFacetProvider", + "geonode.facets.providers.thesaurus.ThesaurusFacetProvider", )