Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Person suggest #1972

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"program": "${workspaceFolder}/src/manage.py",
"args": [
"runserver",
"[::]:8000"
"[::]:8000",
"--ipv6"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove this before merging

],
"env": {
"USE_DEBUG_TOOLBAR": "True"
Expand Down
65 changes: 64 additions & 1 deletion src/search/documents/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
full_name = es_fields.TextField(attr="full_name", analyzer=content_analyzer)
person_types = es_fields.KeywordField(attr="person_types_indexing")
headline = es_fields.ObjectField(
attr="headline",
properties={
"title": es_fields.TextField(),
},
Expand All @@ -29,6 +28,11 @@
"name": es_fields.TextField(),
},
)
suggestion_phrases = es_fields.Completion()
user_id = es_fields.IntegerField(attr="user_id")
reputation_hubs = es_fields.KeywordField()
education = es_fields.KeywordField()
created_date = es_fields.DateField(attr="created_date")

class Index:
name = "person"
Expand All @@ -43,3 +47,62 @@

def should_remove_from_index(self, obj):
return False

def prepare_headline(self, instance):
return instance.build_headline()

def prepare_reputation_hubs(self, instance):
reputation_hubs = []
for rep in instance.reputation_list:
reputation_hubs.append(rep["hub"]["name"])

Check warning on line 57 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L57

Added line #L57 was not covered by tests

return reputation_hubs

def prepare_education(self, instance):
education = []
for edu in instance.education:
education.append(edu["name"])

Check warning on line 64 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L64

Added line #L64 was not covered by tests

return education

def prepare_suggestion_phrases(self, instance):
suggestions = []

if instance.full_name:
if instance.user:
suggestions.append({"input": instance.full_name, "weight": 15})
else:
suggestions.append({"input": instance.full_name, "weight": 10})

Check warning on line 75 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L75

Added line #L75 was not covered by tests

if instance.first_name:
suggestions.append({"input": instance.first_name, "weight": 5})
if instance.last_name:
suggestions.append({"input": instance.last_name, "weight": 5})

# Add institution names
for author_institution in instance.institutions.all():
if author_institution.institution.display_name:
suggestions.append(

Check warning on line 85 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L84-L85

Added lines #L84 - L85 were not covered by tests
{"input": author_institution.institution.display_name, "weight": 3}
)

# Add full name + institution to account for people typing name + institution
suggestions.append(

Check warning on line 90 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L90

Added line #L90 was not covered by tests
{
"input": instance.first_name
+ " "
+ author_institution.institution.display_name,
"weight": 3,
}
)

return suggestions

def prepare(self, instance):
data = super().prepare(instance)
try:
data["suggestion_phrases"] = self.prepare_suggestion_phrases(instance)
except Exception as error:
print(f"Error preparing suggestions for {instance.id}: {error}")
data["suggestion_phrases"] = []

Check warning on line 107 in src/search/documents/person.py

View check run for this annotation

Codecov / codecov/patch

src/search/documents/person.py#L105-L107

Added lines #L105 - L107 were not covered by tests
return data
4 changes: 4 additions & 0 deletions src/search/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from search.views.journal import JournalDocumentView
from search.views.journal_suggester import JournalSuggesterDocumentView
from search.views.person_suggester import PersonSuggesterDocumentView

router = DefaultRouter()
person = router.register(r"person", PersonDocumentView, basename="person_document")
Expand All @@ -33,6 +34,9 @@
journal_suggester = router.register(
r"journals", JournalSuggesterDocumentView, basename="journal_suggester_document"
)
people_suggester = router.register(
r"people", PersonSuggesterDocumentView, basename="people_suggester"
)

urlpatterns = [
re_path(r"^", include(router.urls)),
Expand Down
1 change: 1 addition & 0 deletions src/search/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .hub_suggester import HubSuggesterDocumentView
from .paper import PaperDocumentView
from .person import PersonDocumentView
from .person_suggester import PersonSuggesterDocumentView
from .post import PostDocumentView
from .thread import ThreadDocumentView
from .user_suggester import UserSuggesterDocumentView
49 changes: 49 additions & 0 deletions src/search/views/person_suggester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from django_elasticsearch_dsl_drf.filter_backends import (
OrderingFilterBackend,
SuggesterFilterBackend,
)
from django_elasticsearch_dsl_drf.pagination import LimitOffsetPagination
from django_elasticsearch_dsl_drf.viewsets import DocumentViewSet

from search.backends.multi_match_filter import MultiMatchSearchFilterBackend
from search.documents.person import PersonDocument
from search.serializers.person import PersonDocumentSerializer
from utils.permissions import ReadOnly


class PersonSuggesterDocumentView(DocumentViewSet):
document = PersonDocument
permission_classes = [ReadOnly]
serializer_class = PersonDocumentSerializer
pagination_class = LimitOffsetPagination
lookup_field = "id"
filter_backends = [
MultiMatchSearchFilterBackend,
SuggesterFilterBackend,
OrderingFilterBackend,
]

ordering = ("-author_score",)
ordering_fields = {
"id": "id",
"full_name": "full_name",
"author_score": "author_score",
}

filter_fields = {
"full_name": {"field": "full_name", "lookups": ["match"]},
}

multi_match_search_fields = {
"full_name": {"field": "full_name", "boost": 1},
}

suggester_fields = {
"suggestion_phrases": {
"field": "suggestion_phrases",
"suggesters": ["completion"],
"options": {
"size": 25,
},
},
}
33 changes: 33 additions & 0 deletions src/user/related_models/author_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,39 @@
f"{university_city}"
)

def build_headline(self):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this method from serializer to model so that it can be used by elasticsearch

from collections import Counter

if self.headline:
return self.headline

try:
all_topics = []
authored_papers = self.authored_papers.all()

for p in authored_papers:
unified_document = p.unified_document
all_topics += list(unified_document.topics.all())

topic_counts = Counter(all_topics)

# Sort topics by frequency
sorted_topics = sorted(
topic_counts.items(), key=lambda x: x[1], reverse=True
)

# Extract topics from sorted list
sorted_topics = [topic for topic, _ in sorted_topics]

if not sorted_topics:
return None

return {
"title": "Author with expertise in " + sorted_topics[0].display_name
}
except Exception:
return None

Check warning on line 151 in src/user/related_models/author_model.py

View check run for this annotation

Codecov / codecov/patch

src/user/related_models/author_model.py#L150-L151

Added lines #L150 - L151 were not covered by tests

@property
def full_name(self):
return self.first_name + " " + self.last_name
Expand Down
32 changes: 1 addition & 31 deletions src/user/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,37 +1092,7 @@ def get_achievements(self, author):
return author.achievements

def get_headline(self, author):
from collections import Counter

if author.headline:
return author.headline

try:
all_topics = []
authored_papers = author.authored_papers.all()

for p in authored_papers:
unified_document = p.unified_document
all_topics += list(unified_document.topics.all())

topic_counts = Counter(all_topics)

# Sort topics by frequency
sorted_topics = sorted(
topic_counts.items(), key=lambda x: x[1], reverse=True
)

# Extract topics from sorted list
sorted_topics = [topic for topic, _ in sorted_topics]

if not sorted_topics:
return None

return {
"title": "Author with expertise in " + sorted_topics[0].display_name
}
except Exception:
return None
return author.build_headline()

def get_user(self, author):
user = author.user
Expand Down