Skip to content

Commit

Permalink
Update comments
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Oct 18, 2023
1 parent f9e3ea9 commit c331feb
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions processors/presets/top-hashtags.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
"""
Find similar words
Find most-used hashtags in a dataset
"""
from nltk.stem.snowball import SnowballStemmer

from backend.lib.preset import ProcessorPreset

from common.lib.helpers import UserInput


class TopHashtags(ProcessorPreset):
"""
Run processor pipeline to find similar words
Run processor pipeline to find top hashtags
"""
type = "preset-top-hashtags" # job type ID
category = "Combined processors" # category. 'Combined processors' are always listed first in the UI.
Expand All @@ -36,6 +33,13 @@ class TopHashtags(ProcessorPreset):

@classmethod
def is_compatible_with(cls, module=None, user=None):
"""
Check if dataset has a hashtag attribute
:param module: Dataset to check
:param user: User trying to run the processor
:return bool:
"""
columns = module.get_columns()
return columns and "hashtags" in module.get_columns()

Expand All @@ -47,13 +51,12 @@ def get_processor_pipeline(self):
top = self.parameters.get("top")

pipeline = [
# first, tokenise the posts, excluding all common words
{
"type": "attribute-frequencies",
"parameters": {
"columns": ["hashtags"],
"split-comma": True,
"extract": "none",
"extract": "none", # *not* 'hashtags', because they may not start with #
"timeframe": timeframe,
"top": top,
"top-style": "per-item",
Expand Down

0 comments on commit c331feb

Please sign in to comment.