AudiusProject · hareeshnagaraj · Sep 9, 2019 · Sep 5, 2019 · Sep 5, 2019 · Sep 6, 2019
diff --git a/discovery-provider/.env b/discovery-provider/.env
@@ -8,4 +8,5 @@ audius_ipfs_port=6001
 audius_ipfs_gateway_hosts=
 audius_discprov_block_processing_window=100
 audius_discprov_peer_refresh_interval=10
+audius_discprov_identity_service_url=http://localhost:8000
 WAIT_HOSTS=db:5432,redis-server:6379,docker.for.mac.localhost:6001
diff --git a/discovery-provider/alembic/versions/e9a9c6c2e3b7_track_view_update.py b/discovery-provider/alembic/versions/e9a9c6c2e3b7_track_view_update.py
@@ -0,0 +1,46 @@
+"""track view update
+
+Revision ID: e9a9c6c2e3b7
+Revises: 3acec9065c7f
+Create Date: 2019-09-06 10:55:19.835973
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'e9a9c6c2e3b7'
+down_revision = '3acec9065c7f'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    connection = op.get_bind()
+    connection.execute('''
+        --- Update track_lexeme_dict to exclude tags as part of search
+        DROP MATERIALIZED VIEW track_lexeme_dict;
+        DROP INDEX IF EXISTS track_words_idx;
+        CREATE MATERIALIZED VIEW track_lexeme_dict as
+        SELECT * FROM (
+        SELECT
+          t.track_id,
+          unnest(tsvector_to_array(to_tsvector('audius_ts_config', replace(COALESCE(t."title", ''), '&', 'and'))))
+            as word
+        FROM
+            "tracks" t
+        INNER JOIN "users" u ON t."owner_id" = u."user_id"
+        WHERE t."is_current" = true and u."is_ready" = true and u."is_current" = true
+        GROUP BY t."track_id", t."title", t."tags"
+        ) AS words;
+
+       -- add index on above materialized view
+       CREATE INDEX track_words_idx ON track_lexeme_dict USING gin(word gin_trgm_ops);
+    ''')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    pass
+    # ### end Alembic commands ###
diff --git a/discovery-provider/default_config.ini b/discovery-provider/default_config.ini
@@ -5,6 +5,7 @@ loglevel_celery = INFO
 block_processing_window = 50
 blacklist_block_processing_window = 600
 peer_refresh_interval = 3000
+identity_service_url = https://identityservice.test
 
 [flask]
 debug = true

diff --git a/discovery-provider/src/queries/queries.py b/discovery-provider/src/queries/queries.py
@@ -117,7 +117,6 @@ def get_tracks():
 
         whitelist_params = ['created_at', 'create_date', 'release_date', 'blocknumber']
         base_query = parse_sort_param(base_query, Track, whitelist_params)
-
         query_results = paginate_query(base_query).all()
         tracks = helpers.query_result_to_list(query_results)
 

diff --git a/discovery-provider/src/queries/query_helpers.py b/discovery-provider/src/queries/query_helpers.py
@@ -1,12 +1,15 @@
 import logging # pylint: disable=C0302
+import requests
 from sqlalchemy import func, desc
+from urllib.parse import urljoin
 
 from flask import request
 
 from src import exceptions
 from src.queries import response_name_constants
 from src.models import Track, Repost, RepostType, Follow, Playlist, Save, SaveType
 from src.utils import helpers
+from src.utils.config import shared_config
 
 logger = logging.getLogger(__name__)
 
@@ -501,6 +504,56 @@ def get_save_counts(session, query_by_user_flag, query_save_type_flag, filter_id
     return save_counts_query.all()
 
 
+def get_followee_count_dict(session, user_ids):
+    # build dict of user id --> followee count
+    followee_counts = (
+        session.query(
+            Follow.follower_user_id,
+            func.count(Follow.follower_user_id)
+        )
+        .filter(
+            Follow.is_current == True,
+            Follow.is_delete == False,
+            Follow.follower_user_id.in_(user_ids)
+        )
+        .group_by(Follow.follower_user_id)
+        .all()
+    )
+    followee_count_dict = {user_id: followee_count for (user_id, followee_count) in followee_counts}
+    return followee_count_dict
+
+def get_track_play_counts(track_ids):
+    identity_url = shared_config['discprov']['identity_service_url']
+    querystring = {}
+    key_str = "id[{}]"
+    index = 0
+
+    # Generate track listen query dict with format id[0]=x, id[1]=y, etc.
+    for track_id in track_ids:
+        key = key_str.format(index)
+        index += 1
+        querystring[key] = str(track_id)
+
+    # Create and query identity service endpoint
+    identity_tracks_endpoint = urljoin(identity_url, 'tracks/listens')
+    resp = requests.get(identity_tracks_endpoint, params=querystring)
+    json_resp = resp.json()
+    keys = list(resp.json().keys())
+
+    # Scenario should never arise, since we don't impose date parameter on initial query 
+    if len(keys) != 1:
+        raise Exception('Invalid number of keys')
+
+    # Parse listen query results into track listen count dictionary
+    date_key = keys[0]
+    listen_count_json = json_resp[date_key]
+    track_listen_counts = {}
+    if 'listenCounts' in listen_count_json:
+        for listen_info in listen_count_json['listenCounts']:
+            current_id = listen_info['trackId']
+            track_listen_counts[current_id] = listen_info['listens']
+    return track_listen_counts
+
 def get_pagination_vars():
     limit = min(
         max(request.args.get("limit", default=defaultLimit, type=int), minLimit),

diff --git a/discovery-provider/src/queries/response_name_constants.py b/discovery-provider/src/queries/response_name_constants.py
@@ -4,6 +4,7 @@
 has_current_user_reposted = 'has_current_user_reposted' # boolean - has current user reposted given track/playlist
 has_current_user_saved = 'has_current_user_saved' # boolean - has current user saved given track/playlist
 followee_reposts = 'followee_reposts' # array - followees of current user that have reposted given track/playlist
+play_count = 'play_count' # integer - total number of plays for a given track
 
 # user metadata
 follower_count = 'follower_count' # integer - total follower count of given user

diff --git a/discovery-provider/src/queries/search.py b/discovery-provider/src/queries/search.py
@@ -6,10 +6,13 @@
 from src import api_helpers, exceptions
 from src.models import User, Track, RepostType, Playlist, SaveType
 from src.utils import helpers
+from src.utils.config import shared_config
 from src.utils.db_session import get_db
+from src.queries import response_name_constants
 
 from src.queries.query_helpers import get_current_user_id, populate_user_metadata, \
-    populate_track_metadata, populate_playlist_metadata, get_pagination_vars
+    populate_track_metadata, populate_playlist_metadata, get_pagination_vars, \
+    get_followee_count_dict, get_track_play_counts
 
 logger = logging.getLogger(__name__)
 bp = Blueprint("search_queries", __name__)
@@ -40,6 +43,145 @@ def search_full():
 def search_autocomplete():
     return search(True)
 
+@bp.route("/search/tags", methods=("GET",))
+def search_tags():
+    logger.warning('search tags working')
+    search_str = request.args.get("query", type=str)
+    if not search_str:
+        raise exceptions.ArgumentError("Invalid value for parameter 'query'")
+
+    user_tag_count = request.args.get("user_tag_count", type=str)
+    if not user_tag_count:
+        user_tag_count = "2"
+
+    (limit, offset) = get_pagination_vars()
+    like_tags_str = str.format('%{}%', search_str)
+    db = get_db()
+    with db.scoped_session() as session:
+        track_res = sqlalchemy.text(
+            f"""
+            select distinct(track_id)
+            from
+            (
+                select
+                    strip(to_tsvector(tracks.tags)) as tagstrip,
+                    track_id
+                from
+                    tracks
+                where
+                    (tags like :like_tags_query)
+                    and (is_current is true)
+                    and (is_delete is false)
+                order by
+                    updated_at desc
+            ) as t
+                where
+                tagstrip @@ to_tsquery(:query);
+            """
+        )
+        user_res = sqlalchemy.text(
+            f"""
+            select * from
+            (
+		select
+                    count(track_id),
+                    owner_id
+                from
+		(
+                    select
+                        strip(to_tsvector(tracks.tags)) as tagstrip,
+                        track_id,
+                        owner_id
+                    from
+			tracks
+                    where
+                        (tags like :like_tags_query)
+			and (is_current is true)
+                    order by
+			updated_at desc
+                ) as t
+                where
+                        tagstrip @@ to_tsquery(:query)
+                group by
+                        owner_id
+                order by
+                        count desc
+            ) as usr
+            where
+                usr.count > :user_tag_count;
+            """
+        )
+    track_ids = session.execute(
+        track_res,
+        {
+            "query":search_str,
+            "like_tags_query":like_tags_str
+        }
+    ).fetchall()
+    user_ids = session.execute(
+        user_res,
+        {
+            "query":search_str,
+            "like_tags_query":like_tags_str,
+            "user_tag_count": user_tag_count
+        }
+    ).fetchall()
+
+    # track_ids is list of tuples - simplify to 1-D list
+    track_ids = [i[0] for i in track_ids]
+
+    # user_ids is list of tuples - simplify to 1-D list
+    user_ids = [i[1] for i in user_ids]
+
+    followee_count_dict = get_followee_count_dict(session, user_ids)
+
+    tracks = (
+        session.query(Track)
+        .filter(
+            Track.is_current == True,
+            Track.is_delete == False,
+            Track.track_id.in_(track_ids),
+        )
+        .all()
+    )
+    tracks = helpers.query_result_to_list(tracks)
+    track_play_counts = get_track_play_counts(track_ids)
+    users = (
+        session.query(User)
+        .filter(
+            User.is_current == True,
+            User.is_ready == True,
+            User.user_id.in_(user_ids)
+        )
+        .all()
+    )
+    users = helpers.query_result_to_list(users)
+    for user in users:
+        user_id = user["user_id"]
+        user[response_name_constants.follower_count] = followee_count_dict.get(user_id, 0)
+
+    followee_sorted_users = \
+        sorted(users, key=lambda i: i[response_name_constants.follower_count], reverse=True)
+
+    for track in tracks:
+        track_id = track["track_id"]
+        track[response_name_constants.play_count] = track_play_counts.get(track_id, 0)
+
+    play_count_sorted_tracks = \
+        sorted(tracks, key=lambda i: i[response_name_constants.play_count], reverse=True)
+
+    # Add pagination parameters to track and user results
+    play_count_sorted_tracks = \
+            play_count_sorted_tracks[slice(offset, offset + limit, 1)]
+
+    followee_sorted_users = \
+            followee_sorted_users[slice(offset, offset + limit, 1)]
+
+    resp = {}
+    resp['tracks'] = play_count_sorted_tracks
+    resp['users'] = followee_sorted_users
+    return api_helpers.success_response(resp)
+
 # SEARCH QUERIES
 # We chose to use the raw SQL instead of SQLAlchemy because we're pushing SQLAlchemy to it's
 # limit to do this query by creating new wrappers for pg functions that do not exist like

diff --git a/libs/src/services/discoveryProvider/index.js b/libs/src/services/discoveryProvider/index.js
@@ -320,6 +320,22 @@ class DiscoveryProvider {
     return this._makeRequest(req)
   }
 
+  /**
+   * Perform a tags-only search. Returns tracks with required tag and users
+   * that have used a tag greater than a specified number of times
+   * @param {string} text search query
+   * @param {number} user_tag_count # of times a user must have used a tag to be returned
+   * @param {number} limit max # of items to return per list (for pagination)
+   * @param {number} offset offset into list to return from (for pagination)
+   */
+  async searchTags (text, user_tag_count = 2, limit = 100, offset = 0) {
+    let req = {
+      endpoint: 'search/tags',
+      queryParams: { query: text, user_tag_count, limit: limit, offset: offset }
+    }
+    return this._makeRequest(req)
+  }
+
   /**
    * Return saved playlists for current user
    * NOTE in returned JSON, SaveType string one of track, playlist, album