Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search Tags + Update #38

Merged
merged 20 commits into from
Sep 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions discovery-provider/.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ audius_ipfs_port=6001
audius_ipfs_gateway_hosts=
audius_discprov_block_processing_window=100
audius_discprov_peer_refresh_interval=10
audius_discprov_identity_service_url=http://localhost:8000
hareeshnagaraj marked this conversation as resolved.
Show resolved Hide resolved
WAIT_HOSTS=db:5432,redis-server:6379,docker.for.mac.localhost:6001
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""track view update

Revision ID: e9a9c6c2e3b7
Revises: 3acec9065c7f
Create Date: 2019-09-06 10:55:19.835973

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'e9a9c6c2e3b7'
down_revision = '3acec9065c7f'
branch_labels = None
depends_on = None


def upgrade():
connection = op.get_bind()
connection.execute('''
--- Update track_lexeme_dict to exclude tags as part of search
DROP MATERIALIZED VIEW track_lexeme_dict;
DROP INDEX IF EXISTS track_words_idx;
CREATE MATERIALIZED VIEW track_lexeme_dict as
SELECT * FROM (
SELECT
t.track_id,
unnest(tsvector_to_array(to_tsvector('audius_ts_config', replace(COALESCE(t."title", ''), '&', 'and'))))
as word
FROM
"tracks" t
INNER JOIN "users" u ON t."owner_id" = u."user_id"
WHERE t."is_current" = true and u."is_ready" = true and u."is_current" = true
GROUP BY t."track_id", t."title", t."tags"
) AS words;

-- add index on above materialized view
CREATE INDEX track_words_idx ON track_lexeme_dict USING gin(word gin_trgm_ops);
''')
# ### end Alembic commands ###


def downgrade():
pass
# ### end Alembic commands ###
1 change: 1 addition & 0 deletions discovery-provider/default_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ loglevel_celery = INFO
block_processing_window = 50
blacklist_block_processing_window = 600
peer_refresh_interval = 3000
identity_service_url = https://identityservice.test

[flask]
debug = true
Expand Down
1 change: 0 additions & 1 deletion discovery-provider/src/queries/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def get_tracks():

whitelist_params = ['created_at', 'create_date', 'release_date', 'blocknumber']
base_query = parse_sort_param(base_query, Track, whitelist_params)

query_results = paginate_query(base_query).all()
tracks = helpers.query_result_to_list(query_results)

Expand Down
53 changes: 53 additions & 0 deletions discovery-provider/src/queries/query_helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import logging # pylint: disable=C0302
import requests
from sqlalchemy import func, desc
from urllib.parse import urljoin

from flask import request

from src import exceptions
from src.queries import response_name_constants
from src.models import Track, Repost, RepostType, Follow, Playlist, Save, SaveType
from src.utils import helpers
from src.utils.config import shared_config

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -501,6 +504,56 @@ def get_save_counts(session, query_by_user_flag, query_save_type_flag, filter_id
return save_counts_query.all()


def get_followee_count_dict(session, user_ids):
# build dict of user id --> followee count
followee_counts = (
session.query(
Follow.follower_user_id,
func.count(Follow.follower_user_id)
)
.filter(
Follow.is_current == True,
Follow.is_delete == False,
Follow.follower_user_id.in_(user_ids)
)
.group_by(Follow.follower_user_id)
.all()
)
followee_count_dict = {user_id: followee_count for (user_id, followee_count) in followee_counts}
return followee_count_dict

def get_track_play_counts(track_ids):
identity_url = shared_config['discprov']['identity_service_url']
hareeshnagaraj marked this conversation as resolved.
Show resolved Hide resolved
querystring = {}
key_str = "id[{}]"
index = 0

# Generate track listen query dict with format id[0]=x, id[1]=y, etc.
for track_id in track_ids:
key = key_str.format(index)
index += 1
querystring[key] = str(track_id)

# Create and query identity service endpoint
identity_tracks_endpoint = urljoin(identity_url, 'tracks/listens')
resp = requests.get(identity_tracks_endpoint, params=querystring)
json_resp = resp.json()
keys = list(resp.json().keys())

# Scenario should never arise, since we don't impose date parameter on initial query
if len(keys) != 1:
raise Exception('Invalid number of keys')

# Parse listen query results into track listen count dictionary
date_key = keys[0]
listen_count_json = json_resp[date_key]
track_listen_counts = {}
if 'listenCounts' in listen_count_json:
for listen_info in listen_count_json['listenCounts']:
current_id = listen_info['trackId']
track_listen_counts[current_id] = listen_info['listens']
return track_listen_counts

def get_pagination_vars():
limit = min(
max(request.args.get("limit", default=defaultLimit, type=int), minLimit),
Expand Down
1 change: 1 addition & 0 deletions discovery-provider/src/queries/response_name_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
has_current_user_reposted = 'has_current_user_reposted' # boolean - has current user reposted given track/playlist
has_current_user_saved = 'has_current_user_saved' # boolean - has current user saved given track/playlist
followee_reposts = 'followee_reposts' # array - followees of current user that have reposted given track/playlist
play_count = 'play_count' # integer - total number of plays for a given track

# user metadata
follower_count = 'follower_count' # integer - total follower count of given user
Expand Down
144 changes: 143 additions & 1 deletion discovery-provider/src/queries/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
from src import api_helpers, exceptions
from src.models import User, Track, RepostType, Playlist, SaveType
from src.utils import helpers
from src.utils.config import shared_config
from src.utils.db_session import get_db
from src.queries import response_name_constants

from src.queries.query_helpers import get_current_user_id, populate_user_metadata, \
populate_track_metadata, populate_playlist_metadata, get_pagination_vars
populate_track_metadata, populate_playlist_metadata, get_pagination_vars, \
get_followee_count_dict, get_track_play_counts

logger = logging.getLogger(__name__)
bp = Blueprint("search_queries", __name__)
Expand Down Expand Up @@ -40,6 +43,145 @@ def search_full():
def search_autocomplete():
return search(True)

@bp.route("/search/tags", methods=("GET",))
def search_tags():
logger.warning('search tags working')
search_str = request.args.get("query", type=str)
if not search_str:
raise exceptions.ArgumentError("Invalid value for parameter 'query'")

user_tag_count = request.args.get("user_tag_count", type=str)
if not user_tag_count:
user_tag_count = "2"

(limit, offset) = get_pagination_vars()
like_tags_str = str.format('%{}%', search_str)
db = get_db()
with db.scoped_session() as session:
track_res = sqlalchemy.text(
f"""
select distinct(track_id)
from
(
select
strip(to_tsvector(tracks.tags)) as tagstrip,
track_id
from
tracks
where
(tags like :like_tags_query)
and (is_current is true)
and (is_delete is false)
order by
updated_at desc
) as t
where
tagstrip @@ to_tsquery(:query);
"""
)
user_res = sqlalchemy.text(
f"""
select * from
(
select
count(track_id),
owner_id
from
(
select
strip(to_tsvector(tracks.tags)) as tagstrip,
track_id,
owner_id
from
tracks
where
(tags like :like_tags_query)
and (is_current is true)
order by
updated_at desc
) as t
where
tagstrip @@ to_tsquery(:query)
group by
owner_id
order by
count desc
) as usr
where
usr.count > :user_tag_count;
"""
)
track_ids = session.execute(
track_res,
{
"query":search_str,
"like_tags_query":like_tags_str
}
).fetchall()
user_ids = session.execute(
user_res,
{
"query":search_str,
"like_tags_query":like_tags_str,
"user_tag_count": user_tag_count
}
).fetchall()

# track_ids is list of tuples - simplify to 1-D list
track_ids = [i[0] for i in track_ids]

# user_ids is list of tuples - simplify to 1-D list
user_ids = [i[1] for i in user_ids]

followee_count_dict = get_followee_count_dict(session, user_ids)

tracks = (
session.query(Track)
.filter(
Track.is_current == True,
Track.is_delete == False,
Track.track_id.in_(track_ids),
)
.all()
)
tracks = helpers.query_result_to_list(tracks)
track_play_counts = get_track_play_counts(track_ids)
users = (
session.query(User)
.filter(
User.is_current == True,
User.is_ready == True,
User.user_id.in_(user_ids)
)
.all()
)
users = helpers.query_result_to_list(users)
for user in users:
user_id = user["user_id"]
user[response_name_constants.follower_count] = followee_count_dict.get(user_id, 0)

followee_sorted_users = \
sorted(users, key=lambda i: i[response_name_constants.follower_count], reverse=True)

for track in tracks:
track_id = track["track_id"]
track[response_name_constants.play_count] = track_play_counts.get(track_id, 0)

play_count_sorted_tracks = \
sorted(tracks, key=lambda i: i[response_name_constants.play_count], reverse=True)

# Add pagination parameters to track and user results
play_count_sorted_tracks = \
hareeshnagaraj marked this conversation as resolved.
Show resolved Hide resolved
play_count_sorted_tracks[slice(offset, offset + limit, 1)]

followee_sorted_users = \
followee_sorted_users[slice(offset, offset + limit, 1)]

resp = {}
resp['tracks'] = play_count_sorted_tracks
resp['users'] = followee_sorted_users
return api_helpers.success_response(resp)

# SEARCH QUERIES
# We chose to use the raw SQL instead of SQLAlchemy because we're pushing SQLAlchemy to it's
# limit to do this query by creating new wrappers for pg functions that do not exist like
Expand Down
16 changes: 16 additions & 0 deletions libs/src/services/discoveryProvider/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,22 @@ class DiscoveryProvider {
return this._makeRequest(req)
}

/**
* Perform a tags-only search. Returns tracks with required tag and users
* that have used a tag greater than a specified number of times
* @param {string} text search query
* @param {number} user_tag_count # of times a user must have used a tag to be returned
* @param {number} limit max # of items to return per list (for pagination)
* @param {number} offset offset into list to return from (for pagination)
*/
async searchTags (text, user_tag_count = 2, limit = 100, offset = 0) {
let req = {
endpoint: 'search/tags',
queryParams: { query: text, user_tag_count, limit: limit, offset: offset }
}
return this._makeRequest(req)
}

/**
* Return saved playlists for current user
* NOTE in returned JSON, SaveType string one of track, playlist, album
Expand Down