From cd5d65e8c5b538ac5db6cece5d2cd5430c52f384 Mon Sep 17 00:00:00 2001
From: Shivam Tripathi <shivamtripathi0108@gmail.com>
Date: Thu, 3 May 2018 21:27:17 +0530
Subject: [PATCH 1/4] feat(migration/2018-05-03/up): Add changes to make use of
 Postgres FTS features

---
 migrations/2018-05-03/up.sql | 177 +++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 migrations/2018-05-03/up.sql

diff --git a/migrations/2018-05-03/up.sql b/migrations/2018-05-03/up.sql
new file mode 100644
index 0000000..d1f734b
--- /dev/null
+++ b/migrations/2018-05-03/up.sql
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2018  Shivam Tripathi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+BEGIN;
+
+/*
+ * This migration makes necessary changes in the database to avail full text search
+ * feature of Postgres on BookBrainz site.
+ */
+
+CREATE EXTENSION pg_trgm;
+CREATE EXTENSION unaccent;
+CREATE EXTENSION btree_gin;
+
+-- Column to cache tokenized alias names
+ALTER TABLE bookbrainz.alias
+	ADD COLUMN IF NOT EXISTS tokens TSVECTOR;
+
+
+-- Cache tokens of all previously added alias names
+UPDATE bookbrainz.alias
+	SET tokens = to_tsvector('pg_catalog.simple', unaccent(name));
+
+
+-- Function to unaccent the name row before tokenizing it
+CREATE FUNCTION bookbrainz.token_creator() RETURNS trigger
+	AS $token_creator$
+	BEGIN
+		new.tokens := to_tsvector('pg_catalog.simple', unaccent(new.name));
+		RETURN new;
+	END
+$token_creator$ LANGUAGE plpgsql;
+
+
+-- Trigger to automatically unaccent and insert tokens of an added alias name
+CREATE TRIGGER alias_token_update BEFORE INSERT OR UPDATE
+	ON bookbrainz.alias
+    FOR EACH ROW
+    EXECUTE PROCEDURE bookbrainz.token_creator();
+
+
+-- View to extract all the master entities
+CREATE VIEW bookbrainz.master_entities AS
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'creator' AS entity_type
+		FROM bookbrainz.creator
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'edition' AS entity_type
+		FROM bookbrainz.edition
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'publication' AS entity_type
+		FROM bookbrainz.publication
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'publisher' AS entity_type
+		FROM bookbrainz.publisher
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'work' AS entity_type
+		FROM bookbrainz.work
+		WHERE master=true;
+
+
+/*
+ * The primary materialized view which holds all the relevant information about master entities.
+ * Token field is indexed for quick FTS.
+ * A unique index is created to help execute REFRESH CONCURRENTLY command.
+ * This unique indexing can be modified later when imports are added.
+ * Sample query:
+ * SELECT * FROM search_mv
+ *		WHERE tokens @@ to_tsquery('pg_catalog.simple', 'Cha:* | Dar:*')
+ *		ORDER BY ts_rank(search_mv.tokens, to_tsquery('english', 'Cha:* | Dar:*')) DESC;
+ */
+ CREATE MATERIALIZED VIEW bookbrainz.search_mv AS
+	SELECT items.entity_id, alias.name, alias.tokens, items.entity_type
+	FROM bookbrainz.alias AS alias
+	JOIN (
+		SELECT *
+		FROM bookbrainz.master_entities
+	) AS items
+	ON alias.id = items.alias_id;
+
+CREATE INDEX fts_search_idx ON bookbrainz.search_mv USING gin(tokens);
+CREATE UNIQUE INDEX unique_search_idx ON bookbrainz.search_mv (entity_id);
+
+
+/*
+ * This view holds all the master entities with empty tokens, signifying unsupported
+ * alphabet. An index is added on the name field of search_mv to facilitate quicker
+ * ILIKE operations.
+ */
+CREATE VIEW bookbrainz.untokenized_names AS
+	SELECT * FROM bookbrainz.search_mv WHERE tokens = '';
+
+CREATE INDEX untokenized_names_idx ON bookbrainz.search_mv USING gin(name);
+
+
+/*
+ * This materialized view stores all unique tokens, used to suggest the next best query.
+ * Words have been trigram indexed for quick fuzzy search.
+ * A unique index has been created on words to help execute REFRESH CONCURRENTLY command.
+ * Sample query:
+ * select word from search_words_mv ORDER BY similarity(word, 'chales') DESC limit 10;
+ */
+CREATE MATERIALIZED VIEW bookbrainz.search_words_mv AS
+	SELECT word FROM ts_stat($$
+		SELECT alias.tokens
+		FROM bookbrainz.alias AS alias
+		JOIN (
+			SELECT alias_id
+			FROM bookbrainz.master_entities
+		) AS items
+		ON alias.id = items.alias_id;
+	$$);
+
+CREATE INDEX search_words_mv_idx ON search_words_mv USING gin(word gin_trgm_ops);
+CREATE UNIQUE INDEX unique_word_idx ON bookbrainz.search_words_mv (word);
+
+
+-- Refresh search_mv and search_words_mv materialized views
+CREATE FUNCTION bookbrainz.refresh_mv() RETURNS TRIGGER
+	AS $refresh_mv$
+	BEGIN
+		REFRESH MATERIALIZED VIEW CONCURRENTLY bookbrainz.search_mv;
+		REFRESH MATERIALIZED VIEW CONCURRENTLY bookbrainz.search_words_mv;
+		RETURN NULL;
+	END
+$refresh_mv$ LANGUAGE plpgsql;
+
+
+/*
+ * Triggers which call refresh_mv function to refresh all materialized views
+ * upon any update or addition of entities.
+ */
+CREATE TRIGGER mv_creator_update AFTER INSERT OR UPDATE
+	ON bookbrainz.creator_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+CREATE TRIGGER mv_edition_update AFTER INSERT OR UPDATE
+	ON bookbrainz.edition_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+CREATE TRIGGER mv_publication_update AFTER INSERT OR UPDATE
+	ON bookbrainz.publication_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+CREATE TRIGGER mv_publisher_update AFTER INSERT OR UPDATE
+	ON bookbrainz.publisher_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+CREATE TRIGGER mv_work_update AFTER INSERT OR UPDATE
+	ON bookbrainz.work_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+
+ALTER MATERIALIZED VIEW IF EXISTS bookbrainz.search_mv OWNER TO bookbrainz;
+ALTER MATERIALIZED VIEW IF EXISTS bookbrainz.search_words_mv OWNER TO bookbrainz;
+ALTER VIEW IF EXISTS bookbrainz.master_entities OWNER TO bookbrainz;
+ALTER VIEW IF EXISTS bookbrainz.untokenized_names OWNER TO bookbrainz;
+
+END;

From 3b83958051484f0e14db26d5eccd2078dafd9688 Mon Sep 17 00:00:00 2001
From: Shivam Tripathi <shivamtripathi0108@gmail.com>
Date: Fri, 4 May 2018 22:37:50 +0530
Subject: [PATCH 2/4] feat(migration/2018-05-03/down): Bring down features
 added for Postgres FTS feature

---
 migrations/2018-05-03/down.sql | 43 ++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 migrations/2018-05-03/down.sql

diff --git a/migrations/2018-05-03/down.sql b/migrations/2018-05-03/down.sql
new file mode 100644
index 0000000..28af83f
--- /dev/null
+++ b/migrations/2018-05-03/down.sql
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2018  Shivam Tripathi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+BEGIN;
+
+DROP VIEW IF EXISTS bookbrainz.untokenized_names;
+DROP MATERIALIZED VIEW IF EXISTS bookbrainz.search_mv;
+DROP MATERIALIZED VIEW IF EXISTS bookbrainz.search_words_mv;
+DROP VIEW IF EXISTS bookbrainz.master_entities;
+
+DROP TRIGGER IF EXISTS alias_token_update ON bookbrainz.alias;
+
+DROP TRIGGER IF EXISTS mv_publisher_update ON bookbrainz.publisher_header;
+DROP TRIGGER IF EXISTS mv_creator_update ON bookbrainz.creator_header;
+DROP TRIGGER IF EXISTS mv_work_update ON bookbrainz.work_header;
+DROP TRIGGER IF EXISTS mv_edition_update ON bookbrainz.edition_header;
+DROP TRIGGER IF EXISTS mv_publication_update ON bookbrainz.publication_header;
+
+DROP FUNCTION IF EXISTS bookbrainz.token_creator;
+DROP FUNCTION IF EXISTS bookbrainz.refresh_mv cascade;
+
+ALTER TABLE bookbrainz.alias DROP COLUMN tokens;
+
+DROP EXTENSION IF EXISTS pg_trgm;
+DROP EXTENSION IF EXISTS unaccent;
+DROP EXTENSION IF EXISTS btree_gin;
+
+END;

From a77295f9e414a2b5f4e19bd728313cc08ab44b76 Mon Sep 17 00:00:00 2001
From: Shivam Tripathi <shivamtripathi0108@gmail.com>
Date: Fri, 4 May 2018 20:44:02 +0530
Subject: [PATCH 3/4] feat(schema): Make changes to schema to set up Postgres
 FTS

* Add extentions pg_trgm, unaccent, btree_gin repectively for trigram fuzzy
	search, unaccenting strings and using gin index.
* Add column tokens TSVECTOR to alias table to add tokenized names.
* Add view master_entities to extract all the master entities.
* Add materialized view search_mv holding all the search relevant information
	about master entities.
* Add materialized view search_words_mv for all unique tokens (used to suggest
	the next best query).
* Index fts_search_idx on materialized view search_mv column tokens to for quick
	FTS.
* Add view untokenized_names holding all master entities with empty tokens,
	signifying unsupported names by to_tsvector.
* Index untokenized_names_idx on materialized view search_mv column names to aid
	quick view untokenized_names query.
* Unique indexes on search_mv and search_words_mv to aid REFRESH CONCURRENTLY
	command.
---
 schemas/bookbrainz.sql | 68 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/schemas/bookbrainz.sql b/schemas/bookbrainz.sql
index e9a781e..12c0ab8 100644
--- a/schemas/bookbrainz.sql
+++ b/schemas/bookbrainz.sql
@@ -1,5 +1,9 @@
 BEGIN;
 
+CREATE EXTENSION pg_trgm;
+CREATE EXTENSION unaccent;
+CREATE EXTENSION btree_gin;
+
 CREATE TYPE bookbrainz.lang_proficiency AS ENUM (
 	'BASIC',
 	'INTERMEDIATE',
@@ -439,6 +443,7 @@ CREATE TABLE bookbrainz.alias (
 	name TEXT NOT NULL CHECK (name <> ''),
 	sort_name TEXT NOT NULL CHECK (sort_name <> ''),
 	language_id INT,
+	tokens TSVECTOR,
 	"primary" BOOLEAN NOT NULL DEFAULT FALSE
 );
 ALTER TABLE bookbrainz.alias ADD FOREIGN KEY (language_id) REFERENCES musicbrainz.language (id) DEFERRABLE;
@@ -676,4 +681,67 @@ CREATE VIEW bookbrainz.publication AS
 	LEFT JOIN bookbrainz.alias_set als ON pcd.alias_set_id = als.id
 	WHERE e.type = 'Publication';
 
+-- View to extract all the master entities
+CREATE VIEW bookbrainz.master_entities AS
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'creator' AS entity_type
+		FROM bookbrainz.creator
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'edition' AS entity_type
+		FROM bookbrainz.edition
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'publication' AS entity_type
+		FROM bookbrainz.publication
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'publisher' AS entity_type
+		FROM bookbrainz.publisher
+		WHERE master=true
+	UNION
+	SELECT bbid as entity_id, default_alias_id as alias_id, 'work' AS entity_type
+		FROM bookbrainz.work
+		WHERE master=true;
+
+-- Materialized view holding all the search relevant information about master entities
+CREATE MATERIALIZED VIEW bookbrainz.search_mv AS
+	SELECT items.entity_id, alias.name, alias.tokens, items.entity_type
+	FROM bookbrainz.alias AS alias
+	JOIN (
+		SELECT *
+		FROM bookbrainz.master_entities
+	) AS items
+	ON alias.id = items.alias_id;
+
+-- Index on token field of materialized view search_mv for quick FTS
+CREATE INDEX fts_search_idx ON bookbrainz.search_mv USING gin(tokens);
+
+-- Unique index on materialized view search_mv created to help execute REFRESH CONCURRENTLY command
+CREATE UNIQUE INDEX unique_search_idx ON bookbrainz.search_mv (entity_id);
+
+-- Index on name field of materialized view search_mv to facilitate quick matching operations
+CREATE INDEX untokenized_names_idx ON bookbrainz.search_mv USING gin(name);
+
+-- View holding all master entities with empty tokens, signifying unsupported names by to_tsvector
+CREATE VIEW bookbrainz.untokenized_names AS
+	SELECT * FROM bookbrainz.search_mv WHERE tokens = '';
+
+-- Materialized view for all unique tokens (used to suggest the next best query)
+CREATE MATERIALIZED VIEW bookbrainz.search_words_mv AS
+	SELECT word FROM ts_stat($$
+		SELECT alias.tokens
+		FROM bookbrainz.alias AS alias
+		JOIN (
+			SELECT alias_id
+			FROM bookbrainz.master_entities
+		) AS items
+		ON alias.id = items.alias_id;
+	$$);
+
+-- Trigram index on materialized view search_words_mv for quick fuzzy search.
+CREATE INDEX search_words_mv_idx ON search_words_mv USING gin(word gin_trgm_ops);
+
+-- Unique index on materialized view search_words_mv to help execute REFRESH CONCURRENTLY command
+CREATE UNIQUE INDEX unique_word_idx ON bookbrainz.search_words_mv (word);
+
 COMMIT;

From a577d12b8eb86b2c89cec8e82c15052586729421 Mon Sep 17 00:00:00 2001
From: Shivam Tripathi <shivamtripathi0108@gmail.com>
Date: Fri, 4 May 2018 22:03:07 +0530
Subject: [PATCH 4/4] feat(triggers): Add necessary triggers to aid Postgres
 FTS

    * Add function token_creator to tokenized name and enter it as token field in
            alias table
    * Add function refresh_mv which refreshes all materialized views linked with
            FTS - search_mv and search_words_mv
    * Add triggers for insert/update on each entity_header to call the refresh_mv
            function each time a new entity is inserted or updated (master revision).
    * Add trigger for insert on alias to call function token_creator.
---
 scripts/create_triggers.sql | 72 +++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/scripts/create_triggers.sql b/scripts/create_triggers.sql
index 63066bb..4d1a0bf 100644
--- a/scripts/create_triggers.sql
+++ b/scripts/create_triggers.sql
@@ -229,6 +229,26 @@ CREATE OR REPLACE FUNCTION bookbrainz.process_publication() RETURNS TRIGGER
 	END;
 $process_publication$ LANGUAGE plpgsql;
 
+CREATE FUNCTION bookbrainz.token_creator() RETURNS trigger
+	AS $token_creator$
+	BEGIN
+		new.tokens := to_tsvector('pg_catalog.simple', unaccent(new.name));
+		RETURN new;
+	END
+$token_creator$ LANGUAGE plpgsql;
+
+-- Function refreshes search_mv and search_words_mv materialized views
+CREATE FUNCTION bookbrainz.refresh_mv() RETURNS TRIGGER
+	AS $refresh_mv$
+	BEGIN
+		RAISE NOTICE 'Refresh Fires!';
+		REFRESH MATERIALIZED VIEW CONCURRENTLY bookbrainz.search_mv;
+		REFRESH MATERIALIZED VIEW CONCURRENTLY bookbrainz.search_words_mv;
+		RAISE NOTICE 'Refreshing ...';
+		RETURN NULL;
+	END
+$refresh_mv$ LANGUAGE plpgsql;
+
 
 BEGIN;
 
@@ -269,3 +289,55 @@ CREATE TRIGGER process_publication
 	FOR EACH ROW EXECUTE PROCEDURE bookbrainz.process_publication();
 
 COMMIT;
+
+-- Trigger to unaccent and create tokens of alias name
+CREATE TRIGGER alias_token_update BEFORE INSERT OR UPDATE
+	ON bookbrainz.alias FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.token_creator();
+
+COMMIT;
+
+BEGIN;
+
+-- Trigger to refresh views upon addition/update of creator
+CREATE TRIGGER mv_creator_update AFTER INSERT OR UPDATE
+	ON bookbrainz.creator_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+COMMIT;
+
+BEGIN;
+
+-- Trigger to refresh views upon addition/update of edition
+CREATE TRIGGER mv_edition_update AFTER INSERT OR UPDATE
+	ON bookbrainz.edition_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+COMMIT;
+
+BEGIN;
+
+-- Trigger to refresh views upon addition/update of publication
+CREATE TRIGGER mv_publication_update AFTER INSERT OR UPDATE
+	ON bookbrainz.publication_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+COMMIT;
+
+BEGIN;
+
+-- Trigger to refresh views upon addition/update of publisher
+CREATE TRIGGER mv_publisher_update AFTER INSERT OR UPDATE
+	ON bookbrainz.publisher_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+COMMIT;
+
+BEGIN;
+
+-- Trigger to refresh views upon addition/update of work
+CREATE TRIGGER mv_work_update AFTER INSERT OR UPDATE
+	ON bookbrainz.work_header FOR EACH ROW EXECUTE PROCEDURE
+	bookbrainz.refresh_mv();
+
+COMMIT;