From 81f0de799cd6b19b0da765e50080fc27ba15d729 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 20 Aug 2019 14:15:29 +0100 Subject: [PATCH] Hash 3PID lookups (#184) The implementation of matrix-org/matrix-doc#2134 --- sydent/db/hashing_metadata.py | 125 ++++++++++++++++++++ sydent/db/sqlitedb.py | 36 ++++++ sydent/db/threepid_associations.py | 54 +++++++-- sydent/db/threepid_associations.sql | 27 ++++- sydent/http/httpserver.py | 8 ++ sydent/http/servlets/__init__.py | 4 +- sydent/http/servlets/hashdetailsservlet.py | 62 ++++++++++ sydent/http/servlets/lookupservlet.py | 3 + sydent/http/servlets/lookupv2servlet.py | 126 +++++++++++++++++++++ sydent/http/servlets/replication.py | 28 ++++- sydent/http/servlets/v2_servlet.py | 39 +++++++ sydent/replication/peer.py | 13 ++- sydent/sydent.py | 24 ++++ sydent/threepid/__init__.py | 14 ++- sydent/threepid/bind.py | 16 ++- sydent/util/hash.py | 33 ++++++ 16 files changed, 586 insertions(+), 26 deletions(-) create mode 100644 sydent/db/hashing_metadata.py create mode 100644 sydent/http/servlets/hashdetailsservlet.py create mode 100644 sydent/http/servlets/lookupv2servlet.py create mode 100644 sydent/http/servlets/v2_servlet.py create mode 100644 sydent/util/hash.py diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py new file mode 100644 index 00000000..515f51b3 --- /dev/null +++ b/sydent/db/hashing_metadata.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Actions on the hashing_metadata table which is defined in the migration process in +# sqlitedb.py + +class HashingMetadataStore: + def __init__(self, sydent): + self.sydent = sydent + + def get_lookup_pepper(self): + """Return the value of the current lookup pepper from the db + + :returns a pepper if it exists in the database, or None if one does + not exist + """ + cur = self.sydent.db.cursor() + res = cur.execute("select lookup_pepper from hashing_metadata") + row = res.fetchone() + + if not row: + return None + return row[0] + + def store_lookup_pepper(self, hashing_function, pepper): + """Stores a new lookup pepper in the hashing_metadata db table and rehashes all 3PIDs + + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: The pepper to store in the database + :type pepper: str + """ + cur = self.sydent.db.cursor() + + # Create or update lookup_pepper + sql = ( + 'INSERT OR REPLACE INTO hashing_metadata (id, lookup_pepper) ' + 'VALUES (0, ?)' + ) + cur.execute(sql, (pepper,)) + + # Hand the cursor to each rehashing function + # Each function will queue some rehashing db transactions + self._rehash_threepids(cur, hashing_function, pepper, "local_threepid_associations") + self._rehash_threepids(cur, hashing_function, pepper, "global_threepid_associations") + + # Commit the queued db transactions so that adding a new pepper and hashing is atomic + self.sydent.db.commit() + + def _rehash_threepids(self, cur, hashing_function, pepper, table): + """Rehash 3PIDs of a given table using a given hashing_function and pepper + + A database cursor `cur` must be passed to this function. After this function completes, + the calling function should make sure to call self`self.sydent.db.commit()` to commit + the made changes to the database. + + :param cur: Database cursor + :type cur: + + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing + :type pepper: str + + :param table: The database table to perform the rehashing on + :type table: str + """ + + # Get count of all 3PID records + # Medium/address combos are marked as UNIQUE in the database + sql = "SELECT COUNT(*) FROM %s" % table + res = cur.execute(sql) + row_count = res.fetchone() + row_count = row_count[0] + + # Iterate through each medium, address combo, hash it, + # and store in the db + batch_size = 500 + count = 0 + while count < row_count: + sql = ( + "SELECT medium, address FROM %s ORDER BY id LIMIT %s OFFSET %s" % + (table, batch_size, count) + ) + res = cur.execute(sql) + rows = res.fetchall() + + for medium, address in rows: + # Skip broken db entry + if not medium or not address: + continue + + # Combine the medium, address and pepper together in the + # following form: "address medium pepper" + # According to MSC2134: https://github.com/matrix-org/matrix-doc/pull/2134 + combo = "%s %s %s" % (address, medium, pepper) + + # Hash the resulting string + result = hashing_function(combo) + + # Save the result to the DB + sql = ( + "UPDATE %s SET lookup_hash = ? " + "WHERE medium = ? AND address = ?" + % table + ) + # Lines up the query to be executed on commit + cur.execute(sql, (result, medium, address)) + + count += len(rows) diff --git a/sydent/db/sqlitedb.py b/sydent/db/sqlitedb.py index 60192c86..16b55641 100644 --- a/sydent/db/sqlitedb.py +++ b/sydent/db/sqlitedb.py @@ -132,6 +132,7 @@ def _upgradeSchema(self): self.db.commit() logger.info("v0 -> v1 schema migration complete") self._setSchemaVersion(1) + if curVer < 2: logger.info("Migrating schema from v1 to v2") cur = self.db.cursor() @@ -140,6 +141,41 @@ def _upgradeSchema(self): logger.info("v1 -> v2 schema migration complete") self._setSchemaVersion(2) + if curVer < 3: + cur = self.db.cursor() + + # Add lookup_hash columns to threepid association tables + cur.execute( + "ALTER TABLE local_threepid_associations " + "ADD COLUMN lookup_hash VARCHAR(256)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS lookup_hash_medium " + "on local_threepid_associations " + "(lookup_hash, medium)" + ) + cur.execute( + "ALTER TABLE global_threepid_associations " + "ADD COLUMN lookup_hash VARCHAR(256)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS lookup_hash_medium " + "on global_threepid_associations " + "(lookup_hash, medium)" + ) + + # Create hashing_metadata table to store the current lookup_pepper + cur.execute( + "CREATE TABLE IF NOT EXISTS hashing_metadata (" + "id integer primary key, " + "lookup_pepper varchar(256)" + ")" + ) + + self.db.commit() + logger.info("v2 -> v3 schema migration complete") + self._setSchemaVersion(3) + def _getSchemaVersion(self): cur = self.db.cursor() res = cur.execute("PRAGMA user_version"); diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 331ca90a..55979c10 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -34,9 +34,9 @@ def addOrUpdateAssociation(self, assoc): # sqlite's support for upserts is atrocious cur.execute("insert or replace into local_threepid_associations " - "('medium', 'address', 'mxid', 'ts', 'notBefore', 'notAfter')" - " values (?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) + "('medium', 'address', 'lookup_hash', 'mxid', 'ts', 'notBefore', 'notAfter')" + " values (?, ?, ?, ?, ?, ?, ?)", + (assoc.medium, assoc.address, assoc.lookup_hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) self.sydent.db.commit() def getAssociationsAfterId(self, afterId, limit): @@ -45,7 +45,8 @@ def getAssociationsAfterId(self, afterId, limit): if afterId is None: afterId = -1 - q = "select id, medium, address, mxid, ts, notBefore, notAfter from local_threepid_associations " \ + q = "select id, medium, address, lookup_hash, mxid, ts, notBefore, notAfter from " \ + "local_threepid_associations " \ "where id > ? order by id asc" if limit is not None: q += " limit ?" @@ -58,7 +59,7 @@ def getAssociationsAfterId(self, afterId, limit): assocs = {} for row in res.fetchall(): - assoc = ThreepidAssociation(row[1], row[2], row[3], row[4], row[5], row[6]) + assoc = ThreepidAssociation(row[1], row[2], row[3], row[4], row[5], row[6], row[7]) assocs[row[0]] = assoc maxId = row[0] @@ -139,10 +140,20 @@ def getMxid(self, medium, address): return row[0] def getMxids(self, threepid_tuples): + """Given a list of threepid_tuples, return the same list but with + mxids appended to each tuple for which a match was found in the + database for. Output is ordered by medium, address, timestamp DESC + + :param threepid_tuples: List containing (medium, address) tuples + :type threepid_tuples: [(str, str)] + + :returns a list of (medium, address, mxid) tuples + :rtype [(str, str, str)] + """ cur = self.sydent.db.cursor() - cur.execute("CREATE TEMPORARY TABLE tmp_getmxids (medium VARCHAR(16), address VARCHAR(256))"); - cur.execute("CREATE INDEX tmp_getmxids_medium_lower_address ON tmp_getmxids (medium, lower(address))"); + cur.execute("CREATE TEMPORARY TABLE tmp_getmxids (medium VARCHAR(16), address VARCHAR(256))") + cur.execute("CREATE INDEX tmp_getmxids_medium_lower_address ON tmp_getmxids (medium, lower(address))") try: inserted_cap = 0 @@ -181,14 +192,13 @@ def getMxids(self, threepid_tuples): def addAssociation(self, assoc, rawSgAssoc, originServer, originId, commit=True): """ :param assoc: (sydent.threepid.GlobalThreepidAssociation) The association to add as a high level object - :param sgAssoc The original raw bytes of the signed association - :return: + :param sgAssoc: The original raw bytes of the signed association """ cur = self.sydent.db.cursor() res = cur.execute("insert or ignore into global_threepid_associations " - "(medium, address, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " - "(?, ?, ?, ?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, + "(medium, address, lookup_hash, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (assoc.medium, assoc.address, assoc.lookup_hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, originServer, originId, rawSgAssoc)) if commit: self.sydent.db.commit() @@ -216,3 +226,23 @@ def removeAssociation(self, medium, address): cur.rowcount, medium, address, ) self.sydent.db.commit() + + def retrieveMxidFromHash(self, lookup_hash): + """Returns an mxid from a given lookup_hash value + + :param input_hash: The lookup_hash value to lookup in the database + :type input_hash: str + + :returns the mxid relating to the lookup_hash value if found, + otherwise None + :rtype: str|None + """ + cur = self.sydent.db.cursor() + + res = cur.execute( + "SELECT mxid FROM global_threepid_associations WHERE lookup_hash = ?", (lookup_hash,) + ) + row = res.fetchone() + if not row: + return None + return row[0] diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index 2e17777e..c4869b8b 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -14,10 +14,33 @@ See the License for the specific language governing permissions and limitations under the License. */ -CREATE TABLE IF NOT EXISTS local_threepid_associations (id integer primary key, medium varchar(16) not null, address varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter bigint not null); +CREATE TABLE IF NOT EXISTS local_threepid_associations ( + id integer primary key, + medium varchar(16) not null, + address varchar(256) not null, + lookup_hash varchar, + mxid varchar(256) not null, + ts integer not null, + notBefore bigint not null, + notAfter bigint not null +); +CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium); CREATE UNIQUE INDEX IF NOT EXISTS medium_address on local_threepid_associations(medium, address); -CREATE TABLE IF NOT EXISTS global_threepid_associations (id integer primary key, medium varchar(16) not null, address varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter integer not null, originServer varchar(255) not null, originId integer not null, sgAssoc text not null); +CREATE TABLE IF NOT EXISTS global_threepid_associations ( + id integer primary key, + medium varchar(16) not null, + address varchar(256) not null, + lookup_hash varchar, + mxid varchar(256) not null, + ts integer not null, + notBefore bigint not null, + notAfter integer not null, + originServer varchar(255) not null, + originId integer not null, + sgAssoc text not null +); +CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (lookup_hash, medium); CREATE INDEX IF NOT EXISTS medium_address on global_threepid_associations (medium, address); CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations (medium, lower(address)); CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); diff --git a/sydent/http/httpserver.py b/sydent/http/httpserver.py index 0317f824..78796550 100644 --- a/sydent/http/httpserver.py +++ b/sydent/http/httpserver.py @@ -38,6 +38,7 @@ def __init__(self, sydent): identity = Resource() api = Resource() v1 = self.sydent.servlets.v1 + v2 = self.sydent.servlets.v2 validate = Resource() email = Resource() @@ -51,6 +52,9 @@ def __init__(self, sydent): lookup = self.sydent.servlets.lookup bulk_lookup = self.sydent.servlets.bulk_lookup + hash_details = self.sydent.servlets.hash_details + lookup_v2 = self.sydent.servlets.lookup_v2 + threepid = Resource() bind = self.sydent.servlets.threepidBind unbind = self.sydent.servlets.threepidUnbind @@ -63,6 +67,7 @@ def __init__(self, sydent): root.putChild('_matrix', matrix) matrix.putChild('identity', identity) identity.putChild('api', api) + identity.putChild('v2', v2) api.putChild('v1', v1) v1.putChild('validate', validate) @@ -93,6 +98,9 @@ def __init__(self, sydent): v1.putChild('sign-ed25519', self.sydent.servlets.blindlySignStuffServlet) + v2.putChild('lookup', lookup_v2) + v2.putChild('hash_details', hash_details) + self.factory = Site(root) self.factory.displayTracebacks = False diff --git a/sydent/http/servlets/__init__.py b/sydent/http/servlets/__init__.py index 9e2eabb4..7b737125 100644 --- a/sydent/http/servlets/__init__.py +++ b/sydent/http/servlets/__init__.py @@ -20,11 +20,11 @@ def get_args(request, required_args): """ - Helper function to get arguments for an HTTP request + Helper function to get arguments for an HTTP request. Currently takes args from the top level keys of a json object or www-form-urlencoded for backwards compatability. Returns a tuple (error, args) where if error is non-null, - the requesat is malformed. Otherwise, args contains the + the request is malformed. Otherwise, args contains the parameters passed. """ args = None diff --git a/sydent/http/servlets/hashdetailsservlet.py b/sydent/http/servlets/hashdetailsservlet.py new file mode 100644 index 00000000..3b741cdd --- /dev/null +++ b/sydent/http/servlets/hashdetailsservlet.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource +from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.db.hashing_metadata import HashingMetadataStore + +import logging +import json +import signedjson.sign + +from sydent.http.servlets import get_args, jsonwrap, send_cors + + +logger = logging.getLogger(__name__) + + +class HashDetailsServlet(Resource): + isLeaf = True + known_algorithms = ["sha256", "none"] + + def __init__(self, syd, lookup_pepper): + self.sydent = syd + self.lookup_pepper = lookup_pepper + + def render_GET(self, request): + """ + Return the hashing algorithms and pepper that this IS supports. The + pepper included in the response is stored in the database, or + otherwise generated. + + Returns: An object containing an array of hashing algorithms the + server supports, and a `lookup_pepper` field, which is a + server-defined value that the client should include in the 3PID + information before hashing. + """ + send_cors(request) + + request.setResponseCode(200) + return json.dumps({ + "algorithms": self.known_algorithms, + "lookup_pepper": self.lookup_pepper, + }) + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/http/servlets/lookupservlet.py b/sydent/http/servlets/lookupservlet.py index 4ff92af7..3a146db9 100644 --- a/sydent/http/servlets/lookupservlet.py +++ b/sydent/http/servlets/lookupservlet.py @@ -36,6 +36,9 @@ def __init__(self, syd): def render_GET(self, request): """ Look up an individual threepid. + + ** DEPRECATED ** + Params: 'medium': the medium of the threepid 'address': the address of the threepid Returns: A signed association if the threepid has a corresponding mxid, otherwise the empty object. diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py new file mode 100644 index 00000000..4105ecb1 --- /dev/null +++ b/sydent/http/servlets/lookupv2servlet.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource + +import logging + +from sydent.http.servlets import get_args, jsonwrap, send_cors +from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.http.servlets.hashdetailsservlet import HashDetailsServlet + +logger = logging.getLogger(__name__) + + +class LookupV2Servlet(Resource): + isLeaf = True + + def __init__(self, syd, lookup_pepper): + self.sydent = syd + self.globalAssociationStore = GlobalAssociationStore(self.sydent) + self.lookup_pepper = lookup_pepper + + @jsonwrap + def render_POST(self, request): + """ + Perform lookups with potentially hashed 3PID details. + + Depending on our response to /hash_details, the client will choose a + hash algorithm and pepper, hash the 3PIDs it wants to lookup, and + send them to us, along with the algorithm and pepper it used. + + We first check this algorithm/pepper combo matches what we expect, + then compare the 3PID details to what we have in the database. + + Params: A JSON object containing the following keys: + * 'addresses': List of hashed/plaintext (depending on the + algorithm) 3PID addresses and mediums. + * 'algorithm': The algorithm the client has used to process + the 3PIDs. + * 'pepper': The pepper the client has attached to the 3PIDs. + + Returns: Object with key 'mappings', which is a dictionary of results + where each result is a key/value pair of what the client sent, and + the matching Matrix User ID that claims to own that 3PID. + + User IDs for which no mapping is found are omitted. + """ + send_cors(request) + + err, args = get_args(request, ('addresses', 'algorithm', 'pepper')) + if err: + return err + + addresses = args['addresses'] + if not isinstance(addresses, list): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'} + + algorithm = str(args['algorithm']) + if algorithm not in HashDetailsServlet.known_algorithms: + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'} + + pepper = str(args['pepper']) + if pepper != self.lookup_pepper: + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match '%s'" % + self.lookup_pepper} + + logger.info("Lookup of %d threepid(s) with algorithm %s", len(addresses), algorithm) + if algorithm == "none": + # Lookup without hashing + medium_address_tuples = [] + for address_and_medium in addresses: + # Parse medium, address components + address_medium_split = address_and_medium.split() + + # Forbid addresses that contain a space + if len(address_medium_split) != 2: + request.setResponseCode(400) + return { + 'errcode': 'M_UNKNOWN', + 'error': 'Invalid "address medium" pair: "%s"' % address_and_medium + } + + # Get the mxid for the address/medium combo if known + address, medium = address_medium_split + medium_address_tuples.append((str(medium), str(address))) + + # Lookup the mxids + medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) + + # Return a dictionary of lookup_string: mxid values + return {'mappings': {x[1]: x[2] for x in medium_address_mxid_tuples}} + + elif algorithm == "sha256": + # Lookup using SHA256 with URL-safe base64 encoding + mappings = {} + for h in addresses: + mxid = self.globalAssociationStore.retrieveMxidFromHash(h) + if mxid: + mappings[h] = mxid + + return {'mappings': mappings} + + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'} + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/http/servlets/replication.py b/sydent/http/servlets/replication.py index 5a5642f5..d59fb901 100644 --- a/sydent/http/servlets/replication.py +++ b/sydent/http/servlets/replication.py @@ -18,6 +18,10 @@ from twisted.web.resource import Resource from sydent.http.servlets import jsonwrap from sydent.threepid import threePidAssocFromDict + +from sydent.util.hash import sha256_and_url_safe_base64 + +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.db.peers import PeerStore from sydent.db.threepid_associations import GlobalAssociationStore @@ -29,6 +33,7 @@ class ReplicationPushServlet(Resource): def __init__(self, sydent): self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) @jsonwrap def render_POST(self, request): @@ -66,21 +71,32 @@ def render_POST(self, request): globalAssocsStore = GlobalAssociationStore(self.sydent) - for originId,sgAssoc in inJson['sgAssocs'].items(): + for originId, sgAssoc in inJson['sgAssocs'].items(): try: peer.verifySignedAssociation(sgAssoc) logger.debug("Signed association from %s with origin ID %s verified", peer.servername, originId) - # Don't bother adding if one has already failed: we add all of them or none so we're only going to - # roll back the transaction anyway (but we continue to try & verify the rest so we can give a - # complete list of the ones that don't verify) + # Don't bother adding if one has already failed: we add all of them or none so + # we're only going to roll back the transaction anyway (but we continue to try + # & verify the rest so we can give a complete list of the ones that don't + # verify) if len(failedIds) > 0: continue assocObj = threePidAssocFromDict(sgAssoc) if assocObj.mxid is not None: - globalAssocsStore.addAssociation(assocObj, json.dumps(sgAssoc), peer.servername, originId, commit=False) + # Calculate the lookup hash with our own pepper for this association + str_to_hash = ' '.join( + [assocObj.address, assocObj.medium, + self.hashing_store.get_lookup_pepper()], + ) + assocObj.lookup_hash = sha256_and_url_safe_base64(str_to_hash) + + # Add this association + globalAssocsStore.addAssociation( + assocObj, json.dumps(sgAssoc), peer.servername, originId, commit=False + ) else: logger.info("Incoming deletion: removing associations for %s / %s", assocObj.medium, assocObj.address) globalAssocsStore.removeAssociation(assocObj.medium, assocObj.address) @@ -98,4 +114,4 @@ def render_POST(self, request): 'failed_ids':failedIds} else: self.sydent.db.commit() - return {'success':True} + return {'success': True} diff --git a/sydent/http/servlets/v2_servlet.py b/sydent/http/servlets/v2_servlet.py new file mode 100644 index 00000000..0e6b630d --- /dev/null +++ b/sydent/http/servlets/v2_servlet.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource + +from sydent.http.servlets import jsonwrap, send_cors + + +class V2Servlet(Resource): + isLeaf = False + + def __init__(self, syd): + Resource.__init__(self) + self.sydent = syd + + @jsonwrap + def render_GET(self, request): + send_cors(request) + request.setResponseCode(200) + return {} + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/replication/peer.py b/sydent/replication/peer.py index 1371f9de..f500e982 100644 --- a/sydent/replication/peer.py +++ b/sydent/replication/peer.py @@ -17,8 +17,10 @@ import ConfigParser from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.threepid import threePidAssocFromDict from sydent.config import ConfigError +from sydent.util.hash import sha256_and_url_safe_base64 from unpaddedbase64 import decode_base64 import signedjson.sign @@ -57,6 +59,7 @@ class LocalPeer(Peer): def __init__(self, sydent): super(LocalPeer, self).__init__(sydent.server_name, {}) self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) globalAssocStore = GlobalAssociationStore(self.sydent) self.lastId = globalAssocStore.lastIdFromServer(self.servername) @@ -68,8 +71,16 @@ def pushUpdates(self, sgAssocs): for localId in sgAssocs: if localId > self.lastId: assocObj = threePidAssocFromDict(sgAssocs[localId]) + if assocObj.mxid is not None: - # We can probably skip verification for the local peer (although it could be good as a sanity check) + # Assign a lookup_hash to this association + str_to_hash = ' '.join( + [assocObj.address, assocObj.medium, self.hashing_store.get_lookup_pepper()], + ) + assocObj.lookup_hash = sha256_and_url_safe_base64(str_to_hash) + + # We can probably skip verification for the local peer (although it could + # be good as a sanity check) globalAssocStore.addAssociation(assocObj, json.dumps(sgAssocs[localId]), self.sydent.server_name, localId) else: diff --git a/sydent/sydent.py b/sydent/sydent.py index b5081a7f..447c4666 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -19,6 +19,7 @@ import logging import logging.handlers import os +import pickle import twisted.internet.reactor from twisted.internet import task @@ -38,12 +39,17 @@ from validators.msisdnvalidator import MsisdnValidator from hs_federation.verifier import Verifier +from util.hash import sha256_and_url_safe_base64 +from util.tokenutils import generateAlphanumericTokenOfLength + from sign.ed25519 import SydentEd25519 from http.servlets.emailservlet import EmailRequestCodeServlet, EmailValidateCodeServlet from http.servlets.msisdnservlet import MsisdnRequestCodeServlet, MsisdnValidateCodeServlet from http.servlets.lookupservlet import LookupServlet from http.servlets.bulklookupservlet import BulkLookupServlet +from http.servlets.lookupv2servlet import LookupV2Servlet +from http.servlets.hashdetailsservlet import HashDetailsServlet from http.servlets.pubkeyservlets import Ed25519Servlet from http.servlets.threepidbindservlet import ThreePidBindServlet from http.servlets.threepidunbindservlet import ThreePidUnbindServlet @@ -51,8 +57,10 @@ from http.servlets.getvalidated3pidservlet import GetValidated3pidServlet from http.servlets.store_invite_servlet import StoreInviteServlet from http.servlets.v1_servlet import V1Servlet +from http.servlets.v2_servlet import V2Servlet from db.valsession import ThreePidValSessionStore +from db.hashing_metadata import HashingMetadataStore from threepid.bind import ThreepidBinder @@ -174,6 +182,19 @@ def sighup(signum, stack): addr=self.cfg.get("general", "prometheus_addr"), ) + # See if a pepper already exists in the database + # Note: This MUST be run before we start serving requests, otherwise lookups for + # 3PID hashes may come in before we've completed generating them + hashing_metadata_store = HashingMetadataStore(self) + lookup_pepper = hashing_metadata_store.get_lookup_pepper() + if not lookup_pepper: + # No pepper defined in the database, generate one + lookup_pepper = generateAlphanumericTokenOfLength(5) + + # Store it in the database and rehash 3PIDs + hashing_metadata_store.store_lookup_pepper(sha256_and_url_safe_base64, + lookup_pepper) + self.validators = Validators() self.validators.email = EmailValidator(self) self.validators.msisdn = MsisdnValidator(self) @@ -186,12 +207,15 @@ def sighup(signum, stack): self.servlets = Servlets() self.servlets.v1 = V1Servlet(self) + self.servlets.v2 = V2Servlet(self) self.servlets.emailRequestCode = EmailRequestCodeServlet(self) self.servlets.emailValidate = EmailValidateCodeServlet(self) self.servlets.msisdnRequestCode = MsisdnRequestCodeServlet(self) self.servlets.msisdnValidate = MsisdnValidateCodeServlet(self) self.servlets.lookup = LookupServlet(self) self.servlets.bulk_lookup = BulkLookupServlet(self) + self.servlets.hash_details = HashDetailsServlet(self, lookup_pepper) + self.servlets.lookup_v2 = LookupV2Servlet(self, lookup_pepper) self.servlets.pubkey_ed25519 = Ed25519Servlet(self) self.servlets.pubkeyIsValid = PubkeyIsValidServlet(self) self.servlets.ephemeralPubkeyIsValid = EphemeralPubkeyIsValidServlet(self) diff --git a/sydent/threepid/__init__.py b/sydent/threepid/__init__.py index 7d225445..2d6d82ee 100644 --- a/sydent/threepid/__init__.py +++ b/sydent/threepid/__init__.py @@ -15,14 +15,23 @@ # limitations under the License. def threePidAssocFromDict(d): - assoc = ThreepidAssociation(d['medium'], d['address'], d['mxid'], d['ts'], d['not_before'], d['not_after']) + assoc = ThreepidAssociation( + d['medium'], + d['address'], + None, # empty lookup_hash digest by default + d['mxid'], + d['ts'], + d['not_before'], + d['not_after'], + ) return assoc class ThreepidAssociation: - def __init__(self, medium, address, mxid, ts, not_before, not_after): + def __init__(self, medium, address, lookup_hash, mxid, ts, not_before, not_after): """ :param medium: The medium of the 3pid (eg. email) :param address: The identifier (eg. email address) + :param lookup_hash: A hash digest of the 3pid. Can be a str or None :param mxid: The matrix ID the 3pid is associated with :param ts: The creation timestamp of this association, ms :param not_before: The timestamp, in ms, at which this association becomes valid @@ -30,6 +39,7 @@ def __init__(self, medium, address, mxid, ts, not_before, not_after): """ self.medium = medium self.address = address + self.lookup_hash = lookup_hash self.mxid = mxid self.ts = ts self.not_before = not_before diff --git a/sydent/threepid/bind.py b/sydent/threepid/bind.py index c40a4cfe..9bb8055b 100644 --- a/sydent/threepid/bind.py +++ b/sydent/threepid/bind.py @@ -25,6 +25,8 @@ from sydent.db.threepid_associations import LocalAssociationStore from sydent.util import time_msec +from sydent.util.hash import sha256_and_url_safe_base64 +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.threepid.signer import Signer from sydent.http.httpclient import FederationHttpClient @@ -48,6 +50,7 @@ class ThreepidBinder: def __init__(self, sydent): self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) def addBinding(self, medium, address, mxid): """Binds the given 3pid to the given mxid. @@ -62,9 +65,20 @@ def addBinding(self, medium, address, mxid): """ localAssocStore = LocalAssociationStore(self.sydent) + # Fill out the association details createdAt = time_msec() expires = createdAt + ThreepidBinder.THREEPID_ASSOCIATION_LIFETIME_MS - assoc = ThreepidAssociation(medium, address, mxid, createdAt, createdAt, expires) + + # Hash the medium + address and store that hash for the purposes of + # later lookups + str_to_hash = ' '.join( + [address, medium, self.hashing_store.get_lookup_pepper()], + ) + lookup_hash = sha256_and_url_safe_base64(str_to_hash) + + assoc = ThreepidAssociation( + medium, address, lookup_hash, mxid, createdAt, createdAt, expires, + ) localAssocStore.addOrUpdateAssociation(assoc) diff --git a/sydent/util/hash.py b/sydent/util/hash.py new file mode 100644 index 00000000..aa5d5ae3 --- /dev/null +++ b/sydent/util/hash.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) +