From 00220dcb981912d2e5cbb55fba944a64f7a5bf9f Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 20 Dec 2021 10:46:16 +0000 Subject: [PATCH 01/81] WIP: 3575 --- proposals/3575-sync.md | 310 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 proposals/3575-sync.md diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md new file mode 100644 index 00000000000..e25a8395cdf --- /dev/null +++ b/proposals/3575-sync.md @@ -0,0 +1,310 @@ +# MSCXXXX: Client syncing with sliding windows (aka Sync v3) + +This MSC outlines a replacement for the CS API endpoint `/sync`. + +The current `/sync` endpoint scales badly as the number of rooms on an account increases. It scales +badly because all rooms are returned to the client, and clients cannot opt-out of a large amount of +extraneous data such as receipts. On large accounts with thousands of rooms, the initial sync +operation can take minutes to perform. This significantly delays the initial login to Matrix clients. + +## Goals + +Any improved `/sync` mechanism had a number of goals: + - Sync time should be independent of the number of rooms you are in. + - Time from launch to confident usability should be as low as possible. + - Time from login on existing accounts to usability should be as low as possible. + - Bandwidth should be minimised. + - Support lazy-loading of things like read receipts (and avoid sending unnecessary data to the client) + - Support informing the client when room state changes from under it, due to state resolution. + - Clients should be able to work correctly without ever syncing in the full set of rooms they’re in. + - Don’t incremental sync rooms you don’t care about. + - Combining uploaded filters with ad-hoc filter parameters (which isn’t possible with sync v2 today) + - Servers should not need to store all past since tokens. If a since token has been discarded we should gracefully degrade to initial sync. + - Ability to filter by space. + +These goals shaped the design of this proposal. + +## Proposal + +At a high level, the proposal introduces a way for clients to filter and sort the rooms they are +joined to and then request a subset of the resulting list of rooms rather than the entire room list. +``` + All joined rooms on user's account +Q W E R T Y U I O P L K J H G F D S A Z X C V B N M +\ / + \ / + \ Subset of rooms matched by filters / + Q W E R T Y U I O P L K J H G F D S A Z X C V + | + A C D E F G H I J K L O P Q R S T U V W X Y Z Rooms sorted by name (or by recency, etc) + |_______| + | + + A C D E F first 5 rooms requested +``` +It also introduces a number of new concepts which are explained in more detail later on: + - Core API: The minimumal API to be sync v3 compatible. + - Extensions: Additional APIs which expose more data from the server e.g presence, device messages. + - Sticky Parameters: Clients can specify request parameters once and have the server remember what + they were, without forcing the client to resend the parameter every time. + +### Core +A complete sync request looks like: +`POST /v3/sync?pos=4`: +```js +{ + // Sliding Window API + "lists": [ + { + "rooms": [ [0,99] ], + "sort": [ "by_notification_count", "by_recency", "by_name" ], + "required_state": [ + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + ["m.space.child", "*"] + ], + "timeline_limit": 10, + "filters": { + "is_dm": true + } + } + ], + + // Room Subscriptions API + "room_subscriptions": { + "!sub1:bar": { + "required_state": [ ["*","*"] ], + "timeline_limit": 50 + } + }, + "unsubscribe_rooms": [ "!sub3:bar" ] + + // Extensions API + "extensions": {} +} +``` +An entire response looks like: +`HTTP 200 OK` +```js +{ + // Connection and Streaming API + "initial": true, + "pos": "5", + + // Sliding Window API + "ops": [ + { + "list": 0, + "range": [0,99], + "op": "SYNC", + "rooms": [ + { + "room_id": "!foo:bar", + "name": "The calculated room name", + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} + ], + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, + ], + "notification_count": 54, + "highlight_count": 3 + }, + // ... 99 more items + ], + } + ], + "counts": [1337], + + // Room Subscriptions API + "room_subscriptions": { + "!sub1:bar": { + "name": "Alice and Bob", + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} + ], + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + ], + "notification_count": 1, + "highlight_count": 0 + } + } + + // Extensions API + "extensions": {} +} +``` +These fields and their interactions are explained in the next few sections. This forms the core of +the API. Additional data can be returned via "extensions". + +#### Connections and streaming data + +At a high level, the syncing mechanism creates a "connection" to the server to allow the +bi-directional exchange of JSON objects. This mechanism is ideally suited for WebSockets, but more +difficult to do for HTTP long-polling. + +For the long-polling use case, this proposal includes an opaque token that is very similar to +`/sync` v2's `since` query parameter. This is called `pos` and represents the position in the stream +the client is currently at. Unlike `/sync` v2, this token is ephemeral and can be invalidated at any +time. When a client first connects to the server, no `pos` is specified. Also unlike `/sync` v2, this +token cannot be used with other APIs such as `/messages` or `/keys/changes`. + +In simple servers, the `pos` may be an incrementing integer, but more complex servers may use vector +clocks or contain node identifying information in the token. Clients MUST treat `pos` as an opaque +value and not introspect it. + +When a `pos` is invalidated, the server MUST treat the invalidated `pos` as if it was absent +(in other words that this is an initial sync) and set `initial: true` in the response to inform the +client that the response is now an initial sync. For clarity, `initial: true` MUST also be set when +there is no `pos` value provided. When there is a valid `pos`, this flag MUST be omitted (sending +`initial: false` is wasteful). + +A response for a given `pos` must be idempotent to account for packet loss. For example: +``` +Client Server + | ---------------------> | data=[A,B], pos=2 + | <--data=[A,B], pos=2-- | + | | data=[C], pos=3 (new event arrives) + | -----pos=2-----------> | + | X--data=[C], pos=3-- | Response is lost + | | + | | data=[C,D], pos=4 (another new event arrives) + | -----pos=2-----------> | + | <----data=[C], pos=3-- | Server CANNOT send data=[C,D] pos=4, it MUST send the previous response +``` +Failure to do this will result in duplicate data being sent to the client. + +#### Sticky request parameters + +Request parameters can be "sticky". This means that their value is remembered across multiple requests. +The lifetime of sticky request parameters are tied to a sync connection. When the connection is lost, +the request parameters are lost with it. This feature exists to allow clients to configure the sync +stream in a bandwidth-efficient way. For example, if all keys were sticky: +``` +Client Server + | ------{ "foo": "bar" }------> | {"foo":"bar"} + | <-------HTTP 200 OK---------- | + | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} + | <-------HTTP 200 OK---------- | +``` +For complex nested data, APIs which include sticky parameters MUST indicate every sticky field to +avoid ambiguity. For example, an ambiguous API may state the following: +```js +{ + "foo": { // sticky + "bar": 1, + "baz": 2 + } +} +``` +When this object is combined with an the additional object: +```js +{ + "foo": { + "bar": 3 + } +} +``` +What is the value of `baz`? Both unset and `2` are valid answers. For this reason, `baz` MUST +be marked as sticky if the desired result is `2`, else it will be unset. + +Sticky request parameters SHOULD be set at the start of the connection and kept constant throughout +the lifetime of the connection. It is possible for clients and servers to disagree on the value of +a sticky request parameter in the event of packet loss: +``` + Client Server + | ------{ "foo": "bar" }------> | {"foo":"bar"} +{"foo":"bar"} | <-------HTTP 200 OK---------- | + | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} + | X--HTTP 200 OK---------- | + | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} + | X--HTTP 200 OK---------- | + | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} + | <-------HTTP 200 OK---------- | +``` +For this reason, some request parameters are not suitable to be made "sticky". These include parameters +which are extremely dynamic in nature, such as list ranges. + +#### Sliding Window API +#### Room Subscription API +#### Bandwidth optimisations for persistent clients + +#### Extensions +We anticipate that as more features land in Matrix, different kinds of data will also want to be synced +to clients. Sync v2 did not have any first-class support to opt-in to new data. Sync v3 does have +support for this via "extensions". Extensions also allow this proposal to be broken up into more +manageable sections. Extensions are requested by the client in a dedicated `extensions` block: +```js +{ + "extensions": { + "name_of_extension": { // sticky + "enabled": true, // sticky + "extension_arg": "value", + "extension_arg_2": true + } + } +} +``` +Extensions MUST have an `enabled` flag which defaults to `false`. If a client sends an unknown extension +name, the server MUST ignore it (or else backwards compatibility between servers is broken when a newer +client tries to communicate with an older server). Extension args may or may not be sticky, it +depends on the extension. + +Extensions can leverage the data from the core API, notably which rooms are currently inside sliding +windows as well as which rooms are explicitly subscribed to. + +### Extensions +#### To Device Messaging + - Extension name: `to_device` + - Args: + * `limit` (Sticky): The max number of events to return per sync response. + * `since`: The token returned in the `next_batch` section of this extension, or blank if this is the first time. +#### End-to-End Encryption + - Extension name: `e2ee` + +#### Receipts +TODO +#### Typing Notifications +TODO +#### Presence +TODO +#### Account Data +TODO + + + + + +## Potential issues + + +## Alternatives + + +## Security considerations +- room sub auth check +- history visibility for timeline_limit + +## Unstable prefix + + +## Dependencies + +## Appendices From 5d5aabf3d20d3aa7c08c3bf4606b36652a677068 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 20 Dec 2021 16:08:38 +0000 Subject: [PATCH 02/81] Flesh out room list params and sliding window API --- proposals/3575-sync.md | 224 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 223 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index e25a8395cdf..7911c631e7d 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1,4 +1,4 @@ -# MSCXXXX: Client syncing with sliding windows (aka Sync v3) +# MSC3575: Client syncing with sliding windows (aka Sync v3) This MSC outlines a replacement for the CS API endpoint `/sync`. @@ -242,7 +242,229 @@ a sticky request parameter in the event of packet loss: For this reason, some request parameters are not suitable to be made "sticky". These include parameters which are extremely dynamic in nature, such as list ranges. +#### Room List parameters + +One or more room lists can be requested in sync v3 like so: +```js +{ + "lists": [ + { + // Sliding window ranges, see the Sliding Window API for more information. + "rooms": [ [0,99] ], + // Sticky. List sort order. See Sliding Window API for more information. + "sort": [ "by_notification_count", "by_recency", "by_name" ], + + // Sticky. Required state for each room returned. An array of event type and state key tuples. + // Note that elements of this array are NOT sticky so they must be specified in full when they + // are changed. + "required_state": [ + // Request the join rules event. Note that the empty string is required here. + ["m.room.join_rules", ""], + ["m.room.history_visibility", ""], + // Request all `m.room.member` state events. + // The * is a special sentinel value meaning 'all keys'. + // Note that `*` is NOT a generic glob function. You cannot specify `foo*` to pull in keys + // like `food` and `foobar`. In this case, the * is treated as a literal *. + ["m.room.member", "*"] + // Request all state events. + ["*", "*"] + ], + // Sticky. The maximum number of timeline events to return per response. + "timeline_limit": 10, + // Sticky. Filters to apply to the list before sorting. + "filters": { + // All fields below are Sticky. + // All fields are applied with AND operators, hence if is_dm:true and is_encrypted:true + // then only Encrypted DM rooms will be returned. The absence of fields implies no filter + // on that criteria: it does NOT imply 'false'. + + // Flag which only returns rooms present (or not) in the DM section of account data. + // If unset, both DM rooms and non-DM rooms are returned. If false, only non-DM rooms + // are returned. If true, only DM rooms are returned. + "is_dm": true, + // A list of spaces which target rooms must be a part of. For every joined room for this + // user, ensure that there is a parent space event which is in this list. If unset, all + // rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to + // give a complete list of spaces to native. Only rooms directly in these spaces will be + // returned. + "spaces": ["!foo:bar", "!bar:baz"], + // Flag which only returns rooms which have an `m.room.encryption` state event. If unset, + // both encrypted and unencrypted rooms are returned. If false, only unencrypted rooms + // are returned. If true, only encrypted rooms are returned. + "is_encrypted": true, + // Flag which only returns rooms the user is currently invited to. If unset, both invited + // and joined rooms are returned. If false, no invited rooms are returned. If true, only + // invited rooms are returned. + "is_invite": true + } + } + ], +} +``` + +_Rationale: There are use cases for clients requesting multiple lists. Many clients have DMs and +Invites in dedicated sections separate from the joined room list. API support for this is important +to ensure that the initial UI can load quickly. This is why the API allows multiple lists and there +are filters for things like DMs, Invites and Spaces. The timeline limit is very similar to Sync v2's +`room.timeline.limit` filter field and is required to ensure that busy rooms don't send vast amounts +of events. Wildcard matching on `required_state` fields are purposefully restricted to avoid clients +sending complex matching criteria (e.g pathological regular expressions) and in practice there seems +to be very little in-the-wild use of partial key matching like `foo*` as new state events tend to +namespaced by their event type. Fields in `required_state` are not sticky mainly due to semantics: +expressing deletions becomes hard. The inclusion of a dedicated `is_encrypted` filter exists for the +benefit of complex clients: see the E2EE section for more information._ + +The server will then return rooms which have the following fields: + +```js +{ + "room_id": "!foo:bar", + "name": "The calculated room name", + // this is the CURRENT STATE, unlike sync v2 + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} + ], + // Last event is most recent. Max timeline_limit events. + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, + ], + "notification_count": 54, // same as sync v2 + "highlight_count": 3 // same as sync v2 +} +``` + +_Rationale: The room name and counts are required for display on the UI. They are calculated server +side because they are required for sort operations on lists. The `required_state` is controversially +the **current state** which breaks from sync v2 which has the `state` be "the state before the start +of the timeline". The rationale for this was event duplication and the fact that clients would have +to rewind state to work out historical display names. Clients who show historical display names +already need to rewind state by inspecting the `prev_content` of an event to display text like +"@alice changed their name from Alice to Alice2". Event duplication may be +reduced using Event ID -> Event maps in the response, though in practice this duplication does not +happen frequently. The benefit for returning the current state is that servers can cache the latest +state to return the response more quickly, without being forced to rewind this state (as clients will +need to do) or worse, do an expensive database access to request the state before an event._ + #### Sliding Window API + +At a high level, the sliding window API provides a way to synchronise a subslice of a list in a +bandwidth efficient way. It does this by referring to "operations" which must be performed on the +stored client list, such as INSERT, DELETE and UPDATE. Each operation has an index position OR a +range of index positions which tells the client where the operation should be performed. The +possible operations are: + - `SYNC`: Sets a *range* of entries. Clients SHOULD discard what they previous knew about entries in + this range. + - `INSERT`: Sets a *single* entry. If the position is not empty then clients MUST move entries + to the left or the right depending on where the closest empty space is. + - `DELETE`: Remove a *single* entry. Often comes before an `INSERT` to allow entries to swap places. + - `UPDATE`: Update a *single* entry. Updates are cumulative (consisting of deltas only). + - `INVALIDATE`: Remove a *range* of entries. Clients MAY persist the invalidated range for offline + support, but they should be treated as empty when additional operations which concern indexes in + the range arrive from the server. +For example: +``` + Client Server + [] | | 0,1,2,3,4,5,6,7,8 index + | | [A,B,C,D,E,F,G,H,I] + | -------- range[0,4] -------> | + [A,B,C,D,E] | <--- SYNC[0,4]=A,B,C,D,E --- | + | | 0,1,2,3,4,5,6,7,8 + | | [H,A,B,C,D,E,F,G,I] H moves to the front + | ----- wait for updates ----> | + [H,A,B,C,D] | <- DELETE[4], INSERT[0]=H--- | + | | [H',A,B,C,D,E,F,G,I] H is updated to H' + | ----- wait for updates ----> | +[H',A,B,C,D] | <------ UPDATE[0]=H'-------- | + | | 0,1,2,3,4,5,6,7,8 + | | [J,K,L,M,N,O,P,Q,R] Entire list is replaced + | ----- wait for updates ----> | + [J,K,L,M,N] | <----INVALIDATE[0,4]-------- | + | SYNC[0,4]=J,K,L,M,N | + | | [J,K,L,N,O,P,Q,R] M is deleted + | ----- wait for updates ----> | + [J,K,L,N,O] | <- DELETE[3], INSERT[4]=O--- | +``` +The sync v3 API exposes this API shape via the following request parameters: +```js +{ + // Multiple lists can be requested + "lists": [ + { + // Multiple sliding windows inside a list can be requested. Integers are _inclusive_. + "rooms": [ [0,9], [20,29] ], + // How the list should be sorted on the server. The first value is applied first, then tiebreaks + // are performed with the 2nd sort order, then the 3rd until there are no more sort orders left. + "sort": [ "by_notification_count", "by_recency", "by_name" ], + // Additional Room List request parameters omitted as they are + // unrelated to the semantics of the sliding window, see previous section. + } + ], +} +``` +Which returns the following response parameters: +```js +{ + "ops": [ + { + // Which list is affected by this operation + "list": 0, + // Which index positions are affected. + "range": [0,9], + // The operation being performed on these index positions. + "op": "SYNC", + // The data to put in these positions. + "rooms": [ + { + "room_id": "!foo:bar", + "name": "The calculated room name", + // Additional response parameters omitted as they are + // unrelated to the semantics of the sliding window. + // See previous section on room list parameters. + }, + // ... 9 more items + ], + } + ], + // The total number of entries in the list. Index positions match up to the lists in the request. + "counts": [1337], +} +``` + +The possible `sort` operations are: + - `by_recency`: Sort by `origin_server_ts` on the most recently _received_ event in the room. Note + that due to clock drift over federation it is possible for rooms to re-order such that the most + recently received event in the entire list does not cause that room to go to index position 0. + - `by_highlight_count`: Sort by the `highlight_count` for this user in this room, which is the + number of unread notifications for this room with the highlight flag set. This value is also present + in sync v2. + - `by_notification_count`: Sort by the `notification_count` for this user in this room, which is the + total number of unread notifications for this room. This value is also present in sync v2. + - `by_name`: Sort by room name lexicographically. This requires servers to implement the + [room name calculation algorithm](https://matrix.org/docs/spec/client_server/latest#calculating-the-display-name-for-a-room). + The server MUST perform the following steps: + * Calculate the room name from this user's perspective. This may vary depending on the user as + DM rooms will have the room name set to the name of the _other user_. This is the value that + will be returned in the `name` field for the room but is NOT the value that the server should + perform sort operations on. See following steps. + * Remove any of the following characters from the beginning/end of the calculated name: `#!()):_@`. + This ensures things like canonical aliases display in roughly the right locations rather than + at the start as it starts with `#`. + * Lower-case the result by unicode. This ensures `Matrix` and `matrix` sort in the same locations. + * Perform sort operations on this 'canonicalised' name. + +_Rationale: The sort operations are restrictive and limited in scope on purpose. Alternatives such +as arbitrary or more expansive sort orders like [RFC4790](https://datatracker.ietf.org/doc/html/rfc4790) +were decided against as it would A) force servers to support nonsensical and potentially expensive +operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling._ + #### Room Subscription API #### Bandwidth optimisations for persistent clients From e699035d570043a04d2c99fefb138aaa5b97aef6 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 20 Dec 2021 16:23:15 +0000 Subject: [PATCH 03/81] Proof read --- proposals/3575-sync.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 7911c631e7d..4307921d416 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -285,7 +285,7 @@ One or more room lists can be requested in sync v3 like so: // A list of spaces which target rooms must be a part of. For every joined room for this // user, ensure that there is a parent space event which is in this list. If unset, all // rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to - // give a complete list of spaces to native. Only rooms directly in these spaces will be + // give a complete list of spaces to navigate. Only rooms directly in these spaces will be // returned. "spaces": ["!foo:bar", "!bar:baz"], // Flag which only returns rooms which have an `m.room.encryption` state event. If unset, @@ -369,6 +369,8 @@ possible operations are: - `INVALIDATE`: Remove a *range* of entries. Clients MAY persist the invalidated range for offline support, but they should be treated as empty when additional operations which concern indexes in the range arrive from the server. + + For example: ``` Client Server @@ -466,7 +468,10 @@ were decided against as it would A) force servers to support nonsensical and pot operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling._ #### Room Subscription API +TODO + #### Bandwidth optimisations for persistent clients +TODO #### Extensions We anticipate that as more features land in Matrix, different kinds of data will also want to be synced @@ -515,13 +520,14 @@ TODO ## Potential issues - +TODO ## Alternatives - +TODO ## Security considerations - room sub auth check +- spaces auth check - history visibility for timeline_limit ## Unstable prefix From 02d3273a778d2146dbb7fa6993ac59bde68ad3c0 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 21 Dec 2021 10:49:02 +0000 Subject: [PATCH 04/81] MORE WORDS --- proposals/3575-sync.md | 262 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 241 insertions(+), 21 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 4307921d416..da609a357cf 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -48,7 +48,7 @@ It also introduces a number of new concepts which are explained in more detail l - Sticky Parameters: Clients can specify request parameters once and have the server remember what they were, without forcing the client to resend the parameter every time. -### Core +## Core A complete sync request looks like: `POST /v3/sync?pos=4`: ```js @@ -154,7 +154,7 @@ An entire response looks like: These fields and their interactions are explained in the next few sections. This forms the core of the API. Additional data can be returned via "extensions". -#### Connections and streaming data +### Connections and streaming data At a high level, the syncing mechanism creates a "connection" to the server to allow the bi-directional exchange of JSON objects. This mechanism is ideally suited for WebSockets, but more @@ -191,7 +191,7 @@ Client Server ``` Failure to do this will result in duplicate data being sent to the client. -#### Sticky request parameters +### Sticky request parameters Request parameters can be "sticky". This means that their value is remembered across multiple requests. The lifetime of sticky request parameters are tied to a sync connection. When the connection is lost, @@ -242,7 +242,7 @@ a sticky request parameter in the event of packet loss: For this reason, some request parameters are not suitable to be made "sticky". These include parameters which are extremely dynamic in nature, such as list ranges. -#### Room List parameters +### Room List parameters One or more room lists can be requested in sync v3 like so: ```js @@ -353,7 +353,7 @@ happen frequently. The benefit for returning the current state is that servers c state to return the response more quickly, without being forced to rewind this state (as clients will need to do) or worse, do an expensive database access to request the state before an event._ -#### Sliding Window API +### Sliding Window API At a high level, the sliding window API provides a way to synchronise a subslice of a list in a bandwidth efficient way. It does this by referring to "operations" which must be performed on the @@ -467,13 +467,228 @@ as arbitrary or more expansive sort orders like [RFC4790](https://datatracker.ie were decided against as it would A) force servers to support nonsensical and potentially expensive operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling._ -#### Room Subscription API +The complete API shape for each operation is shown below (note the key names vary on the operation): + +```js +{ + "list": 0, + "index": 3, + "op": "UPDATE", + "room": { ... } +} + +{ + "list": 0, + "op": "DELETE", + "index": 8 +} + +{ + "list": 0, + "op": "INSERT", + "index": 99, + "room": { ... } +} + +{ + "list": 0, + "op": "INVALIDATE", + "range": [100,199] +} + +{ + "list": 0, + "range": [100,117], + "op": "SYNC", + "rooms": [ + // ... 18 rooms with complete state ... + ] +} +``` + +Note that clients will NOT be notified of any events or activity in rooms not in the sliding window. +This can be a problem for some use cases: + - Following a permalink to a random room which is not in the window should be possible. + - Receiving a direct @mention in a room not in the window should notify the client. + +For the first of these issues, the sync v3 API exposes a "room subscription" API. For the second issue, +the sync v3 API exposes a "notifications" API. + +### Room Subscription API + +Sometimes clients know exactly which room they want to get information about e.g by following a +permalink or by refreshing a webapp currently viewing a specific room. The sliding window API alone +is insufficient for this use case because there's no way to say "please track this room explicitly". +The room subscription API serves as a way to provide this tracking. At a high level, the client +provides a map of room ID to room list parameters and the server then returns the response in the +same format as the sliding window API, just without the operations/indexes. + +To track a room `!sub1:bar`, the client would send the following request: +```js +{ + "room_subscriptions": { // sticky + "!sub1:bar": { // sticky + "required_state": [ ["*","*"] ], + "timeline_limit": 50 + } + } +} +``` +This would return the following response: +```js +{ + "room_subscriptions": { + "!sub1:bar": { + "name": "Alice and Bob", + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} + ], + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + ], + "notification_count": 1, + "highlight_count": 0 + } + } +} +``` + +Any updates in this room would be returned in the same section of the sync response: + +```js +{ + "room_subscriptions": { + "!sub1:bar": { + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, + ] + } + } +} +``` + +Multiple rooms can be subscribed to by specifying additional keys in the room subscription map. If +a room is subscribed to multiple times, the _most recent_ subscription takes effect for the purposes +of `required_state` and `timeline_limit` filtering. + +It is possible for a room to be both directly subscribed to _and_ in the sliding window. In this +case, only the room subscription section of the response will contain the data. The sliding window +will just contain the `room_id` and operation/index/list parameters. + +To unsubscribe from a room, the client needs to send a request with the room ID to unsubscribe from in +the `unsubscribe_rooms` array: +```js +{ + "unsubscribe_rooms": [ "!sub1:bar" ] +} +``` +This will delete that key from the `room_subscriptions` map on the server. It is common to for clients +to view one room then swap to another room. This can be modelled as a subscription on the new room +coupled with unsubscribing from the old room. For example, if the client swapped from viewing +`!sub1:bar` to `!sub2:bar`: +```js +{ + "room_subscriptions": { + "!sub2:bar": { + "required_state": [ ["*","*"] ], + "timeline_limit": 50 + } + } + "unsubscribe_rooms": [ "!sub1:bar" ] +} +``` + +`unsubscribe_rooms` is cleared after every response, it is not sticky. + +_Rationale: By using a map, this supports clients who can show multiple room timelines in the UI e.g +Hydrogen's grid view. The `unsubscribe_rooms` array allows rooms to be efficiently deleted from the +map. An alternative would be to specify an empty JSON object in the room subscription but that feels +less explicit than the array form._ + +### Notifications API TODO -#### Bandwidth optimisations for persistent clients +If you are tracking the top 5 rooms and an event arrives in the 6th room, you will be notified about +the event ONLY IF the sort order means the room bumps into the top 5. If for example you sorted +`by_name` then you won't be notified about the event in the 6th room, unless it's an `m.room.name` +event which moves the room into the top 5. In most "recent" sort orders a new event *will result* in +the 6th room bumping to the top of the list. A notable exception is when the rooms are sorted in +*alphabetical order* (`by_name`), which is what some other chat clients do for example. In this case, +you don't care about the event unless the event is a "highlightable" event (e.g direct @mention). + +- Required to show "unread messages" indicators on the room list. +- Unsure how much data to expose (probably index position + notif/highlight counts?). If we do _counts_ +then we are doomed to send a response to a client every time an event is sent in a noisy room, which +seems rather wasteful. Perhaps make it configurable? + +### Bandwidth optimisations for persistent clients TODO -#### Extensions +On re-establishing a sync connection, or re-requesting a page that was previously INVALIDATEd, the server will perform the following operations: + - For this device/session: check the last sent event ID for the room ID in question. Count the number of timeline events from that point to the latest event. Call it `N`. + - For this specific sync request: calculate a reasonable upper-bound for how many events will be returned in a reasonable worst-case scenario. This is simply `timeline_limit + len(required_state)` (ignoring `*` wildcards on state). Call it `M`. + - If N > M then we would probably send more events if we did a delta than just telling the client everything from scratch, so issue a `SYNC` for this room. + - If N < M then we don't have many events since the connection was last established, so just send the delta as an `UPDATE`. + +This approach has numerous benefits: + - In the common case when you scroll a room, you won't get any `SYNC`s for rooms that were invalidated because it's highly unlikely to receive 10+ events during the room scroll (assuming you scroll back up in reasonable time). + - When you reconnect after sleeping your laptop overnight, most rooms will be `UPDATE`s, and busy rooms like Matrix HQ will be `SYNC`ed from fresh rather than sending 100s of events. + +This imposes more restrictions on the server implementation: + - Servers still need the absolute stream ordering for events to work out how many events from `$event_id` to `$latest_event_id`. + - Servers need to remember the last sent event ID for each session for each room. If rooms share a single monotonically increasing stream, then this is a single integer per session (akin to today's sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, along with the stream position when that was sent. So it's basically a `map[string]int64`. + + +We need to make this opt-in to support dumb clients which don't remember history. Similar to `?full_state=` in sync v2. + +### E2EE Handling +TODO + +The server cannot calculate the `highlight_count` in E2EE rooms as it cannot read the message content. +This is a problem when clients want to sort by `highlight_count`. In comparison, the server can +calculate the name, `unread_count`, and work out the most recent timestamp when sorting by those +fields. What should the server do when the client wants to sort by `highlight_count` (which is pretty +typical!)? It can: + - Assume `highlight_count == 1` whenever `unread_count > 0`. This ensures that E2EE rooms are always + bumped above unreads in the list, but doesn't allow sorting within the list of highlighted rooms. + - Assume `highlight_count == 0` always. This will always sort E2EE rooms below the highlight list, + even if the E2EE room has a @mention. + - Sort E2EE rooms in their own dedicated list: `{"filters": { "is_encrypted": true }}` + +In all cases, the client needs to do additional work to calculate the `highlight_count`. When the +client is streaming this work is very small as it just concerns a single event. However, when the +client has been offline for a while there could be hundreds or thousands of missed events. There are +3 options here: + - Do no work and immediately red-highlight the room. Risk of false positives. + - Grab the last N messages and see if any of them are highlights. **Current implementations using sync v2 do this.** + - Grab all the missed messages and see if any of them are highlights. Denial of service risk if there are thousands of messages. + +Once the highlight count has been adequately *estimated* (it's only truly calculated if you grab all +messages), this may affect the sort order for this room - it may diverge from that of the server. +More specifically, it may bump the room up or down the list, depending on what the sort +implementation is for E2EE rooms (top of list or below rooms with highlights). + +Client have two main choices here: + - **Lite**: Keep E2EE rooms in the main list. This means the sort order won't always be strictly + accurate for them but is fast to do. If you are sorting by highlight count then unread count + (which is fairly typical) then E2EE rooms will always be bumped above all the unread count rooms + if the resolution algorithm is set to "Assume `highlight_count == 1` whenever `unread_count > 0`". + - **Heavy**: Sort E2EE rooms into a separate list (higher priority than the main list to + de-duplicate them). Manually mix together the E2EE list and the main list depending on highlight + counts. This means the sort order will be more accurate but is slower and more complex to perform. + This is why there is an `is_encrpyted` filter on the room list parameters. + +### Extensions We anticipate that as more features land in Matrix, different kinds of data will also want to be synced to clients. Sync v2 did not have any first-class support to opt-in to new data. Sync v3 does have support for this via "extensions". Extensions also allow this proposal to be broken up into more @@ -497,42 +712,47 @@ depends on the extension. Extensions can leverage the data from the core API, notably which rooms are currently inside sliding windows as well as which rooms are explicitly subscribed to. -### Extensions -#### To Device Messaging +## Extensions +### To Device Messaging - Extension name: `to_device` - Args: * `limit` (Sticky): The max number of events to return per sync response. * `since`: The token returned in the `next_batch` section of this extension, or blank if this is the first time. -#### End-to-End Encryption +### End-to-End Encryption - Extension name: `e2ee` -#### Receipts +### Receipts TODO -#### Typing Notifications +### Typing Notifications TODO -#### Presence +### Presence TODO -#### Account Data +### Account Data TODO -## Potential issues +# Potential issues TODO -## Alternatives +# Alternatives TODO -## Security considerations +# Security considerations - room sub auth check - spaces auth check - history visibility for timeline_limit -## Unstable prefix +# Unstable prefix +- Annoyingly probably `/v4/sync` +# Dependencies +- None in practice +- Spaces support for spaces filter -## Dependencies +# Appendices +- Examples for server impls +- Examples for client impls -## Appendices From bcfd424620ac3c227e534a945890e16edabca021 Mon Sep 17 00:00:00 2001 From: kegsay Date: Tue, 21 Dec 2021 12:34:36 +0000 Subject: [PATCH 05/81] Apply suggestions from code review Co-authored-by: Matthew Hodgson --- proposals/3575-sync.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index da609a357cf..c9d23f5415d 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -5,7 +5,8 @@ This MSC outlines a replacement for the CS API endpoint `/sync`. The current `/sync` endpoint scales badly as the number of rooms on an account increases. It scales badly because all rooms are returned to the client, and clients cannot opt-out of a large amount of extraneous data such as receipts. On large accounts with thousands of rooms, the initial sync -operation can take minutes to perform. This significantly delays the initial login to Matrix clients. +operation can take minutes to perform. This significantly delays the initial login to Matrix clients, +and also makes incremental sync very heavy when resuming after any significant pause in usage. ## Goals @@ -43,7 +44,7 @@ Q W E R T Y U I O P L K J H G F D S A Z X C V B N M A C D E F first 5 rooms requested ``` It also introduces a number of new concepts which are explained in more detail later on: - - Core API: The minimumal API to be sync v3 compatible. + - Core API: The minimal API to be sync v3 compatible. - Extensions: Additional APIs which expose more data from the server e.g presence, device messages. - Sticky Parameters: Clients can specify request parameters once and have the server remember what they were, without forcing the client to resend the parameter every time. @@ -258,7 +259,7 @@ One or more room lists can be requested in sync v3 like so: // Note that elements of this array are NOT sticky so they must be specified in full when they // are changed. "required_state": [ - // Request the join rules event. Note that the empty string is required here. + // Request the join rules event. Note that the empty string is required here to match the event's blank state_key. ["m.room.join_rules", ""], ["m.room.history_visibility", ""], // Request all `m.room.member` state events. From b415c1f02dc9eb634fc3f620e5fd3e7b1ce6ed5a Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 21 Dec 2021 12:50:58 +0000 Subject: [PATCH 06/81] Review comments --- proposals/3575-sync.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c9d23f5415d..1526a0d2882 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -22,6 +22,7 @@ Any improved `/sync` mechanism had a number of goals: - Combining uploaded filters with ad-hoc filter parameters (which isn’t possible with sync v2 today) - Servers should not need to store all past since tokens. If a since token has been discarded we should gracefully degrade to initial sync. - Ability to filter by space. + - Ability to filter by room name. These goals shaped the design of this proposal. @@ -195,6 +196,7 @@ Failure to do this will result in duplicate data being sent to the client. ### Sticky request parameters Request parameters can be "sticky". This means that their value is remembered across multiple requests. +Clients cannot choose which parameters are sticky, the API defines which parameters are sticky. The lifetime of sticky request parameters are tied to a sync connection. When the connection is lost, the request parameters are lost with it. This feature exists to allow clients to configure the sync stream in a bandwidth-efficient way. For example, if all keys were sticky: @@ -345,14 +347,18 @@ The server will then return rooms which have the following fields: _Rationale: The room name and counts are required for display on the UI. They are calculated server side because they are required for sort operations on lists. The `required_state` is controversially the **current state** which breaks from sync v2 which has the `state` be "the state before the start -of the timeline". The rationale for this was event duplication and the fact that clients would have +of the timeline". Sync v2's rationale was event duplication (state events can appear in both the +state section and the timeline section if it's the current state) and the fact that clients would have to rewind state to work out historical display names. Clients who show historical display names already need to rewind state by inspecting the `prev_content` of an event to display text like "@alice changed their name from Alice to Alice2". Event duplication may be -reduced using Event ID -> Event maps in the response, though in practice this duplication does not -happen frequently. The benefit for returning the current state is that servers can cache the latest -state to return the response more quickly, without being forced to rewind this state (as clients will -need to do) or worse, do an expensive database access to request the state before an event._ +reduced using Event ID -> Event maps in the response should this be a concern. The benefit of +returning the current state is that servers can cache the latest state to return the response more +quickly. If, instead, servers returned the state at the start of a timeline block, servers are forced +to either rewind this state (as clients will need to do) or worse, do an expensive database access to +request the state before an event. As clients can be at different points in the stream for a given +room, this would force servers to cache every possible room state. It's not practical for servers to +cache every single possible earlier state for each room._ ### Sliding Window API From aafcd6ad102549292aa5f79bfcdddee502f013c9 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 21 Dec 2021 13:07:27 +0000 Subject: [PATCH 07/81] More review comments; add note on lang --- proposals/3575-sync.md | 43 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 1526a0d2882..7b5fd6ad88e 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -469,6 +469,11 @@ The possible `sort` operations are: * Lower-case the result by unicode. This ensures `Matrix` and `matrix` sort in the same locations. * Perform sort operations on this 'canonicalised' name. +NOTE: It is known that by forcing servers to calculate the room name there can be problems concerning +multiple languages. "Alice and Bob" in English vs "Alice et Bob" in French for example, which may affect +sort ordering. This can be mitigated by adding a `lang` sticky request parameter to control how +i18n and l10n are done. + _Rationale: The sort operations are restrictive and limited in scope on purpose. Alternatives such as arbitrary or more expansive sort orders like [RFC4790](https://datatracker.ietf.org/doc/html/rfc4790) were decided against as it would A) force servers to support nonsensical and potentially expensive @@ -719,27 +724,21 @@ depends on the extension. Extensions can leverage the data from the core API, notably which rooms are currently inside sliding windows as well as which rooms are explicitly subscribed to. -## Extensions -### To Device Messaging - - Extension name: `to_device` - - Args: - * `limit` (Sticky): The max number of events to return per sync response. - * `since`: The token returned in the `next_batch` section of this extension, or blank if this is the first time. -### End-to-End Encryption - - Extension name: `e2ee` - -### Receipts -TODO -### Typing Notifications -TODO -### Presence -TODO -### Account Data -TODO - - - - +In an effort to reduce the size of this proposal, extensions will be done in separate MSCs. There will +be extensions for: + - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) + - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) + - Ephemeral Events - typing notifications, receipts: [spec](https://spec.matrix.org/v1.1/client-server-api/#client-behaviour-4) + - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) + - Account Data - `account_data` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) + +_Rationale: The name 'extensions' is inspired by the spec itself which refers to "Extensions to /sync" +multiple times. These additional bits of data are all generally outside the scope of the core room +graph and room list so are well-placed for being treated separately. Furthermore, it is possible to +make a meaningful client which only supports the core API and no extensions, as the core controls +the room list and ability to receive events and state in a room. For clients which don't do E2EE and +don't handle presence/typing/receipts/other metadata, they can simply work with this MSC alone and +in full._ # Potential issues TODO @@ -751,6 +750,8 @@ TODO - room sub auth check - spaces auth check - history visibility for timeline_limit +- incompatibility with encrypted room names, topics and user displaynames - given we need to be able + to sort lexicographically and filter based on room name. # Unstable prefix - Annoyingly probably `/v4/sync` From ce34b4d9ccd7719629ef1de18117b92e8caea38e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 22 Dec 2021 12:58:04 +0000 Subject: [PATCH 08/81] More docs; add implementation state section --- proposals/3575-sync.md | 84 +++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 7b5fd6ad88e..100926f37b1 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -147,7 +147,7 @@ An entire response looks like: "notification_count": 1, "highlight_count": 0 } - } + }, // Extensions API "extensions": {} @@ -285,8 +285,8 @@ One or more room lists can be requested in sync v3 like so: // If unset, both DM rooms and non-DM rooms are returned. If false, only non-DM rooms // are returned. If true, only DM rooms are returned. "is_dm": true, - // A list of spaces which target rooms must be a part of. For every joined room for this - // user, ensure that there is a parent space event which is in this list. If unset, all + // A list of spaces which target rooms must be a part of. For every invited/joined room for + // this user, ensure that there is a parent space event which is in this list. If unset, all // rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to // give a complete list of spaces to navigate. Only rooms directly in these spaces will be // returned. @@ -620,7 +620,7 @@ coupled with unsubscribing from the old room. For example, if the client swapped } ``` -`unsubscribe_rooms` is cleared after every response, it is not sticky. +`unsubscribe_rooms` is cleared after every response; it is not sticky. _Rationale: By using a map, this supports clients who can show multiple room timelines in the UI e.g Hydrogen's grid view. The `unsubscribe_rooms` array allows rooms to be efficiently deleted from the @@ -628,7 +628,6 @@ map. An alternative would be to specify an empty JSON object in the room subscri less explicit than the array form._ ### Notifications API -TODO If you are tracking the top 5 rooms and an event arrives in the 6th room, you will be notified about the event ONLY IF the sort order means the room bumps into the top 5. If for example you sorted @@ -636,32 +635,51 @@ the event ONLY IF the sort order means the room bumps into the top 5. If for exa event which moves the room into the top 5. In most "recent" sort orders a new event *will result* in the 6th room bumping to the top of the list. A notable exception is when the rooms are sorted in *alphabetical order* (`by_name`), which is what some other chat clients do for example. In this case, -you don't care about the event unless the event is a "highlightable" event (e.g direct @mention). +you don't care about the event unless the event is a "highlightable" event (e.g direct @mention). +The notifications API exists to provide a mechanism for clients to display "unread messages" +indicators on the room list at positions not currently inside a sliding window. -- Required to show "unread messages" indicators on the room list. +TODO: - Unsure how much data to expose (probably index position + notif/highlight counts?). If we do _counts_ then we are doomed to send a response to a client every time an event is sent in a noisy room, which seems rather wasteful. Perhaps make it configurable? ### Bandwidth optimisations for persistent clients -TODO -On re-establishing a sync connection, or re-requesting a page that was previously INVALIDATEd, the server will perform the following operations: - - For this device/session: check the last sent event ID for the room ID in question. Count the number of timeline events from that point to the latest event. Call it `N`. - - For this specific sync request: calculate a reasonable upper-bound for how many events will be returned in a reasonable worst-case scenario. This is simply `timeline_limit + len(required_state)` (ignoring `*` wildcards on state). Call it `M`. - - If N > M then we would probably send more events if we did a delta than just telling the client everything from scratch, so issue a `SYNC` for this room. - - If N < M then we don't have many events since the connection was last established, so just send the delta as an `UPDATE`. +The API assumes that room data is deleted on the client when the room falls out of the sliding +window or a window gets invalidated. The API will send the entire `required_state` and `timeline` +again when the room re-appears. This is wasteful if the client remembers the state/timeline and there +have been no changes. This is similar to how Sync v2 behaves with `?full_state=true` set. + +The API exposes an opt-in mechanism for providing efficient delta updates. On re-establishing a sync +connection, or re-requesting a page that was previously INVALIDATEd, the server will perform the +following operations: + - For this device: check the last sent event ID for the room ID in question. Count the number of + timeline events from that point to the latest event. Call it `N`. + - For this specific sync request: calculate a reasonable upper-bound for how many events will be + returned in a reasonable worst-case scenario. This is simply `timeline_limit + len(required_state)` + (ignoring `*` wildcards on state). Call it `M`. + - If N > M then we would probably send more events if we did a delta than just telling the client + everything from scratch, so issue a `SYNC` for this room. + - If N < M then we don't have many events since the connection was last established, so just send + the delta as an `UPDATE`. This approach has numerous benefits: - - In the common case when you scroll a room, you won't get any `SYNC`s for rooms that were invalidated because it's highly unlikely to receive 10+ events during the room scroll (assuming you scroll back up in reasonable time). - - When you reconnect after sleeping your laptop overnight, most rooms will be `UPDATE`s, and busy rooms like Matrix HQ will be `SYNC`ed from fresh rather than sending 100s of events. + - In the common case when you scroll a room, you won't get any `SYNC`s for rooms that were invalidated + because it's highly unlikely to receive 10+ events during the room scroll (assuming you scroll + back up in reasonable time). + - When you reconnect after sleeping your laptop overnight, most rooms will be `UPDATE`s, and busy + rooms like Matrix HQ will be `SYNC`ed from fresh rather than sending 100s of events. This imposes more restrictions on the server implementation: - - Servers still need the absolute stream ordering for events to work out how many events from `$event_id` to `$latest_event_id`. - - Servers need to remember the last sent event ID for each session for each room. If rooms share a single monotonically increasing stream, then this is a single integer per session (akin to today's sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, along with the stream position when that was sent. So it's basically a `map[string]int64`. + - Servers still need the absolute stream ordering for events to work out how many events from + `$event_id` to `$latest_event_id`. + - Servers need to remember the last sent event ID for each session for each room. If rooms share a + single monotonically increasing stream, then this is a single integer per session (akin to today's + sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, + along with the stream position when that was sent. So it's basically a `map[string]int64`. - -We need to make this opt-in to support dumb clients which don't remember history. Similar to `?full_state=` in sync v2. +TODO: Name of opt-in flag? Configurable flags for 'I remember state' vs 'I remember timelines'? ### E2EE Handling TODO @@ -741,10 +759,12 @@ don't handle presence/typing/receipts/other metadata, they can simply work with in full._ # Potential issues -TODO +- Scope. +- Flexibility with sorting/filtering and client demands. # Alternatives -TODO +- Do nothing, keep using sync v2 and try to make it go fast. +- Factor out some busy bits of sync v2 e.g receipts but still keep returning all rooms (no paginated sync) # Security considerations - room sub auth check @@ -760,6 +780,28 @@ TODO - None in practice - Spaces support for spaces filter +# Implementation state + +[Proxy server](https://github.com/matrix-org/sync-v3): + - Sliding Window API: + - [x] Operation support + - [ ] Initial flag + - Sorting: + - [x] By recency + - [x] By highlight count + - [x] By notification count + - [x] By name (no locale flag) + - Filtering: + * [x] `is_dm` + * [x] `is_encrypted` + * [ ] `is_invite` + * [ ] `spaces` + - [x] Room Subscription API + - [ ] Notifications API (unspecced) + - Extensions: + - [x] To-device + - [x] E2EE + # Appendices - Examples for server impls - Examples for client impls From ce3184c38057cc7d776d5fe46fa408017d561a35 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 22 Dec 2021 13:00:14 +0000 Subject: [PATCH 09/81] Add missing sections --- proposals/3575-sync.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 100926f37b1..7b863cd2543 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -798,6 +798,8 @@ in full._ * [ ] `spaces` - [x] Room Subscription API - [ ] Notifications API (unspecced) + - [ ] Bandwidth optimisations + - [ ] E2EE highlight/notification count handling - Extensions: - [x] To-device - [x] E2EE From 2934f21bbe840ad79a2727fcaad4be174f4f381e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 22 Dec 2021 13:52:22 +0000 Subject: [PATCH 10/81] Return an error for invalidated positions As @t3chguy mentions, we can't reduce the RTT on this as the server will have forgotten sticky request params. --- proposals/3575-sync.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 7b863cd2543..c5c55dd20e2 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -172,11 +172,19 @@ In simple servers, the `pos` may be an incrementing integer, but more complex se clocks or contain node identifying information in the token. Clients MUST treat `pos` as an opaque value and not introspect it. -When a `pos` is invalidated, the server MUST treat the invalidated `pos` as if it was absent -(in other words that this is an initial sync) and set `initial: true` in the response to inform the -client that the response is now an initial sync. For clarity, `initial: true` MUST also be set when -there is no `pos` value provided. When there is a valid `pos`, this flag MUST be omitted (sending -`initial: false` is wasteful). +When a `pos` is invalidated and the client attempts to use the `pos`, the server MUST send back a +standard error response as a HTTP 400 containing: +```js +{ + "error": "Unknown position", + "errcode": "M_UNKNOWN_POS" +} +``` +This then allows the client to reset their connection and send an initial request (with all sticky +request parameters) without a `pos` value to restart the connection. + +The flag `initial: true` MUST be set in the response when there is no `pos` value provided. When +there is a valid `pos`, this flag MUST be omitted (sending `initial: false` is wasteful). A response for a given `pos` must be idempotent to account for packet loss. For example: ``` @@ -191,7 +199,7 @@ Client Server | -----pos=2-----------> | | <----data=[C], pos=3-- | Server CANNOT send data=[C,D] pos=4, it MUST send the previous response ``` -Failure to do this will result in duplicate data being sent to the client. +Failure to do this may result in duplicate data being sent to the client. ### Sticky request parameters From ba636b182031c98ee8596214ffdc6fd04112af06 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 22 Dec 2021 18:02:50 +0000 Subject: [PATCH 11/81] Flesh out remaining sections; start adding test cases --- proposals/3575-sync.md | 294 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 274 insertions(+), 20 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c5c55dd20e2..469369e54a5 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -651,6 +651,8 @@ TODO: - Unsure how much data to expose (probably index position + notif/highlight counts?). If we do _counts_ then we are doomed to send a response to a client every time an event is sent in a noisy room, which seems rather wasteful. Perhaps make it configurable? +- This has not been fully specced yet because in practice most clients sort by recency so it's not + urgent to include this. For clients who sort by name though, this is a show stopper. ### Bandwidth optimisations for persistent clients @@ -690,7 +692,6 @@ This imposes more restrictions on the server implementation: TODO: Name of opt-in flag? Configurable flags for 'I remember state' vs 'I remember timelines'? ### E2EE Handling -TODO The server cannot calculate the `highlight_count` in E2EE rooms as it cannot read the message content. This is a problem when clients want to sort by `highlight_count`. In comparison, the server can @@ -726,6 +727,11 @@ Client have two main choices here: counts. This means the sort order will be more accurate but is slower and more complex to perform. This is why there is an `is_encrpyted` filter on the room list parameters. +In the future, it may become impossible for servers to sort by room name due to E2EE. This proposal +has no suggestion on how to handle encrypted room names beyond hoping that homomorphic encryption +will allow sorting based on ciphertext: this is an active area of research in the computer science +field. + ### Extensions We anticipate that as more features land in Matrix, different kinds of data will also want to be synced to clients. Sync v2 did not have any first-class support to opt-in to new data. Sync v3 does have @@ -752,8 +758,8 @@ windows as well as which rooms are explicitly subscribed to. In an effort to reduce the size of this proposal, extensions will be done in separate MSCs. There will be extensions for: - - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) - - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) + - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) + - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - Ephemeral Events - typing notifications, receipts: [spec](https://spec.matrix.org/v1.1/client-server-api/#client-behaviour-4) - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) - Account Data - `account_data` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) @@ -767,33 +773,99 @@ don't handle presence/typing/receipts/other metadata, they can simply work with in full._ # Potential issues -- Scope. -- Flexibility with sorting/filtering and client demands. + +This is a very large change to the Client-Server API, which affects the core data flows for every +single client implementation. This means it will require a lot of work from client developers to +support this MSC, especially given in practice clients will need to support _both_ v2 and v3 sync. +This work will slow down adoption of sync v3. + +In addition, this API is more restrictive than sync v2 as not all data is returned to the client. It +is possible that some data flows which are possible in sync v2 will not be possible in sync v3 due +to sorting and filtering limitations such as but not limited to: + - More complex sorting operations beyond recency/name/unread counts e.g by number of joined members. + - More complex filtering operations such as showing DMs from users who are currently in the viewed + space, dependent on some flag in user settings. + - More complex display operations such as showing summed total notification counts in spaces. + - More complex space operations such as handling orphaned rooms and traversal of subspaces. + - More complex bot requirements like knowing all rooms which has a certain custom state event in it, + such that the presence of a state event becomes a filter. + +It is expected that some of these use cases **will be supported as this MSC is iterated upon**. However, +it is likely that some of these use cases will not be supported in this MSC, but may be supported via +use of an extension MSC where applicable. Unfortunately, there may be some data flows which are genuinely +impossible to perform due to limitations of server-side operations (e.g if the data is encrypted). In +this case, clients will be forced to pull in all E2EE rooms to perform their data flows, which, whilst +slow, should still perform better than sync v2. + +TODO: It would be nice if sorting/filtering operations could be marked as extensions given the list +here is really endless! It shouldn't be too difficult to make this work, but crucially this needs +community support (clients, bot developers, etc) to ensure that their data flows can be accomodated. # Alternatives -- Do nothing, keep using sync v2 and try to make it go fast. -- Factor out some busy bits of sync v2 e.g receipts but still keep returning all rooms (no paginated sync) + +There are two main alternatives to this proposal: + - Do nothing and keep using sync v2 in its current shape. Attempt to make it run faster. + - Factor out some obviously expensive bits from sync v2 (e.g receipts) but keep returning all rooms + in the response i.e no paginated sync. + +Both alternatives will still scale based on the number of joined rooms on the user's account. Effective +implementations may _delay_ long sync times but fundamentally won't _prevent_ long sync times, given +a sufficiently large account. The core assumption of this MSC is that user accounts will have 1000s +and 10,000s of rooms per account as metadata rooms continue to be added (VoIP conference rooms, +spaces, profiles-as-rooms, thread-per-room, etc). If this assumption is false and room counts remain +reasonably well bounded then this MSC may not be required. # Security considerations -- room sub auth check -- spaces auth check -- history visibility for timeline_limit -- incompatibility with encrypted room names, topics and user displaynames - given we need to be able - to sort lexicographically and filter based on room name. + +This API presents new ways to request data from the server which need appropriate authentication checks: + - Room subscriptions: ensure the user is joined to the room ID in question. + - Spaces filters: ensure the user is joined to the space room ID in question. + - Timeline limits: ensure the user is allowed to see events as far back as they request (history visibility). + +In addition, this API presents new ways for the server to filter/sort Matrix data, which may become +impossible if they are end-to-end encrypted: + - Room names, user display names, canonical aliases. These events are used to calculate room display names. + - State event types and state keys. These are used in `required_state` filters and if they are + encrypted it won't be possible for servers to return those specific events. + - Highlight and notification counts imply the ability to inspect the event on the server. This is not + possible in E2EE rooms. This is covered somewhat in the "E2EE handling" section of this MSC. If a + client decides to work out accurate counts for E2EE rooms then they must fetch _all missed events_ + in the room and decrypt them to work out the content. If there are 1000s of missed events this will + cause a denial of service attack on this client as downloading and decrypting all the events are + expensive operations. + +Furthermore, this API presents new ways for malicious users to modify other clients: + - Specifying bogus `origin_server_ts` values on events will cause those rooms to be moved appropriately + when the sort operation is `by_recency`. A malicious user could hide a room by forcibly sending + events with a low `origin_server_ts` value. Conversely, they could force a room to be always near + the top of the list by forcibly sending events with a high `origin_server_ts`. Servers could mitigate + this by bounding the `origin_server_ts` used to be +/- 5min of their own clock, whilst still sending + the real `origin_server_ts` value in the event. + - Subtly adjusting the events in the room could adjust the calculated room name to be inappropriate. + For example, if the malicious user can engineer which hero's display names are used when calculating + the room name (say by joining/leaving fake accounts) then it's possible for those names to advertise + spam or spell out offensive words. # Unstable prefix -- Annoyingly probably `/v4/sync` + +Whilst this in MSC review the HTTP path will be `/org.matrix.msc3575/sync` with the intention of this +eventually becoming (confusingly) `/v4/sync`. As this is a brand new endpoint, no other keys or fields +need prefixing. # Dependencies -- None in practice -- Spaces support for spaces filter + +There are no MSCs required for the core functionality to be implemented. Servers and clients need to +be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for their core functionality. # Implementation state [Proxy server](https://github.com/matrix-org/sync-v3): - Sliding Window API: - [x] Operation support - - [ ] Initial flag + - [x] Required state with wildcards + - [x] Timeline limits + - [x] Calculated room names + - [x] Highlight/notification counts - Sorting: - [x] By recency - [x] By highlight count @@ -809,10 +881,192 @@ in full._ - [ ] Bandwidth optimisations - [ ] E2EE highlight/notification count handling - Extensions: - - [x] To-device - - [x] E2EE + - [x] To-device: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) + - [x] E2EE: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) # Appendices -- Examples for server impls -- Examples for client impls +In order to aid implementations, a series of test cases are provided which demonstrate core functionality +of this MSC. The intention of these test cases is to provide a way to automatically verify compliance +with this MSC. As such, they are represented as a sequence of JSON objects. These test cases are not +exhaustive, and don't account for authentication via access tokens or handling multiple user accounts. +For brevity, only fields that concern sync v3 are included in event descriptions. + +## Server Implementation Examples + +The purpose of these examples is to ensure servers respond to client requests with the correct data. +These examples are broken down into: + - `state`: Pre-existing state on the server e.g from a database. These should be executed in the order + of `create_rooms` followed by `send_events`. Keys in those objects must be executed in alphabetical + order to ensure sort orders work correctly e.g execute `!a:localhost` before `!b:localhost`. + The user who is making the request to the server is the user who should create/send these events. + The intention behind this is that entries in `create_rooms` maps to `/createRoom` requests and + entries in `send_events` maps to the PUT endpoints for sending normal/state events. This will + configure the server into a certain state ready for sync v3 requests. + - `request`: The sync v3 client request. + - `response`: What the server should respond with. Note that the `pos` value MUST be ignored as + it is implementation dependent. It should be used in subsequent requests where appropriate + (TODO: specify how?). + + +Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify with proxy server) +```json +{ + "state": { + "create_rooms": { + "!a:localhost": {"preset": "public_chat"}, + "!b:localhost": {"preset": "public_chat", "name": "Sync v3 Test Room"}, + "!c:localhost": {"preset": "public_chat", "room_alias_name": "syncv3testalias"}, + "!d:localhost": {"preset": "public_chat"} + }, + "send_events": { + "!a:localhost": [ + {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} + ] + } + }, + "request": { + "lists": [{ + "required_state": [["m.room.avatar", ""]], + "rooms": [[0,49]], + "sort": ["by_highlight_count", "by_notification_count", "by_recency"], + "timeline_limit": 0 + }] + }, + "response": { + "pos": 1, + "counts": [4], + "ops": [ + { + "list": 0, + "op": "SYNC", + "range": [0,3], + "rooms": [ + { + "room_id": "!a:localhost", + "name": "Empty Room", + "highlight_count": 0, + "notification_count": 0, + "required_state": [ + {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} + ] + }, + { + "room_id": "!d:localhost", + "name": "Empty Room", + "highlight_count": 0, + "notification_count": 0 + }, + { + "room_id": "!c:localhost", + "name": "#syncv3testalias:localhost", + "highlight_count": 0, + "notification_count": 0 + }, + { + "room_id": "!b:localhost", + "name": "Sync v3 Test Room", + "highlight_count": 0, + "notification_count": 0 + } + ] + } + ] + } +} +``` + +## Client Implementation Examples + +The purpose of these examples is to ensure that clients can transition from one state to another +state based on responses from a server. These examples are broken down into: + - `state`: Pre-existing state on the client e.g from a database. This contains the map of list index + to room index to room ID which contains the position of the room in a particular room list. It + also contains the state key tuples and timelines for stored rooms. + - `response`: The server response. + - `new_state`: The new state after the response has been processed. + +Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify with proxy server) + +```json +{ + "state": {}, + "response": { + "pos": 1, + "counts": [4], + "ops": [ + { + "list": 0, + "op": "SYNC", + "range": [0,3], + "rooms": [ + { + "room_id": "!a:localhost", + "name": "Empty Room", + "highlight_count": 0, + "notification_count": 0, + "required_state": [ + {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} + ] + }, + { + "room_id": "!d:localhost", + "name": "Empty Room", + "highlight_count": 0, + "notification_count": 0 + }, + { + "room_id": "!c:localhost", + "name": "#syncv3testalias:localhost", + "highlight_count": 0, + "notification_count": 0 + }, + { + "room_id": "!b:localhost", + "name": "Sync v3 Test Room", + "highlight_count": 0, + "notification_count": 0 + } + ] + } + ] + }, + "new_state": { + "lists": { + "0": { + "0": "!a:localhost", + "1": "!d:localhost", + "2": "!c:localhost", + "3": "!b:localhost" + } + }, + "rooms": { + "!a:localhost": { + "current_state": { + "m.room.name": {"name":"Empty Room"}, + "m.room.avatar": {"url":"mxc://foo/bar"} + }, + "timeline": [] + }, + "!b:localhost": { + "current_state": { + "m.room.name": {"name":"Sync v3 Test Room"} + }, + "timeline": [] + }, + "!c:localhost": { + "current_state": { + "m.room.name": {"name":"#syncv3testalias:localhost"} + }, + "timeline": [] + }, + "!d:localhost": { + "current_state": { + "m.room.name": {"name":"Empty Room"} + }, + "timeline": [] + } + } + } +} +``` From 5515da134a25e0f35369e487782183f7578c12b2 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 22 Dec 2021 18:19:18 +0000 Subject: [PATCH 12/81] Update unstable endpoint --- proposals/3575-sync.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 469369e54a5..6601748301f 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -848,9 +848,9 @@ Furthermore, this API presents new ways for malicious users to modify other clie # Unstable prefix -Whilst this in MSC review the HTTP path will be `/org.matrix.msc3575/sync` with the intention of this -eventually becoming (confusingly) `/v4/sync`. As this is a brand new endpoint, no other keys or fields -need prefixing. +Whilst this in MSC review the HTTP path will be `/_matrix/client/unstable/org.matrix.msc3575/sync` +with the intention of this eventually becoming (confusingly) `/_matrix/client/v4/sync`. As this is a +brand new endpoint, no other keys or fields need prefixing. # Dependencies From 6770437322e2c15e7b0e2583a80843b80bf21276 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 23 Dec 2021 16:35:10 +0000 Subject: [PATCH 13/81] Include filter/sort extensions --- proposals/3575-sync.md | 68 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 8 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 6601748301f..1dc22966fbd 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -263,6 +263,7 @@ One or more room lists can be requested in sync v3 like so: // Sliding window ranges, see the Sliding Window API for more information. "rooms": [ [0,99] ], // Sticky. List sort order. See Sliding Window API for more information. + // These fields may be expanded through use of extensions. "sort": [ "by_notification_count", "by_recency", "by_name" ], // Sticky. Required state for each room returned. An array of event type and state key tuples. @@ -288,6 +289,7 @@ One or more room lists can be requested in sync v3 like so: // All fields are applied with AND operators, hence if is_dm:true and is_encrypted:true // then only Encrypted DM rooms will be returned. The absence of fields implies no filter // on that criteria: it does NOT imply 'false'. + // These fields may be expanded through use of extensions. // Flag which only returns rooms present (or not) in the DM section of account data. // If unset, both DM rooms and non-DM rooms are returned. If false, only non-DM rooms @@ -477,6 +479,12 @@ The possible `sort` operations are: * Lower-case the result by unicode. This ensures `Matrix` and `matrix` sort in the same locations. * Perform sort operations on this 'canonicalised' name. +Sorting algorithms MUST be stable and deterministic to avoid needless churn as otherwise identical +rooms keep swapping positions. This can easily be achieved by including a final tiebreak based on the +room ID (e.g lexicographical sort on the room ID) to guarantee stability and determinism. It is currently +_not possible_ to invert the sort order (ASC vs DESC). This may be added to this MSC if there is a +community need for it. + NOTE: It is known that by forcing servers to calculate the room name there can be problems concerning multiple languages. "Alice and Bob" in English vs "Alice et Bob" in French for example, which may affect sort ordering. This can be mitigated by adding a `lang` sticky request parameter to control how @@ -485,7 +493,9 @@ i18n and l10n are done. _Rationale: The sort operations are restrictive and limited in scope on purpose. Alternatives such as arbitrary or more expansive sort orders like [RFC4790](https://datatracker.ietf.org/doc/html/rfc4790) were decided against as it would A) force servers to support nonsensical and potentially expensive -operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling._ +operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling. +That being said, having some mechanism to support additional sort operations is useful, see the extensions +section for more information._ The complete API shape for each operation is shown below (note the key names vary on the operation): @@ -722,10 +732,9 @@ Client have two main choices here: accurate for them but is fast to do. If you are sorting by highlight count then unread count (which is fairly typical) then E2EE rooms will always be bumped above all the unread count rooms if the resolution algorithm is set to "Assume `highlight_count == 1` whenever `unread_count > 0`". - - **Heavy**: Sort E2EE rooms into a separate list (higher priority than the main list to - de-duplicate them). Manually mix together the E2EE list and the main list depending on highlight - counts. This means the sort order will be more accurate but is slower and more complex to perform. - This is why there is an `is_encrpyted` filter on the room list parameters. + - **Heavy**: Sort E2EE rooms into a separate list. Manually mix together the E2EE list and the main + list depending on highlight counts. This means the sort order will be more accurate but is slower + and more complex to perform. This is why there is an `is_encrypted` filter on the room list parameters. In the future, it may become impossible for servers to sort by room name due to E2EE. This proposal has no suggestion on how to handle encrypted room names beyond hoping that homomorphic encryption @@ -772,6 +781,44 @@ the room list and ability to receive events and state in a room. For clients whi don't handle presence/typing/receipts/other metadata, they can simply work with this MSC alone and in full._ +#### Filter and Sort Extensions + +In addition to extending the sync API by adding more data to the response, the sync API needs to include +additional sorting/filtering options. Clients may want to sort or filter the room list in more ways +than this MSC provides in order to provide a good UI/UX. This is officially supported in the following +way: + - Sorting: Define a sort string (namespaced by MSC number when in the MSC process) and define exactly + how a comparator function should be defined (less, equal, greater than). Explain the room-specific + data that is being operated on. This sort string can then appear in the `sort` array. + - Filtering: Define a JSON object which represents the arguments for the filter. If there is only a + single argument then the JSON object may be a JSON value e.g `true` or `"room search query"`. Define + a filter key name (namespaced by MSC number when in the MSC process). This filter can then appear + in the `filter` object. + +An example MSC for sorting `by_highlight_count` is as follows: +``` +MSCXXXX: MSC3575 extension: Add sort filter by highlight count + +This MSC defines a new sort string called 'by_highlight_count' which sorts rooms based on the +number of unread highlightable events as per the sync v2 API. Rooms which have higher counts sort +before rooms with lower counts. + +During development, this sort string should be called `org.matrix.mscxxxx` in the `filter` object. +``` + +Caveats: It is not possible to specify ascending/descending when specifying a sort option. Furthermore, +it is not possible to include AND/OR/NOT operators in filter operations (they are always ANDed). This +is by design at present in order to restrain the scope and complexity of this MSC. Introducing options +for these will scope creep this MSC into creating an entire query langauge like SQL or GraphQL. The +author wishes to see exactly what sorting/filtering extension MSCs are created in order to see if +expanding the scope of the core MSC to include these options is sensible or not. Furthermore, it's not +currently defined how servers should behave if they encounter a filter or sort operation they do not +recognise. If the server rejects the request with an HTTP 400 then that will break backwards +compatibility with new clients vs old servers. However, the client would be otherwise unaware that +only _some_ of the sort/filter operations have taken effect. We may need to include a "warnings" +section to indicate which sort/filter operations are unrecognised, allowing for some form of graceful +degradation of service. + # Potential issues This is a very large change to the Client-Server API, which affects the core data flows for every @@ -797,9 +844,9 @@ impossible to perform due to limitations of server-side operations (e.g if the d this case, clients will be forced to pull in all E2EE rooms to perform their data flows, which, whilst slow, should still perform better than sync v2. -TODO: It would be nice if sorting/filtering operations could be marked as extensions given the list -here is really endless! It shouldn't be too difficult to make this work, but crucially this needs -community support (clients, bot developers, etc) to ensure that their data flows can be accomodated. +This MSC alone won't meet the needs of the entire ecosystem in terms of sorting/filtering/data returned +to the client. Extensions are a crucial part of this MSC to clearly define how the sync API can expand +with changing requirements. # Alternatives @@ -846,6 +893,11 @@ Furthermore, this API presents new ways for malicious users to modify other clie the room name (say by joining/leaving fake accounts) then it's possible for those names to advertise spam or spell out offensive words. +This API presents new ways for clients to request complex operations which runs the risk of denial +of service attacks: + - Complex or pathological filter/sort options (especially via extensions) may degrade performance + on the server and client. This may affect other users on the server. + # Unstable prefix Whilst this in MSC review the HTTP path will be `/_matrix/client/unstable/org.matrix.msc3575/sync` From 27699ff0fc4461aeb93740feaef5738762f921d8 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 23 Dec 2021 17:17:18 +0000 Subject: [PATCH 14/81] Proof read --- proposals/3575-sync.md | 60 ++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 1dc22966fbd..95317556f15 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -46,9 +46,8 @@ Q W E R T Y U I O P L K J H G F D S A Z X C V B N M ``` It also introduces a number of new concepts which are explained in more detail later on: - Core API: The minimal API to be sync v3 compatible. - - Extensions: Additional APIs which expose more data from the server e.g presence, device messages. - - Sticky Parameters: Clients can specify request parameters once and have the server remember what - they were, without forcing the client to resend the parameter every time. + - Extensions: Additional APIs which expose more data from the server e.g presence, device messages + or additional sort/filter operations. ## Core A complete sync request looks like: @@ -166,7 +165,9 @@ For the long-polling use case, this proposal includes an opaque token that is ve `/sync` v2's `since` query parameter. This is called `pos` and represents the position in the stream the client is currently at. Unlike `/sync` v2, this token is ephemeral and can be invalidated at any time. When a client first connects to the server, no `pos` is specified. Also unlike `/sync` v2, this -token cannot be used with other APIs such as `/messages` or `/keys/changes`. +token cannot be used with other APIs such as `/messages` or `/keys/changes`. Note that the "connection" +formed to the server is _not_ a long-lived TCP connection, it is just an application-level concept +of a connection. In simple servers, the `pos` may be an incrementing integer, but more complex servers may use vector clocks or contain node identifying information in the token. Clients MUST treat `pos` as an opaque @@ -181,10 +182,9 @@ standard error response as a HTTP 400 containing: } ``` This then allows the client to reset their connection and send an initial request (with all sticky -request parameters) without a `pos` value to restart the connection. - -The flag `initial: true` MUST be set in the response when there is no `pos` value provided. When -there is a valid `pos`, this flag MUST be omitted (sending `initial: false` is wasteful). +request parameters) without a `pos` value to restart the connection. The flag `initial: true` MUST +be set in the response when there is no `pos` value provided. When there is a valid `pos`, this flag +MUST be omitted (sending `initial: false` is wasteful). A response for a given `pos` must be idempotent to account for packet loss. For example: ``` @@ -270,7 +270,8 @@ One or more room lists can be requested in sync v3 like so: // Note that elements of this array are NOT sticky so they must be specified in full when they // are changed. "required_state": [ - // Request the join rules event. Note that the empty string is required here to match the event's blank state_key. + // Request the join rules event. Note that the empty string is required here to match + // the event's blank state_key. ["m.room.join_rules", ""], ["m.room.history_visibility", ""], // Request all `m.room.member` state events. @@ -435,7 +436,7 @@ Which returns the following response parameters: { // Which list is affected by this operation "list": 0, - // Which index positions are affected. + // Which index positions are affected. These are both inclusive. "range": [0,9], // The operation being performed on these index positions. "op": "SYNC", @@ -474,8 +475,8 @@ The possible `sort` operations are: will be returned in the `name` field for the room but is NOT the value that the server should perform sort operations on. See following steps. * Remove any of the following characters from the beginning/end of the calculated name: `#!()):_@`. - This ensures things like canonical aliases display in roughly the right locations rather than - at the start as it starts with `#`. + This ensures things like canonical aliases display in roughly the right alphabetical locations + rather than all together with all rooms that start with `#`. * Lower-case the result by unicode. This ensures `Matrix` and `matrix` sort in the same locations. * Perform sort operations on this 'canonicalised' name. @@ -491,11 +492,10 @@ sort ordering. This can be mitigated by adding a `lang` sticky request parameter i18n and l10n are done. _Rationale: The sort operations are restrictive and limited in scope on purpose. Alternatives such -as arbitrary or more expansive sort orders like [RFC4790](https://datatracker.ietf.org/doc/html/rfc4790) -were decided against as it would A) force servers to support nonsensical and potentially expensive -operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling. -That being said, having some mechanism to support additional sort operations is useful, see the extensions -section for more information._ +as arbitrary or more expansive sort orders were decided against as it would A) force servers to support +nonsensical and potentially expensive operations and B) not produce the best sort order for specific +use cases in Matrix such as alias handling. That being said, having some mechanism to support +additional sort operations is useful, see the extensions section for more information._ The complete API shape for each operation is shown below (note the key names vary on the operation): @@ -694,12 +694,14 @@ This approach has numerous benefits: This imposes more restrictions on the server implementation: - Servers still need the absolute stream ordering for events to work out how many events from `$event_id` to `$latest_event_id`. - - Servers need to remember the last sent event ID for each session for each room. If rooms share a - single monotonically increasing stream, then this is a single integer per session (akin to today's + - Servers need to remember the last sent event ID for each device for each room. If rooms share a + single monotonically increasing stream, then this is a single integer per device (akin to today's sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, along with the stream position when that was sent. So it's basically a `map[string]int64`. -TODO: Name of opt-in flag? Configurable flags for 'I remember state' vs 'I remember timelines'? +TODO: Name of opt-in flag? Configurable flags for 'I remember state' vs 'I remember timelines'? Do +we need to explicitly mark rooms as `delta: true` or `initial: true` or something to make it clear +whether this data should update/replace what the client knows? ### E2EE Handling @@ -727,7 +729,7 @@ messages), this may affect the sort order for this room - it may diverge from th More specifically, it may bump the room up or down the list, depending on what the sort implementation is for E2EE rooms (top of list or below rooms with highlights). -Client have two main choices here: +Clients have two main choices here: - **Lite**: Keep E2EE rooms in the main list. This means the sort order won't always be strictly accurate for them but is fast to do. If you are sorting by highlight count then unread count (which is fairly typical) then E2EE rooms will always be bumped above all the unread count rooms @@ -779,7 +781,8 @@ graph and room list so are well-placed for being treated separately. Furthermore make a meaningful client which only supports the core API and no extensions, as the core controls the room list and ability to receive events and state in a room. For clients which don't do E2EE and don't handle presence/typing/receipts/other metadata, they can simply work with this MSC alone and -in full._ +in full. This is a good balance because it means this MSC alone is useful: it doesn't **require** +additional extensions in order for a basic Matrix client to be written._ #### Filter and Sort Extensions @@ -795,17 +798,6 @@ way: a filter key name (namespaced by MSC number when in the MSC process). This filter can then appear in the `filter` object. -An example MSC for sorting `by_highlight_count` is as follows: -``` -MSCXXXX: MSC3575 extension: Add sort filter by highlight count - -This MSC defines a new sort string called 'by_highlight_count' which sorts rooms based on the -number of unread highlightable events as per the sync v2 API. Rooms which have higher counts sort -before rooms with lower counts. - -During development, this sort string should be called `org.matrix.mscxxxx` in the `filter` object. -``` - Caveats: It is not possible to specify ascending/descending when specifying a sort option. Furthermore, it is not possible to include AND/OR/NOT operators in filter operations (they are always ANDed). This is by design at present in order to restrain the scope and complexity of this MSC. Introducing options @@ -834,7 +826,7 @@ to sorting and filtering limitations such as but not limited to: space, dependent on some flag in user settings. - More complex display operations such as showing summed total notification counts in spaces. - More complex space operations such as handling orphaned rooms and traversal of subspaces. - - More complex bot requirements like knowing all rooms which has a certain custom state event in it, + - More complex bot requirements like knowing all rooms which have a certain custom state event in it, such that the presence of a state event becomes a filter. It is expected that some of these use cases **will be supported as this MSC is iterated upon**. However, From 22a3139c1250d6753f6f20f6bdebbbb1de3310e3 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 4 Jan 2022 13:30:20 +0000 Subject: [PATCH 15/81] Review comments - Ensure `pos` is always a string - Rename `rooms` to `ranges` - Remove idempotency notes on `pos` - Minor clarifications --- proposals/3575-sync.md | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 95317556f15..0daef0225a3 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -57,7 +57,7 @@ A complete sync request looks like: // Sliding Window API "lists": [ { - "rooms": [ [0,99] ], + "ranges": [ [0,99] ], "sort": [ "by_notification_count", "by_recency", "by_name" ], "required_state": [ ["m.room.join_rules", ""], @@ -184,22 +184,7 @@ standard error response as a HTTP 400 containing: This then allows the client to reset their connection and send an initial request (with all sticky request parameters) without a `pos` value to restart the connection. The flag `initial: true` MUST be set in the response when there is no `pos` value provided. When there is a valid `pos`, this flag -MUST be omitted (sending `initial: false` is wasteful). - -A response for a given `pos` must be idempotent to account for packet loss. For example: -``` -Client Server - | ---------------------> | data=[A,B], pos=2 - | <--data=[A,B], pos=2-- | - | | data=[C], pos=3 (new event arrives) - | -----pos=2-----------> | - | X--data=[C], pos=3-- | Response is lost - | | - | | data=[C,D], pos=4 (another new event arrives) - | -----pos=2-----------> | - | <----data=[C], pos=3-- | Server CANNOT send data=[C,D] pos=4, it MUST send the previous response -``` -Failure to do this may result in duplicate data being sent to the client. +MUST be omitted (sending `initial: false` is wasteful). This flag exists to tell clients that the ### Sticky request parameters @@ -261,7 +246,7 @@ One or more room lists can be requested in sync v3 like so: "lists": [ { // Sliding window ranges, see the Sliding Window API for more information. - "rooms": [ [0,99] ], + "ranges": [ [0,99] ], // Sticky. List sort order. See Sliding Window API for more information. // These fields may be expanded through use of extensions. "sort": [ "by_notification_count", "by_recency", "by_name" ], @@ -382,7 +367,7 @@ possible operations are: this range. - `INSERT`: Sets a *single* entry. If the position is not empty then clients MUST move entries to the left or the right depending on where the closest empty space is. - - `DELETE`: Remove a *single* entry. Often comes before an `INSERT` to allow entries to swap places. + - `DELETE`: Remove a *single* entry. Often comes before an `INSERT` to allow entries to move places. - `UPDATE`: Update a *single* entry. Updates are cumulative (consisting of deltas only). - `INVALIDATE`: Remove a *range* of entries. Clients MAY persist the invalidated range for offline support, but they should be treated as empty when additional operations which concern indexes in @@ -419,7 +404,7 @@ The sync v3 API exposes this API shape via the following request parameters: "lists": [ { // Multiple sliding windows inside a list can be requested. Integers are _inclusive_. - "rooms": [ [0,9], [20,29] ], + "ranges": [ [0,9], [20,29] ], // How the list should be sorted on the server. The first value is applied first, then tiebreaks // are performed with the 2nd sort order, then the 3rd until there are no more sort orders left. "sort": [ "by_notification_count", "by_recency", "by_name" ], @@ -660,7 +645,8 @@ indicators on the room list at positions not currently inside a sliding window. TODO: - Unsure how much data to expose (probably index position + notif/highlight counts?). If we do _counts_ then we are doomed to send a response to a client every time an event is sent in a noisy room, which -seems rather wasteful. Perhaps make it configurable? +seems rather wasteful. Perhaps make it configurable? @timokoesters mentions having approx counts to +avoid the churn e.g only two digits of precision (21 -> 21, but 1234 -> 1200), this fits UIs very nicely. - This has not been fully specced yet because in practice most clients sort by recency so it's not urgent to include this. For clients who sort by name though, this is a show stopper. @@ -972,13 +958,13 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w "request": { "lists": [{ "required_state": [["m.room.avatar", ""]], - "rooms": [[0,49]], + "ranges": [[0,49]], "sort": ["by_highlight_count", "by_notification_count", "by_recency"], "timeline_limit": 0 }] }, "response": { - "pos": 1, + "pos": "1", "counts": [4], "ops": [ { @@ -1036,7 +1022,7 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w { "state": {}, "response": { - "pos": 1, + "pos": "1", "counts": [4], "ops": [ { From 062cdc170638a4c688585da04b84d072fd7a35e1 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 4 Jan 2022 15:10:31 +0000 Subject: [PATCH 16/81] More review comments - Clarify sort order ASC/DESC. - Indenting on examples - Document proposals for custom state key filters like LAZY and $current - Document `limited` - Document `timeout` --- proposals/3575-sync.md | 57 +++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 0daef0225a3..64f3e89c522 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -51,7 +51,7 @@ It also introduces a number of new concepts which are explained in more detail l ## Core A complete sync request looks like: -`POST /v3/sync?pos=4`: +`POST /v3/sync?pos=4&timeout=30000`: ```js { // Sliding Window API @@ -184,7 +184,10 @@ standard error response as a HTTP 400 containing: This then allows the client to reset their connection and send an initial request (with all sticky request parameters) without a `pos` value to restart the connection. The flag `initial: true` MUST be set in the response when there is no `pos` value provided. When there is a valid `pos`, this flag -MUST be omitted (sending `initial: false` is wasteful). This flag exists to tell clients that the +MUST be omitted (sending `initial: false` is wasteful). + +The `timeout` query parameter exists for the same purposes of sync v2: to tell the server how many +milliseconds to hold open the connection before returning. ### Sticky request parameters @@ -335,6 +338,7 @@ The server will then return rooms which have the following fields: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, ], + "limited": true, // same as sync v2 "notification_count": 54, // same as sync v2 "highlight_count": 3 // same as sync v2 } @@ -356,6 +360,15 @@ request the state before an event. As clients can be at different points in the room, this would force servers to cache every possible room state. It's not practical for servers to cache every single possible earlier state for each room._ +TODO: There is currently no lazy-loaded members support. Various suggestions like a special sentinel +value in required state e.g `["m.room.member", "LAZY"]` could be used. This may need to be specified +in this MSC or possibly an extension MSC (though it's unclear how it would fall under the filtering +extension MSC format). For context, lazy-loaded members refers to the sending of `m.room.member` +events for the _senders_ of the events that are present in `timeline`. Current implementations use +a LRU cache to cut down on sending duplicate events. In addition, we may also want a sentinel value +to indicate "the current user" e.g to always pull out the current user's member event +`["m.room.member", "$current"]`. Are there any other interesting state filters? + ### Sliding Window API At a high level, the sliding window API provides a way to synchronise a subslice of a list in a @@ -447,11 +460,13 @@ The possible `sort` operations are: - `by_recency`: Sort by `origin_server_ts` on the most recently _received_ event in the room. Note that due to clock drift over federation it is possible for rooms to re-order such that the most recently received event in the entire list does not cause that room to go to index position 0. + The highest `origin_server_ts` value comes first in the list. - `by_highlight_count`: Sort by the `highlight_count` for this user in this room, which is the number of unread notifications for this room with the highlight flag set. This value is also present - in sync v2. + in sync v2. The highest `highlight_count` comes first in the list. - `by_notification_count`: Sort by the `notification_count` for this user in this room, which is the total number of unread notifications for this room. This value is also present in sync v2. + The highest `notification_count` comes first in the list. - `by_name`: Sort by room name lexicographically. This requires servers to implement the [room name calculation algorithm](https://matrix.org/docs/spec/client_server/latest#calculating-the-display-name-for-a-room). The server MUST perform the following steps: @@ -463,7 +478,8 @@ The possible `sort` operations are: This ensures things like canonical aliases display in roughly the right alphabetical locations rather than all together with all rooms that start with `#`. * Lower-case the result by unicode. This ensures `Matrix` and `matrix` sort in the same locations. - * Perform sort operations on this 'canonicalised' name. + * Perform sort operations on this 'canonicalised' name. For clarity, the sort is descending so + `A` comes before `B`. Sorting algorithms MUST be stable and deterministic to avoid needless churn as otherwise identical rooms keep swapping positions. This can easily be achieved by including a final tiebreak based on the @@ -556,21 +572,22 @@ This would return the following response: "!sub1:bar": { "name": "Alice and Bob", "required_state": [ - {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} ], "timeline": [ - {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], + "limited": true, "notification_count": 1, "highlight_count": 0 } @@ -774,8 +791,8 @@ additional extensions in order for a basic Matrix client to be written._ In addition to extending the sync API by adding more data to the response, the sync API needs to include additional sorting/filtering options. Clients may want to sort or filter the room list in more ways -than this MSC provides in order to provide a good UI/UX. This is officially supported in the following -way: +than this MSC provides (e.g include historical rooms, include knocked rooms) in order to provide a +good UI/UX. This is officially supported in the following way: - Sorting: Define a sort string (namespaced by MSC number when in the MSC process) and define exactly how a comparator function should be defined (less, equal, greater than). Explain the room-specific data that is being operated on. This sort string can then appear in the `sort` array. @@ -785,9 +802,9 @@ way: in the `filter` object. Caveats: It is not possible to specify ascending/descending when specifying a sort option. Furthermore, -it is not possible to include AND/OR/NOT operators in filter operations (they are always ANDed). This +it is not possible to include AND/OR/NOT operators in filter operations (they are always AND'd). This is by design at present in order to restrain the scope and complexity of this MSC. Introducing options -for these will scope creep this MSC into creating an entire query langauge like SQL or GraphQL. The +for these will scope creep this MSC into creating an entire query language like SQL or GraphQL. The author wishes to see exactly what sorting/filtering extension MSCs are created in order to see if expanding the scope of the core MSC to include these options is sensible or not. Furthermore, it's not currently defined how servers should behave if they encounter a filter or sort operation they do not From f491addeba2b81c7115fb938522d1fd40c00c6c7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 4 Jan 2022 15:47:29 +0000 Subject: [PATCH 17/81] Fixup identation more --- proposals/3575-sync.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 64f3e89c522..87c1dce2377 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -130,18 +130,18 @@ An entire response looks like: "!sub1:bar": { "name": "Alice and Bob", "required_state": [ - {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} ], "timeline": [ - {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], "notification_count": 1, "highlight_count": 0 From ced5274d7c2d3f7acb67813853e1a75669d1ff2d Mon Sep 17 00:00:00 2001 From: kegsay Date: Fri, 7 Jan 2022 14:27:04 +0000 Subject: [PATCH 18/81] Replace "sync v3" with "sliding sync" Sync v3 is technically the current sync in v1.1 of the Matrix Specification, so rename it for clarity. --- proposals/3575-sync.md | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 87c1dce2377..0d06504a73e 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1,4 +1,4 @@ -# MSC3575: Client syncing with sliding windows (aka Sync v3) +# MSC3575: Sliding Sync (aka Sync v3) This MSC outlines a replacement for the CS API endpoint `/sync`. @@ -45,13 +45,13 @@ Q W E R T Y U I O P L K J H G F D S A Z X C V B N M A C D E F first 5 rooms requested ``` It also introduces a number of new concepts which are explained in more detail later on: - - Core API: The minimal API to be sync v3 compatible. + - Core API: The minimal API to be sliding sync compatible. - Extensions: Additional APIs which expose more data from the server e.g presence, device messages or additional sort/filter operations. ## Core A complete sync request looks like: -`POST /v3/sync?pos=4&timeout=30000`: +`POST /_matrix/client/unstable/org.matrix.msc3575/sync?pos=4&timeout=30000`: ```js { // Sliding Window API @@ -243,7 +243,7 @@ which are extremely dynamic in nature, such as list ranges. ### Room List parameters -One or more room lists can be requested in sync v3 like so: +One or more room lists can be requested in sliding sync like so: ```js { "lists": [ @@ -410,7 +410,7 @@ For example: | ----- wait for updates ----> | [J,K,L,N,O] | <- DELETE[3], INSERT[4]=O--- | ``` -The sync v3 API exposes this API shape via the following request parameters: +The sliding sync API exposes this API shape via the following request parameters: ```js { // Multiple lists can be requested @@ -542,8 +542,8 @@ This can be a problem for some use cases: - Following a permalink to a random room which is not in the window should be possible. - Receiving a direct @mention in a room not in the window should notify the client. -For the first of these issues, the sync v3 API exposes a "room subscription" API. For the second issue, -the sync v3 API exposes a "notifications" API. +For the first of these issues, the sliding sync API exposes a "room subscription" API. For the second issue, +the sliding sync API exposes a "notifications" API. ### Room Subscription API @@ -748,7 +748,7 @@ field. ### Extensions We anticipate that as more features land in Matrix, different kinds of data will also want to be synced -to clients. Sync v2 did not have any first-class support to opt-in to new data. Sync v3 does have +to clients. Sync v2 did not have any first-class support to opt-in to new data. Sliding Sync does have support for this via "extensions". Extensions also allow this proposal to be broken up into more manageable sections. Extensions are requested by the client in a dedicated `extensions` block: ```js @@ -818,11 +818,11 @@ degradation of service. This is a very large change to the Client-Server API, which affects the core data flows for every single client implementation. This means it will require a lot of work from client developers to -support this MSC, especially given in practice clients will need to support _both_ v2 and v3 sync. -This work will slow down adoption of sync v3. +support this MSC, especially given in practice clients will need to support _both_ sliding sync +and `/sync`. This work will slow down adoption of sliding sync. In addition, this API is more restrictive than sync v2 as not all data is returned to the client. It -is possible that some data flows which are possible in sync v2 will not be possible in sync v3 due +is possible that some data flows which are possible in sync v2 will not be possible in sliding sync due to sorting and filtering limitations such as but not limited to: - More complex sorting operations beyond recency/name/unread counts e.g by number of joined members. - More complex filtering operations such as showing DMs from users who are currently in the viewed @@ -937,7 +937,7 @@ In order to aid implementations, a series of test cases are provided which demon of this MSC. The intention of these test cases is to provide a way to automatically verify compliance with this MSC. As such, they are represented as a sequence of JSON objects. These test cases are not exhaustive, and don't account for authentication via access tokens or handling multiple user accounts. -For brevity, only fields that concern sync v3 are included in event descriptions. +For brevity, only fields that concern sliding sync are included in event descriptions. ## Server Implementation Examples @@ -949,8 +949,8 @@ These examples are broken down into: The user who is making the request to the server is the user who should create/send these events. The intention behind this is that entries in `create_rooms` maps to `/createRoom` requests and entries in `send_events` maps to the PUT endpoints for sending normal/state events. This will - configure the server into a certain state ready for sync v3 requests. - - `request`: The sync v3 client request. + configure the server into a certain state ready for sliding sync requests. + - `request`: The sliding sync client request. - `response`: What the server should respond with. Note that the `pos` value MUST be ignored as it is implementation dependent. It should be used in subsequent requests where appropriate (TODO: specify how?). @@ -962,8 +962,8 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w "state": { "create_rooms": { "!a:localhost": {"preset": "public_chat"}, - "!b:localhost": {"preset": "public_chat", "name": "Sync v3 Test Room"}, - "!c:localhost": {"preset": "public_chat", "room_alias_name": "syncv3testalias"}, + "!b:localhost": {"preset": "public_chat", "name": "Sync Test Room"}, + "!c:localhost": {"preset": "public_chat", "room_alias_name": "synctestalias"}, "!d:localhost": {"preset": "public_chat"} }, "send_events": { @@ -1006,13 +1006,13 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w }, { "room_id": "!c:localhost", - "name": "#syncv3testalias:localhost", + "name": "#synctestalias:localhost", "highlight_count": 0, "notification_count": 0 }, { "room_id": "!b:localhost", - "name": "Sync v3 Test Room", + "name": "Sync Test Room", "highlight_count": 0, "notification_count": 0 } @@ -1064,13 +1064,13 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w }, { "room_id": "!c:localhost", - "name": "#syncv3testalias:localhost", + "name": "#synctestalias:localhost", "highlight_count": 0, "notification_count": 0 }, { "room_id": "!b:localhost", - "name": "Sync v3 Test Room", + "name": "Sync Test Room", "highlight_count": 0, "notification_count": 0 } @@ -1097,13 +1097,13 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w }, "!b:localhost": { "current_state": { - "m.room.name": {"name":"Sync v3 Test Room"} + "m.room.name": {"name":"Sync Test Room"} }, "timeline": [] }, "!c:localhost": { "current_state": { - "m.room.name": {"name":"#syncv3testalias:localhost"} + "m.room.name": {"name":"#synctestalias:localhost"} }, "timeline": [] }, From 40ff7f3980cf1b9497742262487ab69cb58e98be Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 24 Feb 2022 16:08:39 +0000 Subject: [PATCH 19/81] Mention room_name_like filter --- proposals/3575-sync.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 0d06504a73e..3eca2c8e709 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -297,7 +297,10 @@ One or more room lists can be requested in sliding sync like so: // Flag which only returns rooms the user is currently invited to. If unset, both invited // and joined rooms are returned. If false, no invited rooms are returned. If true, only // invited rooms are returned. - "is_invite": true + "is_invite": true, + // Filter the room name. Case-insensitive partial matching e.g 'foo' matches 'abFooab'. + // The term 'like' is inspired by SQL 'LIKE', and the text here is similar to '%foo%'. + "room_name_like": "foo" } } ], @@ -923,6 +926,7 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t * [x] `is_encrypted` * [ ] `is_invite` * [ ] `spaces` + * [x] `room_name_like` - [x] Room Subscription API - [ ] Notifications API (unspecced) - [ ] Bandwidth optimisations From a41b92df264f7b4b79eeaf31d582bc215623dbdb Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 24 Feb 2022 16:54:50 +0000 Subject: [PATCH 20/81] Move the initial flag to be per-room where it is actually useful --- proposals/3575-sync.md | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3eca2c8e709..6506df09342 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -89,7 +89,6 @@ An entire response looks like: ```js { // Connection and Streaming API - "initial": true, "pos": "5", // Sliding Window API @@ -102,6 +101,7 @@ An entire response looks like: { "room_id": "!foo:bar", "name": "The calculated room name", + "initial": true, "required_state": [ {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, @@ -129,6 +129,7 @@ An entire response looks like: "room_subscriptions": { "!sub1:bar": { "name": "Alice and Bob", + "initial": true, "required_state": [ {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, @@ -182,9 +183,7 @@ standard error response as a HTTP 400 containing: } ``` This then allows the client to reset their connection and send an initial request (with all sticky -request parameters) without a `pos` value to restart the connection. The flag `initial: true` MUST -be set in the response when there is no `pos` value provided. When there is a valid `pos`, this flag -MUST be omitted (sending `initial: false` is wasteful). +request parameters) without a `pos` value to restart the connection. The `timeout` query parameter exists for the same purposes of sync v2: to tell the server how many milliseconds to hold open the connection before returning. @@ -325,6 +324,11 @@ The server will then return rooms which have the following fields: { "room_id": "!foo:bar", "name": "The calculated room name", + // Flag which is set when this is the first time the server is sending this data on this connection. + // Clients can use this flag to replace or update their local state. When there is an update, servers + // MUST omit this flag entirely and NOT send "initial":false as this is wasteful on bandwidth. The + // absence of this flag means 'false'. + "initial": true, // this is the CURRENT STATE, unlike sync v2 "required_state": [ {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, @@ -705,9 +709,6 @@ This imposes more restrictions on the server implementation: sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, along with the stream position when that was sent. So it's basically a `map[string]int64`. -TODO: Name of opt-in flag? Configurable flags for 'I remember state' vs 'I remember timelines'? Do -we need to explicitly mark rooms as `delta: true` or `initial: true` or something to make it clear -whether this data should update/replace what the client knows? ### E2EE Handling @@ -996,6 +997,7 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w { "room_id": "!a:localhost", "name": "Empty Room", + "initial": true, "highlight_count": 0, "notification_count": 0, "required_state": [ @@ -1005,18 +1007,21 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w { "room_id": "!d:localhost", "name": "Empty Room", + "initial": true, "highlight_count": 0, "notification_count": 0 }, { "room_id": "!c:localhost", "name": "#synctestalias:localhost", + "initial": true, "highlight_count": 0, "notification_count": 0 }, { "room_id": "!b:localhost", "name": "Sync Test Room", + "initial": true, "highlight_count": 0, "notification_count": 0 } @@ -1054,6 +1059,7 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w { "room_id": "!a:localhost", "name": "Empty Room", + "initial": true, "highlight_count": 0, "notification_count": 0, "required_state": [ @@ -1063,18 +1069,21 @@ Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify w { "room_id": "!d:localhost", "name": "Empty Room", + "initial": true, "highlight_count": 0, "notification_count": 0 }, { "room_id": "!c:localhost", "name": "#synctestalias:localhost", + "initial": true, "highlight_count": 0, "notification_count": 0 }, { "room_id": "!b:localhost", "name": "Sync Test Room", + "initial": true, "highlight_count": 0, "notification_count": 0 } From 9587bc03ae109005d7e6db2509712ee8cb158728 Mon Sep 17 00:00:00 2001 From: kegsay Date: Wed, 23 Mar 2022 16:05:45 +0000 Subject: [PATCH 21/81] Add tombstone/room types filters --- proposals/3575-sync.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 6506df09342..f39322eb897 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -188,6 +188,10 @@ request parameters) without a `pos` value to restart the connection. The `timeout` query parameter exists for the same purposes of sync v2: to tell the server how many milliseconds to hold open the connection before returning. +_TODO[proxy]: If pos is just an incrementing integer, it means 2 browser tabs with the same access token +will step on each other's toes. The act of hitting /sync without a ?pos results in the first tab being +torn down connection wise. We may want to mux in implicit session IDs into the pos?_ + ### Sticky request parameters Request parameters can be "sticky". This means that their value is remembered across multiple requests. @@ -297,6 +301,16 @@ One or more room lists can be requested in sliding sync like so: // and joined rooms are returned. If false, no invited rooms are returned. If true, only // invited rooms are returned. "is_invite": true, + // Flag which only returns rooms which have an `m.room.tombstone` state event. If unset, + // both tombstoned and un-tombstoned rooms are returned. If false, only un-tombstoned rooms + // are returned. If true, only tombstoned rooms are returned. + "is_tombstoned": true, + // If specified, only rooms where the `m.room.create` event has a `type` matching one + // of the strings in this array will be returned. If this field is unset, all rooms are + // returned regardless of type. This can be used to get the initial set of spaces for an account. + "room_types": [ "m.space" ], + // Same as "room_types" but inverted. This can be used to filter out spaces from the room list. + "not_room_types": [ "m.space" ], // Filter the room name. Case-insensitive partial matching e.g 'foo' matches 'abFooab'. // The term 'like' is inspired by SQL 'LIKE', and the text here is similar to '%foo%'. "room_name_like": "foo" @@ -316,7 +330,11 @@ sending complex matching criteria (e.g pathological regular expressions) and in to be very little in-the-wild use of partial key matching like `foo*` as new state events tend to namespaced by their event type. Fields in `required_state` are not sticky mainly due to semantics: expressing deletions becomes hard. The inclusion of a dedicated `is_encrypted` filter exists for the -benefit of complex clients: see the E2EE section for more information._ +benefit of complex clients: see the E2EE section for more information. The `room_name_like` field +exists to allow the ability to search by room name which most clients support, and is crucial for +large accounts. The `room_types` filters exist primarily to include/exclude spaces. The +`is_tombstone` flag exists to remove tombstoned rooms from the sync response: clients would be +filtering these rooms out anyway, so rather save on the bandwidth!_ The server will then return rooms which have the following fields: @@ -925,8 +943,10 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - Filtering: * [x] `is_dm` * [x] `is_encrypted` - * [ ] `is_invite` + * [x] `is_invite` + * [x] `is_tombstoned` * [ ] `spaces` + * [ ] `room_types` and `not_room_types` * [x] `room_name_like` - [x] Room Subscription API - [ ] Notifications API (unspecced) @@ -935,6 +955,9 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - Extensions: - [x] To-device: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - [x] E2EE: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) + - [ ] Account Data + - [ ] Ephemeral Events + - [ ] Presence # Appendices From 615e8f5a7bfe4da813bc2db661ed0bd00bccac20 Mon Sep 17 00:00:00 2001 From: kegsay Date: Fri, 1 Apr 2022 13:59:50 +0100 Subject: [PATCH 22/81] API updates - Add `is_dm` to the room response JSON if the room is a DM room. - Add `invite_state` to the room response JSON if the room is an invite. - Add `prev_batch` to the room response JSON for fetching scrollback via `/messages`. - Add account data extension implementation --- proposals/3575-sync.md | 209 +++-------------------------------------- 1 file changed, 15 insertions(+), 194 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index f39322eb897..874497fb00d 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -116,6 +116,7 @@ An entire response looks like: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, ], + "prev_batch": "t111_222_333", "notification_count": 54, "highlight_count": 3 }, @@ -144,6 +145,7 @@ An entire response looks like: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], + "prev_batch": "t111_222_333", "notification_count": 1, "highlight_count": 0 } @@ -363,9 +365,12 @@ The server will then return rooms which have the following fields: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, ], - "limited": true, // same as sync v2 - "notification_count": 54, // same as sync v2 - "highlight_count": 3 // same as sync v2 + "is_dm": true, // field is absent on non-DM rooms + "invite_state": [ { type: "m.room.member" } ], // stripped state events, same as rooms.invite.$room_id.invite_state in sync v2, absent on joined/left rooms + "prev_batch": "t111_222_333", // same as sync v2 + "limited": true, // same as sync v2 + "notification_count": 54, // same as sync v2 + "highlight_count": 3 // same as sync v2 } ``` @@ -794,11 +799,11 @@ windows as well as which rooms are explicitly subscribed to. In an effort to reduce the size of this proposal, extensions will be done in separate MSCs. There will be extensions for: - - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) + - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/todevice.go) + - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/e2ee.go) - Ephemeral Events - typing notifications, receipts: [spec](https://spec.matrix.org/v1.1/client-server-api/#client-behaviour-4) - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) - - Account Data - `account_data` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) + - Account Data - `account_data` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/account_data.go) _Rationale: The name 'extensions' is inspired by the spec itself which refers to "Extensions to /sync" multiple times. These additional bits of data are all generally outside the scope of the core room @@ -928,13 +933,14 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sync-v3): +[Proxy server](https://github.com/matrix-org/sync-v3) (v0.1.0): - Sliding Window API: - [x] Operation support - [x] Required state with wildcards - [x] Timeline limits - [x] Calculated room names - [x] Highlight/notification counts + - [x] Prev batch (partial, needs enough traffic in each room to generate reliably, token will cause duplicate events on /messages) - Sorting: - [x] By recency - [x] By highlight count @@ -955,7 +961,7 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - Extensions: - [x] To-device: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - [x] E2EE: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - - [ ] Account Data + - [x] Account Data: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) - [ ] Ephemeral Events - [ ] Presence @@ -967,189 +973,4 @@ with this MSC. As such, they are represented as a sequence of JSON objects. Thes exhaustive, and don't account for authentication via access tokens or handling multiple user accounts. For brevity, only fields that concern sliding sync are included in event descriptions. -## Server Implementation Examples - -The purpose of these examples is to ensure servers respond to client requests with the correct data. -These examples are broken down into: - - `state`: Pre-existing state on the server e.g from a database. These should be executed in the order - of `create_rooms` followed by `send_events`. Keys in those objects must be executed in alphabetical - order to ensure sort orders work correctly e.g execute `!a:localhost` before `!b:localhost`. - The user who is making the request to the server is the user who should create/send these events. - The intention behind this is that entries in `create_rooms` maps to `/createRoom` requests and - entries in `send_events` maps to the PUT endpoints for sending normal/state events. This will - configure the server into a certain state ready for sliding sync requests. - - `request`: The sliding sync client request. - - `response`: What the server should respond with. Note that the `pos` value MUST be ignored as - it is implementation dependent. It should be used in subsequent requests where appropriate - (TODO: specify how?). - - -Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify with proxy server) -```json -{ - "state": { - "create_rooms": { - "!a:localhost": {"preset": "public_chat"}, - "!b:localhost": {"preset": "public_chat", "name": "Sync Test Room"}, - "!c:localhost": {"preset": "public_chat", "room_alias_name": "synctestalias"}, - "!d:localhost": {"preset": "public_chat"} - }, - "send_events": { - "!a:localhost": [ - {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} - ] - } - }, - "request": { - "lists": [{ - "required_state": [["m.room.avatar", ""]], - "ranges": [[0,49]], - "sort": ["by_highlight_count", "by_notification_count", "by_recency"], - "timeline_limit": 0 - }] - }, - "response": { - "pos": "1", - "counts": [4], - "ops": [ - { - "list": 0, - "op": "SYNC", - "range": [0,3], - "rooms": [ - { - "room_id": "!a:localhost", - "name": "Empty Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0, - "required_state": [ - {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} - ] - }, - { - "room_id": "!d:localhost", - "name": "Empty Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - }, - { - "room_id": "!c:localhost", - "name": "#synctestalias:localhost", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - }, - { - "room_id": "!b:localhost", - "name": "Sync Test Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - } - ] - } - ] - } -} -``` - -## Client Implementation Examples - -The purpose of these examples is to ensure that clients can transition from one state to another -state based on responses from a server. These examples are broken down into: - - `state`: Pre-existing state on the client e.g from a database. This contains the map of list index - to room index to room ID which contains the position of the room in a particular room list. It - also contains the state key tuples and timelines for stored rooms. - - `response`: The server response. - - `new_state`: The new state after the response has been processed. - -Initial Sync (room avatar only, no timeline): (TODO: write test jig and verify with proxy server) - -```json -{ - "state": {}, - "response": { - "pos": "1", - "counts": [4], - "ops": [ - { - "list": 0, - "op": "SYNC", - "range": [0,3], - "rooms": [ - { - "room_id": "!a:localhost", - "name": "Empty Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0, - "required_state": [ - {"type":"m.room.avatar","state_key":"","content":{"url":"mxc://foo/bar"}} - ] - }, - { - "room_id": "!d:localhost", - "name": "Empty Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - }, - { - "room_id": "!c:localhost", - "name": "#synctestalias:localhost", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - }, - { - "room_id": "!b:localhost", - "name": "Sync Test Room", - "initial": true, - "highlight_count": 0, - "notification_count": 0 - } - ] - } - ] - }, - "new_state": { - "lists": { - "0": { - "0": "!a:localhost", - "1": "!d:localhost", - "2": "!c:localhost", - "3": "!b:localhost" - } - }, - "rooms": { - "!a:localhost": { - "current_state": { - "m.room.name": {"name":"Empty Room"}, - "m.room.avatar": {"url":"mxc://foo/bar"} - }, - "timeline": [] - }, - "!b:localhost": { - "current_state": { - "m.room.name": {"name":"Sync Test Room"} - }, - "timeline": [] - }, - "!c:localhost": { - "current_state": { - "m.room.name": {"name":"#synctestalias:localhost"} - }, - "timeline": [] - }, - "!d:localhost": { - "current_state": { - "m.room.name": {"name":"Empty Room"} - }, - "timeline": [] - } - } - } -} -``` +TODO: once the API has stabilised From a0bf4027bcef278e43a570c6fa94f2b1bef3488b Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 23 May 2022 17:31:07 +0100 Subject: [PATCH 23/81] BREAKING: Response API shape changes - Moves lists to the top-level, which makes sense given ops and room response JSON is list-scoped. Also at the top-level is room_subscriptions which has its own room response JSON, but no lists at all. - Moves count to be list scoped (rather than the weird index matching you need to do today with counts) - Replaces ops[].rooms with room ID rather than the room response JSON. This adds a bit of needless extra bandwidth but we need these to serve as pointers into.. - Add lists[].rooms which contains the room response JSON for that list. --- proposals/3575-sync.md | 96 +++++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 34 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 874497fb00d..ebc9c80e46d 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -92,14 +92,20 @@ An entire response looks like: "pos": "5", // Sliding Window API - "ops": [ - { - "list": 0, - "range": [0,99], - "op": "SYNC", - "rooms": [ + "lists": { + "0": { + "count": 1337, + "ops": [ { - "room_id": "!foo:bar", + "op": "SYNC", + "range": [0, 99] + "room_ids": [ + "!foo:bar", // ... 99 more room IDs + ] + } + ], + "rooms": { + "!foo:bar": { "name": "The calculated room name", "initial": true, "required_state": [ @@ -121,10 +127,9 @@ An entire response looks like: "highlight_count": 3 }, // ... 99 more items - ], + }, } - ], - "counts": [1337], + }, // Room Subscriptions API "room_subscriptions": { @@ -254,6 +259,7 @@ One or more room lists can be requested in sliding sync like so: "lists": [ { // Sliding window ranges, see the Sliding Window API for more information. + // If this field is missing, no sliding window is used and all rooms are returned in this list. "ranges": [ [0,99] ], // Sticky. List sort order. See Sliding Window API for more information. // These fields may be expanded through use of extensions. @@ -341,8 +347,8 @@ filtering these rooms out anyway, so rather save on the bandwidth!_ The server will then return rooms which have the following fields: ```js -{ - "room_id": "!foo:bar", +// the room ID +"!foo:bar": { "name": "The calculated room name", // Flag which is set when this is the first time the server is sending this data on this connection. // Clients can use this flag to replace or update their local state. When there is an update, servers @@ -447,6 +453,8 @@ The sliding sync API exposes this API shape via the following request parameters "lists": [ { // Multiple sliding windows inside a list can be requested. Integers are _inclusive_. + // If this key is missing entirely, no sliding window is used and all rooms for this list are returned. + // In this particular case, the `ops` field will not be present in the response. "ranges": [ [0,9], [20,29] ], // How the list should be sorted on the server. The first value is applied first, then tiebreaks // are performed with the 2nd sort order, then the 3rd until there are no more sort orders left. @@ -460,32 +468,57 @@ The sliding sync API exposes this API shape via the following request parameters Which returns the following response parameters: ```js { - "ops": [ + // This array is exactly the same length as the `lists` provided in the request. All lists MUST + // return at the very least a `count`, even if there are no changes to the list. + "lists": [ + // List 0 { - // Which list is affected by this operation - "list": 0, - // Which index positions are affected. These are both inclusive. - "range": [0,9], - // The operation being performed on these index positions. - "op": "SYNC", - // The data to put in these positions. - "rooms": [ + // The total number of entries in the list. Always present. + "count": 1337, + // The sliding list operations to perform. This field will be missing if the request specifies + // no ranges. + "ops": [ { - "room_id": "!foo:bar", + // The operation being performed. + "op": "SYNC", + // Which index positions are affected by this operation. These are both inclusive. + "range": [0, 9], + // Which room IDs are affected by this operation. These IDs match up to the positions + // in the `range`, so the last room ID in this list matches the 9th index. The room data + // is held in a separate object. + "room_ids": [ + "!foo:bar", // ... 9 more room IDs + ] + } + ], + // The room data to use for each room ID. This data represents the point in time AFTER all + // ops have been applied. For example, if a room had 2 new events which changed its list position + // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], UPDATE[0,!foo:bar] + // then the room !foo:bar in this map MUST contain both events. + // + // This map will only contain rooms which are present in the list `ops` above. If there are no + // `ops` (because there are no `ranges`) then all rooms which match the list filters will be + // present in this list, unordered. This functionality is useful for clients which do not want + // to use sliding list semantics. + "rooms": { + "!foo:bar": { "name": "The calculated room name", // Additional response parameters omitted as they are // unrelated to the semantics of the sliding window. // See previous section on room list parameters. }, // ... 9 more items - ], + }, } ], - // The total number of entries in the list. Index positions match up to the lists in the request. - "counts": [1337], } ``` +_Rationale: Prior versions of this MSC more tightly coupled room data and list operations. This +became a problem if you did not want to use sliding windows because the room data will be contained +within list operations you don't care about. Now that this data is split out, it is easy for clients +to opt-out of sliding window semantics entirely (the `ops` key just disappears)._ + The possible `sort` operations are: - `by_recency`: Sort by `origin_server_ts` on the most recently _received_ event in the room. Note that due to clock drift over federation it is possible for rooms to re-order such that the most @@ -532,37 +565,32 @@ The complete API shape for each operation is shown below (note the key names var ```js { - "list": 0, "index": 3, "op": "UPDATE", - "room": { ... } + "room_id": "!foo:bar" } { - "list": 0, "op": "DELETE", "index": 8 } { - "list": 0, "op": "INSERT", "index": 99, - "room": { ... } + "room_id": "!foo:bar" } { - "list": 0, "op": "INVALIDATE", "range": [100,199] } { - "list": 0, "range": [100,117], "op": "SYNC", - "rooms": [ - // ... 18 rooms with complete state ... + "room_ids": [ + // ... 18 room IDs ] } ``` From 02e3706f135472f0b23429ef0258ccf45e781c1f Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 23 May 2022 17:38:51 +0100 Subject: [PATCH 24/81] Remove the superfluous UPDATE command We previously needed this command because room data HAD to exist inside an `op`, so if a room was updated without changing position then it needed a command to say "don't change its position" which _is_ the UPDATE command. Now that room data sits outside the `ops`, we no longer need the UPDATE command at all. --- proposals/3575-sync.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index ebc9c80e46d..58fb1dc9cc7 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -409,7 +409,7 @@ to indicate "the current user" e.g to always pull out the current user's member At a high level, the sliding window API provides a way to synchronise a subslice of a list in a bandwidth efficient way. It does this by referring to "operations" which must be performed on the -stored client list, such as INSERT, DELETE and UPDATE. Each operation has an index position OR a +stored client list, such as INSERT, DELETE and SYNC. Each operation has an index position OR a range of index positions which tells the client where the operation should be performed. The possible operations are: - `SYNC`: Sets a *range* of entries. Clients SHOULD discard what they previous knew about entries in @@ -417,7 +417,6 @@ possible operations are: - `INSERT`: Sets a *single* entry. If the position is not empty then clients MUST move entries to the left or the right depending on where the closest empty space is. - `DELETE`: Remove a *single* entry. Often comes before an `INSERT` to allow entries to move places. - - `UPDATE`: Update a *single* entry. Updates are cumulative (consisting of deltas only). - `INVALIDATE`: Remove a *range* of entries. Clients MAY persist the invalidated range for offline support, but they should be treated as empty when additional operations which concern indexes in the range arrive from the server. @@ -434,9 +433,6 @@ For example: | | [H,A,B,C,D,E,F,G,I] H moves to the front | ----- wait for updates ----> | [H,A,B,C,D] | <- DELETE[4], INSERT[0]=H--- | - | | [H',A,B,C,D,E,F,G,I] H is updated to H' - | ----- wait for updates ----> | -[H',A,B,C,D] | <------ UPDATE[0]=H'-------- | | | 0,1,2,3,4,5,6,7,8 | | [J,K,L,M,N,O,P,Q,R] Entire list is replaced | ----- wait for updates ----> | @@ -493,7 +489,7 @@ Which returns the following response parameters: ], // The room data to use for each room ID. This data represents the point in time AFTER all // ops have been applied. For example, if a room had 2 new events which changed its list position - // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], UPDATE[0,!foo:bar] + // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], DELETE[0,!foo:bar], INSERT[1,!foo:bar] // then the room !foo:bar in this map MUST contain both events. // // This map will only contain rooms which are present in the list `ops` above. If there are no @@ -564,12 +560,6 @@ additional sort operations is useful, see the extensions section for more inform The complete API shape for each operation is shown below (note the key names vary on the operation): ```js -{ - "index": 3, - "op": "UPDATE", - "room_id": "!foo:bar" -} - { "op": "DELETE", "index": 8 @@ -743,13 +733,13 @@ following operations: - If N > M then we would probably send more events if we did a delta than just telling the client everything from scratch, so issue a `SYNC` for this room. - If N < M then we don't have many events since the connection was last established, so just send - the delta as an `UPDATE`. + the delta as an update. This approach has numerous benefits: - In the common case when you scroll a room, you won't get any `SYNC`s for rooms that were invalidated because it's highly unlikely to receive 10+ events during the room scroll (assuming you scroll back up in reasonable time). - - When you reconnect after sleeping your laptop overnight, most rooms will be `UPDATE`s, and busy + - When you reconnect after sleeping your laptop overnight, most rooms will be updates, and busy rooms like Matrix HQ will be `SYNC`ed from fresh rather than sending 100s of events. This imposes more restrictions on the server implementation: From bea7323c0b03dbd46600ce8fe01218d55f0de083 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 24 May 2022 10:15:53 +0100 Subject: [PATCH 25/81] BREAKING: Bring all rooms responses to the top-level Reduces data duplication. --- proposals/3575-sync.md | 203 +++++++++++++++++++++++------------------ 1 file changed, 115 insertions(+), 88 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 58fb1dc9cc7..ce62644c0b2 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -92,8 +92,8 @@ An entire response looks like: "pos": "5", // Sliding Window API - "lists": { - "0": { + "lists": [ + { "count": 1337, "ops": [ { @@ -103,36 +103,13 @@ An entire response looks like: "!foo:bar", // ... 99 more room IDs ] } - ], - "rooms": { - "!foo:bar": { - "name": "The calculated room name", - "initial": true, - "required_state": [ - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} - ], - "timeline": [ - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, - ], - "prev_batch": "t111_222_333", - "notification_count": 54, - "highlight_count": 3 - }, - // ... 99 more items - }, + ] } - }, + ], - // Room Subscriptions API - "room_subscriptions": { + // Aggregated rooms from lists and room subscriptions + "rooms": { + // Room from room subscription "!sub1:bar": { "name": "Alice and Bob", "initial": true, @@ -153,7 +130,30 @@ An entire response looks like: "prev_batch": "t111_222_333", "notification_count": 1, "highlight_count": 0 - } + }, + // rooms from list + "!foo:bar": { + "name": "The calculated room name", + "initial": true, + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} + ], + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, + ], + "prev_batch": "t111_222_333", + "notification_count": 54, + "highlight_count": 3 + }, + // ... 99 more items }, // Extensions API @@ -344,39 +344,43 @@ large accounts. The `room_types` filters exist primarily to include/exclude spac `is_tombstone` flag exists to remove tombstoned rooms from the sync response: clients would be filtering these rooms out anyway, so rather save on the bandwidth!_ -The server will then return rooms which have the following fields: +The server will then return a `rooms` key which have the following fields: ```js -// the room ID -"!foo:bar": { - "name": "The calculated room name", - // Flag which is set when this is the first time the server is sending this data on this connection. - // Clients can use this flag to replace or update their local state. When there is an update, servers - // MUST omit this flag entirely and NOT send "initial":false as this is wasteful on bandwidth. The - // absence of this flag means 'false'. - "initial": true, - // this is the CURRENT STATE, unlike sync v2 - "required_state": [ - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, - {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} - ], - // Last event is most recent. Max timeline_limit events. - "timeline": [ - {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, - {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, - ], - "is_dm": true, // field is absent on non-DM rooms - "invite_state": [ { type: "m.room.member" } ], // stripped state events, same as rooms.invite.$room_id.invite_state in sync v2, absent on joined/left rooms - "prev_batch": "t111_222_333", // same as sync v2 - "limited": true, // same as sync v2 - "notification_count": 54, // same as sync v2 - "highlight_count": 3 // same as sync v2 +{ + "rooms": { + // the room ID + "!foo:bar": { + "name": "The calculated room name", + // Flag which is set when this is the first time the server is sending this data on this connection. + // Clients can use this flag to replace or update their local state. When there is an update, servers + // MUST omit this flag entirely and NOT send "initial":false as this is wasteful on bandwidth. The + // absence of this flag means 'false'. + "initial": true, + // this is the CURRENT STATE, unlike sync v2 + "required_state": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!foo:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!bar:example.com", "content":{"via":["example.com"]}}, + {"sender":"@alice:example.com","type":"m.space.child", "state_key":"!baz:example.com", "content":{"via":["example.com"]}} + ], + // Last event is most recent. Max timeline_limit events. + "timeline": [ + {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, + {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, + ], + "is_dm": true, // field is absent on non-DM rooms + "invite_state": [ { type: "m.room.member" } ], // stripped state events, same as rooms.invite.$room_id.invite_state in sync v2, absent on joined/left rooms + "prev_batch": "t111_222_333", // same as sync v2 + "limited": true, // same as sync v2 + "notification_count": 54, // same as sync v2 + "highlight_count": 3 // same as sync v2 + } + } } ``` @@ -486,34 +490,39 @@ Which returns the following response parameters: "!foo:bar", // ... 9 more room IDs ] } - ], - // The room data to use for each room ID. This data represents the point in time AFTER all - // ops have been applied. For example, if a room had 2 new events which changed its list position - // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], DELETE[0,!foo:bar], INSERT[1,!foo:bar] - // then the room !foo:bar in this map MUST contain both events. - // - // This map will only contain rooms which are present in the list `ops` above. If there are no - // `ops` (because there are no `ranges`) then all rooms which match the list filters will be - // present in this list, unordered. This functionality is useful for clients which do not want - // to use sliding list semantics. - "rooms": { - "!foo:bar": { - "name": "The calculated room name", - // Additional response parameters omitted as they are - // unrelated to the semantics of the sliding window. - // See previous section on room list parameters. - }, - // ... 9 more items - }, + ] } ], + // The room data to use for each room ID. This data represents the point in time AFTER all + // ops have been applied. For example, if a room had 2 new events which changed its list position + // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], DELETE[0,!foo:bar], INSERT[1,!foo:bar] + // then the room !foo:bar in this map MUST contain both events. + // + // This map will only contain rooms which are present in the list `ops` above. If there are no + // `ops` (because there are no `ranges`) then all rooms which match the list filters will be + // present in this list, unordered. This functionality is useful for clients which do not want + // to use sliding list semantics. This map is an aggregation of all rooms which can be returned + // over all lists, including room subscriptions. This means if a room appears in 2 lists, only + // 1 entry is present. + "rooms": { + "!foo:bar": { + "name": "The calculated room name", + // Additional response parameters omitted as they are + // unrelated to the semantics of the sliding window. + // See previous section on room list parameters. + }, + // ... 9 more items + }, } ``` _Rationale: Prior versions of this MSC more tightly coupled room data and list operations. This became a problem if you did not want to use sliding windows because the room data will be contained within list operations you don't care about. Now that this data is split out, it is easy for clients -to opt-out of sliding window semantics entirely (the `ops` key just disappears)._ +to opt-out of sliding window semantics entirely (the `ops` key just disappears). Furthermore, the +`rooms` map was originally split out to be per-list / per-room-subscription but this could cause +needless duplication if a room appeared in >1 list. Each list can have different parameters associated +with them (e.g `required_state`, `timeline_limit`) but these can be aggregated / UNION'd easily._ The possible `sort` operations are: - `by_recency`: Sort by `origin_server_ts` on the most recently _received_ event in the room. Note @@ -616,7 +625,7 @@ To track a room `!sub1:bar`, the client would send the following request: This would return the following response: ```js { - "room_subscriptions": { + "rooms": { "!sub1:bar": { "name": "Alice and Bob", "required_state": [ @@ -647,7 +656,7 @@ Any updates in this room would be returned in the same section of the sync respo ```js { - "room_subscriptions": { + "rooms": { "!sub1:bar": { "timeline": [ {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"C"}}, @@ -661,10 +670,6 @@ Multiple rooms can be subscribed to by specifying additional keys in the room su a room is subscribed to multiple times, the _most recent_ subscription takes effect for the purposes of `required_state` and `timeline_limit` filtering. -It is possible for a room to be both directly subscribed to _and_ in the sliding window. In this -case, only the room subscription section of the response will contain the data. The sliding window -will just contain the `room_id` and operation/index/list parameters. - To unsubscribe from a room, the client needs to send a request with the room ID to unsubscribe from in the `unsubscribe_rooms` array: ```js @@ -695,6 +700,28 @@ Hydrogen's grid view. The `unsubscribe_rooms` array allows rooms to be efficient map. An alternative would be to specify an empty JSON object in the room subscription but that feels less explicit than the array form._ +### Commonalities between the Room Subscription API and Sliding Window API + +In the request, both the sliding window API and the room subscription API use the same keys to extract room data. Both +APIs also return that room data in the same part of the response. These keys are: + + - `required_state`: Required state for each room returned. An array of event type and state key tuples. + - `timeline_limit`: The maximum number of timeline events to return per response. + +_All_ room data is returned in a top-level `rooms` keys in the response JSON, regardless of whether +this room is being returned due to it being a room subscription or in a list. This de-duplicates data +when a room can be present in more than 1 list. However, multiple lists may have different values for +`required_state` or `timeline_limit`. In this case, these values are combined together according to +the following rules: + + - `required_state`: Combine all arrays and treat it as a single unified array. + - `timeline_limit`: Take the highest value. + + Due to this, clients need to take care to extract only the number of timeline events / state events + they require from the `rooms` response, as it may include more data than they requested in a single + list. + + ### Notifications API If you are tracking the top 5 rooms and an event arrives in the 6th room, you will be notified about From b3132b04dd3f73870a19515d711e22cb613762cb Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 9 Jun 2022 19:07:23 +0100 Subject: [PATCH 26/81] Add slow_get_all_rooms and remove implicit-return-all-rooms via ranges key omission --- proposals/3575-sync.md | 83 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 4 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index ce62644c0b2..bb093cb13c5 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -453,8 +453,6 @@ The sliding sync API exposes this API shape via the following request parameters "lists": [ { // Multiple sliding windows inside a list can be requested. Integers are _inclusive_. - // If this key is missing entirely, no sliding window is used and all rooms for this list are returned. - // In this particular case, the `ops` field will not be present in the response. "ranges": [ [0,9], [20,29] ], // How the list should be sorted on the server. The first value is applied first, then tiebreaks // are performed with the 2nd sort order, then the 3rd until there are no more sort orders left. @@ -475,8 +473,7 @@ Which returns the following response parameters: { // The total number of entries in the list. Always present. "count": 1337, - // The sliding list operations to perform. This field will be missing if the request specifies - // no ranges. + // The sliding list operations to perform. "ops": [ { // The operation being performed. @@ -602,6 +599,84 @@ This can be a problem for some use cases: For the first of these issues, the sliding sync API exposes a "room subscription" API. For the second issue, the sliding sync API exposes a "notifications" API. +#### Requesting all rooms + +Sometimes clients may not wish to deal with sliding windows, and instead get all rooms on the user's account. +For example, if your client is a bot or an application service, having sliding windows just adds extra complexity. +To aid these use cases, any list can omit the `ranges` key and add a new sticky key at the same level: +`slow_get_all_rooms: true`. If this is set, the `ranges` and `sort` keys are ignored and all rooms which match the list +`filters` will be returned. If there are no filters for this list, then all rooms on the user's account will +be returned. This gives additional flexibility as it allows clients to request all E2EE rooms in a separate list +from the sliding windows. When operating in this mode, there will be no movement operations (DELETE followed by INSERT) +as the client has the entire list and can work out whatever sort order they wish. There will still be DELETE +and INSERT operations when rooms are left or joined respectively. In addition, there will be an initial SYNC +operation to let the client know which rooms in the `rooms` object were from this list. + +An example request: +```js +{ + "lists": [ + // list 0 will include all encrypted rooms in one go + { + "slow_get_all_rooms": true, + "filters": { + "is_encrypted": true + } + }, + // list 1 will include the first 20 unencrypted rooms sorted accordingly + { + "ranges": [ [0,19] ], + "sort": [ "by_notification_count", "by_recency", "by_name" ], + "filters": { + "is_encrypted": false + } + } + ], +} +``` +Would return the response: +```js +{ + "lists": [ + { + "count": 1337, + "ops": [ + { + "op": "SYNC", + "range": [0, 1336], + "room_ids": [ + "!encrypted:bar", // ... 1336 more room IDs + ] + } + ] + }, + { + "count": 420, + "ops": [ + { + "op": "SYNC", + "range": [0, 19], + "room_ids": [ + "!unencrypted:bar", // ... 18 more room IDs + ] + } + ] + } + ], + "rooms": { + "!encrypted:bar": { + ... + }, + // ... 1336 more items + "!unencrypted:bar": { + ... + }, + // ... 18 more items + }, +} +``` + + ### Room Subscription API Sometimes clients know exactly which room they want to get information about e.g by following a From 3b2b3d547b41e4aeebbde2ad6e89606dd684a86c Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 9 Jun 2022 19:13:05 +0100 Subject: [PATCH 27/81] 19 not 18 --- proposals/3575-sync.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index bb093cb13c5..3c33e80a3bc 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -657,7 +657,7 @@ Would return the response: "op": "SYNC", "range": [0, 19], "room_ids": [ - "!unencrypted:bar", // ... 18 more room IDs + "!unencrypted:bar", // ... 19 more room IDs ] } ] @@ -671,7 +671,7 @@ Would return the response: "!unencrypted:bar": { ... }, - // ... 18 more items + // ... 19 more items }, } ``` From 60be9c939df49c364e542256ebb7acb3388ac9b0 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 27 Jul 2022 10:58:19 +0100 Subject: [PATCH 28/81] Clarify that "" for a room type is valid --- proposals/3575-sync.md | 1 + 1 file changed, 1 insertion(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3c33e80a3bc..4076f7ee47c 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -316,6 +316,7 @@ One or more room lists can be requested in sliding sync like so: // If specified, only rooms where the `m.room.create` event has a `type` matching one // of the strings in this array will be returned. If this field is unset, all rooms are // returned regardless of type. This can be used to get the initial set of spaces for an account. + // For rooms which do not have a room type, use "" to include them. "room_types": [ "m.space" ], // Same as "room_types" but inverted. This can be used to filter out spaces from the room list. "not_room_types": [ "m.space" ], From a475ed36a8fb0ec43c8f89e81c8ef8ab19e0aa0f Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 27 Jul 2022 11:03:05 +0100 Subject: [PATCH 29/81] null not "" --- proposals/3575-sync.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 4076f7ee47c..c075c8d61ed 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -316,8 +316,8 @@ One or more room lists can be requested in sliding sync like so: // If specified, only rooms where the `m.room.create` event has a `type` matching one // of the strings in this array will be returned. If this field is unset, all rooms are // returned regardless of type. This can be used to get the initial set of spaces for an account. - // For rooms which do not have a room type, use "" to include them. - "room_types": [ "m.space" ], + // For rooms which do not have a room type, use 'null' to include them. + "room_types": [ "m.space", null ], // Same as "room_types" but inverted. This can be used to filter out spaces from the room list. "not_room_types": [ "m.space" ], // Filter the room name. Case-insensitive partial matching e.g 'foo' matches 'abFooab'. From 9dd664ab72b3f8a94b0cb2bbfd20d02eb17ea74a Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 27 Jul 2022 12:06:02 +0100 Subject: [PATCH 30/81] Clarify not_room_types wins --- proposals/3575-sync.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c075c8d61ed..3e5825b56cf 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -319,6 +319,8 @@ One or more room lists can be requested in sliding sync like so: // For rooms which do not have a room type, use 'null' to include them. "room_types": [ "m.space", null ], // Same as "room_types" but inverted. This can be used to filter out spaces from the room list. + // If a type is in both room_types and not_room_types, then not_room_types wins and they are + // not included in the result. "not_room_types": [ "m.space" ], // Filter the room name. Case-insensitive partial matching e.g 'foo' matches 'abFooab'. // The term 'like' is inspired by SQL 'LIKE', and the text here is similar to '%foo%'. From b82c1858f83c07e9dccaed0b2d45f14d381cd694 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 27 Jul 2022 15:12:15 +0100 Subject: [PATCH 31/81] Clarify that the spaces filter checks based on m.space.child state events --- proposals/3575-sync.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3e5825b56cf..159f0c3fa60 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -295,11 +295,13 @@ One or more room lists can be requested in sliding sync like so: // If unset, both DM rooms and non-DM rooms are returned. If false, only non-DM rooms // are returned. If true, only DM rooms are returned. "is_dm": true, - // A list of spaces which target rooms must be a part of. For every invited/joined room for - // this user, ensure that there is a parent space event which is in this list. If unset, all - // rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to - // give a complete list of spaces to navigate. Only rooms directly in these spaces will be - // returned. + // A list of spaces which target rooms must be a part of, as m.space.child state events. + // The server will inspect the m.space.child state events for the JOINED space room IDs given, + // and filter the room list based on the INVITED/JOINED children room IDs. + // If unset, all rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to + // give a complete list of spaces to navigate. Only rooms directly mentioned as m.space.child + // events in these spaces will be returned. Unknown spaces or spaces the user is not joined to + // will be ignored. "spaces": ["!foo:bar", "!bar:baz"], // Flag which only returns rooms which have an `m.room.encryption` state event. If unset, // both encrypted and unencrypted rooms are returned. If false, only unencrypted rooms From 8a82f926790107e9944e583515b1e6d8f36b43a0 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 1 Aug 2022 12:26:48 +0100 Subject: [PATCH 32/81] Update impl status --- proposals/3575-sync.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 159f0c3fa60..7bda4bec1de 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1058,7 +1058,7 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sync-v3) (v0.1.0): +[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.2.1): - Sliding Window API: - [x] Operation support - [x] Required state with wildcards @@ -1076,8 +1076,8 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t * [x] `is_encrypted` * [x] `is_invite` * [x] `is_tombstoned` - * [ ] `spaces` - * [ ] `room_types` and `not_room_types` + * [x] `spaces` + * [x] `room_types` and `not_room_types` * [x] `room_name_like` - [x] Room Subscription API - [ ] Notifications API (unspecced) From 61decae837b5448b073fc5c718172f9b4d1e5e18 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 3 Aug 2022 15:07:57 +0100 Subject: [PATCH 33/81] Add txn_id to client requests --- proposals/3575-sync.md | 56 +++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 7bda4bec1de..bf0dd068ac6 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -54,6 +54,10 @@ A complete sync request looks like: `POST /_matrix/client/unstable/org.matrix.msc3575/sync?pos=4&timeout=30000`: ```js { + // allows clients to know what request params reached the server, + // functionally similar to txn IDs on /send for events. + "txn_id": "client-chosen-string", + // Sliding Window API "lists": [ { @@ -90,6 +94,7 @@ An entire response looks like: { // Connection and Streaming API "pos": "5", + "txn_id": "client-chosen-string", // echo of the txn ID // Sliding Window API "lists": [ @@ -234,22 +239,45 @@ When this object is combined with an the additional object: What is the value of `baz`? Both unset and `2` are valid answers. For this reason, `baz` MUST be marked as sticky if the desired result is `2`, else it will be unset. -Sticky request parameters SHOULD be set at the start of the connection and kept constant throughout -the lifetime of the connection. It is possible for clients and servers to disagree on the value of -a sticky request parameter in the event of packet loss: +In order for servers and clients to agree on the set of sticky parameters, clients MUST send a transaction +ID with each change to their request parameters and servers MUST buffer responses. This transaction ID will +be echoed back to the client so it knows that those parameters have been applied. + +The following sequence diagram explains the problem encountered when transaction IDs are not used and buffers are not used: +``` +Updates on 200 OK Updates on receipt of data + Client Server + | ------{ "foo": "bar" }------> | {"foo":"bar"} +{"foo":"bar"} | <-------HTTP 200 OK---------- | + | | + | ------{ "baz": "quuz" }---X | {"foo":"bar"} + | | + | ------{ "other": "value" }--> | {"foo":"bar","other":"value"} + | X--HTTP 200 OK---------- | + | | + | ------{ "k": "v" }----------> | {"foo":"bar","other":"value","k":"v"} +{"foo":"bar","k":"v"} | <-------HTTP 200 OK---------- | +``` +The update for `{ "other": "value" }` was not applied on the client, but was applied on the server. + +With the introduction of buffers and transaction IDs: ``` - Client Server - | ------{ "foo": "bar" }------> | {"foo":"bar"} -{"foo":"bar"} | <-------HTTP 200 OK---------- | - | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} - | X--HTTP 200 OK---------- | - | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} - | X--HTTP 200 OK---------- | - | ------{ "baz": "quuz" }-----> | {"foo":"bar","baz":"quuz"} - | <-------HTTP 200 OK---------- | +Updates on receipt of txn ID Updates on receipt of data, every response increments pos by 1, starts at pos=3 + Client Server + | -txn=1-{ "foo": "bar" }-pos=3-> | {"foo":"bar"} txn=1 +{"foo":"bar"} | <--HTTP 200 OK txn=1,pos=4----- | + | | + 2| -txn=2-{"baz":"quuz"}-pos=4--X | {"foo":"bar"} txn=1 + | | + 3| -txn=3-{"other":"val"}-pos=4--> | {"foo":"bar","other":"val"} txn=3 <-- txn=1 wiped as recv pos=4 in request + | X---HTTP 200 OK txn=3,pos=5- | + | | + 4| -txn=4-{ "k": "v" }----pos=4--> | {"foo":"bar","other":"val","k":"v"} txn=3,4 <-- txn=3 not wiped as recv old pos in request +{"foo":"bar","k":"v", | <--HTTP 200 OK txn=3,4,pos=5--- | + "other":"val"} ``` -For this reason, some request parameters are not suitable to be made "sticky". These include parameters -which are extremely dynamic in nature, such as list ranges. + +If the request parameters have not been modified, then the `txn_id` does not need to be sent. ### Room List parameters From 9178f1dede6ba484fdd7e2200e614fb5165d2c95 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 4 Aug 2022 12:53:59 +0100 Subject: [PATCH 34/81] Mention walking over tombstones --- proposals/3575-sync.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index bf0dd068ac6..3e125bf8fdc 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -326,6 +326,9 @@ One or more room lists can be requested in sliding sync like so: // A list of spaces which target rooms must be a part of, as m.space.child state events. // The server will inspect the m.space.child state events for the JOINED space room IDs given, // and filter the room list based on the INVITED/JOINED children room IDs. + // If the child room has a m.room.tombstone event, then the search should recursively navigate + // the room ID in that event to find the latest room and use that room ID instead of the initial + // room ID in the m.space.child event. // If unset, all rooms are included. Servers MUST NOT navigate subspaces. It is up to the client to // give a complete list of spaces to navigate. Only rooms directly mentioned as m.space.child // events in these spaces will be returned. Unknown spaces or spaces the user is not joined to From 9db6b9a6d28e04b3dbd0bf259e4736251a2be6ac Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 5 Aug 2022 15:16:58 +0100 Subject: [PATCH 35/81] Clarify long polling rules --- proposals/3575-sync.md | 114 ++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 41 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3e125bf8fdc..6908c0bab86 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -168,11 +168,74 @@ An entire response looks like: These fields and their interactions are explained in the next few sections. This forms the core of the API. Additional data can be returned via "extensions". -### Connections and streaming data +### Connections At a high level, the syncing mechanism creates a "connection" to the server to allow the -bi-directional exchange of JSON objects. This mechanism is ideally suited for WebSockets, but more -difficult to do for HTTP long-polling. +_bi-directional_ exchange of JSON objects. This mechanism is ideally suited for WebSockets, but more +difficult to do for HTTP long-polling. This design was chosen in order to allow for a seamless transition +to a stream-orientated protocol like WebSockets in the future. + +The existing `/sync` implementation in Matrix also creates a stream but it has limitations. It uses +a `since` token to tell clients where in the stream they are and to tell servers which messages the +client has received (in other words it serves as an ACK). Critically, the stream is _not stateful_. The +request must contain the entire set of input parameters, either via a filter ID or in-line filter. This +results in clients using the same set of input parameters most of the time. In order for sliding sync +to provide only the data needed to render the UI and nothing more, the set of input parameters needs to +be greatly expanded and they need to be dynamic: adding and removing parameters on-the-fly, without +additional round trips. In order to achieve this, Sliding Sync creates _stateful_ connections to the +server, so clients can simply send the deltas. This means clients and servers need to have a mechanism +to agree on what that stored state is. _This introduces additional rules on client implementations._ + +In a WebSockets implementation, this is easy: the request parameters are sent initially when a connection +is established and then remain active for the lifetime of the connection. Any changes to these parameters +are reliably sent to the server in the order they were submitted. In order for clients to know when these +parameters have been applied, most application-level WebSocket protocols use a "message ID" chosen by the +client which is then echoed back in the ACK message. This is very similar to transaction IDs on the `/send` +endpoint in Matrix. + +However, this proposal does not use WebSockets; it uses HTTP long-polling. Like with `/sync`, this proposal +uses a token to allow servers to know which messages the client has received. Emulating WebSockets over HTTP +long-polling is difficult and has limitations. Servers cannot push new data to the client and must instead +wait for the client to make an HTTP request. In addition, individual HTTP requests can fail, resulting in +ordering problems which simply do not exist in a WebSockets implementation. This can lead to some +counter-intuitive responses from a Sliding Sync enabled server, unless certain rules are followed. + +**Long-polling Rule 1:** do not send multiple concurrent sliding sync requests to the server. If a request is lost in +transit, it can be impossible to know if it has been applied on the server or not. This is not an issue for `/sync` +because the request is stateless; there's nothing to lose in the event of packet loss. In this example, +A is applied on both sides, B is not applied on either side, and C is applied on one side only, which +then gets returned in the next successful response by using the position of the client request. The +numbers reflect the position in the stream (similar to a `since` token): +``` +State Client Server State position + | --------A,0------> | 0 + | | A 1 + | <------OK,1------- | A 1 + A,1 | | A 1 + A,1 | --------B,1--/ | A 1 + A,1 | --------C,1------> | A 1 + | | A,C 2 + A,1 | /--------- | A,C 2 + | | A,C 2 + A,1 | --------D,1------> | 1 != 2 -> missed a response + | | A,C,D 3 +A,C,D | <------C,D,3------ | A,C,D 3 +``` +At this point, the client knows that `B` never made it to the server, because C was _sent after_ B, and +the server has ACKed C. If requests were sent in parallel (B and C at the same time), it would be impossible +for clients to know if B was still processing or if B had failed entirely. + +**Long-polling Rule 2:** use transaction IDs if you need to know when a response has been applied. The above example used +A,B,C,D as transaction IDs, but in reality requests/responses are not always obviously tied together. For example, +requesting the first 10 rooms on a users account may return 0 results or 10: it's not possible to know ahead of time. +Clients need to know this information to know when to stop showing a spinner for example. For these reasons, clients +SHOULD send a transaction ID when they need to know when the response has been calculated. + +**Long-polling Rule 3:** the HTTP response you receive may not match the HTTP request you sent. In the above example, C,D were +sent in the same response. In practice, the server does not combine multiple responses into a single response. Instead, +it will send the most recent unacknowledged response, in this case C, _even though_ the HTTP request was for D. + +#### Message IDs for clients and servers For the long-polling use case, this proposal includes an opaque token that is very similar to `/sync` v2's `since` query parameter. This is called `pos` and represents the position in the stream @@ -200,11 +263,15 @@ request parameters) without a `pos` value to restart the connection. The `timeout` query parameter exists for the same purposes of sync v2: to tell the server how many milliseconds to hold open the connection before returning. +In addition, clients may send `txn_id` field at the top-level JSON object in the request to serve as +a client message ID. Servers MUST echo this back to the client via the `txn_id` field in the top-level +JSON object in the response when this request has been processed. + _TODO[proxy]: If pos is just an incrementing integer, it means 2 browser tabs with the same access token will step on each other's toes. The act of hitting /sync without a ?pos results in the first tab being torn down connection wise. We may want to mux in implicit session IDs into the pos?_ -### Sticky request parameters +#### Sticky request parameters Request parameters can be "sticky". This means that their value is remembered across multiple requests. Clients cannot choose which parameters are sticky, the API defines which parameters are sticky. @@ -241,43 +308,8 @@ be marked as sticky if the desired result is `2`, else it will be unset. In order for servers and clients to agree on the set of sticky parameters, clients MUST send a transaction ID with each change to their request parameters and servers MUST buffer responses. This transaction ID will -be echoed back to the client so it knows that those parameters have been applied. - -The following sequence diagram explains the problem encountered when transaction IDs are not used and buffers are not used: -``` -Updates on 200 OK Updates on receipt of data - Client Server - | ------{ "foo": "bar" }------> | {"foo":"bar"} -{"foo":"bar"} | <-------HTTP 200 OK---------- | - | | - | ------{ "baz": "quuz" }---X | {"foo":"bar"} - | | - | ------{ "other": "value" }--> | {"foo":"bar","other":"value"} - | X--HTTP 200 OK---------- | - | | - | ------{ "k": "v" }----------> | {"foo":"bar","other":"value","k":"v"} -{"foo":"bar","k":"v"} | <-------HTTP 200 OK---------- | -``` -The update for `{ "other": "value" }` was not applied on the client, but was applied on the server. - -With the introduction of buffers and transaction IDs: -``` -Updates on receipt of txn ID Updates on receipt of data, every response increments pos by 1, starts at pos=3 - Client Server - | -txn=1-{ "foo": "bar" }-pos=3-> | {"foo":"bar"} txn=1 -{"foo":"bar"} | <--HTTP 200 OK txn=1,pos=4----- | - | | - 2| -txn=2-{"baz":"quuz"}-pos=4--X | {"foo":"bar"} txn=1 - | | - 3| -txn=3-{"other":"val"}-pos=4--> | {"foo":"bar","other":"val"} txn=3 <-- txn=1 wiped as recv pos=4 in request - | X---HTTP 200 OK txn=3,pos=5- | - | | - 4| -txn=4-{ "k": "v" }----pos=4--> | {"foo":"bar","other":"val","k":"v"} txn=3,4 <-- txn=3 not wiped as recv old pos in request -{"foo":"bar","k":"v", | <--HTTP 200 OK txn=3,4,pos=5--- | - "other":"val"} -``` - -If the request parameters have not been modified, then the `txn_id` does not need to be sent. +be echoed back to the client so it knows that those parameters have been applied. If the request parameters +have not been modified, then the `txn_id` does not need to be sent. ### Room List parameters From 59c83a857b4cf3cf6aca593c34efb44709b10d17 Mon Sep 17 00:00:00 2001 From: kegsay Date: Mon, 22 Aug 2022 18:25:30 +0100 Subject: [PATCH 36/81] Add support for filtering by room tag --- proposals/3575-sync.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 6908c0bab86..2f7c01a92eb 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -389,7 +389,15 @@ One or more room lists can be requested in sliding sync like so: "not_room_types": [ "m.space" ], // Filter the room name. Case-insensitive partial matching e.g 'foo' matches 'abFooab'. // The term 'like' is inspired by SQL 'LIKE', and the text here is similar to '%foo%'. - "room_name_like": "foo" + "room_name_like": "foo", + // Filter the room based on its room tags. If multiple tags are present, a room can have + // any one of the listed tags (OR'd). + "tags": ["m.favourite"], + // Filter the room based on its room tags. Takes priority over `tags`. For example, a room + // with tags A and B with filters tags:[A] not_tags:[B] would NOT be included because not_tags + // takes priority over `tags`. This filter is useful if your Rooms list does NOT include the + // list of favourite rooms again. + "not_tags": ["m.lowpriority"] } } ], From 3b8175cf32b1b8d6bc841861c5edbcbbd6b029ed Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 30 Aug 2022 16:29:19 +0100 Subject: [PATCH 37/81] Add resolve_tombstones, joined_count and invited_count --- proposals/3575-sync.md | 46 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 2f7c01a92eb..42a0661652f 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -133,6 +133,8 @@ An entire response looks like: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], "prev_batch": "t111_222_333", + "joined_count": 41, + "invited_count": 1, "notification_count": 1, "highlight_count": 0 }, @@ -155,6 +157,8 @@ An entire response looks like: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"D"}}, ], "prev_batch": "t111_222_333", + "joined_count": 4, + "invited_count": 0, "notification_count": 54, "highlight_count": 3 }, @@ -374,10 +378,18 @@ One or more room lists can be requested in sliding sync like so: // and joined rooms are returned. If false, no invited rooms are returned. If true, only // invited rooms are returned. "is_invite": true, - // Flag which only returns rooms which have an `m.room.tombstone` state event. If unset, - // both tombstoned and un-tombstoned rooms are returned. If false, only un-tombstoned rooms - // are returned. If true, only tombstoned rooms are returned. - "is_tombstoned": true, + // Flag which automatically walks over tombstoned rooms to the most recent room. For example, + // if a DM room is upgraded, the old tombstoned room will NOT be returned but the new room will + // be returned instead, regardless of whether the new room is present in the DM section of account + // data or not. If there is no newer room (e.g the user hasn't joined it yet) then the tombstoned + // room will be returned as it is the most recent room for that client. Room metadata which affects + // filters pass through transitively. For example, if there are a series of room upgrades A -> B -> C + // and only B is in the DM map, then C is treated as being in the same DM map. The specific metadata + // fields which are considered in this way are fields which are pinned to a static room ID: + // - DM rooms in account data. + // - Space children/parents. + // - Tagged rooms e.g favourites. + "resolve_tombstones": true, // If specified, only rooms where the `m.room.create` event has a `type` matching one // of the strings in this array will be returned. If this field is unset, all rooms are // returned regardless of type. This can be used to get the initial set of spaces for an account. @@ -417,8 +429,11 @@ expressing deletions becomes hard. The inclusion of a dedicated `is_encrypted` f benefit of complex clients: see the E2EE section for more information. The `room_name_like` field exists to allow the ability to search by room name which most clients support, and is crucial for large accounts. The `room_types` filters exist primarily to include/exclude spaces. The -`is_tombstone` flag exists to remove tombstoned rooms from the sync response: clients would be -filtering these rooms out anyway, so rather save on the bandwidth!_ +`resolve_tombstones` flag exists to remove tombstoned rooms from the sync response: clients would be +filtering these rooms out anyway, so rather save on the bandwidth! It's slightly more complicated than +that as sometimes we do want to return a tombstoned room to the user if they haven't joined the upgraded +room yet, and this chain of tombstoned rooms has implications on filtering, hence why this is "resolving" +tombstones and not just `is_tombstoned`._ The server will then return a `rooms` key which have the following fields: @@ -453,6 +468,8 @@ The server will then return a `rooms` key which have the following fields: "invite_state": [ { type: "m.room.member" } ], // stripped state events, same as rooms.invite.$room_id.invite_state in sync v2, absent on joined/left rooms "prev_batch": "t111_222_333", // same as sync v2 "limited": true, // same as sync v2 + "joined_count": 41, // same as sync v2 m.joined_member_count + "invited_count": 1, // same as sync v2 m.invited_member_count "notification_count": 54, // same as sync v2 "highlight_count": 3 // same as sync v2 } @@ -461,7 +478,11 @@ The server will then return a `rooms` key which have the following fields: ``` _Rationale: The room name and counts are required for display on the UI. They are calculated server -side because they are required for sort operations on lists. The `required_state` is controversially +side because they are required for sort operations on lists. The joined and invited member counts are +included for the client-side calculation of push rules, specifically `{"kind":"room_member_count","is":"2"}` +which would be impossible to calculate without knowing the total number of users in the room. Failure +to include this field could cause rooms to notify incorrectly, and they need to be calculated client-side +in E2EE rooms. The `required_state` is controversially the **current state** which breaks from sync v2 which has the `state` be "the state before the start of the timeline". Sync v2's rationale was event duplication (state events can appear in both the state section and the timeline section if it's the current state) and the fact that clients would have @@ -796,8 +817,7 @@ This would return the following response: {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], "limited": true, - "notification_count": 1, - "highlight_count": 0 + // ... } } } @@ -1129,14 +1149,15 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.2.1): +[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.4.0): - Sliding Window API: - [x] Operation support - [x] Required state with wildcards - [x] Timeline limits - [x] Calculated room names - [x] Highlight/notification counts - - [x] Prev batch (partial, needs enough traffic in each room to generate reliably, token will cause duplicate events on /messages) + - [ ] Joined and invited member counts + - [x] Prev batch (token will cause duplicate events on /messages) - Sorting: - [x] By recency - [x] By highlight count @@ -1146,10 +1167,11 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t * [x] `is_dm` * [x] `is_encrypted` * [x] `is_invite` - * [x] `is_tombstoned` + * [ ] `resolve_tombstones` * [x] `spaces` * [x] `room_types` and `not_room_types` * [x] `room_name_like` + * [x] `tags` and `not_tags` - [x] Room Subscription API - [ ] Notifications API (unspecced) - [ ] Bandwidth optimisations From 7c010eafb9e948841fbabf3305f4f8ceeae4ddc7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 14 Oct 2022 12:30:36 +0100 Subject: [PATCH 38/81] BREAKING: replace by_highlight_count and by_notification_count with by_notification_level --- proposals/3575-sync.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 42a0661652f..5a2cb983085 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -623,12 +623,11 @@ The possible `sort` operations are: that due to clock drift over federation it is possible for rooms to re-order such that the most recently received event in the entire list does not cause that room to go to index position 0. The highest `origin_server_ts` value comes first in the list. - - `by_highlight_count`: Sort by the `highlight_count` for this user in this room, which is the - number of unread notifications for this room with the highlight flag set. This value is also present - in sync v2. The highest `highlight_count` comes first in the list. - - `by_notification_count`: Sort by the `notification_count` for this user in this room, which is the - total number of unread notifications for this room. This value is also present in sync v2. - The highest `notification_count` comes first in the list. + - `by_notification_level`: Sort based on the presence of non-zero values for `highlight_count` and + `notification_count`. Rooms with a `highlight_count` > 0 come first, followed by rooms with a + `notification_count` > 0, followed by all other rooms. Rooms are not sorted within each level: + use an additional sort operation like `by_recency` to sort these groups. TODO: should we include + unread indicator with this? - `by_name`: Sort by room name lexicographically. This requires servers to implement the [room name calculation algorithm](https://matrix.org/docs/spec/client_server/latest#calculating-the-display-name-for-a-room). The server MUST perform the following steps: From 182e66412f14384ea9b81d3579588172406436ff Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 14 Oct 2022 12:32:42 +0100 Subject: [PATCH 39/81] Update examples --- proposals/3575-sync.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 5a2cb983085..4dcfad866f6 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -62,7 +62,7 @@ A complete sync request looks like: "lists": [ { "ranges": [ [0,99] ], - "sort": [ "by_notification_count", "by_recency", "by_name" ], + "sort": [ "by_notification_level", "by_recency", "by_name" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -327,7 +327,7 @@ One or more room lists can be requested in sliding sync like so: "ranges": [ [0,99] ], // Sticky. List sort order. See Sliding Window API for more information. // These fields may be expanded through use of extensions. - "sort": [ "by_notification_count", "by_recency", "by_name" ], + "sort": [ "by_notification_level", "by_recency" ], // Sticky. Required state for each room returned. An array of event type and state key tuples. // Note that elements of this array are NOT sticky so they must be specified in full when they @@ -553,7 +553,7 @@ The sliding sync API exposes this API shape via the following request parameters "ranges": [ [0,9], [20,29] ], // How the list should be sorted on the server. The first value is applied first, then tiebreaks // are performed with the 2nd sort order, then the 3rd until there are no more sort orders left. - "sort": [ "by_notification_count", "by_recency", "by_name" ], + "sort": [ "by_notification_level", "by_recency", "by_name" ], // Additional Room List request parameters omitted as they are // unrelated to the semantics of the sliding window, see previous section. } @@ -722,7 +722,7 @@ An example request: // list 1 will include the first 20 unencrypted rooms sorted accordingly { "ranges": [ [0,19] ], - "sort": [ "by_notification_count", "by_recency", "by_name" ], + "sort": [ "by_notification_level", "by_recency" ], "filters": { "is_encrypted": false } From e6ad74ec7f3cbdd03db07a7210db38f2a25167d7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 19 Oct 2022 11:12:29 +0100 Subject: [PATCH 40/81] Flesh out Lite/Heavy E2EE handling section; tweak by_notification_level algo --- proposals/3575-sync.md | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 4dcfad866f6..b018aebafa1 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -625,7 +625,8 @@ The possible `sort` operations are: The highest `origin_server_ts` value comes first in the list. - `by_notification_level`: Sort based on the presence of non-zero values for `highlight_count` and `notification_count`. Rooms with a `highlight_count` > 0 come first, followed by rooms with a - `notification_count` > 0, followed by all other rooms. Rooms are not sorted within each level: + `notification_count` > 0 which are encrypted, followed by unencrypted rooms with a `notification_count` > 0, + followed by all other rooms. See the "E2EE Handling" section for more information. Rooms are not sorted within each level: use an additional sort operation like `by_recency` to sort these groups. TODO: should we include unread indicator with this? - `by_name`: Sort by room name lexicographically. This requires servers to implement the @@ -951,9 +952,9 @@ This imposes more restrictions on the server implementation: ### E2EE Handling The server cannot calculate the `highlight_count` in E2EE rooms as it cannot read the message content. -This is a problem when clients want to sort by `highlight_count`. In comparison, the server can +This is a problem when clients want to sort by the most recent highlight. In comparison, the server can calculate the name, `unread_count`, and work out the most recent timestamp when sorting by those -fields. What should the server do when the client wants to sort by `highlight_count` (which is pretty +fields. What should the server do when the client wants to sort by the most recent highlight (which is pretty typical!)? It can: - Assume `highlight_count == 1` whenever `unread_count > 0`. This ensures that E2EE rooms are always bumped above unreads in the list, but doesn't allow sorting within the list of highlighted rooms. @@ -983,6 +984,22 @@ Clients have two main choices here: list depending on highlight counts. This means the sort order will be more accurate but is slower and more complex to perform. This is why there is an `is_encrypted` filter on the room list parameters. +If you use the sort options `["by_notification_level", "by_recency"]`, this will implement the "Lite" option for you +automatically. This create the following groups (in priority order): + - Unencrypted rooms with `highlight_count > 0` appear first. (NB: you cannot get encrypted rooms with highlight_count > 0) + - Encrypted rooms with `notification_count > 0` appear next. + - Unencrypted rooms with `notification_count > 0` follow. + - Rooms with `highlight_count == 0 && notification_count == 0` appear last. + +Within each group, the rooms are then sorted by recency (most recent first). This has the follow negative side-effects: + - An explicit @mention in an encrypted room will not bump the room to the top of the list _if and only if_ + there are highlight counts for unencrypted rooms. It will instead bump the room to the bottom of the last + unencrypted room with a highlight count. + - A newer unread notification for an unencrypted room will sort beneath older unread notifications for encrypted rooms. + +If these trade-offs are unacceptable to a client implementation then they will need to sort encrypted rooms into +their own list and manually mix rooms from each list together as per the "Heavy" description. + In the future, it may become impossible for servers to sort by room name due to E2EE. This proposal has no suggestion on how to handle encrypted room names beyond hoping that homomorphic encryption will allow sorting based on ciphertext: this is an active area of research in the computer science From e58cf135e4cc66bb65f36701623ac08999396651 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 21 Oct 2022 10:54:53 +0100 Subject: [PATCH 41/81] Remove tombstone handling entirely; it should be an extension --- proposals/3575-sync.md | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index b018aebafa1..1e4705327be 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -378,18 +378,6 @@ One or more room lists can be requested in sliding sync like so: // and joined rooms are returned. If false, no invited rooms are returned. If true, only // invited rooms are returned. "is_invite": true, - // Flag which automatically walks over tombstoned rooms to the most recent room. For example, - // if a DM room is upgraded, the old tombstoned room will NOT be returned but the new room will - // be returned instead, regardless of whether the new room is present in the DM section of account - // data or not. If there is no newer room (e.g the user hasn't joined it yet) then the tombstoned - // room will be returned as it is the most recent room for that client. Room metadata which affects - // filters pass through transitively. For example, if there are a series of room upgrades A -> B -> C - // and only B is in the DM map, then C is treated as being in the same DM map. The specific metadata - // fields which are considered in this way are fields which are pinned to a static room ID: - // - DM rooms in account data. - // - Space children/parents. - // - Tagged rooms e.g favourites. - "resolve_tombstones": true, // If specified, only rooms where the `m.room.create` event has a `type` matching one // of the strings in this array will be returned. If this field is unset, all rooms are // returned regardless of type. This can be used to get the initial set of spaces for an account. @@ -428,12 +416,7 @@ namespaced by their event type. Fields in `required_state` are not sticky mainly expressing deletions becomes hard. The inclusion of a dedicated `is_encrypted` filter exists for the benefit of complex clients: see the E2EE section for more information. The `room_name_like` field exists to allow the ability to search by room name which most clients support, and is crucial for -large accounts. The `room_types` filters exist primarily to include/exclude spaces. The -`resolve_tombstones` flag exists to remove tombstoned rooms from the sync response: clients would be -filtering these rooms out anyway, so rather save on the bandwidth! It's slightly more complicated than -that as sometimes we do want to return a tombstoned room to the user if they haven't joined the upgraded -room yet, and this chain of tombstoned rooms has implications on filtering, hence why this is "resolving" -tombstones and not just `is_tombstoned`._ +large accounts. The `room_types` filters exist primarily to include/exclude spaces. The server will then return a `rooms` key which have the following fields: @@ -1183,7 +1166,6 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t * [x] `is_dm` * [x] `is_encrypted` * [x] `is_invite` - * [ ] `resolve_tombstones` * [x] `spaces` * [x] `room_types` and `not_room_types` * [x] `room_name_like` @@ -1193,9 +1175,11 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - [ ] Bandwidth optimisations - [ ] E2EE highlight/notification count handling - Extensions: - - [x] To-device: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - - [x] E2EE: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) + - [x] To-device: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3885/proposals/3885-sliding-sync-to-device.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) + - [x] E2EE: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3884/proposals/3884-sliding-sync-e2ee.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - [x] Account Data: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) + - [ ] Tombstoned rooms + - [ ] Receipts - [ ] Ephemeral Events - [ ] Presence From 84c13615ba6004552a67a60856229512d6730eb2 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 21 Oct 2022 12:38:21 +0100 Subject: [PATCH 42/81] Add `include_old_rooms` support to handle tombstoned rooms --- proposals/3575-sync.md | 89 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 5 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 1e4705327be..127da526836 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -79,7 +79,11 @@ A complete sync request looks like: "room_subscriptions": { "!sub1:bar": { "required_state": [ ["*","*"] ], - "timeline_limit": 50 + "timeline_limit": 50, + "include_old_rooms": { + "timeline_limit": 1, + "required_state": [ ["m.room.tombstone", ""], ["m.room.create", ""] ], + } } }, "unsubscribe_rooms": [ "!sub3:bar" ] @@ -347,6 +351,13 @@ One or more room lists can be requested in sliding sync like so: ], // Sticky. The maximum number of timeline events to return per response. "timeline_limit": 10, + + // See the "Tombstones" section for more information. + "include_old_rooms": { //sticky + "timeline_limit": 1, + "required_state": [ ["m.room.tombstone", ""] ] + }, + // Sticky. Filters to apply to the list before sorting. "filters": { // All fields below are Sticky. @@ -772,7 +783,11 @@ To track a room `!sub1:bar`, the client would send the following request: "room_subscriptions": { // sticky "!sub1:bar": { // sticky "required_state": [ ["*","*"] ], - "timeline_limit": 50 + "timeline_limit": 50, + "include_old_rooms": { // See the "Tombstones" section for more information. + "timeline_limit": 1, + "required_state": [ ["m.room.tombstone", ""] ] + }, } } } @@ -861,6 +876,7 @@ APIs also return that room data in the same part of the response. These keys are - `required_state`: Required state for each room returned. An array of event type and state key tuples. - `timeline_limit`: The maximum number of timeline events to return per response. + - `include_old_rooms`: Determines if `predecessor` rooms are included in the `rooms` response. _All_ room data is returned in a top-level `rooms` keys in the response JSON, regardless of whether this room is being returned due to it being a room subscription or in a list. This de-duplicates data @@ -870,11 +886,74 @@ the following rules: - `required_state`: Combine all arrays and treat it as a single unified array. - `timeline_limit`: Take the highest value. + - `include_old_rooms`: Presence of this field in any section turns this on. Due to this, clients need to take care to extract only the number of timeline events / state events they require from the `rooms` response, as it may include more data than they requested in a single list. +#### Tombstones + +By default, sliding sync will not return "old" rooms in lists. This is generally the right thing to do, +as many popular rooms have previous versions which would otherwise feature in the room list. This section +details the semantics for how sliding sync does this, and how to opt-out of this behaviour. + +There is no `is_tombstoned` filter in sliding sync. This is by design, as it is almost always not what +clients want. With a simple `is_tombstoned` filter, the moment another user upgrades a room, the room +will disappear from the room list for all other users. Not all tombstoned rooms are equal. If the user +has joined the `replacement_room`, then the previous room is treated as "old". If the user has not joined +the `replacement_room`, then the room is treated as live, and is eligible to be returned in sliding sync +responses. + +If the `include_old_rooms` field is set, the `rooms` field in the response may contain additional rooms. +These rooms are "old" rooms for _every matched room_ for a particular list or a particular room subscription, +depending where `include_old_rooms` was set in the request. The user MUST be joined to old rooms for them +to show up in the response. + +For example, given a list of joined rooms A, B, C, A2, A3 where A2 and A3 are newer versions of room A, +sliding sync will not return rooms A or A2 by default. The client may send the following direct room subscription +to include these rooms: +```js +{ + "room_subscriptions": { + "A3": { + "required_state": [ ["*","*"] ], + "timeline_limit": 50, + "include_old_rooms": { + "timeline_limit": 1, + "required_state": [ ["m.room.create", ""] ] + } + } + } +} +``` +This will result in a `rooms` response for A, A2 and A3, where A and A2 use the `timeline_limit: 1` and +`required_state: [ ["m.room.create", ""] ]` values, and A3 uses `timeline_limit: 50` and `required_state: [ ["*","*"] ]`. +If a client explicitly subscribes to an old room, say A2, then `include_old_rooms` works backwards from that point, +including A but not the newer room A3. + +These options work on lists as well: +```js +{ + "lists": [ + { + "include_old_rooms": { + "timeline_limit": 1 + }, + "timeline_limit": 50, + "filters": { + "is_encrypted": true + } + } + ] +} +``` +When applied to lists, old rooms MUST NOT be present in the list. They MUST be present in the `rooms` response only. +The old rooms DO NOT need to meet the filter criteria. That is to say, if A was unencrypted and A2 and A3 were encrypted, +this list would include only A3 (as old rooms must not be present), and would have a `rooms` response for A, A2 and A3: +room A is included even though it is unencrypted, because "oldness" takes precedence. Conversely, if the filter +was `is_encrypted: false`, then no rooms would be returned even though room A is joined and unencrypted, because it is +old and hence ineligible for being returned in a list. ### Notifications API @@ -1148,15 +1227,16 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.4.0): +[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.5.1): - Sliding Window API: - [x] Operation support - [x] Required state with wildcards - [x] Timeline limits - [x] Calculated room names - [x] Highlight/notification counts - - [ ] Joined and invited member counts + - [x] Joined and invited member counts - [x] Prev batch (token will cause duplicate events on /messages) + - [ ] `include_old_rooms` - Sorting: - [x] By recency - [x] By highlight count @@ -1178,7 +1258,6 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - [x] To-device: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3885/proposals/3885-sliding-sync-to-device.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - [x] E2EE: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3884/proposals/3884-sliding-sync-e2ee.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - [x] Account Data: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) - - [ ] Tombstoned rooms - [ ] Receipts - [ ] Ephemeral Events - [ ] Presence From 4efc165f405ea71a277e4b495347411fdadc66f7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 21 Oct 2022 14:58:41 +0100 Subject: [PATCH 43/81] More notes on include_old_rooms --- proposals/3575-sync.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 127da526836..57d16f325a1 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -886,7 +886,10 @@ the following rules: - `required_state`: Combine all arrays and treat it as a single unified array. - `timeline_limit`: Take the highest value. - - `include_old_rooms`: Presence of this field in any section turns this on. + - `include_old_rooms`: Presence of this field in any section turns this on. If there are multiple matches + for the same room ID (e.g explicit subscription and present in a list) then the inner values of + `required_state` and `timeline_limit` are unioned in the same way. + Due to this, clients need to take care to extract only the number of timeline events / state events they require from the `rooms` response, as it may include more data than they requested in a single @@ -910,6 +913,9 @@ These rooms are "old" rooms for _every matched room_ for a particular list or a depending where `include_old_rooms` was set in the request. The user MUST be joined to old rooms for them to show up in the response. +_TODO: we rely on include_old_rooms being set to "enable" this, but we mux together request based on nil-ness +so it's not possible to disable include_old_rooms by omitting it._ + For example, given a list of joined rooms A, B, C, A2, A3 where A2 and A3 are newer versions of room A, sliding sync will not return rooms A or A2 by default. The client may send the following direct room subscription to include these rooms: From 4ec8bfbdbc70e1d56249cf4e8ba1ce78bedb9baa Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 28 Oct 2022 13:13:34 +0100 Subject: [PATCH 44/81] Add lazy loading --- proposals/3575-sync.md | 85 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 57d16f325a1..0f63afafef0 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -335,17 +335,32 @@ One or more room lists can be requested in sliding sync like so: // Sticky. Required state for each room returned. An array of event type and state key tuples. // Note that elements of this array are NOT sticky so they must be specified in full when they - // are changed. + // are changed. Elements in this array are ORd together to produce the final set of state events + // to return. One unique exception is when you request all state events via ["*", "*"]. When used, + // all state events are returned by default, and additional entries FILTER OUT the returned set + // of state events. These additional entries cannot use '*' themselves. + // For example, ["*", "*"], ["m.room.member", "@alice:example.com"] will _exclude_ every m.room.member + // event _except_ for @alice:example.com, and include every other state event. + // In addition, ["*", "*"], ["m.space.child", "*"] is an error, the m.space.child filter is not + // required as it would have been returned anyway. "required_state": [ // Request the join rules event. Note that the empty string is required here to match // the event's blank state_key. ["m.room.join_rules", ""], ["m.room.history_visibility", ""], - // Request all `m.room.member` state events. + // Request all `m.space.child` state events. // The * is a special sentinel value meaning 'all keys'. // Note that `*` is NOT a generic glob function. You cannot specify `foo*` to pull in keys // like `food` and `foobar`. In this case, the * is treated as a literal *. - ["m.room.member", "*"] + ["m.space.child", "*"], + // Request only the m.room.member events required to render events in the timeline. + // The "$LAZY" value is a special sentinel value meaning "lazy loading" and is only valid for + // the "m.room.member" event type. For more information on the semantics, see "Lazy-Loading Room Members". + ["m.room.member", "$LAZY"], + // Request your own m.room.member event. + // The "$ME" value is a special sentinel value meaning "my user id". It is valid for use on + // any state event, but is typically most useful on the m.room.member event. + ["m.room.member", "$ME"], // Request all state events. ["*", "*"] ], @@ -491,14 +506,6 @@ request the state before an event. As clients can be at different points in the room, this would force servers to cache every possible room state. It's not practical for servers to cache every single possible earlier state for each room._ -TODO: There is currently no lazy-loaded members support. Various suggestions like a special sentinel -value in required state e.g `["m.room.member", "LAZY"]` could be used. This may need to be specified -in this MSC or possibly an extension MSC (though it's unclear how it would fall under the filtering -extension MSC format). For context, lazy-loaded members refers to the sending of `m.room.member` -events for the _senders_ of the events that are present in `timeline`. Current implementations use -a LRU cache to cut down on sending duplicate events. In addition, we may also want a sentinel value -to indicate "the current user" e.g to always pull out the current user's member event -`["m.room.member", "$current"]`. Are there any other interesting state filters? ### Sliding Window API @@ -961,6 +968,62 @@ room A is included even though it is unencrypted, because "oldness" takes preced was `is_encrypted: false`, then no rooms would be returned even though room A is joined and unencrypted, because it is old and hence ineligible for being returned in a list. + +#### Lazy-Loading Room Members + +Room members in a room can be lazily-loaded by requesting the special value `$LAZY` as the state key +for the `m.room.member` event type in the `required_state` filter: +```js +{ + "required_state": [ + ["m.room.member", "$LAZY"] // activate lazy loading + ] +} +``` +At a high level, this can be thought of as requesting the `m.room.member` events for a set of unknown +user IDs. Typically, when you view a room, you want to retrieve all state events _except_ for `m.room.member` +events which you want to lazily load. To get this behaviour, clients can send the following: +```js +{ + "required_state": [ + ["m.room.member", "$LAZY"], // activate lazy loading + ["*", "*"] // request all state events _except_ for m.room.member events which are lazily loaded + ] +} +``` +Check the description of `required_state` for more information on this behaviour, as it is not specific to lazy-loading. + +The server processes `$LAZY` according to the following rules: + - Calculate the timeline entries that will be returned in this room. + - For each timeline entry, ensure the `m.room.member` event for the `sender` of the timeline event is included exactly once per user ID. + * This means if `timeline_limit: 0` then no `m.room.member` events are returned. + - The required state is always the current state, so if the timeline had [Alice join, msg, msg, Alice leave] then + the leave `m.room.member` event should be returned in `required_state`, even though the state at the time of the messages + was the join event. + * This means that for a timeline like [Alice join, msg, msg, Alice change name to A] the `m.room.member` event will + contain the display name "A" even though the display name was "Alice" at the time the messages were sent. Clients + need to look at the `unsigned.prev_content` section of the "A" event to work out what the display name was at the + time the messages were sent (rolling back state). Clients MUST NOT rely on seeing the correct "state before the event" + value in `required_state`. + - When the client is live streaming events, include the `m.room.member` event for the live events only if they have not been + sent before _during this connection_. This means servers must remember which user IDs it has sent `m.room.member` events for, + for the lifetime of the connection. If the live event is an `m.room.member` event itself, include it in both the timeline and + `required_state` to avoid clients needing to parse the timeline for current state. This is particularly important as the + client CANNOT relilably work out the current state from the timeline entries in the face of state resolution. + +Note: It is strongly advised to not lazy-load members in encrypted rooms, as the client needs a complete room member list +in order to determine which devices to encrypt messages for. It is possible to use lazy-loading members in conjunction +with the `/members` endpoint to extract the complete list of joined users, but this is only really useful if this is done +at the point of sending a message, as if you do it when you view a room you might as well just request the complete member +list via Sliding Sync. If you wait until the client sends a message to query `/members`, it will take longer to send the +message as the client will need to retrieve device information for all the users before it can send the event. If instead +the client retrieved this information when the room is initially viewed, the client has more time to pre-emptively fetch +this information to result in a snappier UX. Be careful if using `/members` as clients won't be able to use `?at=` to avoid +race conditions because sliding sync streaming tokens are not compatible with other endpoints. + + + + ### Notifications API If you are tracking the top 5 rooms and an event arrives in the 6th room, you will be notified about From 25eeb82769f2eaf7d7e4a15f8ec399504efa4407 Mon Sep 17 00:00:00 2001 From: kegsay Date: Thu, 17 Nov 2022 14:02:15 +0000 Subject: [PATCH 45/81] Update 3575-sync.md --- proposals/3575-sync.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 0f63afafef0..8c0d156c811 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1289,6 +1289,18 @@ Whilst this in MSC review the HTTP path will be `/_matrix/client/unstable/org.ma with the intention of this eventually becoming (confusingly) `/_matrix/client/v4/sync`. As this is a brand new endpoint, no other keys or fields need prefixing. +Homeservers can advertise support for a sliding sync proxy by adding the following to their +`/.well-known/matrix/client` config: +```json +{ + "org.matrix.msc3575.proxy": { + "url": "https://slidingsync.proxy.url.here" + } +} +``` +This allows servers to declare an "official" trusted proxy, rather than using other URLs which may be +run by malicious actors who want to steal the access token for users. + # Dependencies There are no MSCs required for the core functionality to be implemented. Servers and clients need to From 2538552705487ecef34abf1dd1afb61e25a06f28 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 1 Dec 2022 15:56:52 +0000 Subject: [PATCH 46/81] Add num_live --- proposals/3575-sync.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 8c0d156c811..cc7d5b90912 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -480,7 +480,14 @@ The server will then return a `rooms` key which have the following fields: "joined_count": 41, // same as sync v2 m.joined_member_count "invited_count": 1, // same as sync v2 m.invited_member_count "notification_count": 54, // same as sync v2 - "highlight_count": 3 // same as sync v2 + "highlight_count": 3, // same as sync v2 + // The number of timeline events which have just occurred and are not historical. + // The last N events are 'live' and should be treated as such. + // This is mostly useful to determine whether a given @mention event should make a noise or not. + // Clients cannot rely solely on the absence of 'initial: true' to determine live events because + // if a room not in the sliding window bumps into the window because of an @mention it will have + // 'initial: true' yet contain a single live event (with potentially other old events in the timeline) + "num_live": 1 } } } From 607ec75cae0e610415374c2129425c1ffdc7389e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Tue, 20 Dec 2022 14:00:15 +0000 Subject: [PATCH 47/81] BREAKING: Support lists-as-keys --- proposals/3575-sync.md | 94 ++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index cc7d5b90912..2c4ab12d8be 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -59,8 +59,8 @@ A complete sync request looks like: "txn_id": "client-chosen-string", // Sliding Window API - "lists": [ - { + "lists": { + "client_chosen_key": { "ranges": [ [0,99] ], "sort": [ "by_notification_level", "by_recency", "by_name" ], "required_state": [ @@ -73,7 +73,7 @@ A complete sync request looks like: "is_dm": true } } - ], + }, // Room Subscriptions API "room_subscriptions": { @@ -101,8 +101,8 @@ An entire response looks like: "txn_id": "client-chosen-string", // echo of the txn ID // Sliding Window API - "lists": [ - { + "lists": { + "client_chosen_key": { "count": 1337, "ops": [ { @@ -114,7 +114,7 @@ An entire response looks like: } ] } - ], + }, // Aggregated rooms from lists and room subscriptions "rooms": { @@ -324,8 +324,11 @@ have not been modified, then the `txn_id` does not need to be sent. One or more room lists can be requested in sliding sync like so: ```js { - "lists": [ - { + // A map of list key to list information. Max lists: 100. + "lists": { + // an arbitrary string which the client is using to refer to this list for this connection. Keep + // this small as it needs to be sent a lot. Max length: 64 bytes. + "client_chosen_key": { // Sliding window ranges, see the Sliding Window API for more information. // If this field is missing, no sliding window is used and all rooms are returned in this list. "ranges": [ [0,99] ], @@ -426,7 +429,7 @@ One or more room lists can be requested in sliding sync like so: "not_tags": ["m.lowpriority"] } } - ], + }, } ``` @@ -442,7 +445,15 @@ namespaced by their event type. Fields in `required_state` are not sticky mainly expressing deletions becomes hard. The inclusion of a dedicated `is_encrypted` filter exists for the benefit of complex clients: see the E2EE section for more information. The `room_name_like` field exists to allow the ability to search by room name which most clients support, and is crucial for -large accounts. The `room_types` filters exist primarily to include/exclude spaces. +large accounts. The `room_types` filters exist primarily to include/exclude spaces. A previous version +of this MSC expressed multiple lists as an array and not an object with client-chosen keys. This was +changed because using arrays had a few undesirable consequences: you couldn't just edit list #3, you +had to add stub lists in index positions 0,1,2 first, and likewise the response demanded stub responses +which always included the `count` field to pad out earlier lists to get to the list index that was modified. +In addition, it was unclear how to delete a list. Also, some clients would race on startup to create lists, +which would result in different index positions being allocated, which made it hard for client code to +then refer deterministically to specific lists. The workaround basically assigned static names to each +list which then mapped to an index position. By using an object, these issues disappear._ The server will then return a `rooms` key which have the following fields: @@ -555,8 +566,8 @@ The sliding sync API exposes this API shape via the following request parameters ```js { // Multiple lists can be requested - "lists": [ - { + "lists": { + "list1": { // Multiple sliding windows inside a list can be requested. Integers are _inclusive_. "ranges": [ [0,9], [20,29] ], // How the list should be sorted on the server. The first value is applied first, then tiebreaks @@ -565,18 +576,16 @@ The sliding sync API exposes this API shape via the following request parameters // Additional Room List request parameters omitted as they are // unrelated to the semantics of the sliding window, see previous section. } - ], + }, } ``` Which returns the following response parameters: ```js { - // This array is exactly the same length as the `lists` provided in the request. All lists MUST - // return at the very least a `count`, even if there are no changes to the list. - "lists": [ - // List 0 - { - // The total number of entries in the list. Always present. + // This object echoes back the list keys provided in the request. + "lists": { + "list1": { + // The total number of entries in the list. Always present if this list is. "count": 1337, // The sliding list operations to perform. "ops": [ @@ -594,7 +603,7 @@ Which returns the following response parameters: } ] } - ], + }, // The room data to use for each room ID. This data represents the point in time AFTER all // ops have been applied. For example, if a room had 2 new events which changed its list position // then you could see `ops` with DELETE[4,!foo:bar], INSERT[0,!foo:bar], DELETE[0,!foo:bar], INSERT[1,!foo:bar] @@ -720,30 +729,30 @@ operation to let the client know which rooms in the `rooms` object were from thi An example request: ```js { - "lists": [ - // list 0 will include all encrypted rooms in one go - { + "lists": { + // list will include all encrypted rooms in one go + "list_all_encrypted": { "slow_get_all_rooms": true, "filters": { "is_encrypted": true } }, - // list 1 will include the first 20 unencrypted rooms sorted accordingly - { + // list will include the first 20 unencrypted rooms sorted accordingly + "list_unencrypted": { "ranges": [ [0,19] ], "sort": [ "by_notification_level", "by_recency" ], "filters": { "is_encrypted": false } } - ], + }, } ``` Would return the response: ```js { - "lists": [ - { + "lists": { + "list_all_encrypted": { "count": 1337, "ops": [ { @@ -755,7 +764,7 @@ Would return the response: } ] }, - { + "list_unencrypted": { "count": 420, "ops": [ { @@ -767,7 +776,7 @@ Would return the response: } ] } - ], + }, "rooms": { "!encrypted:bar": { ... @@ -955,8 +964,8 @@ including A but not the newer room A3. These options work on lists as well: ```js { - "lists": [ - { + "lists": { + "a": { "include_old_rooms": { "timeline_limit": 1 }, @@ -965,7 +974,7 @@ These options work on lists as well: "is_encrypted": true } } - ] + } } ``` When applied to lists, old rooms MUST NOT be present in the list. They MUST be present in the `rooms` response only. @@ -1169,11 +1178,12 @@ windows as well as which rooms are explicitly subscribed to. In an effort to reduce the size of this proposal, extensions will be done in separate MSCs. There will be extensions for: - - To Device Messaging - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/todevice.go) - - End-to-End Encryption - [spec](https://spec.matrix.org/v1.1/client-server-api/#extensions-to-sync-1) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/e2ee.go) - - Ephemeral Events - typing notifications, receipts: [spec](https://spec.matrix.org/v1.1/client-server-api/#client-behaviour-4) + - To Device Messaging - [MSC3885](https://github.com/matrix-org/matrix-spec-proposals/pull/3885) + - End-to-End Encryption - [MSC3884](https://github.com/matrix-org/matrix-spec-proposals/pull/3884) + - Typing Notifications - [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/typing.go) + - Receipts - [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/receipts.go) - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) - - Account Data - `account_data` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/account_data.go) + - Account Data - `account_data` in sync v2: [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/account_data.go) _Rationale: The name 'extensions' is inspired by the spec itself which refers to "Extensions to /sync" multiple times. These additional bits of data are all generally outside the scope of the core room @@ -1289,6 +1299,7 @@ This API presents new ways for clients to request complex operations which runs of service attacks: - Complex or pathological filter/sort options (especially via extensions) may degrade performance on the server and client. This may affect other users on the server. + - Excessively long lists, list keys, ranges, etc. Some limits are specified in this MSC to mitigate against this. # Unstable prefix @@ -1315,7 +1326,8 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.5.1): +[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.98.0): + - **WARNING: This version uses lists-as-arrays, not lists-as-keys that this MSC describes!** - Sliding Window API: - [x] Operation support - [x] Required state with wildcards @@ -1324,7 +1336,7 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - [x] Highlight/notification counts - [x] Joined and invited member counts - [x] Prev batch (token will cause duplicate events on /messages) - - [ ] `include_old_rooms` + - [x] `include_old_rooms` - Sorting: - [x] By recency - [x] By highlight count @@ -1346,8 +1358,8 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - [x] To-device: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3885/proposals/3885-sliding-sync-to-device.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - [x] E2EE: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3884/proposals/3884-sliding-sync-e2ee.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - [x] Account Data: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) - - [ ] Receipts - - [ ] Ephemeral Events + - [x] Receipts [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/receipts.go) + - [x] Typing Notifications [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/typing.go) - [ ] Presence # Appendices From 5b2577da54b07c3360777a2b4cc3f538780ff0d3 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 9 Jan 2023 17:31:36 +0000 Subject: [PATCH 48/81] Update with bandwidth optimisations/tokens --- proposals/3575-sync.md | 116 +++++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 33 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 2c4ab12d8be..60ea4e17ca8 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -58,6 +58,10 @@ A complete sync request looks like: // functionally similar to txn IDs on /send for events. "txn_id": "client-chosen-string", + // a bandwidth token to remember information between sessions. + // See "Bandwidth optimisations for persistent clients" for more information. + "bw_token": "opaque-server-provided-string", + // Sliding Window API "lists": { "client_chosen_key": { @@ -170,7 +174,10 @@ An entire response looks like: }, // Extensions API - "extensions": {} + "extensions": {}, + + // Bandwidth optimisations, see "Bandwidth optimisations for persistent clients" + "bw_token": "server-generated-string" // the bandwidth token to use } ``` These fields and their interactions are explained in the next few sections. This forms the core of @@ -1062,39 +1069,82 @@ avoid the churn e.g only two digits of precision (21 -> 21, but 1234 -> 1200), t ### Bandwidth optimisations for persistent clients -The API assumes that room data is deleted on the client when the room falls out of the sliding -window or a window gets invalidated. The API will send the entire `required_state` and `timeline` -again when the room re-appears. This is wasteful if the client remembers the state/timeline and there -have been no changes. This is similar to how Sync v2 behaves with `?full_state=true` set. - -The API exposes an opt-in mechanism for providing efficient delta updates. On re-establishing a sync -connection, or re-requesting a page that was previously INVALIDATEd, the server will perform the -following operations: - - For this device: check the last sent event ID for the room ID in question. Count the number of - timeline events from that point to the latest event. Call it `N`. - - For this specific sync request: calculate a reasonable upper-bound for how many events will be - returned in a reasonable worst-case scenario. This is simply `timeline_limit + len(required_state)` - (ignoring `*` wildcards on state). Call it `M`. - - If N > M then we would probably send more events if we did a delta than just telling the client - everything from scratch, so issue a `SYNC` for this room. - - If N < M then we don't have many events since the connection was last established, so just send - the delta as an update. - -This approach has numerous benefits: - - In the common case when you scroll a room, you won't get any `SYNC`s for rooms that were invalidated - because it's highly unlikely to receive 10+ events during the room scroll (assuming you scroll - back up in reasonable time). - - When you reconnect after sleeping your laptop overnight, most rooms will be updates, and busy - rooms like Matrix HQ will be `SYNC`ed from fresh rather than sending 100s of events. - -This imposes more restrictions on the server implementation: - - Servers still need the absolute stream ordering for events to work out how many events from - `$event_id` to `$latest_event_id`. - - Servers need to remember the last sent event ID for each device for each room. If rooms share a - single monotonically increasing stream, then this is a single integer per device (akin to today's - sync tokens for PDU events). Servers need to remember _which rooms_ have been sent to the client, - along with the stream position when that was sent. So it's basically a `map[string]int64`. +The Sliding Sync API assumes that room data is deleted on the client when: + - the room falls out of the sliding window or; + - a window gets invalidated or; + - the session expires and a new session is created. + +The API will send the entire `required_state` and `timeline` again when the room re-appears for the 2nd time. +This is wasteful if the client remembers the state/timeline and there have been no changes. + +To resolve this, the API exposes an opt-in mechanism for providing efficient delta updates. This is +encoded into a "bandwidth token" which is an opaque string. If a request is missing a bandwidth token, +no bandwidth optimisations are applied. This token sits at the top-level of the request/response JSON +as `bw_token`. If a bandwidth token is provided, the server SHOULD remove events that have already +been sent *and acknowledged* by the client. The list of fields which this can apply to is not fully +determined, but SHOULD include: + - `required_state` events + - `timeline` events + - Any extensions which return events e.g `account_data`. Extensions which make use of the bandwidth + token MUST state so in its MSC. + +The bandwidth token sits outside the scope of sessions, and hence can be used to remember data between +sessions. The token remembers the following information for every room which has been sent to the client: + - The event ID of the last sent timeline event. + - The event ID of the last sent `required_state` event, keyed off `(type, state_key)`. + +When the client makes a new connection, or when a room re-appears inside a window, the following algorithm +is applied: + - Work out the `required_state` for this room as if there was no bandwidth token. + - Filter `required_state` by looking for event ID matches referenced by the bandwidth token. If there is + a match, remove that state event. + - Work out the `timeline` for this room as if there was no bandwidth token. + - Attempt to find the last sent timeline event referenced by the bandwidth token. If it is found, discard + all events before this event, including the referenced event itself. The remaining timeline events are + sent to the client. + +This can create gaps in the timeline, but this could already happen between sessions for persistent clients. +It is up to the client to resolve gaps by querying `/messages`. The `prev_batch` token MUST be updated if +events are filtered out. + +A worked example: + - Client hits `/sync` and accumulates some data for rooms. They are using a bandwidth token. + - The client goes offline for a while, and room data changes. The client's session expires. + - The client reappears and hits `/sync` to start a new session. The bandwidth token takes effect and + returns a much smaller delta. + +``` + Client Server + |------------/sync---------------->| + | timeline_limit=4, | + | required_state=[PL,avatar] | Generate response, store bw_token=X + | | Room1, last_timeline_event=$D, m.room.power_levels=$B, m.room.avatar=$C + |<-----------/sync-----------------| + | Room1,timeline[$A,$B,$C,$D] | + | required_state=[$B,$C],bw_token=X| + | | + ... time passes ... Room1 new events $E,$F, m.room.avatar updates to $F + | | + |------------/sync---------------->| + | bw_token=X,timeline_limit=4 | Generate response: timeline=[$C,$D,$E,$F], PL=$B, avatar=$F + | required_state=[PL,avatar] | Compare with bw_token: last_timeline=$D PL=$B, avatar=$C + | | Diff: timeline=[$E,$F] avatar=$F + |<-----------/sync-----------------| + | Room1,timeline[$E,$F] | + | required_state=[$F],bw_token=Y | +``` +#### Limitations + +The bandwidth token will not work under the following scenarios: + - The timeline is filtered in some way. Currently Sliding Sync provides no filtering mechanism + for timeline events but it will in the future. Any filters need to be the same between sessions + for the bandwidth optimisations to work at all. + - `m.room.member` events are excluded from these calculations. Bandwidth tokens map to a lot of + data server-side. In an effort to bound the growth of this data, `m.room.member` events MAY be + sent redundantly even if the client has been sent it before. This also reduces the chances of + missing an `m.room.member` event, which would risk causing E2EE key issues as the client would fail + to encrypt for the target room member. ### E2EE Handling From f5fae29fd709f34b8203371a9f46135f213ab254 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 9 Jan 2023 17:36:28 +0000 Subject: [PATCH 49/81] Allow bw tokens to expire --- proposals/3575-sync.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 60ea4e17ca8..3f77437d831 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1145,6 +1145,10 @@ The bandwidth token will not work under the following scenarios: sent redundantly even if the client has been sent it before. This also reduces the chances of missing an `m.room.member` event, which would risk causing E2EE key issues as the client would fail to encrypt for the target room member. + - The server does not need to remember bandwidth tokens and the associated data forever. The server + can expire this data whenever they want, which will result in more redundant information being sent + to the client and a new bandwidth token being generated. This MSC recommends that servers keep bandwidth + tokens valid for at least 7 days. ### E2EE Handling From d17f875968093dc9a220457f5ce54c57096deea8 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 9 Jan 2023 18:40:21 +0000 Subject: [PATCH 50/81] s/bandwidth_token/delta_token/g --- proposals/3575-sync.md | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3f77437d831..58629194367 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -58,9 +58,9 @@ A complete sync request looks like: // functionally similar to txn IDs on /send for events. "txn_id": "client-chosen-string", - // a bandwidth token to remember information between sessions. + // a delta token to remember information between sessions. // See "Bandwidth optimisations for persistent clients" for more information. - "bw_token": "opaque-server-provided-string", + "delta_token": "opaque-server-provided-string", // Sliding Window API "lists": { @@ -177,7 +177,7 @@ An entire response looks like: "extensions": {}, // Bandwidth optimisations, see "Bandwidth optimisations for persistent clients" - "bw_token": "server-generated-string" // the bandwidth token to use + "delta_token": "server-generated-string" // the delta token to use } ``` These fields and their interactions are explained in the next few sections. This forms the core of @@ -1078,28 +1078,28 @@ The API will send the entire `required_state` and `timeline` again when the room This is wasteful if the client remembers the state/timeline and there have been no changes. To resolve this, the API exposes an opt-in mechanism for providing efficient delta updates. This is -encoded into a "bandwidth token" which is an opaque string. If a request is missing a bandwidth token, +encoded into a "delta token" which is an opaque string. If a request is missing a delta token, no bandwidth optimisations are applied. This token sits at the top-level of the request/response JSON -as `bw_token`. If a bandwidth token is provided, the server SHOULD remove events that have already +as `delta_token`. If a delta token is provided, the server SHOULD remove events that have already been sent *and acknowledged* by the client. The list of fields which this can apply to is not fully determined, but SHOULD include: - `required_state` events - `timeline` events - - Any extensions which return events e.g `account_data`. Extensions which make use of the bandwidth + - Any extensions which return events e.g `account_data`. Extensions which make use of the delta token MUST state so in its MSC. -The bandwidth token sits outside the scope of sessions, and hence can be used to remember data between +The delta token sits outside the scope of sessions, and hence can be used to remember data between sessions. The token remembers the following information for every room which has been sent to the client: - The event ID of the last sent timeline event. - The event ID of the last sent `required_state` event, keyed off `(type, state_key)`. When the client makes a new connection, or when a room re-appears inside a window, the following algorithm is applied: - - Work out the `required_state` for this room as if there was no bandwidth token. - - Filter `required_state` by looking for event ID matches referenced by the bandwidth token. If there is + - Work out the `required_state` for this room as if there was no delta token. + - Filter `required_state` by looking for event ID matches referenced by the delta token. If there is a match, remove that state event. - - Work out the `timeline` for this room as if there was no bandwidth token. - - Attempt to find the last sent timeline event referenced by the bandwidth token. If it is found, discard + - Work out the `timeline` for this room as if there was no delta token. + - Attempt to find the last sent timeline event referenced by the delta token. If it is found, discard all events before this event, including the referenced event itself. The remaining timeline events are sent to the client. @@ -1108,46 +1108,46 @@ It is up to the client to resolve gaps by querying `/messages`. The `prev_batch` events are filtered out. A worked example: - - Client hits `/sync` and accumulates some data for rooms. They are using a bandwidth token. + - Client hits `/sync` and accumulates some data for rooms. They are using a delta token. - The client goes offline for a while, and room data changes. The client's session expires. - - The client reappears and hits `/sync` to start a new session. The bandwidth token takes effect and + - The client reappears and hits `/sync` to start a new session. The delta token takes effect and returns a much smaller delta. ``` Client Server |------------/sync---------------->| | timeline_limit=4, | - | required_state=[PL,avatar] | Generate response, store bw_token=X + | required_state=[PL,avatar] | Generate response, store delta_token=X | | Room1, last_timeline_event=$D, m.room.power_levels=$B, m.room.avatar=$C |<-----------/sync-----------------| | Room1,timeline[$A,$B,$C,$D] | - | required_state=[$B,$C],bw_token=X| + | req_state=[$B,$C],delta_token=X | | | ... time passes ... Room1 new events $E,$F, m.room.avatar updates to $F | | |------------/sync---------------->| - | bw_token=X,timeline_limit=4 | Generate response: timeline=[$C,$D,$E,$F], PL=$B, avatar=$F - | required_state=[PL,avatar] | Compare with bw_token: last_timeline=$D PL=$B, avatar=$C + | delta_token=X,timeline_limit=4 | Generate response: timeline=[$C,$D,$E,$F], PL=$B, avatar=$F + | required_state=[PL,avatar] | Compare with delta_token: last_timeline=$D PL=$B, avatar=$C | | Diff: timeline=[$E,$F] avatar=$F |<-----------/sync-----------------| | Room1,timeline[$E,$F] | - | required_state=[$F],bw_token=Y | + | req_state=[$F],delta_token=Y | ``` #### Limitations -The bandwidth token will not work under the following scenarios: +The delta token will not work under the following scenarios: - The timeline is filtered in some way. Currently Sliding Sync provides no filtering mechanism for timeline events but it will in the future. Any filters need to be the same between sessions for the bandwidth optimisations to work at all. - - `m.room.member` events are excluded from these calculations. Bandwidth tokens map to a lot of + - `m.room.member` events are excluded from these calculations. Delta tokens map to a lot of data server-side. In an effort to bound the growth of this data, `m.room.member` events MAY be sent redundantly even if the client has been sent it before. This also reduces the chances of missing an `m.room.member` event, which would risk causing E2EE key issues as the client would fail to encrypt for the target room member. - - The server does not need to remember bandwidth tokens and the associated data forever. The server + - The server does not need to remember delta tokens and the associated data forever. The server can expire this data whenever they want, which will result in more redundant information being sent - to the client and a new bandwidth token being generated. This MSC recommends that servers keep bandwidth + to the client and a new delta token being generated. This MSC recommends that servers keep bandwidth tokens valid for at least 7 days. ### E2EE Handling From b4b4e7ff306920d2c862c6ff4d245110f6fa5bc7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 9 Jan 2023 18:41:08 +0000 Subject: [PATCH 51/81] Missed one --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 58629194367..c99f5014dc5 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1147,7 +1147,7 @@ The delta token will not work under the following scenarios: to encrypt for the target room member. - The server does not need to remember delta tokens and the associated data forever. The server can expire this data whenever they want, which will result in more redundant information being sent - to the client and a new delta token being generated. This MSC recommends that servers keep bandwidth + to the client and a new delta token being generated. This MSC recommends that servers keep delta tokens valid for at least 7 days. ### E2EE Handling From b5788bd7963531de9988da0c2d593056c4ed56e1 Mon Sep 17 00:00:00 2001 From: kegsay Date: Wed, 8 Feb 2023 10:02:25 +0000 Subject: [PATCH 52/81] Update 3575-sync.md --- proposals/3575-sync.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c99f5014dc5..17f4b3f1f03 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1380,8 +1380,7 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t # Implementation state -[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.98.0): - - **WARNING: This version uses lists-as-arrays, not lists-as-keys that this MSC describes!** +[Proxy server](https://github.com/matrix-org/sliding-sync) (v0.99.0): - Sliding Window API: - [x] Operation support - [x] Required state with wildcards @@ -1409,11 +1408,11 @@ be spaces-aware for spaces filters. Extension MSCs will depend on this MSC for t - [ ] Bandwidth optimisations - [ ] E2EE highlight/notification count handling - Extensions: - - [x] To-device: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3885/proposals/3885-sliding-sync-to-device.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) - - [x] E2EE: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3884/proposals/3884-sliding-sync-e2ee.md)[impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) - - [x] Account Data: [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) - - [x] Receipts [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/receipts.go) - - [x] Typing Notifications [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/typing.go) + - [x] To-device: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3885/proposals/3885-sliding-sync-to-device.md) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/todevice.go) + - [x] E2EE: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/msc3884/proposals/3884-sliding-sync-e2ee.md) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/e2ee.go) + - [x] Account Data: [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/ssext-account-data/proposals/3959-sliding-sync-account-data.md) [impl](https://github.com/matrix-org/sync-v3/blob/main/sync3/extensions/account_data.go) + - [x] Receipts [spec](https://github.com/matrix-org/matrix-spec-proposals/pull/3960) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/receipts.go) + - [x] Typing Notifications [spec](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/ssext-typing/proposals/3961-sliding-sync-typing.md) [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/typing.go) - [ ] Presence # Appendices From 89cf0341b5fac4311839bed4b90070faae14de91 Mon Sep 17 00:00:00 2001 From: kegsay Date: Tue, 21 Feb 2023 09:10:25 +0000 Subject: [PATCH 53/81] Update 3575-sync.md --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 17f4b3f1f03..c123c734be5 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1423,4 +1423,4 @@ with this MSC. As such, they are represented as a sequence of JSON objects. Thes exhaustive, and don't account for authentication via access tokens or handling multiple user accounts. For brevity, only fields that concern sliding sync are included in event descriptions. -TODO: once the API has stabilised +TODO From 64a6f495e9c6964ac567bf7fdc323cc543e1301b Mon Sep 17 00:00:00 2001 From: kegsay Date: Tue, 28 Feb 2023 12:32:07 +0000 Subject: [PATCH 54/81] Update proposals/3575-sync.md Co-authored-by: Travis Ralston --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c123c734be5..646b48b45f9 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1,6 +1,6 @@ # MSC3575: Sliding Sync (aka Sync v3) -This MSC outlines a replacement for the CS API endpoint `/sync`. +This MSC outlines a replacement for the [CS API endpoint `/sync`](https://spec.matrix.org/v1.6/client-server-api/#get_matrixclientv3sync). The current `/sync` endpoint scales badly as the number of rooms on an account increases. It scales badly because all rooms are returned to the client, and clients cannot opt-out of a large amount of From 4f5d3bf744bda107955393d9beae043d49c1b6b9 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 16:31:05 +0000 Subject: [PATCH 55/81] Mark samples as `json5` and `jsonl` This makes goland happier. Probably supported, becuase it's part of https://github.com/github/linguist/blob/master/lib/linguist/languages.yml#L3143-L3152 which is mentioned by https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks#syntax-highlighting --- proposals/3575-sync.md | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 646b48b45f9..2c5171451af 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -52,7 +52,7 @@ It also introduces a number of new concepts which are explained in more detail l ## Core A complete sync request looks like: `POST /_matrix/client/unstable/org.matrix.msc3575/sync?pos=4&timeout=30000`: -```js +```json5 { // allows clients to know what request params reached the server, // functionally similar to txn IDs on /send for events. @@ -98,7 +98,7 @@ A complete sync request looks like: ``` An entire response looks like: `HTTP 200 OK` -```js +```json5 { // Connection and Streaming API "pos": "5", @@ -266,7 +266,7 @@ value and not introspect it. When a `pos` is invalidated and the client attempts to use the `pos`, the server MUST send back a standard error response as a HTTP 400 containing: -```js +```json5 { "error": "Unknown position", "errcode": "M_UNKNOWN_POS" @@ -302,7 +302,7 @@ Client Server ``` For complex nested data, APIs which include sticky parameters MUST indicate every sticky field to avoid ambiguity. For example, an ambiguous API may state the following: -```js +```json5 { "foo": { // sticky "bar": 1, @@ -311,7 +311,7 @@ avoid ambiguity. For example, an ambiguous API may state the following: } ``` When this object is combined with an the additional object: -```js +```json5 { "foo": { "bar": 3 @@ -329,7 +329,7 @@ have not been modified, then the `txn_id` does not need to be sent. ### Room List parameters One or more room lists can be requested in sliding sync like so: -```js +```json5 { // A map of list key to list information. Max lists: 100. "lists": { @@ -464,7 +464,7 @@ list which then mapped to an index position. By using an object, these issues di The server will then return a `rooms` key which have the following fields: -```js +```json5 { "rooms": { // the room ID @@ -570,7 +570,7 @@ For example: [J,K,L,N,O] | <- DELETE[3], INSERT[4]=O--- | ``` The sliding sync API exposes this API shape via the following request parameters: -```js +```json5 { // Multiple lists can be requested "lists": { @@ -587,7 +587,7 @@ The sliding sync API exposes this API shape via the following request parameters } ``` Which returns the following response parameters: -```js +```json5 { // This object echoes back the list keys provided in the request. "lists": { @@ -686,7 +686,7 @@ additional sort operations is useful, see the extensions section for more inform The complete API shape for each operation is shown below (note the key names vary on the operation): -```js +```jsonl { "op": "DELETE", "index": 8 @@ -734,7 +734,7 @@ and INSERT operations when rooms are left or joined respectively. In addition, t operation to let the client know which rooms in the `rooms` object were from this list. An example request: -```js +```json5 { "lists": { // list will include all encrypted rooms in one go @@ -756,7 +756,7 @@ An example request: } ``` Would return the response: -```js +```json5 { "lists": { "list_all_encrypted": { @@ -808,7 +808,7 @@ provides a map of room ID to room list parameters and the server then returns th same format as the sliding window API, just without the operations/indexes. To track a room `!sub1:bar`, the client would send the following request: -```js +```json5 { "room_subscriptions": { // sticky "!sub1:bar": { // sticky @@ -823,7 +823,7 @@ To track a room `!sub1:bar`, the client would send the following request: } ``` This would return the following response: -```js +```json5 { "rooms": { "!sub1:bar": { @@ -853,7 +853,7 @@ This would return the following response: Any updates in this room would be returned in the same section of the sync response: -```js +```json5 { "rooms": { "!sub1:bar": { @@ -871,7 +871,7 @@ of `required_state` and `timeline_limit` filtering. To unsubscribe from a room, the client needs to send a request with the room ID to unsubscribe from in the `unsubscribe_rooms` array: -```js +```json5 { "unsubscribe_rooms": [ "!sub1:bar" ] } @@ -880,7 +880,7 @@ This will delete that key from the `room_subscriptions` map on the server. It is to view one room then swap to another room. This can be modelled as a subscription on the new room coupled with unsubscribing from the old room. For example, if the client swapped from viewing `!sub1:bar` to `!sub2:bar`: -```js +```json5 { "room_subscriptions": { "!sub2:bar": { @@ -949,7 +949,7 @@ so it's not possible to disable include_old_rooms by omitting it._ For example, given a list of joined rooms A, B, C, A2, A3 where A2 and A3 are newer versions of room A, sliding sync will not return rooms A or A2 by default. The client may send the following direct room subscription to include these rooms: -```js +```json5 { "room_subscriptions": { "A3": { @@ -969,7 +969,7 @@ If a client explicitly subscribes to an old room, say A2, then `include_old_room including A but not the newer room A3. These options work on lists as well: -```js +```json5 { "lists": { "a": { @@ -996,7 +996,7 @@ old and hence ineligible for being returned in a list. Room members in a room can be lazily-loaded by requesting the special value `$LAZY` as the state key for the `m.room.member` event type in the `required_state` filter: -```js +```json5 { "required_state": [ ["m.room.member", "$LAZY"] // activate lazy loading @@ -1006,7 +1006,7 @@ for the `m.room.member` event type in the `required_state` filter: At a high level, this can be thought of as requesting the `m.room.member` events for a set of unknown user IDs. Typically, when you view a room, you want to retrieve all state events _except_ for `m.room.member` events which you want to lazily load. To get this behaviour, clients can send the following: -```js +```json5 { "required_state": [ ["m.room.member", "$LAZY"], // activate lazy loading @@ -1211,7 +1211,7 @@ We anticipate that as more features land in Matrix, different kinds of data will to clients. Sync v2 did not have any first-class support to opt-in to new data. Sliding Sync does have support for this via "extensions". Extensions also allow this proposal to be broken up into more manageable sections. Extensions are requested by the client in a dedicated `extensions` block: -```js +```json5 { "extensions": { "name_of_extension": { // sticky From 35b79f6ebe80f9a36c0ccd1ddc47ab0ef3f4d78f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 16:34:28 +0000 Subject: [PATCH 56/81] Stop goland complaining about JSON syntax --- proposals/3575-sync.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 2c5171451af..9e4dbaa5cff 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -90,7 +90,7 @@ A complete sync request looks like: } } }, - "unsubscribe_rooms": [ "!sub3:bar" ] + "unsubscribe_rooms": [ "!sub3:bar" ], // Extensions API "extensions": {} @@ -111,7 +111,7 @@ An entire response looks like: "ops": [ { "op": "SYNC", - "range": [0, 99] + "range": [0, 99], "room_ids": [ "!foo:bar", // ... 99 more room IDs ] @@ -136,7 +136,7 @@ An entire response looks like: {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}} + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], @@ -786,11 +786,11 @@ Would return the response: }, "rooms": { "!encrypted:bar": { - ... + // ... }, // ... 1336 more items "!unencrypted:bar": { - ... + // ... }, // ... 19 more items }, @@ -840,7 +840,7 @@ This would return the following response: {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, {"sender":"@alice:example.com","type":"m.room.history_visibility", "state_key":"", "content":{"history_visibility":"joined"}}, {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@alice:example.com", "content":{"membership":"join","displayname":"Alice"}}, - {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}} + {"sender":"@alice:example.com","type":"m.room.member", "state_key":"@bob:example.com", "content":{"membership":"join","displayname":"Bob"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"A"}}, {"sender":"@alice:example.com","type":"m.room.message", "content":{"body":"B"}}, ], @@ -887,7 +887,7 @@ coupled with unsubscribing from the old room. For example, if the client swapped "required_state": [ ["*","*"] ], "timeline_limit": 50 } - } + }, "unsubscribe_rooms": [ "!sub1:bar" ] } ``` From 27d190c27ce256594d154150ce5730f7d632e3e6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 16:36:22 +0000 Subject: [PATCH 57/81] Introduce `bump_event_types` field so that e.g. profile changes don't reorder the list --- proposals/3575-sync.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 9e4dbaa5cff..92db6463777 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -67,6 +67,7 @@ A complete sync request looks like: "client_chosen_key": { "ranges": [ [0,99] ], "sort": [ "by_notification_level", "by_recency", "by_name" ], + "bump_event_types": [ "m.room.message" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -342,7 +343,9 @@ One or more room lists can be requested in sliding sync like so: // Sticky. List sort order. See Sliding Window API for more information. // These fields may be expanded through use of extensions. "sort": [ "by_notification_level", "by_recency" ], - + // Sticky. Allowlist of event types which should cause the list of rooms to be reordered. + // Empty lists or omitted lists fall back to the list ["m.room.message", "m.room.encrypted"]. + "bump_event_types": [ "m.room.message", "m.room.encrypted" ], // Sticky. Required state for each room returned. An array of event type and state key tuples. // Note that elements of this array are NOT sticky so they must be specified in full when they // are changed. Elements in this array are ORd together to produce the final set of state events From d9aeefcc541ce1f5aee97781cc120820a3f2c25c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 15 Mar 2023 15:45:05 +0000 Subject: [PATCH 58/81] Make `bump_event_types` conn-level, not list-level Much easier to implement like this, and might be good enough for now. --- proposals/3575-sync.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 92db6463777..3854ab09f1f 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -67,7 +67,6 @@ A complete sync request looks like: "client_chosen_key": { "ranges": [ [0,99] ], "sort": [ "by_notification_level", "by_recency", "by_name" ], - "bump_event_types": [ "m.room.message" ], "required_state": [ ["m.room.join_rules", ""], ["m.room.history_visibility", ""], @@ -79,6 +78,7 @@ A complete sync request looks like: } } }, + "bump_event_types": [ "m.room.message" ], // Room Subscriptions API "room_subscriptions": { @@ -343,9 +343,6 @@ One or more room lists can be requested in sliding sync like so: // Sticky. List sort order. See Sliding Window API for more information. // These fields may be expanded through use of extensions. "sort": [ "by_notification_level", "by_recency" ], - // Sticky. Allowlist of event types which should cause the list of rooms to be reordered. - // Empty lists or omitted lists fall back to the list ["m.room.message", "m.room.encrypted"]. - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], // Sticky. Required state for each room returned. An array of event type and state key tuples. // Note that elements of this array are NOT sticky so they must be specified in full when they // are changed. Elements in this array are ORd together to produce the final set of state events @@ -440,6 +437,12 @@ One or more room lists can be requested in sliding sync like so: } } }, + // Sticky. Allowlist of event types which should cause the lists of rooms to be reordered. + // By omitting types from this list, clients can ensure that uninteresting events (e.g. a + // profile rename) do not cause a room to jump to the top of its list(s). + // Empty or omitted `bump_event_types` fall back to the list ["m.room.message", "m.room.encrypted"]. + // NB: this is currently per-connection, not per-list. + "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } ``` From 560f018cd48a3a8d766895a0115f8d2dd225e39a Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 16 Mar 2023 13:07:46 +0000 Subject: [PATCH 59/81] Include `m.room.encrypted` in both examples --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3854ab09f1f..779013291c7 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -78,7 +78,7 @@ A complete sync request looks like: } } }, - "bump_event_types": [ "m.room.message" ], + "bump_event_types": [ "m.room.message", "m.room.encrypted" ], // Room Subscriptions API "room_subscriptions": { From 5bd13e66df73cfcf4a635094758ed86906b84a73 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 16 Mar 2023 13:14:00 +0000 Subject: [PATCH 60/81] Bump for all events if `bump_event_types` is empty --- proposals/3575-sync.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 779013291c7..416b98ae416 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -437,10 +437,11 @@ One or more room lists can be requested in sliding sync like so: } } }, - // Sticky. Allowlist of event types which should cause the lists of rooms to be reordered. - // By omitting types from this list, clients can ensure that uninteresting events (e.g. a - // profile rename) do not cause a room to jump to the top of its list(s). - // Empty or omitted `bump_event_types` fall back to the list ["m.room.message", "m.room.encrypted"]. + // Sticky. Allowlist of event types which should be considered recent activity + // when sorting `by_recency`. By omitting event types from this field, clients + // can ensure that uninteresting events (e.g. a profile rename) do not cause a + // room to jump to the top of its list(s). Empty or omitted `bump_event_types` + // have no effect—all events in a room will be considered recent activity. // NB: this is currently per-connection, not per-list. "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } From 602d342cc4210c015a13a4a2b0fc73929d444b8e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 28 Mar 2023 13:40:35 +0100 Subject: [PATCH 61/81] Sketch definition of `extensions.*.lists/rooms` --- proposals/3575-sync.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 416b98ae416..2df6adecf7b 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -182,7 +182,7 @@ An entire response looks like: } ``` These fields and their interactions are explained in the next few sections. This forms the core of -the API. Additional data can be returned via "extensions". +the API. Additional data can be returned via ["extensions"](#extensions). ### Connections @@ -1223,6 +1223,8 @@ manageable sections. Extensions are requested by the client in a dedicated `exte "extensions": { "name_of_extension": { // sticky "enabled": true, // sticky + "lists": ["rooms", "dms"], // sticky + "rooms": ["!abcd:example.com"], // sticky "extension_arg": "value", "extension_arg_2": true } @@ -1235,7 +1237,26 @@ client tries to communicate with an older server). Extension args may or may not depends on the extension. Extensions can leverage the data from the core API, notably which rooms are currently inside sliding -windows as well as which rooms are explicitly subscribed to. +windows as well as which rooms are explicitly subscribed to. By default, an extension is expected to +be aware of and act on all sliding windows and all room subscriptions. However, this may mean the +extension provides data that the client never uses. (For example, clients may be interested in seeing +typing notifications for rooms in a sliding window, but ignore such notifications in a background list +of all rooms.) + +To avoid transferring useless data, the spec reserves a field `lists`, which is a sticky list of strings, +namely the names of lists given to the Sliding Window API. If the `lists` field is omitted or provided +as a JSON `null`, the client requests that the extension should process _all_ sliding windows. If the +field is an empty list, the client requests that the extension processes no sliding windows. Otherwise +the field is a nonempty list, meaning that the client wants the extension to only apply to the given +list of sliding windows. + +Similarly, we reserve a `rooms` field, which is a sticky list of room IDs. It has the same semantics +as `lists`, except it tells the extension which room subscriptions it should process (instead of sliding +windows). + +Extensions SHOULD NOT attach their own semantics to the `lists` and `rooms` fields. +Extensions are otherwise free to define and process their own config fields, which may be sticky. +Such fields are ignored by the Core of sliding sync and transparently forwarded to extensions. In an effort to reduce the size of this proposal, extensions will be done in separate MSCs. There will be extensions for: From 5c8496ec5a786bd7d6fb37f8c18d786e9e0af0dd Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 28 Mar 2023 15:32:41 +0100 Subject: [PATCH 62/81] The extension activiation condition is union-like --- proposals/3575-sync.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 2df6adecf7b..3ab25d1830c 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1254,6 +1254,11 @@ Similarly, we reserve a `rooms` field, which is a sticky list of room IDs. It ha as `lists`, except it tells the extension which room subscriptions it should process (instead of sliding windows). +It's possible that the same room appears in multiple sliding windows, or in both a sliding window and an +explicit room subscription. In this case, the extension should process that room if the extension +is configured to process any of the windows/subscriptions that contains the room. (The logic is a union +of conditions, not an intersection.) + Extensions SHOULD NOT attach their own semantics to the `lists` and `rooms` fields. Extensions are otherwise free to define and process their own config fields, which may be sticky. Such fields are ignored by the Core of sliding sync and transparently forwarded to extensions. From aa9c21b0bd3939fd18ea9387fb30e9682fab4956 Mon Sep 17 00:00:00 2001 From: kegsay Date: Thu, 30 Mar 2023 13:06:18 +0100 Subject: [PATCH 63/81] Update proposals/3575-sync.md Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 3ab25d1830c..c9694915116 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1232,7 +1232,7 @@ manageable sections. Extensions are requested by the client in a dedicated `exte } ``` Extensions MUST have an `enabled` flag which defaults to `false`. If a client sends an unknown extension -name, the server MUST ignore it (or else backwards compatibility between servers is broken when a newer +name, the server MUST ignore it (or else backwards compatibility between clients and servers is broken when a newer client tries to communicate with an older server). Extension args may or may not be sticky, it depends on the extension. From eab643cb3ca63b03537a260fa343e1fb2d1ee284 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 31 Mar 2023 12:56:50 +0100 Subject: [PATCH 64/81] Link to recently created MSCs for extensions --- proposals/3575-sync.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c9694915116..13c594a6dcf 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1267,10 +1267,10 @@ In an effort to reduce the size of this proposal, extensions will be done in sep be extensions for: - To Device Messaging - [MSC3885](https://github.com/matrix-org/matrix-spec-proposals/pull/3885) - End-to-End Encryption - [MSC3884](https://github.com/matrix-org/matrix-spec-proposals/pull/3884) - - Typing Notifications - [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/typing.go) - - Receipts - [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/receipts.go) + - Typing Notifications - [MSC3961](https://github.com/matrix-org/matrix-spec-proposals/pull/3961) + - Receipts - [MSC3960](https://github.com/matrix-org/matrix-spec-proposals/pull/3960) - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) - - Account Data - `account_data` in sync v2: [impl](https://github.com/matrix-org/sliding-sync/blob/main/sync3/extensions/account_data.go) + - Account Data - `account_data` in sync v2: [MSC3959](https://github.com/matrix-org/matrix-spec-proposals/pull/3950) _Rationale: The name 'extensions' is inspired by the spec itself which refers to "Extensions to /sync" multiple times. These additional bits of data are all generally outside the scope of the core room From 9d53e76ebc1a727d4251ebad9c537c43cb5ec1bb Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 4 Apr 2023 10:50:59 +0100 Subject: [PATCH 65/81] Fix account data extension link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kévin Commaille <76261501+zecakeh@users.noreply.github.com> --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 13c594a6dcf..b3bf7e8352b 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1270,7 +1270,7 @@ be extensions for: - Typing Notifications - [MSC3961](https://github.com/matrix-org/matrix-spec-proposals/pull/3961) - Receipts - [MSC3960](https://github.com/matrix-org/matrix-spec-proposals/pull/3960) - Presence - `presence` in sync v2: [spec](https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync) - - Account Data - `account_data` in sync v2: [MSC3959](https://github.com/matrix-org/matrix-spec-proposals/pull/3950) + - Account Data - `account_data` in sync v2: [MSC3959](https://github.com/matrix-org/matrix-spec-proposals/pull/3959) _Rationale: The name 'extensions' is inspired by the spec itself which refers to "Extensions to /sync" multiple times. These additional bits of data are all generally outside the scope of the core room From 188aeeb028a2bce2e19597320238985e705645cf Mon Sep 17 00:00:00 2001 From: kegsay Date: Fri, 5 May 2023 10:44:47 +0100 Subject: [PATCH 66/81] Update proposals/3575-sync.md Co-authored-by: Benjamin Bouvier --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index b3bf7e8352b..ec55da8c95d 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -311,7 +311,7 @@ avoid ambiguity. For example, an ambiguous API may state the following: } } ``` -When this object is combined with an the additional object: +When this object is combined with the additional object: ```json5 { "foo": { From 2b92f4b686636dc5cd69b45524217e5acf642511 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 10 May 2023 15:05:49 +0100 Subject: [PATCH 67/81] Add conn_id as a replacement for session IDs This allows concurrent connections to a SS server. --- proposals/3575-sync.md | 46 +++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index ec55da8c95d..658178c23b8 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -54,14 +54,21 @@ A complete sync request looks like: `POST /_matrix/client/unstable/org.matrix.msc3575/sync?pos=4&timeout=30000`: ```json5 { - // allows clients to know what request params reached the server, + // Optional: allows clients to know what request params reached the server, // functionally similar to txn IDs on /send for events. "txn_id": "client-chosen-string", - // a delta token to remember information between sessions. + // Optional: a delta token to remember information between connections. // See "Bandwidth optimisations for persistent clients" for more information. "delta_token": "opaque-server-provided-string", + // Optional: a unique string to identify this connection to the server. If this + // is missing, only 1 sliding sync connection can be made to the server at any one time. + // Clients need to set this to allow >1 connection concurrently, so the server can distinguish + // between connections. This is NOT STICKY and must be provided with every request, if your client + // needs >1 concurrent connection. Max: 16 chars, due to it being required with every request. + "conn_id": "client-chosen", + // Sliding Window API "lists": { "client_chosen_key": { @@ -216,7 +223,7 @@ wait for the client to make an HTTP request. In addition, individual HTTP reques ordering problems which simply do not exist in a WebSockets implementation. This can lead to some counter-intuitive responses from a Sliding Sync enabled server, unless certain rules are followed. -**Long-polling Rule 1:** do not send multiple concurrent sliding sync requests to the server. If a request is lost in +**Long-polling Rule 1:** do not send multiple concurrent sliding sync requests to the server with the same connection ID. If a request is lost in transit, it can be impossible to know if it has been applied on the server or not. This is not an issue for `/sync` because the request is stateless; there's nothing to lose in the event of packet loss. In this example, A is applied on both sides, B is not applied on either side, and C is applied on one side only, which @@ -251,6 +258,21 @@ SHOULD send a transaction ID when they need to know when the response has been c sent in the same response. In practice, the server does not combine multiple responses into a single response. Instead, it will send the most recent unacknowledged response, in this case C, _even though_ the HTTP request was for D. +#### Concurrent connections + +There are three main reasons why a client may want to have >1 connection to the server open concurrently: + - The client is a **browser**, and it should be possible to open the same client in multiple tabs without causing problems. Without concurrent connections, each tab would reset the other tabs connection due to different `?pos=` values being sent. The number of concurrent connections is technically unbounded. + - The client is a **mobile application**, and it should be possible to have a "push process" connection in addition to the "app connection". Without concurrent connections, it isn't possible to obtain to-device messages in the push process, whilst also obtaining them in the main app. The number of concurrent connections is fixed e.g 2. + - The client wants to do a **one-shot request** for some data, without incurring latency/bandwidth penalties with all the activity on the user's account. Without concurrent connections, it isn't possible to get the response without also potentially getting large amounts of extraneous data. The number of concurrent connections is N+1, where N is the number of active concurrent connections. + +Each distinct connection MUST specify a unique `conn_id` at the top-level of _every sync request_, consistent for that connection for that _device_. For example: + - Each browser tab needs a distinct connection. Each tab uses a unix timestamp when the page was loaded and uses that throughout the tab's lifetime e.g `conn_id: "1683726382973"` + - Process A and B each need a distinct connection. Process A uses `conn_id: "A"` and Process B uses `conn_id: "B"`. + - For unbounded one-shot connections controlled by a single process, a simple monotonically increasing integer can be used as the connection ID e.g `conn_id: "4"`. + It is also possible to re-use one-shot connections by omitting the `?pos=` value, as that will trigger an initial sync. + +_Using concurrent connections may result in data loss if used inappropriately. This can happen when one connection sees some data and then performs some action to delete that data on the server before other concurrent connections have seen this data. Where this is a risk, it will be outlined clearly under a "concurrent connections" subheading. This is particularly important for certain extensions like the to-device and E2EE extensions, which delete data when the client has acknowledged the previous response._ + #### Message IDs for clients and servers For the long-polling use case, this proposal includes an opaque token that is very similar to @@ -283,10 +305,6 @@ In addition, clients may send `txn_id` field at the top-level JSON object in the a client message ID. Servers MUST echo this back to the client via the `txn_id` field in the top-level JSON object in the response when this request has been processed. -_TODO[proxy]: If pos is just an incrementing integer, it means 2 browser tabs with the same access token -will step on each other's toes. The act of hitting /sync without a ?pos results in the first tab being -torn down connection wise. We may want to mux in implicit session IDs into the pos?_ - #### Sticky request parameters Request parameters can be "sticky". This means that their value is remembered across multiple requests. @@ -1079,7 +1097,7 @@ avoid the churn e.g only two digits of precision (21 -> 21, but 1234 -> 1200), t The Sliding Sync API assumes that room data is deleted on the client when: - the room falls out of the sliding window or; - a window gets invalidated or; - - the session expires and a new session is created. + - the connection expires and a new connection is created. The API will send the entire `required_state` and `timeline` again when the room re-appears for the 2nd time. This is wasteful if the client remembers the state/timeline and there have been no changes. @@ -1095,8 +1113,8 @@ determined, but SHOULD include: - Any extensions which return events e.g `account_data`. Extensions which make use of the delta token MUST state so in its MSC. -The delta token sits outside the scope of sessions, and hence can be used to remember data between -sessions. The token remembers the following information for every room which has been sent to the client: +The delta token sits outside the scope of connections, and hence can be used to remember data between +connections. The token remembers the following information for every room which has been sent to the client: - The event ID of the last sent timeline event. - The event ID of the last sent `required_state` event, keyed off `(type, state_key)`. @@ -1110,14 +1128,14 @@ is applied: all events before this event, including the referenced event itself. The remaining timeline events are sent to the client. -This can create gaps in the timeline, but this could already happen between sessions for persistent clients. +This can create gaps in the timeline, but this could already happen between connections for persistent clients. It is up to the client to resolve gaps by querying `/messages`. The `prev_batch` token MUST be updated if events are filtered out. A worked example: - Client hits `/sync` and accumulates some data for rooms. They are using a delta token. - - The client goes offline for a while, and room data changes. The client's session expires. - - The client reappears and hits `/sync` to start a new session. The delta token takes effect and + - The client goes offline for a while, and room data changes. The client's connection expires. + - The client reappears and hits `/sync` to start a new connection. The delta token takes effect and returns a much smaller delta. ``` @@ -1145,7 +1163,7 @@ A worked example: The delta token will not work under the following scenarios: - The timeline is filtered in some way. Currently Sliding Sync provides no filtering mechanism - for timeline events but it will in the future. Any filters need to be the same between sessions + for timeline events but it will in the future. Any filters need to be the same between connections for the bandwidth optimisations to work at all. - `m.room.member` events are excluded from these calculations. Delta tokens map to a lot of data server-side. In an effort to bound the growth of this data, `m.room.member` events MAY be From 9f5ac33a04eee2b4b9fac53fafd8878140cdc549 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 10 May 2023 15:06:53 +0100 Subject: [PATCH 68/81] Security considerations --- proposals/3575-sync.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 658178c23b8..97628833e17 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1405,6 +1405,9 @@ of service attacks: - Complex or pathological filter/sort options (especially via extensions) may degrade performance on the server and client. This may affect other users on the server. - Excessively long lists, list keys, ranges, etc. Some limits are specified in this MSC to mitigate against this. + - Excessive amounts of concurrent connections could consume large amounts of memory on the server for a single device. + It is recommended that servers limit the number of concurrent connections to 5, and expire the oldest connection + first. # Unstable prefix From f9954f0dc9a8ce475856432eeea9fca3445d031e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 10 May 2023 15:20:36 +0100 Subject: [PATCH 69/81] Add blurb on expiry --- proposals/3575-sync.md | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 97628833e17..fc2eb3e28a2 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -258,6 +258,24 @@ SHOULD send a transaction ID when they need to know when the response has been c sent in the same response. In practice, the server does not combine multiple responses into a single response. Instead, it will send the most recent unacknowledged response, in this case C, _even though_ the HTTP request was for D. +#### Expiry + +Connections can be "expired" by the server at any time and for any reason. When a connection is expired, the server +will send back an HTTP 400 containing the response body: +```json5 +{ + "error": "Unknown position", + "errcode": "M_UNKNOWN_POS" +} +``` +Common reasons for expiring a connection include: + - The last request was sent too long ago. + - The server has reached a memory limit for your connection and has expired it to reclaim memory. + - The server which handled your last request is no longer running (e.g it was restarted) and it cannot calculate a response. + +To handle expired connections, clients should send an initial request (with all sticky request parameters) +without a `pos` value to restart the connection. + #### Concurrent connections There are three main reasons why a client may want to have >1 connection to the server open concurrently: @@ -287,17 +305,6 @@ In simple servers, the `pos` may be an incrementing integer, but more complex se clocks or contain node identifying information in the token. Clients MUST treat `pos` as an opaque value and not introspect it. -When a `pos` is invalidated and the client attempts to use the `pos`, the server MUST send back a -standard error response as a HTTP 400 containing: -```json5 -{ - "error": "Unknown position", - "errcode": "M_UNKNOWN_POS" -} -``` -This then allows the client to reset their connection and send an initial request (with all sticky -request parameters) without a `pos` value to restart the connection. - The `timeout` query parameter exists for the same purposes of sync v2: to tell the server how many milliseconds to hold open the connection before returning. From 14580cae99a3e69598985c364dbdb1e0dfb177b4 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 23 May 2023 14:59:49 +0100 Subject: [PATCH 70/81] Make bump_event_types per-list --- proposals/3575-sync.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index fc2eb3e28a2..749e293773e 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -82,10 +82,10 @@ A complete sync request looks like: "timeline_limit": 10, "filters": { "is_dm": true - } + }, + "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], // Room Subscriptions API "room_subscriptions": { @@ -459,16 +459,15 @@ One or more room lists can be requested in sliding sync like so: // takes priority over `tags`. This filter is useful if your Rooms list does NOT include the // list of favourite rooms again. "not_tags": ["m.lowpriority"] - } + }, + // Sticky. Allowlist of event types which should be considered recent activity + // when sorting `by_recency`. By omitting event types from this field, clients + // can ensure that uninteresting events (e.g. a profile rename) do not cause a + // room to jump to the top of its list(s). Empty or omitted `bump_event_types` + // have no effect—all events in a room will be considered recent activity. + "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, - // Sticky. Allowlist of event types which should be considered recent activity - // when sorting `by_recency`. By omitting event types from this field, clients - // can ensure that uninteresting events (e.g. a profile rename) do not cause a - // room to jump to the top of its list(s). Empty or omitted `bump_event_types` - // have no effect—all events in a room will be considered recent activity. - // NB: this is currently per-connection, not per-list. - "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } ``` From 4103ee768a4a3e1decee80c2987f50f4c6b3d539 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 23 May 2023 15:02:50 +0100 Subject: [PATCH 71/81] Editing bump_event_types has no retroactive effect --- proposals/3575-sync.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 749e293773e..20fbfcfb881 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -465,6 +465,9 @@ One or more room lists can be requested in sliding sync like so: // can ensure that uninteresting events (e.g. a profile rename) do not cause a // room to jump to the top of its list(s). Empty or omitted `bump_event_types` // have no effect—all events in a room will be considered recent activity. + // + // NB. Changes to bump_event_types will NOT cause the room list to be reordered; + // it will only affect the ordering of rooms due to future updates. "bump_event_types": [ "m.room.message", "m.room.encrypted" ], } }, From 30e31c8c50c1e69c7794540ba4090814087a06dc Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 9 Jun 2023 12:37:15 +0100 Subject: [PATCH 72/81] Clarify the meaning of extension scoping config --- proposals/3575-sync.md | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 20fbfcfb881..dac1a199984 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1277,9 +1277,10 @@ field is an empty list, the client requests that the extension processes no slid the field is a nonempty list, meaning that the client wants the extension to only apply to the given list of sliding windows. -Similarly, we reserve a `rooms` field, which is a sticky list of room IDs. It has the same semantics -as `lists`, except it tells the extension which room subscriptions it should process (instead of sliding -windows). +Similarly, we reserve a `rooms` field, which is a sticky list of room IDs that +the extension should process in addition to those in sliding windows. If `null` +or omitted, the `rooms` field falls back to the rooms that the main request +has explicitly subscribed to using the Room Subscription API. It's possible that the same room appears in multiple sliding windows, or in both a sliding window and an explicit room subscription. In this case, the extension should process that room if the extension @@ -1308,6 +1309,39 @@ don't handle presence/typing/receipts/other metadata, they can simply work with in full. This is a good balance because it means this MSC alone is useful: it doesn't **require** additional extensions in order for a basic Matrix client to be written._ +#### Example extension configuration + +```json lines +{ + "enabled": false, // extension completely disabled +} +{ + "enabled": true, // extension enabled for all sliding windows and all room subscriptions +} +{ + "enabled": true, // extension enabled for all sliding windows and all room subscriptions + "lists": null, + "rooms": null, +} +{ + "enabled": true, // extension enabled for all room subscriptions, + "lists": [], // but not enabled for sliding windows +} +{ + "enabled": true, // extension enabled for all room subscriptions, + "lists": ["dms"], // and for the "dms" sliding window +} +{ + "enabled": true, // extension enabled for all sliding windows and one specific room + "rooms": ["!myroom:example.com"], +} +{ + "enabled": true, // extension enabled for the "dms" sliding window and one specific room + "lists": ["dms"], // and for the "dms" sliding window + "rooms": ["!myroom:example.com"], +} +``` + #### Filter and Sort Extensions In addition to extending the sync API by adding more data to the response, the sync API needs to include From 7148c5774e1063c93ea33c082fabe4af5a697452 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 9 Jun 2023 16:36:56 +0100 Subject: [PATCH 73/81] Another pass on extension scoping --- proposals/3575-sync.md | 110 +++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 42 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index dac1a199984..2db9a7e6bec 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1252,8 +1252,8 @@ manageable sections. Extensions are requested by the client in a dedicated `exte "enabled": true, // sticky "lists": ["rooms", "dms"], // sticky "rooms": ["!abcd:example.com"], // sticky - "extension_arg": "value", - "extension_arg_2": true + "extension_arg": "value", // stickiness specified by the extension + "extension_arg_2": true // stickiness specified by the extension } } } @@ -1271,17 +1271,75 @@ typing notifications for rooms in a sliding window, but ignore such notification of all rooms.) To avoid transferring useless data, the spec reserves a field `lists`, which is a sticky list of strings, -namely the names of lists given to the Sliding Window API. If the `lists` field is omitted or provided -as a JSON `null`, the client requests that the extension should process _all_ sliding windows. If the -field is an empty list, the client requests that the extension processes no sliding windows. Otherwise -the field is a nonempty list, meaning that the client wants the extension to only apply to the given -list of sliding windows. +namely the names of lists given to the Sliding Window API. There are four behaviours that the +client can request of the extension: + +```jsonlines +{"lists": []} // Do not process any lists. +{"lists": ["rooms", "dms"]} // Process only a subset of lists. +{"lists": ["*"]} // Process all lists defined in the Sliding Window API. (This is the default.) +{"lists": ["*", "junk", "here"]} // The same: anything whose first entry is `*` means "process all lists". +{"lists": null} // No change, use the `lists` value from the previous request. +{} // field omitted // The same: use the previous sticky value. +``` Similarly, we reserve a `rooms` field, which is a sticky list of room IDs that -the extension should process in addition to those in sliding windows. If `null` -or omitted, the `rooms` field falls back to the rooms that the main request -has explicitly subscribed to using the Room Subscription API. +given to the Room Subscription API. Again, there are four behaviours: + +```jsonlines +{"rooms": []} // Do not process any specific rooms. +{"rooms": ["!a:b", "!c:d"]} // Process only a subset of room subscriptions. +{"rooms": ["*"]} // Process all room subscriptions defined in the Room Subscription API. (This is the default.) +{"rooms": ["*", "junk", "here"]} // The same: anything whose first entry is `*` means "process all room subscriptions". +{"rooms": null} // No change, use the `rooms` value from the previous request. +{} // field omitted // The same: use the previous sticky value. +``` + +
+Examples of using lists and rooms + +```json lines +{ + "enabled": false, // extension completely disabled +} +{ + "enabled": true, // extension enabled for all sliding windows and all room subscriptions + "lists": ["*"], + "rooms": ["*"], +} +{ + "enabled": true, // extension enabled for all room subscriptions, + "lists": [], // but not enabled for sliding windows + "rooms": ["*"], +} +{ + "enabled": true, // extension enabled for all room subscriptions, + "lists": ["dms"], // and for the "dms" sliding window + "rooms": ["*"], +} +{ + "enabled": true, // extension enabled for all sliding windows and one specific room + "lists": ["*"], + "rooms": ["!myroom:example.com"], +} +{ + "enabled": true, // extension enabled for the "dms" sliding window and one specific room + "lists": ["dms"], // and for the "dms" sliding window + "rooms": ["!myroom:example.com"], +} +{ + "enabled": true, // extension enabled for the "dms" sliding window and one specific room + "lists": ["dms"], // and for the "dms" sliding window + "rooms": ["!myroom:example.com"], +} +{ + "lists": ["dms"] // use "enabled and "rooms" from the previous request, + // and only enable the extension for the "dms" sliding window. +} +``` +
+The `lists` and `rooms` keys are independent and can be freely mixed (as in the core sliding sync API.) It's possible that the same room appears in multiple sliding windows, or in both a sliding window and an explicit room subscription. In this case, the extension should process that room if the extension is configured to process any of the windows/subscriptions that contains the room. (The logic is a union @@ -1309,38 +1367,6 @@ don't handle presence/typing/receipts/other metadata, they can simply work with in full. This is a good balance because it means this MSC alone is useful: it doesn't **require** additional extensions in order for a basic Matrix client to be written._ -#### Example extension configuration - -```json lines -{ - "enabled": false, // extension completely disabled -} -{ - "enabled": true, // extension enabled for all sliding windows and all room subscriptions -} -{ - "enabled": true, // extension enabled for all sliding windows and all room subscriptions - "lists": null, - "rooms": null, -} -{ - "enabled": true, // extension enabled for all room subscriptions, - "lists": [], // but not enabled for sliding windows -} -{ - "enabled": true, // extension enabled for all room subscriptions, - "lists": ["dms"], // and for the "dms" sliding window -} -{ - "enabled": true, // extension enabled for all sliding windows and one specific room - "rooms": ["!myroom:example.com"], -} -{ - "enabled": true, // extension enabled for the "dms" sliding window and one specific room - "lists": ["dms"], // and for the "dms" sliding window - "rooms": ["!myroom:example.com"], -} -``` #### Filter and Sort Extensions From 0e3ea5809b30377102a5f2d7b03075f285b17711 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 9 Jun 2023 16:44:23 +0100 Subject: [PATCH 74/81] Tweak extension scoping wording --- proposals/3575-sync.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 2db9a7e6bec..c52aed5b6f8 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1274,31 +1274,31 @@ To avoid transferring useless data, the spec reserves a field `lists`, which is namely the names of lists given to the Sliding Window API. There are four behaviours that the client can request of the extension: -```jsonlines +```json5 {"lists": []} // Do not process any lists. {"lists": ["rooms", "dms"]} // Process only a subset of lists. {"lists": ["*"]} // Process all lists defined in the Sliding Window API. (This is the default.) {"lists": ["*", "junk", "here"]} // The same: anything whose first entry is `*` means "process all lists". -{"lists": null} // No change, use the `lists` value from the previous request. +{"lists": null} // No change, use the `lists` sticky value from previous requests. {} // field omitted // The same: use the previous sticky value. ``` -Similarly, we reserve a `rooms` field, which is a sticky list of room IDs that +Similarly, we reserve a `rooms` field, which is a sticky list of room IDs given to the Room Subscription API. Again, there are four behaviours: -```jsonlines +```json5 {"rooms": []} // Do not process any specific rooms. {"rooms": ["!a:b", "!c:d"]} // Process only a subset of room subscriptions. {"rooms": ["*"]} // Process all room subscriptions defined in the Room Subscription API. (This is the default.) {"rooms": ["*", "junk", "here"]} // The same: anything whose first entry is `*` means "process all room subscriptions". -{"rooms": null} // No change, use the `rooms` value from the previous request. +{"rooms": null} // No change, use the `rooms` sticky value from previous requests. {} // field omitted // The same: use the previous sticky value. ```
Examples of using lists and rooms -```json lines +```json5 { "enabled": false, // extension completely disabled } From 50ae2c8d65956fb156751aeeef04518ae4433ef0 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 9 Jun 2023 16:47:46 +0100 Subject: [PATCH 75/81] Hyperlink to the extensions section --- proposals/3575-sync.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index c52aed5b6f8..ebd7118c3e8 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -46,7 +46,7 @@ Q W E R T Y U I O P L K J H G F D S A Z X C V B N M ``` It also introduces a number of new concepts which are explained in more detail later on: - Core API: The minimal API to be sliding sync compatible. - - Extensions: Additional APIs which expose more data from the server e.g presence, device messages + - [Extensions](#extensions): Additional APIs which expose more data from the server e.g presence, device messages or additional sort/filter operations. ## Core @@ -716,7 +716,7 @@ _Rationale: The sort operations are restrictive and limited in scope on purpose. as arbitrary or more expansive sort orders were decided against as it would A) force servers to support nonsensical and potentially expensive operations and B) not produce the best sort order for specific use cases in Matrix such as alias handling. That being said, having some mechanism to support -additional sort operations is useful, see the extensions section for more information._ +additional sort operations is useful, see the [extensions](#extensions) section for more information._ The complete API shape for each operation is shown below (note the key names vary on the operation): From 1d38101b626249ca732db54f4179b701de883a1f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 13 Jul 2023 11:50:22 +0100 Subject: [PATCH 76/81] Define an avatar field in the room response --- proposals/3575-sync.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index ebd7118c3e8..bec9afbbf56 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -133,6 +133,7 @@ An entire response looks like: // Room from room subscription "!sub1:bar": { "name": "Alice and Bob", + "avatar": "mxc://...", "initial": true, "required_state": [ {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, @@ -157,6 +158,7 @@ An entire response looks like: // rooms from list "!foo:bar": { "name": "The calculated room name", + "avatar": "mxc://...", "initial": true, "required_state": [ {"sender":"@alice:example.com","type":"m.room.join_rules", "state_key":"", "content":{"join_rule":"invite"}}, @@ -504,6 +506,8 @@ The server will then return a `rooms` key which have the following fields: // the room ID "!foo:bar": { "name": "The calculated room name", + // The MXC URL of the room's avatar, or the string "none" if this room has no prescribed avatar. + "avatar": "mxc://...", // Flag which is set when this is the first time the server is sending this data on this connection. // Clients can use this flag to replace or update their local state. When there is an update, servers // MUST omit this flag entirely and NOT send "initial":false as this is wasteful on bandwidth. The From 912621b4d8c5bf4bc7f695acb590c055795c497f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 14 Jul 2023 11:30:57 +0100 Subject: [PATCH 77/81] Avatar field: use `null` for "no avatar" --- proposals/3575-sync.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index bec9afbbf56..d1cbe8a98a3 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -506,7 +506,8 @@ The server will then return a `rooms` key which have the following fields: // the room ID "!foo:bar": { "name": "The calculated room name", - // The MXC URL of the room's avatar, or the string "none" if this room has no prescribed avatar. + // Optional, nullable string: the MXC URL of the room's avatar. If omitted, + // there is no change to the avatar. If null, the room now has no avatar. "avatar": "mxc://...", // Flag which is set when this is the first time the server is sending this data on this connection. // Clients can use this flag to replace or update their local state. When there is an update, servers From 71fb1a2e3780ae07e03850d96ce3bad59305986e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Sat, 29 Jul 2023 00:59:28 +0100 Subject: [PATCH 78/81] Fix indentation of `avatar` field --- proposals/3575-sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index d1cbe8a98a3..1d4b3b973ab 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -133,7 +133,7 @@ An entire response looks like: // Room from room subscription "!sub1:bar": { "name": "Alice and Bob", - "avatar": "mxc://...", + "avatar": "mxc://...", "initial": true, "required_state": [ {"sender":"@alice:example.com","type":"m.room.create", "state_key":"", "content":{"creator":"@alice:example.com"}}, From 9450ced7fb9cf5ea9077d029b3adf36aebfa8709 Mon Sep 17 00:00:00 2001 From: Till Faelligen <2353100+S7evinK@users.noreply.github.com> Date: Mon, 18 Sep 2023 16:11:15 +0200 Subject: [PATCH 79/81] Define `include_heroes` flag and `heroes` in room the room response --- proposals/3575-sync.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index 1d4b3b973ab..a33d4c05e31 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -410,6 +410,10 @@ One or more room lists can be requested in sliding sync like so: "required_state": [ ["m.room.tombstone", ""] ] }, + // Sticky. Return a stripped variant of membership events (containing `user_id` and optionally `avatar_url` and `displayname`) + // for the users used to calculate the room name. + "include_heroes": true, + // Sticky. Filters to apply to the list before sorting. "filters": { // All fields below are Sticky. @@ -509,6 +513,11 @@ The server will then return a `rooms` key which have the following fields: // Optional, nullable string: the MXC URL of the room's avatar. If omitted, // there is no change to the avatar. If null, the room now has no avatar. "avatar": "mxc://...", + // Optional. If omitted there is no change to the heroes or the `name` was not + // calculated using room heroes. `avatar_url` and `displayname` are optional. + "heroes": [ + {"user_id":"@alice:example.com","displayname":"Alice","avatar_url":"mxc://..."}, + ], // Flag which is set when this is the first time the server is sending this data on this connection. // Clients can use this flag to replace or update their local state. When there is an update, servers // MUST omit this flag entirely and NOT send "initial":false as this is wasteful on bandwidth. The From 8ced9a777f1e78575158fb0cba5df7d6528fc40b Mon Sep 17 00:00:00 2001 From: kegsay Date: Thu, 9 Nov 2023 16:58:39 +0000 Subject: [PATCH 80/81] Update 3575-sync.md --- proposals/3575-sync.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index a33d4c05e31..e3459f62380 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1452,6 +1452,25 @@ and 10,000s of rooms per account as metadata rooms continue to be added (VoIP co spaces, profiles-as-rooms, thread-per-room, etc). If this assumption is false and room counts remain reasonably well bounded then this MSC may not be required. +## GraphQL + +We could define schemas for querying sync data and expose a GraphQL server on every Matrix homeserver. +This would have numerous benefits: + - flexible query language, + - SDKs exist which interact with GraphQL e.g for automatically handling pagination, streaming, + - more standardised than a custom line protocol i.e if you know GraphQL already, it lowers the barrier to entry (e.g using Subscriptions for real-time updates) + - some would argue this is less complex than designing a custom API. + +This would have the following drawbacks: + - easy to design slow performing queries. Denial of Service risk, mitigated via strong rate limits (see [Github v4 API](https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api), + - higher bandwidth costs than a custom API (both for requests and responses), + - easier to accidentally expose confidential information by not applying sufficient authentication checks, + - some would argue this is more complex than designing a custom API, + - it forces all Matrix developers to become familiar with GraphQL as the queries are crafted client-side, + - it's difficult to cache responses, impacting speed. + +Overall, GraphQL would be suitable for rapid prototyping, but does not meet the [Goals](https://github.com/matrix-org/matrix-spec-proposals/blob/kegan/sync-v3/proposals/3575-sync.md#goals) of this API. + # Security considerations This API presents new ways to request data from the server which need appropriate authentication checks: From 7036c29db2b0ea40dccb0c505d6ef4b6085bf192 Mon Sep 17 00:00:00 2001 From: kegsay Date: Thu, 9 Nov 2023 17:00:13 +0000 Subject: [PATCH 81/81] Update 3575-sync.md --- proposals/3575-sync.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proposals/3575-sync.md b/proposals/3575-sync.md index e3459f62380..7fb644856a4 100644 --- a/proposals/3575-sync.md +++ b/proposals/3575-sync.md @@ -1462,7 +1462,8 @@ This would have numerous benefits: - some would argue this is less complex than designing a custom API. This would have the following drawbacks: - - easy to design slow performing queries. Denial of Service risk, mitigated via strong rate limits (see [Github v4 API](https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api), + - easy to design slow performing queries which work well for small accounts but degrade on large accounts, + - Denial of Service risk, mitigated via strong rate limits (see [Github v4 API](https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api)), - higher bandwidth costs than a custom API (both for requests and responses), - easier to accidentally expose confidential information by not applying sufficient authentication checks, - some would argue this is more complex than designing a custom API,