Skip to content

Commit

Permalink
Merge pull request #633 from BrandtHill/master
Browse files Browse the repository at this point in the history
Upgrade voice gateway v8 and support per-channel encryption modes with fallback
  • Loading branch information
jchristgit authored Sep 21, 2024
2 parents 360cdc5 + 2e9a458 commit 1628880
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 52 deletions.
12 changes: 6 additions & 6 deletions guides/functionality/voice.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ hardware architecture your bot is running on. If you're interested, keep reading

#### Encryption Mode Configuration Options

This is a compile-time configuration option, so should you wish to set it,
do it in `config.exs` or one of its imported config files, *not* `runtime.exs`.
This is a runtime configuration option. Some Discord voice servers may not support your
configured encryption mode, and in these cases a fallback mode will be selected.

```elixir
config :nostrum, :voice_encryption_mode, :aes256_gcm # Default
Expand All @@ -199,10 +199,10 @@ Available configuration options are as follows:
- `:xsalsa20_poly1305`
- `:xsalsa20_poly1305_suffix`
- `:xsalsa20_poly1305_lite`
- `:xsalsa20_poly1305_lite_rtpsize` *(not yet documented by Discord)*
- `:aead_xchacha20_poly1305_rtpsize` *(not yet documented by Discord)*
- `:aead_aes256_gcm` *(not yet documented by Discord)*
- `:aead_aes256_gcm_rtpsize` *(not yet documented by Discord)*
- `:xsalsa20_poly1305_lite_rtpsize`
- `:aead_xchacha20_poly1305_rtpsize`
- `:aead_aes256_gcm`
- `:aead_aes256_gcm_rtpsize`
- `:xchacha20_poly1305` (alias for `:aead_xchacha20_poly1305_rtpsize`)
- `:aes256_gcm` (alias for `:aead_aes256_gcm_rtpsize`)

Expand Down
1 change: 1 addition & 0 deletions lib/nostrum/struct/voice_state.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ defmodule Nostrum.Struct.VoiceState do
:ip,
:port,
:udp_socket,
:encryption_mode,
:rtp_sequence,
:rtp_timestamp,
:ffmpeg_proc,
Expand Down
18 changes: 14 additions & 4 deletions lib/nostrum/struct/voice_ws_state.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ defmodule Nostrum.Struct.VoiceWSState do
:stream,
:gateway,
:identified,
:seq,
:encryption_mode,
:last_heartbeat_send,
:last_heartbeat_ack,
:heartbeat_ack,
Expand Down Expand Up @@ -49,10 +51,10 @@ defmodule Nostrum.Struct.VoiceWSState do
@type secret_key :: binary() | nil

@typedoc "PID of the `:gun` worker connected to the websocket"
@type conn :: pid
@type conn :: pid()

@typedoc "PID of the connection process"
@type conn_pid :: pid
@type conn_pid :: pid()

@typedoc "Stream reference for `:gun`"
@type stream :: :gun.stream_ref()
Expand All @@ -63,6 +65,12 @@ defmodule Nostrum.Struct.VoiceWSState do
@typedoc "Whether the session has been identified"
@type identified :: boolean()

@typedoc "Sequence number for buffering server-sent events"
@type seq :: integer()

@typedoc "Encryption mode selected for voice channel"
@type encryption_mode :: Nostrum.Voice.Crypto.cipher()

@typedoc """
The time the last heartbeat was sent, if a heartbeat hasn't been sent it
will be the time the websocket process was started
Expand All @@ -76,10 +84,10 @@ defmodule Nostrum.Struct.VoiceWSState do
@type last_heartbeat_ack :: DateTime.t() | nil

@typedoc "Whether or not the last heartbeat sent was ACK'd"
@type heartbeat_ack :: boolean
@type heartbeat_ack :: boolean()

@typedoc "Interval at which heartbeats are sent"
@type heartbeat_interval :: integer | nil
@type heartbeat_interval :: integer() | nil

@typedoc "Time ref for the heartbeat"
@type heartbeat_ref :: :timer.tref() | nil
Expand All @@ -96,6 +104,8 @@ defmodule Nostrum.Struct.VoiceWSState do
stream: stream,
gateway: gateway,
identified: identified,
seq: seq,
encryption_mode: encryption_mode,
last_heartbeat_send: last_heartbeat_send,
last_heartbeat_ack: last_heartbeat_ack,
heartbeat_ack: heartbeat_ack,
Expand Down
38 changes: 22 additions & 16 deletions lib/nostrum/voice/crypto.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ defmodule Nostrum.Voice.Crypto do
@moduledoc false

alias Nostrum.Struct.VoiceState
alias Nostrum.Struct.VoiceWSState
alias Nostrum.Voice.Audio
alias Nostrum.Voice.Crypto.Aes
alias Nostrum.Voice.Crypto.Chacha
Expand All @@ -23,27 +22,34 @@ defmodule Nostrum.Voice.Crypto do

@type cipher :: cipher_non_rtpsize() | cipher_alias() | cipher_rtpsize()

@mode Application.compile_env(:nostrum, :voice_encryption_mode, :aes256_gcm)
@fallback_mode :aead_xchacha20_poly1305_rtpsize

@mode_string Map.get(
%{
xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize",
aes256_gcm: "aead_aes256_gcm_rtpsize"
},
@mode,
"#{@mode}"
)
@mode_aliases %{
xchacha20_poly1305: :aead_xchacha20_poly1305_rtpsize,
aes256_gcm: :aead_aes256_gcm_rtpsize
}

def encryption_mode, do: @mode_string
@spec encryption_mode(list(String.t())) :: cipher()
def encryption_mode(available_modes) do
mode = Application.get_env(:nostrum, :voice_encryption_mode, :aes256_gcm)

def encrypt(voice, data) do
mode = Map.get(@mode_aliases, mode, mode)

if "#{mode}" in available_modes do
mode
else
@fallback_mode
end
end

def encrypt(%VoiceState{encryption_mode: mode} = voice, data) do
header = Audio.rtp_header(voice)
unquote(:"encrypt_#{@mode}")(voice, data, header)
apply(__MODULE__, :"encrypt_#{mode}", [voice, data, header])
end

def decrypt(%VoiceState{secret_key: key}, data), do: decrypt(key, data)
def decrypt(%VoiceWSState{secret_key: key}, data), do: decrypt(key, data)
def decrypt(key, data), do: unquote(:"decrypt_#{@mode}")(key, data)
def decrypt(%{secret_key: key, encryption_mode: mode}, data) do
apply(__MODULE__, :"decrypt_#{mode}", [key, data])
end

def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data, header) do
nonce = header <> <<0::unit(8)-size(12)>>
Expand Down
45 changes: 33 additions & 12 deletions lib/nostrum/voice/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,49 @@ defmodule Nostrum.Voice.Event do
@moduledoc false

alias Nostrum.Cache.UserCache
alias Nostrum.Constants
alias Nostrum.Struct.VoiceWSState
alias Nostrum.Voice
alias Nostrum.Voice.{Audio, Payload, Session}
alias Nostrum.Voice.Audio
alias Nostrum.Voice.Crypto
alias Nostrum.Voice.Payload
alias Nostrum.Voice.Session

require Logger

def handle(:ready, payload, state) do
@spec handle(map(), VoiceWSState.t()) :: VoiceWSState.t() | {VoiceWSState.t(), iodata()}
def handle(payload, state) do
state = update_sequence(state, payload)

payload["op"]
|> Constants.atom_from_voice_opcode()
|> handle_event(payload, state)
end

defp update_sequence(state, %{"seq" => seq} = _payload), do: %{state | seq: seq}

defp update_sequence(state, _payload), do: state

defp handle_event(:ready, payload, state) do
Logger.debug("VOICE READY")

mode = Crypto.encryption_mode(payload["d"]["modes"])

voice =
Voice.update_voice(state.guild_id,
ssrc: payload["d"]["ssrc"],
ip: payload["d"]["ip"],
port: payload["d"]["port"],
encryption_mode: mode,
udp_socket: Audio.open_udp()
)

{my_ip, my_port} = Audio.discover_ip(voice.udp_socket, voice.ip, voice.port, voice.ssrc)

{state, Payload.select_protocol_payload(my_ip, my_port)}
{%{state | encryption_mode: mode}, Payload.select_protocol_payload(my_ip, my_port, mode)}
end

def handle(:session_description, payload, state) do
defp handle_event(:session_description, payload, state) do
Logger.debug("VOICE SESSION DESCRIPTION")

secret_key = payload["d"]["secret_key"] |> :erlang.list_to_binary()
Expand All @@ -39,17 +60,17 @@ defmodule Nostrum.Voice.Event do
%{state | secret_key: secret_key}
end

def handle(:heartbeat_ack, _payload, state) do
defp handle_event(:heartbeat_ack, _payload, state) do
Logger.debug("VOICE HEARTBEAT_ACK")
%{state | last_heartbeat_ack: DateTime.utc_now(), heartbeat_ack: true}
end

def handle(:resumed, _payload, state) do
defp handle_event(:resumed, _payload, state) do
Logger.info("VOICE RESUMED")
state
end

def handle(:hello, payload, state) do
defp handle_event(:hello, payload, state) do
state = %{state | heartbeat_interval: payload["d"]["heartbeat_interval"]}

GenServer.cast(state.conn_pid, :heartbeat)
Expand All @@ -63,7 +84,7 @@ defmodule Nostrum.Voice.Event do
end
end

def handle(:client_connect, payload, state) do
defp handle_event(:client_connect, payload, state) do
Logger.debug(fn ->
user_id = payload["d"]["user_id"] |> String.to_integer()

Expand All @@ -76,7 +97,7 @@ defmodule Nostrum.Voice.Event do
state
end

def handle(:client_disconnect, payload, state) do
defp handle_event(:client_disconnect, payload, state) do
Logger.debug(fn ->
user_id = payload["d"]["user_id"] |> String.to_integer()

Expand All @@ -89,16 +110,16 @@ defmodule Nostrum.Voice.Event do
state
end

def handle(:codec_info, _payload, state), do: state
defp handle_event(:codec_info, _payload, state), do: state

def handle(:speaking, payload, state) do
defp handle_event(:speaking, payload, state) do
ssrc = payload["d"]["ssrc"]
user_id = payload["d"]["user_id"] |> String.to_integer()
ssrc_map = Map.put(state.ssrc_map, ssrc, user_id)
%{state | ssrc_map: ssrc_map}
end

def handle(event, _payload, state) do
defp handle_event(event, _payload, state) do
Logger.debug("UNHANDLED VOICE GATEWAY EVENT #{event}")
state
end
Expand Down
15 changes: 9 additions & 6 deletions lib/nostrum/voice/payload.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ defmodule Nostrum.Voice.Payload do
# Other functions which return a map with keys `:t` and `:d` are for
# generating voice-related events to be consumed by a Consumer process.

def heartbeat_payload do
DateTime.utc_now()
|> DateTime.to_unix()
def heartbeat_payload(%VoiceWSState{} = state) do
%{
t: DateTime.utc_now() |> DateTime.to_unix(:millisecond),
seq_ack: state.seq
}
|> build_payload("HEARTBEAT")
end

Expand All @@ -35,18 +37,19 @@ defmodule Nostrum.Voice.Payload do
%{
server_id: state.guild_id,
token: state.token,
session_id: state.session
session_id: state.session,
seq_ack: state.seq
}
|> build_payload("RESUME")
end

def select_protocol_payload(ip, port) do
def select_protocol_payload(ip, port, mode) do
%{
protocol: "udp",
data: %{
address: ip,
port: port,
mode: Crypto.encryption_mode()
mode: "#{mode}"
}
}
|> build_payload("SELECT_PROTOCOL")
Expand Down
14 changes: 6 additions & 8 deletions lib/nostrum/voice/session.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ defmodule Nostrum.Voice.Session do

use GenServer

@gateway_qs "/?v=4"
@gateway_qs "/?v=8"

@timeout_connect 10_000

Expand Down Expand Up @@ -58,6 +58,7 @@ defmodule Nostrum.Voice.Session do
session: voice.session,
token: voice.token,
gateway: voice.gateway,
seq: -1,
stream: stream,
last_heartbeat_ack: DateTime.utc_now(),
heartbeat_ack: true
Expand Down Expand Up @@ -85,13 +86,10 @@ defmodule Nostrum.Voice.Session do
end

def handle_info({:gun_ws, _worker, stream, {:text, frame}}, state) do
# Jason.decode calls iodata_to_binary internally
payload = Jason.decode!(frame)

from_handle =
payload["op"]
|> Constants.atom_from_voice_opcode()
|> Event.handle(payload, state)
frame
|> Jason.decode!()
|> Event.handle(state)

case from_handle do
{new_state, reply} ->
Expand Down Expand Up @@ -175,7 +173,7 @@ defmodule Nostrum.Voice.Session do
:heartbeat
])

:ok = :gun.ws_send(state.conn, state.stream, {:text, Payload.heartbeat_payload()})
:ok = :gun.ws_send(state.conn, state.stream, {:text, Payload.heartbeat_payload(state)})

{:noreply,
%{state | heartbeat_ref: ref, heartbeat_ack: false, last_heartbeat_send: DateTime.utc_now()}}
Expand Down

0 comments on commit 1628880

Please sign in to comment.