From aa86a1a19e92dea4849c0ddea8034d9532194285 Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Thu, 16 May 2024 00:15:05 -0500 Subject: [PATCH 1/8] Add support for aes256_gcm, xchacha20_poly1305, xsalsa20_poly1305 *_rtpsize voice encryption modes --- lib/nostrum/voice/audio.ex | 21 +--- lib/nostrum/voice/crypto.ex | 161 +++++++++++++++++++++++++++++ lib/nostrum/voice/crypto/chacha.ex | 109 +++++++++++++++++++ lib/nostrum/voice/payload.ex | 7 +- 4 files changed, 279 insertions(+), 19 deletions(-) create mode 100644 lib/nostrum/voice/crypto.ex create mode 100644 lib/nostrum/voice/crypto/chacha.ex diff --git a/lib/nostrum/voice/audio.ex b/lib/nostrum/voice/audio.ex index 8171ec5f5..deeaab2e7 100644 --- a/lib/nostrum/voice/audio.ex +++ b/lib/nostrum/voice/audio.ex @@ -7,11 +7,10 @@ defmodule Nostrum.Voice.Audio do alias Nostrum.Struct.VoiceState alias Nostrum.Util alias Nostrum.Voice + alias Nostrum.Voice.Crypto alias Nostrum.Voice.Opus alias Nostrum.Voice.Ports - @encryption_mode "xsalsa20_poly1305" - # Default value @frames_per_burst 10 @@ -20,8 +19,6 @@ defmodule Nostrum.Voice.Audio do @ytdl "youtube-dl" @streamlink "streamlink" - def encryption_mode, do: @encryption_mode - def ffmpeg_executable, do: Application.get_env(:nostrum, :ffmpeg, @ffmpeg) def youtubedl_executable, do: Application.get_env(:nostrum, :youtubedl, @ytdl) def streamlink_executable, do: Application.get_env(:nostrum, :streamlink, @streamlink) @@ -40,13 +37,6 @@ defmodule Nostrum.Voice.Audio do >> end - def encrypt_packet(%VoiceState{} = voice, data) do - header = rtp_header(voice) - # 12 byte header + 12 null bytes - nonce = header <> <<0::8*12>> - header <> Kcl.secretbox(data, nonce, voice.secret_key) - end - def open_udp do {:ok, socket} = :gen_udp.open(0, [ @@ -58,7 +48,7 @@ defmodule Nostrum.Voice.Audio do socket end - def get_rtp_packet(%VoiceState{secret_key: key, udp_socket: socket} = v) do + def get_rtp_packet(%VoiceState{udp_socket: socket} = v) do {:ok, {_ip, _port, payload}} = :gen_udp.recv(socket, 1024) case payload do @@ -66,9 +56,8 @@ defmodule Nostrum.Voice.Audio do <<2::2, 0::1, 1::5, 201::8, _rest::binary>> -> get_rtp_packet(v) - <> -> - nonce = header <> <<0::8*12>> - {header, Kcl.secretunbox(data, nonce, key)} + <> = data -> + {header, Crypto.decrypt(v, data)} end end @@ -144,7 +133,7 @@ defmodule Nostrum.Voice.Audio do v.udp_socket, v.ip |> ip_to_tuple(), v.port, - encrypt_packet(v, f) + Crypto.encrypt(v, f) ) %{ diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex new file mode 100644 index 000000000..e682eac9f --- /dev/null +++ b/lib/nostrum/voice/crypto.ex @@ -0,0 +1,161 @@ +defmodule Nostrum.Voice.Crypto do + @moduledoc false + + alias Nostrum.Struct.VoiceState + alias Nostrum.Voice.Audio + alias Nostrum.Voice.Crypto.Chacha + + @type cipher :: :xchacha20_poly1305 | :xsalsa20_poly1305 | :aes256_gcm + + @cipher Application.compile_env(:nostrum, :voice_encryption_mode, :aes256_gcm) + + @encryption_mode Map.get( + %{ + xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", + xsalsa20_poly1305: "xsalsa20_poly1305_lite_rtpsize", + aes256_gcm: "aead_aes256_gcm_rtpsize" + }, + @cipher, + "aead_aes256_gcm_rtpsize" + ) + + def encryption_mode, do: @encryption_mode + + def encrypt(voice, data) do + apply(__MODULE__, :"encrypt_#{@cipher}", [voice, data]) + end + + def decrypt(voice, data) do + apply(__MODULE__, :"decrypt_#{@cipher}", [voice, data]) + end + + def encrypt_xchacha20_poly1305(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do + header = Audio.rtp_header(voice) + + unpadded_nonce = <> + + # 24 byte nonce + nonce = unpadded_nonce <> <<0::unit(8)-size(20)>> + + {xchacha_key, xchacha_nonce} = Chacha.xchacha20_key_and_nonce(key, nonce) + + {cipher_text, tag} = + :crypto.crypto_one_time_aead( + :chacha20_poly1305, + xchacha_key, + xchacha_nonce, + data, + _aad = header, + _encrypt = true + ) + + header <> cipher_text <> tag <> unpadded_nonce + end + + def decrypt_xchacha20_poly1305(%VoiceState{secret_key: key}, data) do + {header, cipher_text, tag, nonce, ext_len} = decode_packet(data, 24) + {xchacha_key, xchacha_nonce} = Chacha.xchacha20_key_and_nonce(key, nonce) + + <<_exts::unit(32)-size(ext_len), opus::binary>> = + :crypto.crypto_one_time_aead( + :chacha20_poly1305, + xchacha_key, + xchacha_nonce, + cipher_text, + _aad = header, + tag, + _encrypt = false + ) + + opus + end + + def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do + header = Audio.rtp_header(voice) + + unpadded_nonce = <> + + # 24 byte nonce + nonce = unpadded_nonce <> <<0::unit(8)-size(20)>> + + header <> Kcl.secretbox(data, nonce, key) <> unpadded_nonce + end + + def decrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data) do + {_header, cipher_text, _tag, nonce, ext_len} = decode_packet(data, 24, 0) + <<_exts::unit(32)-size(ext_len), opus::binary>> = Kcl.secretunbox(cipher_text, nonce, key) + opus + end + + def encrypt_aes256_gcm(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do + header = Audio.rtp_header(voice) + + unpadded_nonce = <> + + # 12 byte nonce + nonce = unpadded_nonce <> <<0::unit(8)-size(8)>> + + {cipher_text, tag} = + :crypto.crypto_one_time_aead(:aes_256_gcm, key, nonce, data, _aad = header, _encrypt = true) + + header <> cipher_text <> tag <> unpadded_nonce + end + + def decrypt_aes256_gcm(%VoiceState{secret_key: key}, data) do + {header, cipher_text, tag, nonce, ext_len} = decode_packet(data, 12) + + <<_exts::unit(32)-size(ext_len), opus::binary>> = + :crypto.crypto_one_time_aead( + :aes_256_gcm, + key, + nonce, + cipher_text, + _aad = header, + tag, + _encrypt = false + ) + + opus + end + + @unpadded_nonce_length 4 + + @doc """ + Discord's newer encryption modes ending in '_rtpsize' leave the first 4 bytes of the RTP + header extension in plaintext while encrypting the elements themselves. The AAD is the + 12-byte RTP header concatenated with the first 4 bytes of the RTP header extension. + + Much like is done within the function `Nostrum.Voice.Opus.strip_rtp_ext/1`, we pattern match + on the `0xBEDE` constant and the 16-bit big-endian extension length that denotes the length + in 32-bit words of the extension elements. Because the elements are a part of the cipher text, + the extension length is the number of 32-bit words to discard after decryption to obtain + solely the opus packet. + + This function returns a 5-element tuple with + - RTP header + - Fixed 12 byte header concatenated with the first 4 bytes of the extension + - Used as the AAD for AEAD ciphers + - cipher text + - RTP extension elements prepended to the opus packet + - cipher tag (MAC) + - nonce (padded) + - RTP header extension length + - for isolating the opus after decryption + """ + def decode_packet( + <>, + nonce_length \\ 24, + tag_length \\ 16 + ) + when byte_size(rest) - (@unpadded_nonce_length + tag_length) > ext_len * 4 do + header = header <> <<0xBE, 0xDE, ext_len::integer-16>> + cipher_text_len = byte_size(rest) - (tag_length + @unpadded_nonce_length) + + <> = rest + + nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - @unpadded_nonce_length)>> + + {header, cipher_text, tag, nonce, ext_len} + end +end diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex new file mode 100644 index 000000000..00f33a501 --- /dev/null +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -0,0 +1,109 @@ +defmodule Nostrum.Voice.Crypto.Chacha do + @moduledoc false + + # Erlang's :crypto module supports the chacha20_poly1305 aead stream cipher. + # Analogously to Salsa20 and XSalsa20, XChaCha20 is a way to use 192-bit nonces + # with ChaCha20 by hashing the key and part of the extended nonce generate a + # sub-key, which is used as the input key for ChaCha20. + # + # Even though we've implemented the bulk of what's needed to generate chacha20 key streams + # for encryption and decryption, we're only using this module to generate the inputs to + # use the :crypto module's chacha20_poly1305 functionality in the capacity of xchacha20 + # as is required by Discord with that encryption mode selected. + # + # This is to all in service of leveraging the performance benefits of the the NIF crypto + # functions, which are necessarily going to be more performant than anything implemented + # in pure elixir/erlang like the `:kcl` package. + # + # References for Salsa family of ciphers + # https://cr.yp.to/snuffle/spec.pdf + # https://cr.yp.to/chacha/chacha-20080128.pdf + # https://cr.yp.to/snuffle/xsalsa-20110204.pdf + # https://datatracker.ietf.org/doc/html/rfc7539 + # https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + + import Bitwise + + @chacha_constant "expand 32-byte k" + + defp sum(a, b), do: a + b &&& 0xFFFFFFFF + defp rotl(a, b), do: (a <<< b ||| a >>> (32 - b)) &&& 0xFFFFFFFF + + defp quarter_round(a, b, c, d) do + a = a |> sum(b) + d = d |> bxor(a) |> rotl(16) + + c = c |> sum(d) + b = b |> bxor(c) |> rotl(12) + + a = a |> sum(b) + d = d |> bxor(a) |> rotl(8) + + c = c |> sum(d) + b = b |> bxor(c) |> rotl(7) + + {a, b, c, d} + end + + defp quarter_round_on(tuple, index_a, index_b, index_c, index_d) do + a = elem(tuple, index_a) + b = elem(tuple, index_b) + c = elem(tuple, index_c) + d = elem(tuple, index_d) + + {a, b, c, d} = quarter_round(a, b, c, d) + + tuple + |> put_elem(index_a, a) + |> put_elem(index_b, b) + |> put_elem(index_c, c) + |> put_elem(index_d, d) + end + + # 4 column quarter rounds followed by 4 diagonal quarter rounds + defp double_round(tuple) do + tuple + |> quarter_round_on(0, 4, 8, 12) + |> quarter_round_on(1, 5, 9, 13) + |> quarter_round_on(2, 6, 10, 14) + |> quarter_round_on(3, 7, 11, 15) + |> quarter_round_on(0, 5, 10, 15) + |> quarter_round_on(1, 6, 11, 12) + |> quarter_round_on(2, 7, 8, 13) + |> quarter_round_on(3, 4, 9, 14) + end + + defp twenty_rounds(block) do + Enum.reduce(1..10, block, fn _, t -> double_round(t) end) + end + + defp hchacha20(<> = _k, <> = _n) do + (@chacha_constant <> key <> first_sixteen) + |> block_binary_to_tuple() + |> twenty_rounds() + |> hchacha20_block_tuple_to_binary() + end + + def xchacha20_key_and_nonce(<> = _k, <> = _n) do + xchacha20_key = hchacha20(key, nonce) + <<_first_sixteen::bytes-16, last_eight::bytes-8>> = nonce + xchacha20_nonce = <<0, 0, 0, 0>> <> last_eight + {xchacha20_key, xchacha20_nonce} + end + + defp block_binary_to_tuple( + <> + ) do + {x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15} + end + + defp hchacha20_block_tuple_to_binary( + {x0, x1, x2, x3, _, _, _, _, _, _, _, _, x12, x13, x14, x15} + ) do + <> + end +end diff --git a/lib/nostrum/voice/payload.ex b/lib/nostrum/voice/payload.ex index d2ae01a0c..541161eba 100644 --- a/lib/nostrum/voice/payload.ex +++ b/lib/nostrum/voice/payload.ex @@ -3,8 +3,9 @@ defmodule Nostrum.Voice.Payload do alias Nostrum.Cache.Me alias Nostrum.Constants - alias Nostrum.Voice.Audio - alias Nostrum.Struct.{VoiceState, VoiceWSState} + alias Nostrum.Struct.VoiceState + alias Nostrum.Struct.VoiceWSState + alias Nostrum.Voice.Crypto require Logger @@ -45,7 +46,7 @@ defmodule Nostrum.Voice.Payload do data: %{ address: ip, port: port, - mode: Audio.encryption_mode() + mode: Crypto.encryption_mode() } } |> build_payload("SELECT_PROTOCOL") From a8571b901d767560b6e4e9f7169d63114a886267 Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Fri, 17 May 2024 15:24:53 -0500 Subject: [PATCH 2/8] Implement XSalsa20 cipher --- lib/nostrum/voice/crypto.ex | 9 +- lib/nostrum/voice/crypto/chacha.ex | 9 +- lib/nostrum/voice/crypto/salsa.ex | 183 +++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 6 deletions(-) create mode 100644 lib/nostrum/voice/crypto/salsa.ex diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex index e682eac9f..be8e40f2d 100644 --- a/lib/nostrum/voice/crypto.ex +++ b/lib/nostrum/voice/crypto.ex @@ -4,6 +4,7 @@ defmodule Nostrum.Voice.Crypto do alias Nostrum.Struct.VoiceState alias Nostrum.Voice.Audio alias Nostrum.Voice.Crypto.Chacha + alias Nostrum.Voice.Crypto.Salsa @type cipher :: :xchacha20_poly1305 | :xsalsa20_poly1305 | :aes256_gcm @@ -49,7 +50,7 @@ defmodule Nostrum.Voice.Crypto do _encrypt = true ) - header <> cipher_text <> tag <> unpadded_nonce + [header, cipher_text, tag, unpadded_nonce] end def decrypt_xchacha20_poly1305(%VoiceState{secret_key: key}, data) do @@ -78,12 +79,12 @@ defmodule Nostrum.Voice.Crypto do # 24 byte nonce nonce = unpadded_nonce <> <<0::unit(8)-size(20)>> - header <> Kcl.secretbox(data, nonce, key) <> unpadded_nonce + [header, Salsa.encrypt(data, key, nonce), unpadded_nonce] end def decrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data) do {_header, cipher_text, _tag, nonce, ext_len} = decode_packet(data, 24, 0) - <<_exts::unit(32)-size(ext_len), opus::binary>> = Kcl.secretunbox(cipher_text, nonce, key) + <<_exts::unit(32)-size(ext_len), opus::binary>> = Salsa.decrypt(cipher_text, key, nonce) opus end @@ -98,7 +99,7 @@ defmodule Nostrum.Voice.Crypto do {cipher_text, tag} = :crypto.crypto_one_time_aead(:aes_256_gcm, key, nonce, data, _aad = header, _encrypt = true) - header <> cipher_text <> tag <> unpadded_nonce + [header, cipher_text, tag, unpadded_nonce] end def decrypt_aes256_gcm(%VoiceState{secret_key: key}, data) do diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex index 00f33a501..747afe0cf 100644 --- a/lib/nostrum/voice/crypto/chacha.ex +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -60,7 +60,7 @@ defmodule Nostrum.Voice.Crypto.Chacha do |> put_elem(index_d, d) end - # 4 column quarter rounds followed by 4 diagonal quarter rounds + # Column round followed by diagonal round defp double_round(tuple) do tuple |> quarter_round_on(0, 4, 8, 12) @@ -77,8 +77,13 @@ defmodule Nostrum.Voice.Crypto.Chacha do Enum.reduce(1..10, block, fn _, t -> double_round(t) end) end + defp expand(<> = _k, <> = _n) do + @chacha_constant <> key <> nonce + end + defp hchacha20(<> = _k, <> = _n) do - (@chacha_constant <> key <> first_sixteen) + key + |> expand(first_sixteen) |> block_binary_to_tuple() |> twenty_rounds() |> hchacha20_block_tuple_to_binary() diff --git a/lib/nostrum/voice/crypto/salsa.ex b/lib/nostrum/voice/crypto/salsa.ex new file mode 100644 index 000000000..04a77a775 --- /dev/null +++ b/lib/nostrum/voice/crypto/salsa.ex @@ -0,0 +1,183 @@ +defmodule Nostrum.Voice.Crypto.Salsa do + @moduledoc false + + # To support xsalsa20_poly1305 without a NIF, we have to implement the + # Salsa20 cipher and HSalsa20 hash function to use 192-bit nonces. + # + # Along with leveraging the :crypto module to perform the poly1305 MAC function + # and xor'ing arbitrary-length binaries, by being more thoughtful and explicit + # with our implementation, we should be able to eek out better performance + # than the `:kcl` package provides. + # + # References for Salsa family of ciphers + # https://cr.yp.to/snuffle/spec.pdf + # https://cr.yp.to/chacha/chacha-20080128.pdf + # https://cr.yp.to/snuffle/xsalsa-20110204.pdf + # https://datatracker.ietf.org/doc/html/rfc7539 + # https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + + import Bitwise + + @salsa_constant "expand 32-byte k" + |> :binary.bin_to_list() + |> Enum.chunk_every(4) + |> Enum.map(&:binary.list_to_bin/1) + + defp sum(a, b), do: a + b &&& 0xFFFFFFFF + defp rotl(a, b), do: (a <<< b ||| a >>> (32 - b)) &&& 0xFFFFFFFF + + def quarter_round(a, b, c, d) do + b = a |> sum(d) |> rotl(7) |> bxor(b) + c = b |> sum(a) |> rotl(9) |> bxor(c) + d = c |> sum(b) |> rotl(13) |> bxor(d) + a = d |> sum(c) |> rotl(18) |> bxor(a) + + {a, b, c, d} + end + + defp quarter_round_on(tuple, index_a, index_b, index_c, index_d) do + a = elem(tuple, index_a) + b = elem(tuple, index_b) + c = elem(tuple, index_c) + d = elem(tuple, index_d) + + {a, b, c, d} = quarter_round(a, b, c, d) + + tuple + |> put_elem(index_a, a) + |> put_elem(index_b, b) + |> put_elem(index_c, c) + |> put_elem(index_d, d) + end + + # Column round followed by row round + defp double_round(tuple) do + tuple + |> quarter_round_on(0, 4, 8, 12) + |> quarter_round_on(5, 9, 13, 1) + |> quarter_round_on(10, 14, 2, 6) + |> quarter_round_on(15, 3, 7, 11) + |> quarter_round_on(0, 1, 2, 3) + |> quarter_round_on(5, 6, 7, 4) + |> quarter_round_on(10, 11, 8, 9) + |> quarter_round_on(15, 12, 13, 14) + end + + def twenty_rounds(block) do + Enum.reduce(1..10, block, fn _, t -> double_round(t) end) + end + + def expand(<>, <>, block_count) when is_integer(block_count) do + # Full input is 64-bit nonce concatenated with little endian block count + input = nonce <> <> + expand(key, input) + end + + defp expand(<> = _key, <>) do + [c0, c1, c2, c3] = @salsa_constant + + c0 <> k0 <> c1 <> input <> c2 <> k1 <> c3 + end + + defp hsalsa20(<> = _k, <> = _n) do + key + |> expand(first_sixteen) + |> block_binary_to_tuple() + |> twenty_rounds() + |> hsalsa20_block_tuple_to_binary() + end + + def xsalsa20_key_and_nonce(<> = _k, <> = _n) do + xsalsa20_key = hsalsa20(key, nonce) + <<_first_sixteen::bytes-16, xsalsa20_nonce::bytes-8>> = nonce + {xsalsa20_key, xsalsa20_nonce} + end + + defp block_binary_to_tuple( + <> + ) do + {x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15} + end + + defp hsalsa20_block_tuple_to_binary({x0, _, _, _, _, x5, x6, x7, x8, x9, x10, _, _, _, _, x15}) do + <> + end + + defp sum_blocks( + {x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15}, + {y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15} + ) do + <> + end + + def bxor_block(<>, <>) do + :crypto.exor(keystream, message) + end + + def bxor_block(<>, message) when byte_size(message) < 64 do + keystream + |> binary_part(0, byte_size(message)) + |> :crypto.exor(message) + end + + def keystream_block(key, nonce, block_count) do + block = + key + |> expand(nonce, block_count) + |> block_binary_to_tuple() + + block + |> twenty_rounds() + |> sum_blocks(block) + end + + defp crypt(key, nonce, message, block_count \\ 0, outputs \\ []) + + defp crypt(key, nonce, <>, block_count, outputs) do + output_block = crypt_block(key, nonce, message, block_count) + crypt(key, nonce, rest, block_count + 1, [output_block | outputs]) + end + + defp crypt(key, nonce, <>, block_count, outputs) do + output_block = crypt_block(key, nonce, message, block_count) + outputs = Enum.reverse([output_block | outputs]) + IO.iodata_to_binary(outputs) + end + + defp crypt_block(key, nonce, message, block_count) do + keystream = keystream_block(key, nonce, block_count) + bxor_block(keystream, message) + end + + def encrypt(plain_text, <> = _key, <> = _nonce) do + {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) + message = <<0::unit(8)-size(32)>> <> plain_text + <> = crypt(xsalsa_key, xsalsa_nonce, message) + cipher_tag = :crypto.mac(:poly1305, mac_otp, cipher_text) + cipher_tag <> cipher_text + end + + def decrypt( + <> = _encrypted_message, + <>, + <> + ) do + {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) + message = <<0::unit(8)-size(32)>> <> cipher_text + <> = crypt(xsalsa_key, xsalsa_nonce, message) + + case :crypto.mac(:poly1305, mac_otp, cipher_text) do + ^cipher_tag -> plain_text + _error -> :error + end + end +end From 4947e43100465cb34c9185deb8198fa934a70b8c Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Fri, 17 May 2024 15:31:03 -0500 Subject: [PATCH 3/8] Remove Kcl dependencies --- guides/advanced/multi_node.md | 4 ++-- mix.exs | 1 - mix.lock | 7 ------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/guides/advanced/multi_node.md b/guides/advanced/multi_node.md index 4f48d4785..948ee46fe 100644 --- a/guides/advanced/multi_node.md +++ b/guides/advanced/multi_node.md @@ -40,7 +40,7 @@ changing your application definition in `mix.exs` as follows: mod: {MyBot.Application, []}, included_applications: [:nostrum], # You can see this with `mix app.tree nostrum` - extra_applications: [:certifi, :gun, :inets, :jason, :kcl, :mime] + extra_applications: [:certifi, :gun, :inets, :jason, :mime] # ... ] end @@ -53,7 +53,7 @@ as command frameworks like `:nosedrum`: ```elixir defp deps do [ - {:nostrum, "~> 0.8", runtime: false}, + {:nostrum, "~> 0.9", runtime: false}, # {:nosedrum, "~> 0.6", runtime: false}, ] end diff --git a/mix.exs b/mix.exs index 02fb5dddd..12c4b0fac 100644 --- a/mix.exs +++ b/mix.exs @@ -158,7 +158,6 @@ defmodule Nostrum.Mixfile do {:jason, "~> 1.4"}, {:gun, "~> 2.0"}, {:certifi, "~> 2.13"}, - {:kcl, "~> 1.4"}, {:mime, "~> 1.6 or ~> 2.0"}, {:ezstd, "~> 1.1", optional: true}, {:castle, "~> 0.3.0", runtime: false}, diff --git a/mix.lock b/mix.lock index 416c9e68d..73c0cfd11 100644 --- a/mix.lock +++ b/mix.lock @@ -4,15 +4,11 @@ "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castle": {:hex, :castle, "0.3.0", "47b1a550b2348a6d7e60e43ded1df19dca601ed21ef6f267c3dbb1b3a301fbf5", [:mix], [{:forecastle, "~> 0.1.0", [hex: :forecastle, repo: "hexpm", optional: false]}], "hexpm", "dbdc1c171520c4591101938a3d342dec70d36b7f5b102a5c138098581e35fcef"}, "certifi": {:hex, :certifi, "2.13.0", "e52be248590050b2dd33b0bb274b56678f9068e67805dca8aa8b1ccdb016bbf6", [:rebar3], [], "hexpm", "8f3d9533a0f06070afdfd5d596b32e21c6580667a492891851b0e2737bc507a1"}, - "chacha20": {:hex, :chacha20, "1.0.4", "0359d8f9a32269271044c1b471d5cf69660c362a7c61a98f73a05ef0b5d9eb9e", [:mix], [], "hexpm", "2027f5d321ae9903f1f0da7f51b0635ad6b8819bc7fe397837930a2011bc2349"}, "cowlib": {:hex, :cowlib, "2.12.1", "a9fa9a625f1d2025fe6b462cb865881329b5caff8f1854d1cbc9f9533f00e1e1", [:make, :rebar3], [], "hexpm", "163b73f6367a7341b33c794c4e88e7dbfe6498ac42dcd69ef44c5bc5507c8db0"}, "credo": {:hex, :credo, "1.7.5", "643213503b1c766ec0496d828c90c424471ea54da77c8a168c725686377b9545", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "f799e9b5cd1891577d8c773d245668aa74a2fcd15eb277f51a0131690ebfb3fd"}, - "curve25519": {:hex, :curve25519, "1.0.5", "f801179424e4012049fcfcfcda74ac04f65d0ffceeb80e7ef1d3352deb09f5bb", [:mix], [], "hexpm", "0fba3ad55bf1154d4d5fc3ae5fb91b912b77b13f0def6ccb3a5d58168ff4192d"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, "dialyxir": {:hex, :dialyxir, "1.3.0", "fd1672f0922b7648ff9ce7b1b26fcf0ef56dda964a459892ad15f6b4410b5284", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "00b2a4bcd6aa8db9dcb0b38c1225b7277dca9bc370b6438715667071a304696f"}, "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, - "ed25519": {:hex, :ed25519, "1.4.1", "479fb83c3e31987c9cad780e6aeb8f2015fb5a482618cdf2a825c9aff809afc4", [:mix], [], "hexpm", "0dacb84f3faa3d8148e81019ca35f9d8dcee13232c32c9db5c2fb8ff48c80ec7"}, - "equivalex": {:hex, :equivalex, "1.0.3", "170d9a82ae066e0020dfe1cf7811381669565922eb3359f6c91d7e9a1124ff74", [:mix], [], "hexpm", "46fa311adb855117d36e461b9c0ad2598f72110ad17ad73d7533c78020e045fc"}, "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, "ex_doc": {:hex, :ex_doc, "0.32.1", "21e40f939515373bcdc9cffe65f3b3543f05015ac6c3d01d991874129d173420", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5142c9db521f106d61ff33250f779807ed2a88620e472ac95dc7d59c380113da"}, "ezstd": {:hex, :ezstd, "1.1.0", "d3b483d6acfadfb65dba4015371e6d54526dbf3d9ef0941b5add8bf5890731f4", [:rebar3], [], "hexpm", "28cfa0ed6cc3922095ad5ba0f23392a1664273358b17184baa909868361184e7"}, @@ -20,14 +16,11 @@ "forecastle": {:hex, :forecastle, "0.1.1", "89dcfaccbfffe866cbd8a4c41ade55f62f00f1b5d0528bec787b1e6631004b98", [:mix], [], "hexpm", "f6f4d297224a22ac4387d305249aed7b8b02e85b4a03e83225af4536812c4079"}, "gun": {:hex, :gun, "2.0.1", "160a9a5394800fcba41bc7e6d421295cf9a7894c2252c0678244948e3336ad73", [:make, :rebar3], [{:cowlib, "2.12.1", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "a10bc8d6096b9502205022334f719cc9a08d9adcfbfc0dbee9ef31b56274a20b"}, "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, - "kcl": {:hex, :kcl, "1.4.2", "8b73a55a14899dc172fcb05a13a754ac171c8165c14f65043382d567922f44ab", [:mix], [{:curve25519, ">= 1.0.4", [hex: :curve25519, repo: "hexpm", optional: false]}, {:ed25519, "~> 1.3", [hex: :ed25519, repo: "hexpm", optional: false]}, {:poly1305, "~> 1.0", [hex: :poly1305, repo: "hexpm", optional: false]}, {:salsa20, "~> 1.0", [hex: :salsa20, repo: "hexpm", optional: false]}], "hexpm", "9f083dd3844d902df6834b258564a82b21a15eb9f6acdc98e8df0c10feeabf05"}, "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"}, "mime": {:hex, :mime, "2.0.3", "3676436d3d1f7b81b5a2d2bd8405f412c677558c81b1c92be58c00562bb59095", [:mix], [], "hexpm", "27a30bf0db44d25eecba73755acf4068cbfe26a4372f9eb3e4ea3a45956bff6b"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, - "poly1305": {:hex, :poly1305, "1.0.4", "7cdc8961a0a6e00a764835918cdb8ade868044026df8ef5d718708ea6cc06611", [:mix], [{:chacha20, "~> 1.0", [hex: :chacha20, repo: "hexpm", optional: false]}, {:equivalex, "~> 1.0", [hex: :equivalex, repo: "hexpm", optional: false]}], "hexpm", "e14e684661a5195e149b3139db4a1693579d4659d65bba115a307529c47dbc3b"}, "recon": {:hex, :recon, "2.5.3", "739107b9050ea683c30e96de050bc59248fd27ec147696f79a8797ff9fa17153", [:mix, :rebar3], [], "hexpm", "6c6683f46fd4a1dfd98404b9f78dcabc7fcd8826613a89dcb984727a8c3099d7"}, - "salsa20": {:hex, :salsa20, "1.0.4", "404cbea1fa8e68a41bcc834c0a2571ac175580fec01cc38cc70c0fb9ffc87e9b", [:mix], [], "hexpm", "745ddcd8cfa563ddb0fd61e7ce48d5146279a2cf7834e1da8441b369fdc58ac6"}, "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"}, } From d2f74b450d98eab8cb640370c36e09e22a663cb0 Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Fri, 17 May 2024 21:25:34 -0500 Subject: [PATCH 4/8] Fully support all encryption modes --- guides/functionality/voice.md | 27 ++++ guides/intro/intro.md | 1 + lib/nostrum/voice/crypto.ex | 217 ++++++++++++++++++----------- lib/nostrum/voice/crypto/aes.ex | 27 ++++ lib/nostrum/voice/crypto/chacha.ex | 47 +++++-- lib/nostrum/voice/crypto/salsa.ex | 30 ++-- lib/nostrum/voice/session.ex | 13 +- 7 files changed, 254 insertions(+), 108 deletions(-) create mode 100644 lib/nostrum/voice/crypto/aes.ex diff --git a/guides/functionality/voice.md b/guides/functionality/voice.md index 29c1a455f..38edbe1eb 100644 --- a/guides/functionality/voice.md +++ b/guides/functionality/voice.md @@ -177,3 +177,30 @@ packets returned per invocation and the option to return the raw RTP packet. In likely won't be missed when consuming incoming voice packets asynchronously. Note that the third element in the event is of type `t:Nostrum.Struct.VoiceWSState.t/0` and not `t:Nostrum.Struct.WSState.t/0`. + +## Encryption Modes + +Nostrum supports all of Discord's available encryption modes for voice channels. +The encryption mode is invisible to the user, and you will likely never need to touch it. + +Different encryption modes may have different performance characteristics depending on the +hardware architecture your bot is running on. If you're interested, keep reading. + +#### Encryption Mode Configuration Options + +```elixir +config :nostrum, :voice_encryption_mode, :aes256_gcm # Default +``` + +Available configuration options are as follows: +- `:xsalsa20_poly1305` +- `:xsalsa20_poly1305_suffix` +- `:xsalsa20_poly1305_lite` +- `:xsalsa20_poly1305_lite_rtpsize` (not yet documented by Discord) +- `:aead_xchacha20_poly1305_rtpsize` (not yet documented by Discord) +- `:aead_aes256_gcm` (not yet documented by Discord) +- `:aead_aes256_gcm_rtpsize` (not yet documented by Discord) +- `:xchacha20_poly1305` (alias for `:aead_xchacha20_poly1305_rtpsize`) +- `:aes256_gcm` (alias for `:aead_aes256_gcm_rtpsize`) + +The first seven are Discord's available options, while the last two are shorter aliases. diff --git a/guides/intro/intro.md b/guides/intro/intro.md index 9e53e4651..e043de51e 100644 --- a/guides/intro/intro.md +++ b/guides/intro/intro.md @@ -76,6 +76,7 @@ Apart from the `token` field mentioned above, the following fields are also supp but you still wish to connect to the voice gateway, you can do so manually by calling `Nostrum.Voice.connect_to_gateway/1` after joining a voice channel. Defaults to `true`. +- `voice_encryption_mode` - Defaults to `:aes256_gcm` ### Development & debugging diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex index be8e40f2d..245a0c520 100644 --- a/lib/nostrum/voice/crypto.ex +++ b/lib/nostrum/voice/crypto.ex @@ -2,124 +2,164 @@ defmodule Nostrum.Voice.Crypto do @moduledoc false alias Nostrum.Struct.VoiceState + alias Nostrum.Struct.VoiceWSState alias Nostrum.Voice.Audio + alias Nostrum.Voice.Crypto.Aes alias Nostrum.Voice.Crypto.Chacha alias Nostrum.Voice.Crypto.Salsa - @type cipher :: :xchacha20_poly1305 | :xsalsa20_poly1305 | :aes256_gcm + @type cipher_rtpsize :: + :xsalsa20_poly1305_lite_rtpsize + | :aead_xchacha20_poly1305_rtpsize + | :aead_aes256_gcm_rtpsize - @cipher Application.compile_env(:nostrum, :voice_encryption_mode, :aes256_gcm) + @type cipher_alias :: :aes256_gcm | :xchacha20_poly1305 - @encryption_mode Map.get( - %{ - xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", - xsalsa20_poly1305: "xsalsa20_poly1305_lite_rtpsize", - aes256_gcm: "aead_aes256_gcm_rtpsize" - }, - @cipher, - "aead_aes256_gcm_rtpsize" - ) + @type cipher_non_rtpsize :: + :xsalsa20_poly1305 + | :xsalsa20_poly1305_suffix + | :xsalsa20_poly1305_lite + | :aead_aes256_gcm - def encryption_mode, do: @encryption_mode + @type cipher :: cipher_non_rtpsize() | cipher_alias() | cipher_rtpsize() + + defp mode, do: Application.get_env(:nostrum, :voice_encryption_mode, :aes256_gcm) + + def encryption_mode do + mode = mode() + + Map.get( + %{ + xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", + aes256_gcm: "aead_aes256_gcm_rtpsize" + }, + mode, + "#{mode}" + ) + end def encrypt(voice, data) do - apply(__MODULE__, :"encrypt_#{@cipher}", [voice, data]) + apply(__MODULE__, :"encrypt_#{mode()}", [voice, data]) end - def decrypt(voice, data) do - apply(__MODULE__, :"decrypt_#{@cipher}", [voice, data]) + def decrypt(%VoiceState{secret_key: key}, data), do: decrypt(key, data) + def decrypt(%VoiceWSState{secret_key: key}, data), do: decrypt(key, data) + + def decrypt(key, data) do + apply(__MODULE__, :"decrypt_#{mode()}", [key, data]) end - def encrypt_xchacha20_poly1305(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do + def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key} = voice, data) do header = Audio.rtp_header(voice) - unpadded_nonce = <> + nonce = header <> <<0::unit(8)-size(12)>> - # 24 byte nonce - nonce = unpadded_nonce <> <<0::unit(8)-size(20)>> + [header, Salsa.encrypt(data, key, nonce)] + end - {xchacha_key, xchacha_nonce} = Chacha.xchacha20_key_and_nonce(key, nonce) + def encrypt_xsalsa20_poly1305_suffix(%VoiceState{secret_key: key} = voice, data) do + header = Audio.rtp_header(voice) - {cipher_text, tag} = - :crypto.crypto_one_time_aead( - :chacha20_poly1305, - xchacha_key, - xchacha_nonce, - data, - _aad = header, - _encrypt = true - ) + nonce = :crypto.strong_rand_bytes(24) - [header, cipher_text, tag, unpadded_nonce] + [header, Salsa.encrypt(data, key, nonce), nonce] end - def decrypt_xchacha20_poly1305(%VoiceState{secret_key: key}, data) do - {header, cipher_text, tag, nonce, ext_len} = decode_packet(data, 24) - {xchacha_key, xchacha_nonce} = Chacha.xchacha20_key_and_nonce(key, nonce) + def encrypt_xsalsa20_poly1305_lite(%VoiceState{secret_key: key} = voice, data) do + header = Audio.rtp_header(voice) - <<_exts::unit(32)-size(ext_len), opus::binary>> = - :crypto.crypto_one_time_aead( - :chacha20_poly1305, - xchacha_key, - xchacha_nonce, - cipher_text, - _aad = header, - tag, - _encrypt = false - ) + {unpadded_nonce, nonce} = lite_nonce(voice) - opus + [header, Salsa.encrypt(data, key, nonce), unpadded_nonce] + end + + def encrypt_xsalsa20_poly1305_lite_rtpsize(voice, data), + do: encrypt_xsalsa20_poly1305_lite(voice, data) + + def encrypt_xchacha20_poly1305(voice, data), + do: encrypt_aead_xchacha20_poly1305_rtpsize(voice, data) + + def encrypt_aead_xchacha20_poly1305_rtpsize(%VoiceState{secret_key: key} = voice, data) do + header = Audio.rtp_header(voice) + + {unpadded_nonce, nonce} = lite_nonce(voice) + + [header, Chacha.encrypt(data, key, nonce, _aad = header), unpadded_nonce] end - def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do + def encrypt_aead_aes256_gcm(voice, data), do: encrypt_aes256_gcm(voice, data) + def encrypt_aead_aes256_gcm_rtpsize(voice, data), do: encrypt_aes256_gcm(voice, data) + + def encrypt_aes256_gcm(%VoiceState{secret_key: key} = voice, data) do header = Audio.rtp_header(voice) - unpadded_nonce = <> + {unpadded_nonce, nonce} = lite_nonce(voice, 12) - # 24 byte nonce - nonce = unpadded_nonce <> <<0::unit(8)-size(20)>> + [header, Aes.encrypt(data, key, nonce, _aad = header), unpadded_nonce] + end - [header, Salsa.encrypt(data, key, nonce), unpadded_nonce] + def decrypt_xsalsa20_poly1305(key, <>) do + nonce = header <> <<0::unit(8)-size(12)>> + + Salsa.decrypt(cipher_text, key, nonce) end - def decrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data) do - {_header, cipher_text, _tag, nonce, ext_len} = decode_packet(data, 24, 0) + def decrypt_xsalsa20_poly1305_lite(key, data) do + {_header, cipher_text, _tag = <<>>, nonce} = decode_packet(data, 4, 24, 0) + + Salsa.decrypt(cipher_text, key, nonce) + end + + def decrypt_xsalsa20_poly1305_suffix(key, data) do + {_header, cipher_text, _tag = <<>>, nonce} = decode_packet(data, 24, 24, 0) + + Salsa.decrypt(cipher_text, key, nonce) + end + + def decrypt_xsalsa20_poly1305_lite_rtpsize(key, data) do + {_header, cipher_text, _tag, nonce, ext_len} = decode_packet_rtpsize(data, 24, 0) + <<_exts::unit(32)-size(ext_len), opus::binary>> = Salsa.decrypt(cipher_text, key, nonce) + opus end - def encrypt_aes256_gcm(%VoiceState{secret_key: key, rtp_sequence: seq} = voice, data) do - header = Audio.rtp_header(voice) - - unpadded_nonce = <> + def decrypt_xchacha20_poly1305(key, data), + do: decrypt_aead_xchacha20_poly1305_rtpsize(key, data) - # 12 byte nonce - nonce = unpadded_nonce <> <<0::unit(8)-size(8)>> + def decrypt_aead_xchacha20_poly1305_rtpsize(key, data) do + {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 24) - {cipher_text, tag} = - :crypto.crypto_one_time_aead(:aes_256_gcm, key, nonce, data, _aad = header, _encrypt = true) + <<_exts::unit(32)-size(ext_len), opus::binary>> = + Chacha.decrypt(cipher_text, key, nonce, _aad = header, tag) - [header, cipher_text, tag, unpadded_nonce] + opus end - def decrypt_aes256_gcm(%VoiceState{secret_key: key}, data) do - {header, cipher_text, tag, nonce, ext_len} = decode_packet(data, 12) + def decrypt_aes256_gcm(key, data), do: decrypt_aead_aes256_gcm_rtpsize(key, data) + + def decrypt_aead_aes256_gcm_rtpsize(key, data) do + {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 12) <<_exts::unit(32)-size(ext_len), opus::binary>> = - :crypto.crypto_one_time_aead( - :aes_256_gcm, - key, - nonce, - cipher_text, - _aad = header, - tag, - _encrypt = false - ) + Aes.decrypt(cipher_text, key, nonce, _aad = header, tag) opus end - @unpadded_nonce_length 4 + def decrypt_aead_aes256_gcm(key, data) do + {header, cipher_text, tag, nonce} = decode_packet(data, 4, 12, 16) + + Aes.decrypt(cipher_text, key, nonce, _aad = header, tag) + end + + @lite_nonce_length 4 + + defp lite_nonce(%VoiceState{rtp_sequence: rtp_sequence}, nonce_length \\ 24) do + unpadded_nonce = <> + nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - @lite_nonce_length)>> + {unpadded_nonce, nonce} + end @doc """ Discord's newer encryption modes ending in '_rtpsize' leave the first 4 bytes of the RTP @@ -143,20 +183,41 @@ defmodule Nostrum.Voice.Crypto do - RTP header extension length - for isolating the opus after decryption """ - def decode_packet( + def decode_packet_rtpsize( <>, nonce_length \\ 24, tag_length \\ 16 ) - when byte_size(rest) - (@unpadded_nonce_length + tag_length) > ext_len * 4 do + when byte_size(rest) - (@lite_nonce_length + tag_length) > ext_len * 4 do header = header <> <<0xBE, 0xDE, ext_len::integer-16>> - cipher_text_len = byte_size(rest) - (tag_length + @unpadded_nonce_length) - <> = rest + {cipher_text, tag, unpadded_nonce} = split_data(rest, @lite_nonce_length, tag_length) - nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - @unpadded_nonce_length)>> + nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - @lite_nonce_length)>> {header, cipher_text, tag, nonce, ext_len} end + + # Non "rtpsize" modes where everything is encrypted beyond the 12-byte header + def decode_packet( + <>, + unpadded_nonce_length \\ @lite_nonce_length, + nonce_length \\ 24, + tag_length \\ 16 + ) do + {cipher_text, tag, unpadded_nonce} = split_data(rest, unpadded_nonce_length, tag_length) + + nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - unpadded_nonce_length)>> + + {header, cipher_text, tag, nonce} + end + + defp split_data(data, unpadded_nonce_length, tag_length) do + cipher_text_length = byte_size(data) - (unpadded_nonce_length + tag_length) + + <> = data + + {cipher_text, tag, unpadded_nonce} + end end diff --git a/lib/nostrum/voice/crypto/aes.ex b/lib/nostrum/voice/crypto/aes.ex new file mode 100644 index 000000000..a7eff6404 --- /dev/null +++ b/lib/nostrum/voice/crypto/aes.ex @@ -0,0 +1,27 @@ +defmodule Nostrum.Voice.Crypto.Aes do + @moduledoc false + + # AES is fully supported by the erlang crypto module, so this module is + # just to provide a convenient wrapper around it for encryption and decryption + + @spec encrypt(binary(), <<_::256>>, <<_::96>>, binary()) :: iodata() + def encrypt(plain_text, <> = _key, <> = _nonce, aad) do + {cipher_text, tag} = + :crypto.crypto_one_time_aead(:aes_256_gcm, key, nonce, plain_text, aad, _encrypt = true) + + [cipher_text, tag] + end + + @spec decrypt(binary(), <<_::256>>, <<_::96>>, binary(), <<_::128>>) :: binary() | :error + def decrypt(cipher_text, <> = _key, <> = _nonce, aad, tag) do + :crypto.crypto_one_time_aead( + :aes_256_gcm, + key, + nonce, + cipher_text, + aad, + tag, + _encrypt = false + ) + end +end diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex index 747afe0cf..775e123c8 100644 --- a/lib/nostrum/voice/crypto/chacha.ex +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -11,7 +11,7 @@ defmodule Nostrum.Voice.Crypto.Chacha do # use the :crypto module's chacha20_poly1305 functionality in the capacity of xchacha20 # as is required by Discord with that encryption mode selected. # - # This is to all in service of leveraging the performance benefits of the the NIF crypto + # This is all in service of leveraging the performance benefits of the the NIF crypto # functions, which are necessarily going to be more performant than anything implemented # in pure elixir/erlang like the `:kcl` package. # @@ -24,6 +24,8 @@ defmodule Nostrum.Voice.Crypto.Chacha do import Bitwise + import Nostrum.Voice.Crypto.Salsa, only: [block_binary_to_tuple: 1] + @chacha_constant "expand 32-byte k" defp sum(a, b), do: a + b &&& 0xFFFFFFFF @@ -89,26 +91,49 @@ defmodule Nostrum.Voice.Crypto.Chacha do |> hchacha20_block_tuple_to_binary() end - def xchacha20_key_and_nonce(<> = _k, <> = _n) do + defp xchacha20_key_and_nonce(<> = _k, <> = _n) do xchacha20_key = hchacha20(key, nonce) <<_first_sixteen::bytes-16, last_eight::bytes-8>> = nonce xchacha20_nonce = <<0, 0, 0, 0>> <> last_eight {xchacha20_key, xchacha20_nonce} end - defp block_binary_to_tuple( - <> - ) do - {x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15} - end - defp hchacha20_block_tuple_to_binary( {x0, x1, x2, x3, _, _, _, _, _, _, _, _, x12, x13, x14, x15} ) do <> end + + @spec encrypt(binary(), <<_::256>>, <<_::192>>, binary()) :: iodata() + def encrypt(plain_text, <> = _key, <> = _nonce, aad) do + {xchacha_key, xchacha_nonce} = xchacha20_key_and_nonce(key, nonce) + + {cipher_text, tag} = + :crypto.crypto_one_time_aead( + :chacha20_poly1305, + xchacha_key, + xchacha_nonce, + plain_text, + aad, + _encrypt = true + ) + + [cipher_text, tag] + end + + @spec decrypt(binary(), <<_::256>>, <<_::192>>, binary(), <<_::128>>) :: binary() | :error + def decrypt(cipher_text, <>, <>, aad, tag) do + {xchacha_key, xchacha_nonce} = xchacha20_key_and_nonce(key, nonce) + + :crypto.crypto_one_time_aead( + :chacha20_poly1305, + xchacha_key, + xchacha_nonce, + cipher_text, + aad, + tag, + _encrypt = false + ) + end end diff --git a/lib/nostrum/voice/crypto/salsa.ex b/lib/nostrum/voice/crypto/salsa.ex index 04a77a775..6525a0d3e 100644 --- a/lib/nostrum/voice/crypto/salsa.ex +++ b/lib/nostrum/voice/crypto/salsa.ex @@ -26,7 +26,7 @@ defmodule Nostrum.Voice.Crypto.Salsa do defp sum(a, b), do: a + b &&& 0xFFFFFFFF defp rotl(a, b), do: (a <<< b ||| a >>> (32 - b)) &&& 0xFFFFFFFF - def quarter_round(a, b, c, d) do + defp quarter_round(a, b, c, d) do b = a |> sum(d) |> rotl(7) |> bxor(b) c = b |> sum(a) |> rotl(9) |> bxor(c) d = c |> sum(b) |> rotl(13) |> bxor(d) @@ -63,11 +63,11 @@ defmodule Nostrum.Voice.Crypto.Salsa do |> quarter_round_on(15, 12, 13, 14) end - def twenty_rounds(block) do + defp twenty_rounds(block) do Enum.reduce(1..10, block, fn _, t -> double_round(t) end) end - def expand(<>, <>, block_count) when is_integer(block_count) do + defp expand(<>, <>, block_count) when is_integer(block_count) do # Full input is 64-bit nonce concatenated with little endian block count input = nonce <> <> expand(key, input) @@ -87,18 +87,18 @@ defmodule Nostrum.Voice.Crypto.Salsa do |> hsalsa20_block_tuple_to_binary() end - def xsalsa20_key_and_nonce(<> = _k, <> = _n) do + defp xsalsa20_key_and_nonce(<> = _k, <> = _n) do xsalsa20_key = hsalsa20(key, nonce) <<_first_sixteen::bytes-16, xsalsa20_nonce::bytes-8>> = nonce {xsalsa20_key, xsalsa20_nonce} end - defp block_binary_to_tuple( - <> - ) do + def block_binary_to_tuple( + <> + ) do {x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15} end @@ -119,17 +119,17 @@ defmodule Nostrum.Voice.Crypto.Salsa do sum(x15, y15)::little-32>> end - def bxor_block(<>, <>) do + defp bxor_block(<>, <>) do :crypto.exor(keystream, message) end - def bxor_block(<>, message) when byte_size(message) < 64 do + defp bxor_block(<>, message) when byte_size(message) < 64 do keystream |> binary_part(0, byte_size(message)) |> :crypto.exor(message) end - def keystream_block(key, nonce, block_count) do + defp keystream_block(key, nonce, block_count) do block = key |> expand(nonce, block_count) @@ -158,14 +158,16 @@ defmodule Nostrum.Voice.Crypto.Salsa do bxor_block(keystream, message) end + @spec encrypt(binary(), <<_::256>>, <<_::192>>) :: iodata() def encrypt(plain_text, <> = _key, <> = _nonce) do {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) message = <<0::unit(8)-size(32)>> <> plain_text <> = crypt(xsalsa_key, xsalsa_nonce, message) cipher_tag = :crypto.mac(:poly1305, mac_otp, cipher_text) - cipher_tag <> cipher_text + [cipher_tag, cipher_text] end + @spec decrypt(binary(), <<_::256>>, <<_::192>>) :: binary() | :error def decrypt( <> = _encrypted_message, <>, diff --git a/lib/nostrum/voice/session.ex b/lib/nostrum/voice/session.ex index f9d4fa00d..8ed786ef6 100644 --- a/lib/nostrum/voice/session.ex +++ b/lib/nostrum/voice/session.ex @@ -5,9 +5,13 @@ defmodule Nostrum.Voice.Session do alias Nostrum.Constants alias Nostrum.ConsumerGroup alias Nostrum.Shard.Dispatch - alias Nostrum.Struct.{VoiceState, VoiceWSState} + alias Nostrum.Struct.VoiceState + alias Nostrum.Struct.VoiceWSState alias Nostrum.Voice - alias Nostrum.Voice.{Event, Opus, Payload} + alias Nostrum.Voice.Crypto + alias Nostrum.Voice.Event + alias Nostrum.Voice.Opus + alias Nostrum.Voice.Payload require Logger @@ -138,9 +142,8 @@ defmodule Nostrum.Voice.Session do <<2::2, 0::1, 1::5, 201::8, _rest::binary>> -> :noop - <> -> - nonce = header <> <<0::8*12>> - payload = Kcl.secretunbox(data, nonce, state.secret_key) + <> = data -> + payload = Crypto.decrypt(state, data) <<_::16, seq::integer-16, time::integer-32, ssrc::integer-32>> = header opus = Opus.strip_rtp_ext(payload) incoming_packet = Payload.voice_incoming_packet({{seq, time, ssrc}, opus}) From 15e9fa9bc6fc412374a9a99db174f992ac29a638 Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Fri, 17 May 2024 22:40:42 -0500 Subject: [PATCH 5/8] Add more details in docs, clarify typespecs --- guides/functionality/voice.md | 25 +++++++++++++++++++++++++ lib/nostrum/voice/crypto/aes.ex | 2 +- lib/nostrum/voice/crypto/chacha.ex | 2 +- lib/nostrum/voice/crypto/salsa.ex | 2 +- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/guides/functionality/voice.md b/guides/functionality/voice.md index 38edbe1eb..224c9e901 100644 --- a/guides/functionality/voice.md +++ b/guides/functionality/voice.md @@ -204,3 +204,28 @@ Available configuration options are as follows: - `:aes256_gcm` (alias for `:aead_aes256_gcm_rtpsize`) The first seven are Discord's available options, while the last two are shorter aliases. + +#### Implementation Details + +Of the seven supported modes, three different ciphers are used. The remaining differences +are variations in how the nonce is determined and where the encrypted portion of the RTP packet begins. + +Erlang's `:crypto` module is leveraged as much as possible as the ciphers are NIFs. + +##### xsalsa20_poly1305 + +The entire Salsa20/XSalsa20 cipher is implemented in elixir. The poly1305 MAC function is handled by the `:crypto` module. +As a result, xsalsa_poly1305 modes will likely have the slowest performance. + +##### xchacha20_poly1305 + +The `:crypto` module supports the `chacha20_poly1305` AEAD cipher. The only thing implemented in elixir +is the HChaCha20 hash function that generates a sub-key from the key and the longer nonce that XChaCha20 +specifies, which is then passed to the `chacha20_poly1305` cipher. +If your hardware doesn't have AES hardware acceleration, the `chacha` option may perform +the best for you. + +##### aes256_gcm + +The `:crypto` module completely supports AES256 in GCM mode requiring no implementation in elixir. +Many CPUs have hardware acceleration specifically for AES. For these reasons, Nostrum defaults to `aes256_gcm`. diff --git a/lib/nostrum/voice/crypto/aes.ex b/lib/nostrum/voice/crypto/aes.ex index a7eff6404..990a330ae 100644 --- a/lib/nostrum/voice/crypto/aes.ex +++ b/lib/nostrum/voice/crypto/aes.ex @@ -4,7 +4,7 @@ defmodule Nostrum.Voice.Crypto.Aes do # AES is fully supported by the erlang crypto module, so this module is # just to provide a convenient wrapper around it for encryption and decryption - @spec encrypt(binary(), <<_::256>>, <<_::96>>, binary()) :: iodata() + @spec encrypt(binary(), <<_::256>>, <<_::96>>, binary()) :: iolist() def encrypt(plain_text, <> = _key, <> = _nonce, aad) do {cipher_text, tag} = :crypto.crypto_one_time_aead(:aes_256_gcm, key, nonce, plain_text, aad, _encrypt = true) diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex index 775e123c8..118bec7d0 100644 --- a/lib/nostrum/voice/crypto/chacha.ex +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -105,7 +105,7 @@ defmodule Nostrum.Voice.Crypto.Chacha do x14::little-32, x15::little-32>> end - @spec encrypt(binary(), <<_::256>>, <<_::192>>, binary()) :: iodata() + @spec encrypt(binary(), <<_::256>>, <<_::192>>, binary()) :: iolist() def encrypt(plain_text, <> = _key, <> = _nonce, aad) do {xchacha_key, xchacha_nonce} = xchacha20_key_and_nonce(key, nonce) diff --git a/lib/nostrum/voice/crypto/salsa.ex b/lib/nostrum/voice/crypto/salsa.ex index 6525a0d3e..75849e828 100644 --- a/lib/nostrum/voice/crypto/salsa.ex +++ b/lib/nostrum/voice/crypto/salsa.ex @@ -158,7 +158,7 @@ defmodule Nostrum.Voice.Crypto.Salsa do bxor_block(keystream, message) end - @spec encrypt(binary(), <<_::256>>, <<_::192>>) :: iodata() + @spec encrypt(binary(), <<_::256>>, <<_::192>>) :: iolist() def encrypt(plain_text, <> = _key, <> = _nonce) do {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) message = <<0::unit(8)-size(32)>> <> plain_text From e18ede7dabb35610af456abfeaf96bb3a3ebbff4 Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Sat, 18 May 2024 15:33:39 -0500 Subject: [PATCH 6/8] Further reduce binary copies. Fix doc links. Make voice encryption mode config compile time. --- guides/functionality/voice.md | 13 ++-- guides/intro/intro.md | 11 +-- lib/nostrum/voice/crypto.ex | 111 +++++++++++++---------------- lib/nostrum/voice/crypto/chacha.ex | 2 +- lib/nostrum/voice/crypto/salsa.ex | 25 ++++--- 5 files changed, 83 insertions(+), 79 deletions(-) diff --git a/guides/functionality/voice.md b/guides/functionality/voice.md index 224c9e901..edb768892 100644 --- a/guides/functionality/voice.md +++ b/guides/functionality/voice.md @@ -188,6 +188,9 @@ hardware architecture your bot is running on. If you're interested, keep reading #### Encryption Mode Configuration Options +This is a compile-time configuration option, so should you wish to set it, +do it in `config.exs` or one of its imported config files, *not* `runtime.exs`. + ```elixir config :nostrum, :voice_encryption_mode, :aes256_gcm # Default ``` @@ -196,15 +199,17 @@ Available configuration options are as follows: - `:xsalsa20_poly1305` - `:xsalsa20_poly1305_suffix` - `:xsalsa20_poly1305_lite` -- `:xsalsa20_poly1305_lite_rtpsize` (not yet documented by Discord) -- `:aead_xchacha20_poly1305_rtpsize` (not yet documented by Discord) -- `:aead_aes256_gcm` (not yet documented by Discord) -- `:aead_aes256_gcm_rtpsize` (not yet documented by Discord) +- `:xsalsa20_poly1305_lite_rtpsize` *(not yet documented by Discord)* +- `:aead_xchacha20_poly1305_rtpsize` *(not yet documented by Discord)* +- `:aead_aes256_gcm` *(not yet documented by Discord)* +- `:aead_aes256_gcm_rtpsize` *(not yet documented by Discord)* - `:xchacha20_poly1305` (alias for `:aead_xchacha20_poly1305_rtpsize`) - `:aes256_gcm` (alias for `:aead_aes256_gcm_rtpsize`) The first seven are Discord's available options, while the last two are shorter aliases. +The latter four of Discord's seven modes are not yet documented, but [will be soon](https://github.com/discord/discord-api-docs/pull/6801). + #### Implementation Details Of the seven supported modes, three different ciphers are used. The remaining differences diff --git a/guides/intro/intro.md b/guides/intro/intro.md index e043de51e..d96e8e310 100644 --- a/guides/intro/intro.md +++ b/guides/intro/intro.md @@ -5,10 +5,10 @@ nostrum is an Elixir library that can be used to interact with Discord. To see documentation about a specific part of the library, please visit one of the following: -* [API](api.html) - Methods to interact with the RESTful API (and some other goodies). +* [API](api-1.html) - Methods to interact with the RESTful API (and some other goodies). * [State](state.html) - Caches that keep information from Discord fresh at your disposal. * [Events](event_handling.html) - Handling events from Discord as they come in. -* [Voice](voice.html) - Playing audio through Discord voice channels. +* [Voice](voice-2.html) - Playing audio through Discord voice channels. ## Setup @@ -67,16 +67,17 @@ Apart from the `token` field mentioned above, the following fields are also supp livestream audio with streamlink support. Defaults to `"streamlink"`. - `audio_timeout` - Milliseconds that input must begin generating audio by upon invoking `play`. More information about this option can be found in the - [voice](./voice.html) documentation page. Defaults to `20_000` (20s). + [voice](./voice-2.html) documentation page. Defaults to `20_000` (20s). - `audio_frames_per_burst` - Number of opus frames to send at a time while playing audio. More information about this option can be found in the - [voice](./voice.html) documentation page. Defaults to `10`. + [voice](./voice-2.html) documentation page. Defaults to `10`. - `voice_auto_connect` - This will determine if Nostrum automatically connects to voice websockets gateways upon joining voice channels. If set to `false` but you still wish to connect to the voice gateway, you can do so manually by calling `Nostrum.Voice.connect_to_gateway/1` after joining a voice channel. Defaults to `true`. -- `voice_encryption_mode` - Defaults to `:aes256_gcm` +- `voice_encryption_mode` - Defaults to `:aes256_gcm`. More information about this + option can be found [here](./voice-2.html#encryption-modes). ### Development & debugging diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex index 245a0c520..3f543207f 100644 --- a/lib/nostrum/voice/crypto.ex +++ b/lib/nostrum/voice/crypto.ex @@ -23,76 +23,67 @@ defmodule Nostrum.Voice.Crypto do @type cipher :: cipher_non_rtpsize() | cipher_alias() | cipher_rtpsize() - defp mode, do: Application.get_env(:nostrum, :voice_encryption_mode, :aes256_gcm) + @mode Application.compile_env(:nostrum, :voice_encryption_mode, :aes256_gcm) def encryption_mode do - mode = mode() - Map.get( %{ xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", aes256_gcm: "aead_aes256_gcm_rtpsize" }, - mode, - "#{mode}" + @mode, + "#{@mode}" ) end def encrypt(voice, data) do - apply(__MODULE__, :"encrypt_#{mode()}", [voice, data]) + header = Audio.rtp_header(voice) + apply(__MODULE__, :"encrypt_#{@mode}", [voice, data, header]) end def decrypt(%VoiceState{secret_key: key}, data), do: decrypt(key, data) def decrypt(%VoiceWSState{secret_key: key}, data), do: decrypt(key, data) def decrypt(key, data) do - apply(__MODULE__, :"decrypt_#{mode()}", [key, data]) + apply(__MODULE__, :"decrypt_#{@mode}", [key, data]) end - def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key} = voice, data) do - header = Audio.rtp_header(voice) - + def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data, header) do nonce = header <> <<0::unit(8)-size(12)>> [header, Salsa.encrypt(data, key, nonce)] end - def encrypt_xsalsa20_poly1305_suffix(%VoiceState{secret_key: key} = voice, data) do - header = Audio.rtp_header(voice) - + def encrypt_xsalsa20_poly1305_suffix(%VoiceState{secret_key: key}, data, header) do nonce = :crypto.strong_rand_bytes(24) [header, Salsa.encrypt(data, key, nonce), nonce] end - def encrypt_xsalsa20_poly1305_lite(%VoiceState{secret_key: key} = voice, data) do - header = Audio.rtp_header(voice) - + def encrypt_xsalsa20_poly1305_lite(%VoiceState{secret_key: key} = voice, data, header) do {unpadded_nonce, nonce} = lite_nonce(voice) [header, Salsa.encrypt(data, key, nonce), unpadded_nonce] end - def encrypt_xsalsa20_poly1305_lite_rtpsize(voice, data), - do: encrypt_xsalsa20_poly1305_lite(voice, data) + def encrypt_xsalsa20_poly1305_lite_rtpsize(voice, data, header), + do: encrypt_xsalsa20_poly1305_lite(voice, data, header) - def encrypt_xchacha20_poly1305(voice, data), - do: encrypt_aead_xchacha20_poly1305_rtpsize(voice, data) - - def encrypt_aead_xchacha20_poly1305_rtpsize(%VoiceState{secret_key: key} = voice, data) do - header = Audio.rtp_header(voice) + def encrypt_xchacha20_poly1305(voice, data, header), + do: encrypt_aead_xchacha20_poly1305_rtpsize(voice, data, header) + def encrypt_aead_xchacha20_poly1305_rtpsize(%VoiceState{secret_key: key} = voice, data, header) do {unpadded_nonce, nonce} = lite_nonce(voice) [header, Chacha.encrypt(data, key, nonce, _aad = header), unpadded_nonce] end - def encrypt_aead_aes256_gcm(voice, data), do: encrypt_aes256_gcm(voice, data) - def encrypt_aead_aes256_gcm_rtpsize(voice, data), do: encrypt_aes256_gcm(voice, data) + def encrypt_aead_aes256_gcm(voice, data, header), do: encrypt_aes256_gcm(voice, data, header) - def encrypt_aes256_gcm(%VoiceState{secret_key: key} = voice, data) do - header = Audio.rtp_header(voice) + def encrypt_aead_aes256_gcm_rtpsize(voice, data, header), + do: encrypt_aes256_gcm(voice, data, header) + def encrypt_aes256_gcm(%VoiceState{secret_key: key} = voice, data, header) do {unpadded_nonce, nonce} = lite_nonce(voice, 12) [header, Aes.encrypt(data, key, nonce, _aad = header), unpadded_nonce] @@ -161,34 +152,32 @@ defmodule Nostrum.Voice.Crypto do {unpadded_nonce, nonce} end - @doc """ - Discord's newer encryption modes ending in '_rtpsize' leave the first 4 bytes of the RTP - header extension in plaintext while encrypting the elements themselves. The AAD is the - 12-byte RTP header concatenated with the first 4 bytes of the RTP header extension. - - Much like is done within the function `Nostrum.Voice.Opus.strip_rtp_ext/1`, we pattern match - on the `0xBEDE` constant and the 16-bit big-endian extension length that denotes the length - in 32-bit words of the extension elements. Because the elements are a part of the cipher text, - the extension length is the number of 32-bit words to discard after decryption to obtain - solely the opus packet. - - This function returns a 5-element tuple with - - RTP header - - Fixed 12 byte header concatenated with the first 4 bytes of the extension - - Used as the AAD for AEAD ciphers - - cipher text - - RTP extension elements prepended to the opus packet - - cipher tag (MAC) - - nonce (padded) - - RTP header extension length - - for isolating the opus after decryption - """ - def decode_packet_rtpsize( - <>, - nonce_length \\ 24, - tag_length \\ 16 - ) - when byte_size(rest) - (@lite_nonce_length + tag_length) > ext_len * 4 do + # Discord's newer encryption modes ending in '_rtpsize' leave the first 4 bytes of the RTP + # header extension in plaintext while encrypting the elements themselves. The AAD is the + # 12-byte RTP header concatenated with the first 4 bytes of the RTP header extension. + + # Much like is done within the function `Nostrum.Voice.Opus.strip_rtp_ext/1`, we pattern match + # on the `0xBEDE` constant and the 16-bit big-endian extension length that denotes the length + # in 32-bit words of the extension elements. Because the elements are a part of the cipher text, + # the extension length is the number of 32-bit words to discard after decryption to obtain + # solely the opus packet. + + # This function returns a 5-element tuple with + # - RTP header + # - Fixed 12 byte header concatenated with the first 4 bytes of the extension + # - Used as the AAD for AEAD ciphers + # - cipher text + # - RTP extension elements prepended to the opus packet + # - cipher tag (MAC) + # - nonce (padded) + # - RTP header extension length + # - for isolating the opus after decryption + defp decode_packet_rtpsize( + <>, + nonce_length \\ 24, + tag_length \\ 16 + ) + when byte_size(rest) - (@lite_nonce_length + tag_length) > ext_len * 4 do header = header <> <<0xBE, 0xDE, ext_len::integer-16>> {cipher_text, tag, unpadded_nonce} = split_data(rest, @lite_nonce_length, tag_length) @@ -199,12 +188,12 @@ defmodule Nostrum.Voice.Crypto do end # Non "rtpsize" modes where everything is encrypted beyond the 12-byte header - def decode_packet( - <>, - unpadded_nonce_length \\ @lite_nonce_length, - nonce_length \\ 24, - tag_length \\ 16 - ) do + defp decode_packet( + <>, + unpadded_nonce_length \\ @lite_nonce_length, + nonce_length \\ 24, + tag_length \\ 16 + ) do {cipher_text, tag, unpadded_nonce} = split_data(rest, unpadded_nonce_length, tag_length) nonce = unpadded_nonce <> <<0::unit(8)-size(nonce_length - unpadded_nonce_length)>> diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex index 118bec7d0..d2b64d926 100644 --- a/lib/nostrum/voice/crypto/chacha.ex +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -123,7 +123,7 @@ defmodule Nostrum.Voice.Crypto.Chacha do end @spec decrypt(binary(), <<_::256>>, <<_::192>>, binary(), <<_::128>>) :: binary() | :error - def decrypt(cipher_text, <>, <>, aad, tag) do + def decrypt(cipher_text, <> = _key, <> = _nonce, aad, tag) do {xchacha_key, xchacha_nonce} = xchacha20_key_and_nonce(key, nonce) :crypto.crypto_one_time_aead( diff --git a/lib/nostrum/voice/crypto/salsa.ex b/lib/nostrum/voice/crypto/salsa.ex index 75849e828..707ab9918 100644 --- a/lib/nostrum/voice/crypto/salsa.ex +++ b/lib/nostrum/voice/crypto/salsa.ex @@ -149,8 +149,7 @@ defmodule Nostrum.Voice.Crypto.Salsa do defp crypt(key, nonce, <>, block_count, outputs) do output_block = crypt_block(key, nonce, message, block_count) - outputs = Enum.reverse([output_block | outputs]) - IO.iodata_to_binary(outputs) + _final_outputs = Enum.reverse([output_block | outputs]) end defp crypt_block(key, nonce, message, block_count) do @@ -162,23 +161,33 @@ defmodule Nostrum.Voice.Crypto.Salsa do def encrypt(plain_text, <> = _key, <> = _nonce) do {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) message = <<0::unit(8)-size(32)>> <> plain_text - <> = crypt(xsalsa_key, xsalsa_nonce, message) + + # First block is guaranteed to be at least 32 bytes + [<> | cipher_text_tail] = + crypt(xsalsa_key, xsalsa_nonce, message) + + cipher_text = [cipher_text_head | cipher_text_tail] + cipher_tag = :crypto.mac(:poly1305, mac_otp, cipher_text) - [cipher_tag, cipher_text] + + [cipher_tag | cipher_text] end @spec decrypt(binary(), <<_::256>>, <<_::192>>) :: binary() | :error def decrypt( <> = _encrypted_message, - <>, - <> + <> = _key, + <> = _nonce ) do {xsalsa_key, xsalsa_nonce} = xsalsa20_key_and_nonce(key, nonce) message = <<0::unit(8)-size(32)>> <> cipher_text - <> = crypt(xsalsa_key, xsalsa_nonce, message) + + # First block is guaranteed to be at least 32 bytes + [<> | plain_text_tail] = + crypt(xsalsa_key, xsalsa_nonce, message) case :crypto.mac(:poly1305, mac_otp, cipher_text) do - ^cipher_tag -> plain_text + ^cipher_tag -> IO.iodata_to_binary([plain_text_head | plain_text_tail]) _error -> :error end end From 2ace8da25263201de9c573732a7fac9ece7c994a Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Sun, 19 May 2024 20:33:58 -0500 Subject: [PATCH 7/8] Bind crypto functions at compile time. Document crypto modules for the curious. --- lib/nostrum/voice/crypto.ex | 27 +++++------ lib/nostrum/voice/crypto/aes.ex | 33 +++++++++++-- lib/nostrum/voice/crypto/chacha.ex | 73 +++++++++++++++++++--------- lib/nostrum/voice/crypto/salsa.ex | 78 ++++++++++++++++++++++++------ 4 files changed, 155 insertions(+), 56 deletions(-) diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex index 3f543207f..53a032439 100644 --- a/lib/nostrum/voice/crypto.ex +++ b/lib/nostrum/voice/crypto.ex @@ -25,28 +25,25 @@ defmodule Nostrum.Voice.Crypto do @mode Application.compile_env(:nostrum, :voice_encryption_mode, :aes256_gcm) - def encryption_mode do - Map.get( - %{ - xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", - aes256_gcm: "aead_aes256_gcm_rtpsize" - }, - @mode, - "#{@mode}" - ) - end + @mode_string Map.get( + %{ + xchacha20_poly1305: "aead_xchacha20_poly1305_rtpsize", + aes256_gcm: "aead_aes256_gcm_rtpsize" + }, + @mode, + "#{@mode}" + ) + + def encryption_mode, do: @mode_string def encrypt(voice, data) do header = Audio.rtp_header(voice) - apply(__MODULE__, :"encrypt_#{@mode}", [voice, data, header]) + unquote(:"encrypt_#{@mode}")(voice, data, header) end def decrypt(%VoiceState{secret_key: key}, data), do: decrypt(key, data) def decrypt(%VoiceWSState{secret_key: key}, data), do: decrypt(key, data) - - def decrypt(key, data) do - apply(__MODULE__, :"decrypt_#{@mode}", [key, data]) - end + def decrypt(key, data), do: unquote(:"decrypt_#{@mode}")(key, data) def encrypt_xsalsa20_poly1305(%VoiceState{secret_key: key}, data, header) do nonce = header <> <<0::unit(8)-size(12)>> diff --git a/lib/nostrum/voice/crypto/aes.ex b/lib/nostrum/voice/crypto/aes.ex index 990a330ae..6006406a6 100644 --- a/lib/nostrum/voice/crypto/aes.ex +++ b/lib/nostrum/voice/crypto/aes.ex @@ -1,8 +1,35 @@ defmodule Nostrum.Voice.Crypto.Aes do - @moduledoc false + @moduledoc """ + Handles encryption and decryption of outgoing and incoming voice data when an AES encryption mode is selected - # AES is fully supported by the erlang crypto module, so this module is - # just to provide a convenient wrapper around it for encryption and decryption + > ### Internal module {: .info} + > + > This module is intended for exclusive usage inside of nostrum, and is + > documented for completeness and curious cryptographic cats. + + ### Purpose + + AES is fully supported by the erlang `:crypto` module, so this module + just provides a convenient wrapper around it for encryption and decryption. + + ### Implementation + + Internally the functions `:crypto.crypto_one_time_aead/6` and `:crypto.crypto_one_time_aead/7` are + called with the cipher argument set to `:aes_256_gcm`. Because these functions are implemented as + NIFs with OpenSSL bindings, they faster than anything written in pure erlang/elixir. + + Because the crypto function returns a tuple with the cipher text and tag (MAC) separately, the return + value is an iolist with them in the order that Discord expects. This is done to reduce binary copies + as the functions ingesting the encrypted messages all support iodata. + + Readers are encouraged to read more about the [erlang crypto module](https://www.erlang.org/doc/man/crypto). + + ### Performance considerations + + AES is a well-established cipher and many modern CPUs have AES instruction sets to accelerate + AES operations. While the Salsa20 and ChaCha20 stream ciphers are typically faster with pure + software implementations, hardware acceleration will typically give AES the leg up for performance. + """ @spec encrypt(binary(), <<_::256>>, <<_::96>>, binary()) :: iolist() def encrypt(plain_text, <> = _key, <> = _nonce, aad) do diff --git a/lib/nostrum/voice/crypto/chacha.ex b/lib/nostrum/voice/crypto/chacha.ex index d2b64d926..9eadbee6b 100644 --- a/lib/nostrum/voice/crypto/chacha.ex +++ b/lib/nostrum/voice/crypto/chacha.ex @@ -1,26 +1,55 @@ defmodule Nostrum.Voice.Crypto.Chacha do - @moduledoc false - - # Erlang's :crypto module supports the chacha20_poly1305 aead stream cipher. - # Analogously to Salsa20 and XSalsa20, XChaCha20 is a way to use 192-bit nonces - # with ChaCha20 by hashing the key and part of the extended nonce generate a - # sub-key, which is used as the input key for ChaCha20. - # - # Even though we've implemented the bulk of what's needed to generate chacha20 key streams - # for encryption and decryption, we're only using this module to generate the inputs to - # use the :crypto module's chacha20_poly1305 functionality in the capacity of xchacha20 - # as is required by Discord with that encryption mode selected. - # - # This is all in service of leveraging the performance benefits of the the NIF crypto - # functions, which are necessarily going to be more performant than anything implemented - # in pure elixir/erlang like the `:kcl` package. - # - # References for Salsa family of ciphers - # https://cr.yp.to/snuffle/spec.pdf - # https://cr.yp.to/chacha/chacha-20080128.pdf - # https://cr.yp.to/snuffle/xsalsa-20110204.pdf - # https://datatracker.ietf.org/doc/html/rfc7539 - # https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + @moduledoc """ + Handles encryption and decryption of outgoing and incoming voice data when a ChaCha encryption mode is selected + + > ### Internal module {: .info} + > + > This module is intended for exclusive usage inside of nostrum, and is + > documented for completeness and curious cryptographic cats. + + ### Purpose + + Erlang's `:crypto` module supports the chacha20_poly1305 AEAD stream cipher. + Analogously to Salsa20 and XSalsa20, XChaCha20 is a way to use 192-bit nonces + with ChaCha20 by hashing the key and part of the extended nonce to generate a + sub-key, which is used as the input key for ChaCha20. + + To leverage the crypto module, we had to implement the HChaCha20 hash function + in elixir to then pass the resulting sub-key to the `crypto_one_time_aead`. + + ### Implementation + + The HChaCha20 function takes the first 16-bytes of the extended 24-byte XChaCha20 nonce, + expands the key and the 16-byte nonce slice into a block in place of the block count and + usual smaller nonce. That block has 20 rounds of mutation, and instead of summing the block + with its starting state as is done with keystream generation, 8 of the 16 bytes are taken + and used as the sub-key, which is the input key for the standard chacha20 cipher. + + Even though we've implemented the bulk of what's needed to generate chacha20 key streams + for encryption and decryption, we're only using this module to generate the inputs to + use the :crypto module's chacha20_poly1305 functionality in the capacity of xchacha20 + as is required by Discord with that encryption mode selected. + + This is all in service of leveraging the performance benefits of the the NIF crypto + functions, which are necessarily going to be more performant than anything implemented + in pure elixir/erlang like the `:kcl` package. + + *ChaCha20 is a variant of the Salsa20 cipher. I will discuss in greater detail the implementation + in the `Nostrum.Voice.Crypto.Salsa` module, where much is applicable here.* + + References for Salsa family of ciphers + - https://cr.yp.to/snuffle/spec.pdf + - https://cr.yp.to/chacha/chacha-20080128.pdf + - https://cr.yp.to/snuffle/xsalsa-20110204.pdf + - https://datatracker.ietf.org/doc/html/rfc7539 + - https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + + ### Performance considerations + + After the XChaCha20 sub-key is generated in elixir, the crypto NIF function performs the + heavy lifting. If your bot is running on a machine without AES hardware acceleration, you + may find that using the chacha encryption mode yields better performance. + """ import Bitwise diff --git a/lib/nostrum/voice/crypto/salsa.ex b/lib/nostrum/voice/crypto/salsa.ex index 707ab9918..380fc9772 100644 --- a/lib/nostrum/voice/crypto/salsa.ex +++ b/lib/nostrum/voice/crypto/salsa.ex @@ -1,20 +1,66 @@ defmodule Nostrum.Voice.Crypto.Salsa do - @moduledoc false - - # To support xsalsa20_poly1305 without a NIF, we have to implement the - # Salsa20 cipher and HSalsa20 hash function to use 192-bit nonces. - # - # Along with leveraging the :crypto module to perform the poly1305 MAC function - # and xor'ing arbitrary-length binaries, by being more thoughtful and explicit - # with our implementation, we should be able to eek out better performance - # than the `:kcl` package provides. - # - # References for Salsa family of ciphers - # https://cr.yp.to/snuffle/spec.pdf - # https://cr.yp.to/chacha/chacha-20080128.pdf - # https://cr.yp.to/snuffle/xsalsa-20110204.pdf - # https://datatracker.ietf.org/doc/html/rfc7539 - # https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + @moduledoc """ + Handles encryption and decryption of outgoing and incoming voice data when a ChaCha encryption mode is selected + + > ### Internal module {: .info} + > + > This module is intended for exclusive usage inside of nostrum, and is + > documented for completeness and curious cryptographic cats. + + ### Purpose + + To support xsalsa20_poly1305 without a NIF, we have to implement the + Salsa20 cipher and HSalsa20 hash function to use 192-bit nonces in the capacity + of XSalsa20. + + Along with leveraging the :crypto module to perform the poly1305 MAC function + and xor'ing arbitrary-length binaries, by being more thoughtful and explicit + with our implementation, we should be able to eek out better performance + than the `:kcl` package provides. + + ### Implementation + + The `:kcl` package is an impressive pure-elixir NaCl/libsodium compatible library + that Nostrum previously used for voice encryption. For our usage with Discord voice, + we only need the ability to encrypt and decrypt with 32-byte keys using 24-byte + XSalsa20 nonces. + + Some of the key differences in our implementation compared to Kcl + - Heavy use of explicit binary pattern matching instead of more traditional implicit enumeration + - Intermediate block state stored in a 16-element tuple that is mutated during the 20-round hot loop instead of lists + - Minimized the number binary copies, returning iolists when appropriate, instead of concatenating binaries + - XOR whole keystream and message blocks instead of XOR'ing one byte at a time + - Poly1305 MAC handled by crypto module instead of implemented in elixir + - Only support 32-byte keys and 24-byte nonces (XSalsa20) instead of full NaCl/libsodium + + Additionally there appears to be a bug in how the 16-byte block count is serialized during key expansion: + It's supposed to be little endian, and it happens to be for blocks 0-255, but for larger block counts, + Kcl may become incompatible with NaCl/libsodium-type libraries. For Discord's use case of encrypting short + 20 millisecond compressed audio packets, block counts were well-below this suspected problem threshold. + + The cipher functions were implemented in the order they're defined in the original Salsa specification, + and though it's using a lot of explicit binary pattern matching, it turned out to be quite legible. + In a single statement of binary pattern matching, the 512-bit initial block state is cast into 16 + little-endian 32-bit words. Standard elixir patterns might have you iterate through the binary until the + end was reached, but matching and casting all sixteen block elements in a single statement then returning + a tuple is explicit, clear, and simple to understand when referenced against the spec. + + Readers interested in cryptography are encouraged to read more about the Salsa20/ChaCha20 ciphers. + + References for Salsa family of ciphers + - https://cr.yp.to/snuffle/spec.pdf + - https://cr.yp.to/chacha/chacha-20080128.pdf + - https://cr.yp.to/snuffle/xsalsa-20110204.pdf + - https://datatracker.ietf.org/doc/html/rfc7539 + - https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-xchacha + + ### Performance considerations + + The entire keystream generation and xor'ing the message with the stream is done in elixir, + only performing the Poly1305 MAC function through the crypto module. Although it was implemented + as thoughtfully and explicitly as possible with memory usage and performance in mind, using any + of the Salsa modes will likely be less performant than ChaCha or AES. + """ import Bitwise From 4790582d2b75a43591ace19f540eb097b95b1f5c Mon Sep 17 00:00:00 2001 From: Brandt Hill Date: Sun, 19 May 2024 21:04:16 -0500 Subject: [PATCH 8/8] Add voice crypto tests --- lib/nostrum/voice/crypto.ex | 14 +++--- test/nostrum/voice/crypto_test.exs | 73 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 test/nostrum/voice/crypto_test.exs diff --git a/lib/nostrum/voice/crypto.ex b/lib/nostrum/voice/crypto.ex index 53a032439..a13a2da73 100644 --- a/lib/nostrum/voice/crypto.ex +++ b/lib/nostrum/voice/crypto.ex @@ -116,7 +116,7 @@ defmodule Nostrum.Voice.Crypto do do: decrypt_aead_xchacha20_poly1305_rtpsize(key, data) def decrypt_aead_xchacha20_poly1305_rtpsize(key, data) do - {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 24) + {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 24, 16) <<_exts::unit(32)-size(ext_len), opus::binary>> = Chacha.decrypt(cipher_text, key, nonce, _aad = header, tag) @@ -127,7 +127,7 @@ defmodule Nostrum.Voice.Crypto do def decrypt_aes256_gcm(key, data), do: decrypt_aead_aes256_gcm_rtpsize(key, data) def decrypt_aead_aes256_gcm_rtpsize(key, data) do - {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 12) + {header, cipher_text, tag, nonce, ext_len} = decode_packet_rtpsize(data, 12, 16) <<_exts::unit(32)-size(ext_len), opus::binary>> = Aes.decrypt(cipher_text, key, nonce, _aad = header, tag) @@ -171,8 +171,8 @@ defmodule Nostrum.Voice.Crypto do # - for isolating the opus after decryption defp decode_packet_rtpsize( <>, - nonce_length \\ 24, - tag_length \\ 16 + nonce_length, + tag_length ) when byte_size(rest) - (@lite_nonce_length + tag_length) > ext_len * 4 do header = header <> <<0xBE, 0xDE, ext_len::integer-16>> @@ -187,9 +187,9 @@ defmodule Nostrum.Voice.Crypto do # Non "rtpsize" modes where everything is encrypted beyond the 12-byte header defp decode_packet( <>, - unpadded_nonce_length \\ @lite_nonce_length, - nonce_length \\ 24, - tag_length \\ 16 + unpadded_nonce_length, + nonce_length, + tag_length ) do {cipher_text, tag, unpadded_nonce} = split_data(rest, unpadded_nonce_length, tag_length) diff --git a/test/nostrum/voice/crypto_test.exs b/test/nostrum/voice/crypto_test.exs new file mode 100644 index 000000000..03e31acb6 --- /dev/null +++ b/test/nostrum/voice/crypto_test.exs @@ -0,0 +1,73 @@ +defmodule Nostrum.Voice.CryptoTest do + alias Nostrum.Voice.Crypto.Aes + alias Nostrum.Voice.Crypto.Chacha + alias Nostrum.Voice.Crypto.Salsa + + use ExUnit.Case + + setup do + key = :crypto.strong_rand_bytes(32) + nonce_12 = :crypto.strong_rand_bytes(12) + nonce_24 = :crypto.strong_rand_bytes(24) + aad = "tacgiarc" + + %{ + key: key, + nonce_12: nonce_12, + nonce_24: nonce_24, + aad: aad + } + end + + describe "AES" do + test "encryption and decryption", %{key: key, nonce_12: nonce, aad: aad} do + plain_text = "Hello, World!" + + [cipher_text, tag] = Aes.encrypt(plain_text, key, nonce, aad) + ^plain_text = Aes.decrypt(cipher_text, key, nonce, aad, tag) + end + + test "decryption fails with altered input", %{key: key, nonce_12: nonce, aad: aad} do + plain_text = "Hello, World!" + + [cipher_text, tag] = Aes.encrypt(plain_text, key, nonce, aad) + :error = Aes.decrypt(cipher_text, key, nonce, aad <> "!", tag) + end + end + + describe "ChaCha" do + test "encryption and decryption", %{key: key, nonce_24: nonce, aad: aad} do + plain_text = "Hello, World!" + + [cipher_text, tag] = Chacha.encrypt(plain_text, key, nonce, aad) + ^plain_text = Chacha.decrypt(cipher_text, key, nonce, aad, tag) + end + + test "decryption fails with altered input", %{key: key, nonce_24: nonce, aad: aad} do + plain_text = "Hello, World!" + + [cipher_text, tag] = Chacha.encrypt(plain_text, key, nonce, aad) + :error = Chacha.decrypt(cipher_text, key, nonce, aad <> "!", tag) + end + end + + describe "Salsa" do + test "encryption and decryption", %{key: key, nonce_24: nonce} do + plain_text = "Hello, World!" + + cipher_text = Salsa.encrypt(plain_text, key, nonce) + cipher_text = IO.iodata_to_binary(cipher_text) + ^plain_text = Salsa.decrypt(cipher_text, key, nonce) + end + + test "decryption fails with altered input", %{key: key, nonce_24: nonce} do + plain_text = "Hello, World!" + + cipher_text = Salsa.encrypt(plain_text, key, nonce) + <> = IO.iodata_to_binary(cipher_text) + altered_cipher_text = <> + + :error = Salsa.decrypt(altered_cipher_text, key, nonce) + end + end +end