From 7fca986c977ebaf3d01de1d4ab6ac219df7ad983 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Thu, 19 Sep 2024 14:34:38 +0700 Subject: [PATCH 1/9] feat: initial decrypt impl, TODO: test and cleanups --- src/dpp/discordvoiceclient.cpp | 66 ++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index c28ce709b6..8ded0fcc58 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -756,44 +756,58 @@ void discord_voice_client::read_ready() /* Nonce is the RTP Header with zero padding */ uint8_t nonce[24] = { 0 }; - std::memcpy(nonce, &buffer[0], header_size); + std::memcpy(nonce, buffer, header_size); /* Get the number of CSRC in header */ const size_t csrc_count = buffer[0] & 0b0000'1111; /* Skip to the encrypted voice data */ const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; - uint8_t* encrypted_data = buffer + offset_to_data; - const size_t encrypted_data_len = packet_size - offset_to_data; - - if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0) - - if (crypto_secretbox_open_easy(encrypted_data, encrypted_data, - encrypted_data_len, nonce, secret_key)) { - /* Invalid Discord RTP payload. */ - return; + uint8_t* ciphertext = buffer + offset_to_data; + const size_t ciphertext_len = packet_size - offset_to_data; + + std::vector decrypted; + decrypted.reserve(ciphertext_len); + unsigned long long decrypted_len = 0; + + if (crypto_aead_chacha20poly1305_ietf_decrypt(decrypted.data(), &decrypted_len, + NULL, + ciphertext, ciphertext_len, + NULL, + NULL, + nonce, secret_key) != 0) { + /* Invalid Discord RTP payload. */ + return; } - const uint8_t* decrypted_data = encrypted_data; - size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; - if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - /* Skip the RTP Extensions */ - size_t ext_len = 0; - { - uint16_t ext_len_in_words; - memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); - ext_len_in_words = ntohs(ext_len_in_words); - ext_len = sizeof(uint32_t) * ext_len_in_words; - } - constexpr size_t ext_header_len = sizeof(uint16_t) * 2; - decrypted_data += ext_header_len + ext_len; - decrypted_data_len -= ext_header_len + ext_len; - } + // if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0) + + // if (crypto_secretbox_open_easy(encrypted_data, encrypted_data, + // encrypted_data_len, nonce, secret_key)) { + // /* Invalid Discord RTP payload. */ + // return; + // } + + // const uint8_t* decrypted_data = encrypted_data; + // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; + // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { + // /* Skip the RTP Extensions */ + // size_t ext_len = 0; + // { + // uint16_t ext_len_in_words; + // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); + // ext_len_in_words = ntohs(ext_len_in_words); + // ext_len = sizeof(uint32_t) * ext_len_in_words; + // } + // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; + // decrypted_data += ext_header_len + ext_len; + // decrypted_data_len -= ext_header_len + ext_len; + // } /* * We're left with the decrypted, opus-encoded data. * Park the payload and decode on the voice courier thread. */ - vp.vr->audio_data.assign(decrypted_data, decrypted_data + decrypted_data_len); + vp.vr->audio_data.assign(decrypted.begin(), decrypted.end()); { std::lock_guard lk(voice_courier_shared_state.mtx); From 684d4e9abd0ce43225274d15e33bfc16f71ff168 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Fri, 20 Sep 2024 03:29:43 +0700 Subject: [PATCH 2/9] feat: progress?? --- include/dpp/discordvoiceclient.h | 11 ++++ src/dpp/discordvoiceclient.cpp | 103 +++++++++++++++++++++++-------- 2 files changed, 89 insertions(+), 25 deletions(-) diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h index 07e4364483..8ed58107d8 100644 --- a/include/dpp/discordvoiceclient.h +++ b/include/dpp/discordvoiceclient.h @@ -346,6 +346,17 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ uint32_t timestamp; + /** + * @brief Each packet should have a nonce, a 32-bit incremental + * integer value appended to payload. + * + * We should keep track of this value and increment it for each + * packet sent. + * + * Current initial value is hardcoded to 1. + */ + uint32_t packet_nonce; + /** * @brief Last sent packet high-resolution timestamp */ diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 8ded0fcc58..e6004ff584 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -20,6 +20,7 @@ * ************************************************************************************/ +#include #include #ifdef _WIN32 #include @@ -316,6 +317,7 @@ discord_voice_client::discord_voice_client(dpp::cluster* _cluster, snowflake _ch secret_key(nullptr), sequence(0), timestamp(0), + packet_nonce(1), last_timestamp(std::chrono::high_resolution_clock::now()), sending(false), tracks(0), @@ -593,6 +595,9 @@ bool discord_voice_client::handle_frame(const std::string &data) rdy.voice_channel_id = this->channel_id; creator->on_voice_ready.call(rdy); } + + /* Reset packet_nonce */ + packet_nonce = 1; } break; /* Voice ready */ @@ -754,30 +759,30 @@ void discord_voice_client::read_ready() std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); vp.timestamp = ntohl(vp.timestamp); - /* Nonce is the RTP Header with zero padding */ - uint8_t nonce[24] = { 0 }; - std::memcpy(nonce, buffer, header_size); + // nonce is 4 byte at the end of payload now + // change accordingly + // /* Nonce is the RTP Header with zero padding */ + // uint8_t nonce[24] = { 0 }; + // std::memcpy(nonce, buffer, header_size); - /* Get the number of CSRC in header */ - const size_t csrc_count = buffer[0] & 0b0000'1111; - /* Skip to the encrypted voice data */ - const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; - uint8_t* ciphertext = buffer + offset_to_data; - const size_t ciphertext_len = packet_size - offset_to_data; + // /* Get the number of CSRC in header */ + // const size_t csrc_count = buffer[0] & 0b0000'1111; + // /* Skip to the encrypted voice data */ + // const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; + // uint8_t* ciphertext = buffer + offset_to_data; + // const size_t ciphertext_len = packet_size - offset_to_data; - std::vector decrypted; - decrypted.reserve(ciphertext_len); unsigned long long decrypted_len = 0; - if (crypto_aead_chacha20poly1305_ietf_decrypt(decrypted.data(), &decrypted_len, - NULL, - ciphertext, ciphertext_len, - NULL, - NULL, - nonce, secret_key) != 0) { - /* Invalid Discord RTP payload. */ - return; - } + // if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len, + // NULL, + // ciphertext, ciphertext_len, + // NULL, + // NULL, + // nonce, secret_key) != 0) { + // /* Invalid Discord RTP payload. */ + // return; + // } // if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0) @@ -807,7 +812,7 @@ void discord_voice_client::read_ready() * We're left with the decrypted, opus-encoded data. * Park the payload and decode on the voice courier thread. */ - vp.vr->audio_data.assign(decrypted.begin(), decrypted.end()); + vp.vr->audio_data.assign(buffer, buffer + decrypted_len); { std::lock_guard lk(voice_courier_shared_state.mtx); @@ -1296,20 +1301,68 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet ++sequence; rtp_header header(sequence, timestamp, (uint32_t)ssrc); - std::vector audioDataPacket(sizeof(header) + encodedAudioLength + crypto_secretbox_MACBYTES); + /* Unencrypted header + encrypted opus packet + encrypted header as additional data + unencrypted 32 bit nonce */ + size_t packet_siz = (sizeof(header) * 2) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce); + std::vector audioDataPacket(packet_siz); std::memcpy(audioDataPacket.data(), &header, sizeof(header)); - unsigned char nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES]; - randombytes_buf(nonce, sizeof nonce); + /* Convert to big-endian */ + uint32_t noncel = htonl(packet_nonce); + + /* 4 byte encrypt nonce padded with 20 byte NULL */ + unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { NULL }; + memcpy(encrypt_nonce, &packet_nonce, sizeof(packet_nonce)); unsigned long long clen_p; - crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, NULL, NULL, NULL, (const unsigned char*)nonce, secret_key); + crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, (const unsigned char *)&header, sizeof(header), NULL, (const unsigned char*)encrypt_nonce, secret_key); + + std::cout << "data[\n"; + + /*::write(STDIN_FILENO, audioDataPacket.data(), audioDataPacket.size());*/ + + std::cout << "\n]\n"; + std::cout << "size("<< audioDataPacket.size() << ")\n"; + std::cout << "clen_p("<< clen_p << ")\n"; + + // uint8_t buffer[65535] = {NULL}; + // unsigned long long decrypted_len = 0; + // if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len, + // NULL, + // audioDataPacket.data() + sizeof(header), audioDataPacket.size() - sizeof(header), + // NULL, + // NULL, + // (const unsigned char*)encrypt_nonce, secret_key) != 0) { + // std::cout << "VERIFICATION FAILED\n"; + // } + // else { + // auto pb = [](unsigned char *bin, size_t siz){ + // for (size_t i = 0; i < siz; i++) { + // printf("%d ", bin[i]); + // } + // }; + + // std::cout << "buffer[\n"; + // pb(encodedAudioData.data(), encodedAudioLength); + // std::cout<<"\n]\n"; + // std::cout << "buffer_len("<< encodedAudioLength <<")\n"; + + // std::cout << "decrypted_buffer[\n"; + // pb(buffer, decrypted_len); + // std::cout <<"\n]\n"; + // std::cout << "decrypted_len("<< decrypted_len <<")\n"; + // } //crypto_secretbox_easy(audioDataPacket.data() + sizeof(header), encodedAudioData.data(), encodedAudioLength, (const unsigned char*)nonce, secret_key); + /* Append the 4 byte nonce to the whole payload */ + std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel)); + this->send((const char*)audioDataPacket.data(), audioDataPacket.size(), duration); timestamp += frameSize; + /* Increment for next packet */ + packet_nonce++; + speak(); #else throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++"); From fe5b0e40dbdd21806663108d9281da20d4b1f03d Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Fri, 20 Sep 2024 04:25:27 +0700 Subject: [PATCH 3/9] feat: working encryption --- src/dpp/discordvoiceclient.cpp | 74 +++++++++++----------------------- 1 file changed, 24 insertions(+), 50 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index e6004ff584..f2236aa085 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -1301,63 +1301,37 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet ++sequence; rtp_header header(sequence, timestamp, (uint32_t)ssrc); - /* Unencrypted header + encrypted opus packet + encrypted header as additional data + unencrypted 32 bit nonce */ - size_t packet_siz = (sizeof(header) * 2) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce); + /* Expected payload size is unencrypted header + encrypted opus packet + unencrypted 32 bit nonce */ + size_t packet_siz = sizeof(header) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce); + std::vector audioDataPacket(packet_siz); + + /* Set RTP header */ std::memcpy(audioDataPacket.data(), &header, sizeof(header)); - /* Convert to big-endian */ + /* Convert nonce to big-endian */ uint32_t noncel = htonl(packet_nonce); - /* 4 byte encrypt nonce padded with 20 byte NULL */ - unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { NULL }; - memcpy(encrypt_nonce, &packet_nonce, sizeof(packet_nonce)); - - unsigned long long clen_p; - crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, (const unsigned char *)&header, sizeof(header), NULL, (const unsigned char*)encrypt_nonce, secret_key); - - std::cout << "data[\n"; - - /*::write(STDIN_FILENO, audioDataPacket.data(), audioDataPacket.size());*/ - - std::cout << "\n]\n"; - std::cout << "size("<< audioDataPacket.size() << ")\n"; - std::cout << "clen_p("<< clen_p << ")\n"; - - // uint8_t buffer[65535] = {NULL}; - // unsigned long long decrypted_len = 0; - // if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len, - // NULL, - // audioDataPacket.data() + sizeof(header), audioDataPacket.size() - sizeof(header), - // NULL, - // NULL, - // (const unsigned char*)encrypt_nonce, secret_key) != 0) { - // std::cout << "VERIFICATION FAILED\n"; - // } - // else { - // auto pb = [](unsigned char *bin, size_t siz){ - // for (size_t i = 0; i < siz; i++) { - // printf("%d ", bin[i]); - // } - // }; - - // std::cout << "buffer[\n"; - // pb(encodedAudioData.data(), encodedAudioLength); - // std::cout<<"\n]\n"; - // std::cout << "buffer_len("<< encodedAudioLength <<")\n"; - - // std::cout << "decrypted_buffer[\n"; - // pb(buffer, decrypted_len); - // std::cout <<"\n]\n"; - // std::cout << "decrypted_len("<< decrypted_len <<")\n"; - // } - - //crypto_secretbox_easy(audioDataPacket.data() + sizeof(header), encodedAudioData.data(), encodedAudioLength, (const unsigned char*)nonce, secret_key); - - /* Append the 4 byte nonce to the whole payload */ + /* 24 byte is needed for encrypting, discord just want 4 byte so just fill up the rest with null */ + unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { '\0' }; + memcpy(encrypt_nonce, &noncel, sizeof(noncel)); + + /* Execute */ + crypto_aead_xchacha20poly1305_ietf_encrypt( + audioDataPacket.data() + sizeof(header), + nullptr, + encodedAudioData.data(), + encodedAudioLength, + reinterpret_cast(&header), + sizeof(header), + nullptr, + (const unsigned char*)encrypt_nonce, + secret_key); + + /* Append the 4 byte nonce to the resulting payload */ std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel)); - this->send((const char*)audioDataPacket.data(), audioDataPacket.size(), duration); + this->send(reinterpret_cast(audioDataPacket.data()), audioDataPacket.size(), duration); timestamp += frameSize; /* Increment for next packet */ From b002e89378722a17e11915da0f55f91e42768a58 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Fri, 20 Sep 2024 06:11:30 +0700 Subject: [PATCH 4/9] fix: static cast this --- src/dpp/discordvoiceclient.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index f2236aa085..8b0dba0bca 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -1325,7 +1325,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet reinterpret_cast(&header), sizeof(header), nullptr, - (const unsigned char*)encrypt_nonce, + static_cast(encrypt_nonce), secret_key); /* Append the 4 byte nonce to the resulting payload */ From d742b71db6242a3a2c93620fd284ff7a900a3fcf Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 13:22:01 +0700 Subject: [PATCH 5/9] feat: initial receive --- src/dpp/discordvoiceclient.cpp | 247 ++++++++++++++++----------------- 1 file changed, 123 insertions(+), 124 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 8b0dba0bca..63d56be01b 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -716,145 +716,144 @@ void discord_voice_client::read_ready() uint8_t buffer[65535]; int packet_size = this->udp_recv((char*)buffer, sizeof(buffer)); - if (packet_size > 0 && (!creator->on_voice_receive.empty() || !creator->on_voice_receive_combined.empty())) { - constexpr size_t header_size = 12; - if (static_cast(packet_size) < header_size) { - /* Invalid RTP payload */ - return; - } + bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty(); + if (packet_size <= 0 || receive_handler_is_empty) { + /* Nothing to do */ + return; + } - /* It's a "silence packet" - throw it away. */ - if (packet_size < 44) { - return; - } + constexpr size_t header_size = 12; + if (static_cast(packet_size) < header_size) { + /* Invalid RTP payload */ + return; + } - if (uint8_t payload_type = buffer[1] & 0b0111'1111; - 72 <= payload_type && payload_type <= 76) { - /* - * This is an RTCP payload. Discord is known to send - * RTCP Receiver Reports. - * - * See https://datatracker.ietf.org/doc/html/rfc3551#section-6 - */ - return; - } + /* It's a "silence packet" - throw it away. */ + if (packet_size < 44) { + return; + } - voice_payload vp{0, // seq, populate later - 0, // timestamp, populate later - std::make_unique(nullptr, std::string((char*)buffer, packet_size))}; + if (uint8_t payload_type = buffer[1] & 0b0111'1111; + 72 <= payload_type && payload_type <= 76) { + /* + * This is an RTCP payload. Discord is known to send + * RTCP Receiver Reports. + * + * See https://datatracker.ietf.org/doc/html/rfc3551#section-6 + */ + return; + } - vp.vr->voice_client = this; + voice_payload vp{0, // seq, populate later + 0, // timestamp, populate later + std::make_unique(nullptr, std::string((char*)buffer, packet_size))}; - { /* Get the User ID of the speaker */ - uint32_t speaker_ssrc; - std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t)); - speaker_ssrc = ntohl(speaker_ssrc); - vp.vr->user_id = ssrc_map[speaker_ssrc]; - } + vp.vr->voice_client = this; - /* Get the sequence number of the voice UDP packet */ - std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t)); - vp.seq = ntohs(vp.seq); - /* Get the timestamp of the voice UDP packet */ - std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); - vp.timestamp = ntohl(vp.timestamp); - - // nonce is 4 byte at the end of payload now - // change accordingly - // /* Nonce is the RTP Header with zero padding */ - // uint8_t nonce[24] = { 0 }; - // std::memcpy(nonce, buffer, header_size); - - // /* Get the number of CSRC in header */ - // const size_t csrc_count = buffer[0] & 0b0000'1111; - // /* Skip to the encrypted voice data */ - // const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; - // uint8_t* ciphertext = buffer + offset_to_data; - // const size_t ciphertext_len = packet_size - offset_to_data; - - unsigned long long decrypted_len = 0; - - // if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len, - // NULL, - // ciphertext, ciphertext_len, - // NULL, - // NULL, - // nonce, secret_key) != 0) { - // /* Invalid Discord RTP payload. */ - // return; - // } - - // if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0) - - // if (crypto_secretbox_open_easy(encrypted_data, encrypted_data, - // encrypted_data_len, nonce, secret_key)) { - // /* Invalid Discord RTP payload. */ - // return; - // } - - // const uint8_t* decrypted_data = encrypted_data; - // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; - // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - // /* Skip the RTP Extensions */ - // size_t ext_len = 0; - // { - // uint16_t ext_len_in_words; - // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); - // ext_len_in_words = ntohs(ext_len_in_words); - // ext_len = sizeof(uint32_t) * ext_len_in_words; - // } - // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; - // decrypted_data += ext_header_len + ext_len; - // decrypted_data_len -= ext_header_len + ext_len; - // } + { /* Get the User ID of the speaker */ + uint32_t speaker_ssrc; + std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t)); + speaker_ssrc = ntohl(speaker_ssrc); + vp.vr->user_id = ssrc_map[speaker_ssrc]; + } - /* - * We're left with the decrypted, opus-encoded data. - * Park the payload and decode on the voice courier thread. - */ - vp.vr->audio_data.assign(buffer, buffer + decrypted_len); + /* Get the sequence number of the voice UDP packet */ + std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t)); + vp.seq = ntohs(vp.seq); + /* Get the timestamp of the voice UDP packet */ + std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); + vp.timestamp = ntohl(vp.timestamp); + + constexpr size_t nonce_size = sizeof(uint32_t); + /* Nonce is 4 byte at the end of payload with zero padding */ + uint8_t nonce[24] = { 0 }; + std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size); + + /* Get the number of CSRC in header */ + const size_t csrc_count = buffer[0] & 0b0000'1111; + /* Skip to the encrypted voice data */ + const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; + uint8_t* ciphertext = buffer + offset_to_data; + const size_t ciphertext_len = packet_size - offset_to_data - nonce_size; + + uint8_t header[header_size] = { 0 }; + memcpy(header, buffer, header_size); + + unsigned long long decrypted_len = 0; + if (crypto_aead_xchacha20poly1305_ietf_decrypt( + buffer, &decrypted_len, + NULL, + ciphertext, ciphertext_len, + header, + header_size, + nonce, secret_key) != 0) { + /* Invalid Discord RTP payload. */ + std::cout << "INVALID PACKET\n"; + return; + } - { - std::lock_guard lk(voice_courier_shared_state.mtx); - auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id]; + // const uint8_t* decrypted_data = buffer; + // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; + // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { + // /* Skip the RTP Extensions */ + // size_t ext_len = 0; + // { + // uint16_t ext_len_in_words; + // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); + // ext_len_in_words = ntohs(ext_len_in_words); + // ext_len = sizeof(uint32_t) * ext_len_in_words; + // } + // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; + // decrypted_data += ext_header_len + ext_len; + // decrypted_data_len -= ext_header_len + ext_len; + // } + + /* + * We're left with the decrypted, opus-encoded data. + * Park the payload and decode on the voice courier thread. + */ + vp.vr->audio_data.assign(buffer, buffer + decrypted_len); - if (!decoder) { - /* - * Most likely this is the first time we encounter this speaker. - * Do some initialization for not only the decoder but also the range. + { + std::lock_guard lk(voice_courier_shared_state.mtx); + auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id]; + + if (!decoder) { + /* + * Most likely this is the first time we encounter this speaker. + * Do some initialization for not only the decoder but also the range. + */ + range.min_seq = vp.seq; + range.min_timestamp = vp.timestamp; + + int opus_error = 0; + decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error), + &opus_decoder_destroy); + if (opus_error) { + /** + * NOTE: The -10 here makes the opus_error match up with values of exception_error_code, + * which would otherwise conflict as every C library loves to use values from -1 downwards. */ - range.min_seq = vp.seq; - range.min_timestamp = vp.timestamp; - - int opus_error = 0; - decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error), - &opus_decoder_destroy); - if (opus_error) { - /** - * NOTE: The -10 here makes the opus_error match up with values of exception_error_code, - * which would otherwise conflict as every C library loves to use values from -1 downwards. - */ - throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed"); - } + throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed"); } + } - if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) { - /* This packet arrived too late. We can only discard it. */ - return; - } - range.max_seq = vp.seq; - range.max_timestamp = vp.timestamp; - payload_queue.push(std::move(vp)); + if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) { + /* This packet arrived too late. We can only discard it. */ + return; } + range.max_seq = vp.seq; + range.max_timestamp = vp.timestamp; + payload_queue.push(std::move(vp)); + } - voice_courier_shared_state.signal_iteration.notify_one(); + voice_courier_shared_state.signal_iteration.notify_one(); - if (!voice_courier.joinable()) { - /* Courier thread is not running, start it */ - voice_courier = std::thread(&voice_courier_loop, - std::ref(*this), - std::ref(voice_courier_shared_state)); - } + if (!voice_courier.joinable()) { + /* Courier thread is not running, start it */ + voice_courier = std::thread(&voice_courier_loop, + std::ref(*this), + std::ref(voice_courier_shared_state)); } #else throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++"); From a228019b52d49961102909f5d2d28b823b51022e Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 23:07:57 +0700 Subject: [PATCH 6/9] feat: working on_voice_receive, need cleanups --- src/dpp/discordvoiceclient.cpp | 70 +++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 63d56be01b..bc9367a6a0 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -710,12 +710,29 @@ void discord_voice_client::send(const char* packet, size_t len, uint64_t duratio outbuf.emplace_back(frame); } +// static FILE *f = NULL; +// +// void init() { +// static bool i = false; +// if (i) return; +// i = true; +// +// f = fopen("report.bin", "wb"); +// } + void discord_voice_client::read_ready() { #ifdef HAVE_VOICE uint8_t buffer[65535]; int packet_size = this->udp_recv((char*)buffer, sizeof(buffer)); + std::cout << "RECEIVED SIZE("<on_voice_receive.empty() && creator->on_voice_receive_combined.empty(); if (packet_size <= 0 || receive_handler_is_empty) { /* Nothing to do */ @@ -750,8 +767,8 @@ void discord_voice_client::read_ready() vp.vr->voice_client = this; + uint32_t speaker_ssrc; { /* Get the User ID of the speaker */ - uint32_t speaker_ssrc; std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t)); speaker_ssrc = ntohl(speaker_ssrc); vp.vr->user_id = ssrc_map[speaker_ssrc]; @@ -773,46 +790,55 @@ void discord_voice_client::read_ready() const size_t csrc_count = buffer[0] & 0b0000'1111; /* Skip to the encrypted voice data */ const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; + size_t total_header_len = offset_to_data; + uint8_t* ciphertext = buffer + offset_to_data; - const size_t ciphertext_len = packet_size - offset_to_data - nonce_size; + size_t ciphertext_len = packet_size - offset_to_data - nonce_size; - uint8_t header[header_size] = { 0 }; - memcpy(header, buffer, header_size); + size_t ext_len = 0; + if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { + /* Get the RTP Extensions size */ + { + uint16_t ext_len_in_words; + memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t)); + ext_len_in_words = ntohs(ext_len_in_words); + ext_len = sizeof(uint32_t) * ext_len_in_words; + } + constexpr size_t ext_header_len = sizeof(uint16_t) * 2; + ciphertext += ext_header_len; + ciphertext_len -= ext_header_len; + total_header_len += ext_header_len; + } - unsigned long long decrypted_len = 0; + uint8_t decrypted[65535] = { 0 }; + unsigned long long opus_packet_len = 0; if (crypto_aead_xchacha20poly1305_ietf_decrypt( - buffer, &decrypted_len, + decrypted, &opus_packet_len, NULL, ciphertext, ciphertext_len, - header, - header_size, + buffer, + total_header_len, nonce, secret_key) != 0) { /* Invalid Discord RTP payload. */ std::cout << "INVALID PACKET\n"; return; } + uint8_t *opus_packet = decrypted; + if (ext_len > 0) { + /* Skip RTP Header Extension */ + opus_packet += ext_len; + opus_packet_len -= ext_len; + } + // const uint8_t* decrypted_data = buffer; // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; - // if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - // /* Skip the RTP Extensions */ - // size_t ext_len = 0; - // { - // uint16_t ext_len_in_words; - // memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t)); - // ext_len_in_words = ntohs(ext_len_in_words); - // ext_len = sizeof(uint32_t) * ext_len_in_words; - // } - // constexpr size_t ext_header_len = sizeof(uint16_t) * 2; - // decrypted_data += ext_header_len + ext_len; - // decrypted_data_len -= ext_header_len + ext_len; - // } /* * We're left with the decrypted, opus-encoded data. * Park the payload and decode on the voice courier thread. */ - vp.vr->audio_data.assign(buffer, buffer + decrypted_len); + vp.vr->audio_data.assign(opus_packet, opus_packet + opus_packet_len); { std::lock_guard lk(voice_courier_shared_state.mtx); From 72938834ad119c49d8fef1aaaf197dd71046b041 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 23:09:29 +0700 Subject: [PATCH 7/9] feat: cleanups --- src/dpp/discordvoiceclient.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index bc9367a6a0..a62cb75b8b 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -710,29 +710,12 @@ void discord_voice_client::send(const char* packet, size_t len, uint64_t duratio outbuf.emplace_back(frame); } -// static FILE *f = NULL; -// -// void init() { -// static bool i = false; -// if (i) return; -// i = true; -// -// f = fopen("report.bin", "wb"); -// } - void discord_voice_client::read_ready() { #ifdef HAVE_VOICE uint8_t buffer[65535]; int packet_size = this->udp_recv((char*)buffer, sizeof(buffer)); - std::cout << "RECEIVED SIZE("<on_voice_receive.empty() && creator->on_voice_receive_combined.empty(); if (packet_size <= 0 || receive_handler_is_empty) { /* Nothing to do */ @@ -820,7 +803,6 @@ void discord_voice_client::read_ready() total_header_len, nonce, secret_key) != 0) { /* Invalid Discord RTP payload. */ - std::cout << "INVALID PACKET\n"; return; } @@ -831,9 +813,6 @@ void discord_voice_client::read_ready() opus_packet_len -= ext_len; } - // const uint8_t* decrypted_data = buffer; - // size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES; - /* * We're left with the decrypted, opus-encoded data. * Park the payload and decode on the voice courier thread. From 5c45f94b051dc54951f6ccf766451a3690fa8767 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 23:36:31 +0700 Subject: [PATCH 8/9] feat: add some comment --- src/dpp/discordvoiceclient.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index a62cb75b8b..770391d34e 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -780,7 +780,10 @@ void discord_voice_client::read_ready() size_t ext_len = 0; if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - /* Get the RTP Extensions size */ + /** + * Get the RTP Extensions size, we only get the size here because + * the extension itself is encrypted along with the opus packet + */ { uint16_t ext_len_in_words; memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t)); @@ -797,9 +800,14 @@ void discord_voice_client::read_ready() unsigned long long opus_packet_len = 0; if (crypto_aead_xchacha20poly1305_ietf_decrypt( decrypted, &opus_packet_len, - NULL, + nullptr, ciphertext, ciphertext_len, buffer, + /** + * Additional Data: + * The whole header (including csrc list) + + * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length) + */ total_header_len, nonce, secret_key) != 0) { /* Invalid Discord RTP payload. */ @@ -808,7 +816,7 @@ void discord_voice_client::read_ready() uint8_t *opus_packet = decrypted; if (ext_len > 0) { - /* Skip RTP Header Extension */ + /* Skip previously encrypted RTP Header Extension */ opus_packet += ext_len; opus_packet_len -= ext_len; } @@ -1326,6 +1334,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet nullptr, encodedAudioData.data(), encodedAudioLength, + /* The RTP Header as Additional Data */ reinterpret_cast(&header), sizeof(header), nullptr, From a0f5bc35f53ff8b81976a9cf028c92629e97cfb2 Mon Sep 17 00:00:00 2001 From: Neko-Life Date: Sat, 21 Sep 2024 23:50:01 +0700 Subject: [PATCH 9/9] feat: replace to snake case --- src/dpp/discordvoiceclient.cpp | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 770391d34e..e46d4258a0 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -1275,13 +1275,13 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data, return send_audio_raw((uint16_t*)packet.data(), packet.size()); } - opus_int32 encodedAudioMaxLength = (opus_int32)length; - std::vector encodedAudioData(encodedAudioMaxLength); - size_t encodedAudioLength = encodedAudioMaxLength; + opus_int32 encoded_audio_max_length = (opus_int32)length; + std::vector encoded_audio(encoded_audio_max_length); + size_t encoded_audio_length = encoded_audio_max_length; - encodedAudioLength = this->encode((uint8_t*)audio_data, length, encodedAudioData.data(), encodedAudioLength); + encoded_audio_length = this->encode((uint8_t*)audio_data, length, encoded_audio.data(), encoded_audio_length); - send_audio_opus(encodedAudioData.data(), encodedAudioLength); + send_audio_opus(encoded_audio.data(), encoded_audio_length); #else throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++"); #endif @@ -1301,25 +1301,25 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) { #if HAVE_VOICE - int frameSize = (int)(48 * duration * (timescale / 1000000)); - opus_int32 encodedAudioMaxLength = (opus_int32)length; - std::vector encodedAudioData(encodedAudioMaxLength); - size_t encodedAudioLength = encodedAudioMaxLength; + int frame_size = (int)(48 * duration * (timescale / 1000000)); + opus_int32 encoded_audio_max_length = (opus_int32)length; + std::vector encoded_audio(encoded_audio_max_length); + size_t encoded_audio_length = encoded_audio_max_length; - encodedAudioLength = length; - encodedAudioData.reserve(length); - memcpy(encodedAudioData.data(), opus_packet, length); + encoded_audio_length = length; + encoded_audio.reserve(length); + memcpy(encoded_audio.data(), opus_packet, length); ++sequence; rtp_header header(sequence, timestamp, (uint32_t)ssrc); /* Expected payload size is unencrypted header + encrypted opus packet + unencrypted 32 bit nonce */ - size_t packet_siz = sizeof(header) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce); + size_t packet_siz = sizeof(header) + (encoded_audio_length + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce); - std::vector audioDataPacket(packet_siz); + std::vector payload(packet_siz); /* Set RTP header */ - std::memcpy(audioDataPacket.data(), &header, sizeof(header)); + std::memcpy(payload.data(), &header, sizeof(header)); /* Convert nonce to big-endian */ uint32_t noncel = htonl(packet_nonce); @@ -1330,10 +1330,10 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet /* Execute */ crypto_aead_xchacha20poly1305_ietf_encrypt( - audioDataPacket.data() + sizeof(header), + payload.data() + sizeof(header), nullptr, - encodedAudioData.data(), - encodedAudioLength, + encoded_audio.data(), + encoded_audio_length, /* The RTP Header as Additional Data */ reinterpret_cast(&header), sizeof(header), @@ -1342,10 +1342,10 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet secret_key); /* Append the 4 byte nonce to the resulting payload */ - std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel)); + std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel)); - this->send(reinterpret_cast(audioDataPacket.data()), audioDataPacket.size(), duration); - timestamp += frameSize; + this->send(reinterpret_cast(payload.data()), payload.size(), duration); + timestamp += frame_size; /* Increment for next packet */ packet_nonce++;