From 7fca986c977ebaf3d01de1d4ab6ac219df7ad983 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Thu, 19 Sep 2024 14:34:38 +0700
Subject: [PATCH 1/9] feat: initial decrypt impl, TODO: test and cleanups

---
 src/dpp/discordvoiceclient.cpp | 66 ++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index c28ce709b6..8ded0fcc58 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -756,44 +756,58 @@ void discord_voice_client::read_ready()
 
 		/* Nonce is the RTP Header with zero padding */
 		uint8_t nonce[24] = { 0 };
-		std::memcpy(nonce, &buffer[0], header_size);
+		std::memcpy(nonce, buffer, header_size);
 
 		/* Get the number of CSRC in header */
 		const size_t csrc_count = buffer[0] & 0b0000'1111;
 		/* Skip to the encrypted voice data */
 		const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
-		uint8_t* encrypted_data = buffer + offset_to_data;
-		const size_t encrypted_data_len = packet_size - offset_to_data;
-
-		if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0)
-
-		if (crypto_secretbox_open_easy(encrypted_data, encrypted_data,
-		                               encrypted_data_len, nonce, secret_key)) {
-			/* Invalid Discord RTP payload. */
-			return;
+		uint8_t* ciphertext = buffer + offset_to_data;
+		const size_t ciphertext_len = packet_size - offset_to_data;
+
+		std::vector<uint8_t> decrypted;
+		decrypted.reserve(ciphertext_len);
+		unsigned long long decrypted_len = 0;
+
+		if (crypto_aead_chacha20poly1305_ietf_decrypt(decrypted.data(), &decrypted_len,
+								NULL,
+								ciphertext, ciphertext_len,
+								NULL,
+								NULL,
+								nonce, secret_key) != 0) {
+				/* Invalid Discord RTP payload. */
+				return;
 		}
 
-                const uint8_t* decrypted_data = encrypted_data;
-                size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
-		if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-			/* Skip the RTP Extensions */
-			size_t ext_len = 0;
-			{
-				uint16_t ext_len_in_words;
-				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
-				ext_len_in_words = ntohs(ext_len_in_words);
-				ext_len = sizeof(uint32_t) * ext_len_in_words;
-			}
-			constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
-                        decrypted_data += ext_header_len + ext_len;
-                        decrypted_data_len -= ext_header_len + ext_len;
-		}
+		// if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0)
+
+		// 		if (crypto_secretbox_open_easy(encrypted_data, encrypted_data,
+		// 								encrypted_data_len, nonce, secret_key)) {
+		// 				/* Invalid Discord RTP payload. */
+		// 				return;
+		// 		}
+
+		// const uint8_t* decrypted_data = encrypted_data;
+		// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
+		// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
+		// 		/* Skip the RTP Extensions */
+		// 		size_t ext_len = 0;
+		// 		{
+		// 				uint16_t ext_len_in_words;
+		// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
+		// 				ext_len_in_words = ntohs(ext_len_in_words);
+		// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
+		// 		}
+		// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
+		// 		decrypted_data += ext_header_len + ext_len;
+		// 		decrypted_data_len -= ext_header_len + ext_len;
+		// }
 
 		/*
 		 * We're left with the decrypted, opus-encoded data.
 		 * Park the payload and decode on the voice courier thread.
 		 */
-		vp.vr->audio_data.assign(decrypted_data, decrypted_data + decrypted_data_len);
+		vp.vr->audio_data.assign(decrypted.begin(), decrypted.end());
 
 		{
 			std::lock_guard lk(voice_courier_shared_state.mtx);

From 684d4e9abd0ce43225274d15e33bfc16f71ff168 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Fri, 20 Sep 2024 03:29:43 +0700
Subject: [PATCH 2/9] feat: progress??

---
 include/dpp/discordvoiceclient.h |  11 ++++
 src/dpp/discordvoiceclient.cpp   | 103 +++++++++++++++++++++++--------
 2 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h
index 07e4364483..8ed58107d8 100644
--- a/include/dpp/discordvoiceclient.h
+++ b/include/dpp/discordvoiceclient.h
@@ -346,6 +346,17 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	uint32_t timestamp;
 
+	/**
+	 * @brief Each packet should have a nonce, a 32-bit incremental
+	 * integer value appended to payload.
+	 *
+	 * We should keep track of this value and increment it for each
+	 * packet sent.
+	 *
+	 * Current initial value is hardcoded to 1.
+	 */
+	uint32_t packet_nonce;
+
 	/**
 	 * @brief Last sent packet high-resolution timestamp
 	 */
diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 8ded0fcc58..e6004ff584 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -20,6 +20,7 @@
  *
  ************************************************************************************/
 
+#include <cstdint>
 #include <dpp/export.h>
 #ifdef _WIN32
 	#include <WinSock2.h>
@@ -316,6 +317,7 @@ discord_voice_client::discord_voice_client(dpp::cluster* _cluster, snowflake _ch
 	secret_key(nullptr),
 	sequence(0),
 	timestamp(0),
+	packet_nonce(1),
 	last_timestamp(std::chrono::high_resolution_clock::now()),
 	sending(false),
 	tracks(0),
@@ -593,6 +595,9 @@ bool discord_voice_client::handle_frame(const std::string &data)
 					rdy.voice_channel_id = this->channel_id;
 					creator->on_voice_ready.call(rdy);
 				}
+
+				/* Reset packet_nonce */
+				packet_nonce = 1;
 			}
 			break;
 			/* Voice ready */
@@ -754,30 +759,30 @@ void discord_voice_client::read_ready()
 		std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
 		vp.timestamp = ntohl(vp.timestamp);
 
-		/* Nonce is the RTP Header with zero padding */
-		uint8_t nonce[24] = { 0 };
-		std::memcpy(nonce, buffer, header_size);
+		// nonce is 4 byte at the end of payload now
+		// change accordingly
+		// /* Nonce is the RTP Header with zero padding */
+		// uint8_t nonce[24] = { 0 };
+		// std::memcpy(nonce, buffer, header_size);
 
-		/* Get the number of CSRC in header */
-		const size_t csrc_count = buffer[0] & 0b0000'1111;
-		/* Skip to the encrypted voice data */
-		const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
-		uint8_t* ciphertext = buffer + offset_to_data;
-		const size_t ciphertext_len = packet_size - offset_to_data;
+		// /* Get the number of CSRC in header */
+		// const size_t csrc_count = buffer[0] & 0b0000'1111;
+		// /* Skip to the encrypted voice data */
+		// const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
+		// uint8_t* ciphertext = buffer + offset_to_data;
+		// const size_t ciphertext_len = packet_size - offset_to_data;
 
-		std::vector<uint8_t> decrypted;
-		decrypted.reserve(ciphertext_len);
 		unsigned long long decrypted_len = 0;
 
-		if (crypto_aead_chacha20poly1305_ietf_decrypt(decrypted.data(), &decrypted_len,
-								NULL,
-								ciphertext, ciphertext_len,
-								NULL,
-								NULL,
-								nonce, secret_key) != 0) {
-				/* Invalid Discord RTP payload. */
-				return;
-		}
+		// if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len,
+		// 						NULL,
+		// 						ciphertext, ciphertext_len,
+		// 						NULL,
+		// 						NULL,
+		// 						nonce, secret_key) != 0) {
+		// 		/* Invalid Discord RTP payload. */
+		// 		return;
+		// }
 
 		// if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0)
 
@@ -807,7 +812,7 @@ void discord_voice_client::read_ready()
 		 * We're left with the decrypted, opus-encoded data.
 		 * Park the payload and decode on the voice courier thread.
 		 */
-		vp.vr->audio_data.assign(decrypted.begin(), decrypted.end());
+		vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
 
 		{
 			std::lock_guard lk(voice_courier_shared_state.mtx);
@@ -1296,20 +1301,68 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 	++sequence;
 	rtp_header header(sequence, timestamp, (uint32_t)ssrc);
 
-	std::vector<uint8_t> audioDataPacket(sizeof(header) + encodedAudioLength + crypto_secretbox_MACBYTES);
+	/* Unencrypted header + encrypted opus packet + encrypted header as additional data + unencrypted 32 bit nonce */
+	size_t packet_siz = (sizeof(header) * 2) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce);
+	std::vector<uint8_t> audioDataPacket(packet_siz);
 	std::memcpy(audioDataPacket.data(), &header, sizeof(header));
 
-	unsigned char nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES];
-	randombytes_buf(nonce, sizeof nonce);
+	/* Convert to big-endian */
+	uint32_t noncel = htonl(packet_nonce);
+
+	/* 4 byte encrypt nonce padded with 20 byte NULL */
+	unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { NULL };
+	memcpy(encrypt_nonce, &packet_nonce, sizeof(packet_nonce));
 
 	unsigned long long clen_p;
-	crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, NULL, NULL, NULL, (const unsigned char*)nonce, secret_key);
+	crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, (const unsigned char *)&header, sizeof(header), NULL, (const unsigned char*)encrypt_nonce, secret_key);
+
+	std::cout << "data[\n";
+
+	/*::write(STDIN_FILENO, audioDataPacket.data(), audioDataPacket.size());*/
+
+	std::cout << "\n]\n";
+	std::cout << "size("<< audioDataPacket.size() << ")\n";
+	std::cout << "clen_p("<< clen_p << ")\n";
+
+	// uint8_t buffer[65535] = {NULL};
+	// unsigned long long decrypted_len = 0;
+	// if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len,
+	// 						NULL,
+	// 						audioDataPacket.data() + sizeof(header), audioDataPacket.size() - sizeof(header),
+	// 						NULL,
+	// 						NULL,
+	// 						(const unsigned char*)encrypt_nonce, secret_key) != 0) {
+	// 		std::cout << "VERIFICATION FAILED\n";
+	// }
+	// else {
+	// 		auto pb = [](unsigned char *bin, size_t siz){
+	// 				for (size_t i = 0; i < siz; i++) {
+	// 						printf("%d ", bin[i]);
+	// 				}
+	// 		};
+
+	// 		std::cout << "buffer[\n";
+	// 		pb(encodedAudioData.data(), encodedAudioLength);
+	// 		std::cout<<"\n]\n";
+	// 		std::cout << "buffer_len("<< encodedAudioLength <<")\n";
+
+	// 		std::cout << "decrypted_buffer[\n";
+	// 		pb(buffer, decrypted_len);
+	// 		std::cout		<<"\n]\n";
+	// 		std::cout << "decrypted_len("<< decrypted_len <<")\n";
+	// }
 
 	//crypto_secretbox_easy(audioDataPacket.data() + sizeof(header), encodedAudioData.data(), encodedAudioLength, (const unsigned char*)nonce, secret_key);
 
+	/* Append the 4 byte nonce to the whole payload */
+	std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel));
+
 	this->send((const char*)audioDataPacket.data(), audioDataPacket.size(), duration);
 	timestamp += frameSize;
 
+	/* Increment for next packet */
+	packet_nonce++;
+
 	speak();
 #else
 	throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++");

From fe5b0e40dbdd21806663108d9281da20d4b1f03d Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Fri, 20 Sep 2024 04:25:27 +0700
Subject: [PATCH 3/9] feat: working encryption

---
 src/dpp/discordvoiceclient.cpp | 74 +++++++++++-----------------------
 1 file changed, 24 insertions(+), 50 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index e6004ff584..f2236aa085 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -1301,63 +1301,37 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 	++sequence;
 	rtp_header header(sequence, timestamp, (uint32_t)ssrc);
 
-	/* Unencrypted header + encrypted opus packet + encrypted header as additional data + unencrypted 32 bit nonce */
-	size_t packet_siz = (sizeof(header) * 2) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce);
+	/* Expected payload size is unencrypted header + encrypted opus packet + unencrypted 32 bit nonce */
+	size_t packet_siz = sizeof(header) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce);
+
 	std::vector<uint8_t> audioDataPacket(packet_siz);
+
+	/* Set RTP header */
 	std::memcpy(audioDataPacket.data(), &header, sizeof(header));
 
-	/* Convert to big-endian */
+	/* Convert nonce to big-endian */
 	uint32_t noncel = htonl(packet_nonce);
 
-	/* 4 byte encrypt nonce padded with 20 byte NULL */
-	unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { NULL };
-	memcpy(encrypt_nonce, &packet_nonce, sizeof(packet_nonce));
-
-	unsigned long long clen_p;
-	crypto_aead_xchacha20poly1305_ietf_encrypt(audioDataPacket.data() + sizeof(header), &clen_p, encodedAudioData.data(), encodedAudioLength, (const unsigned char *)&header, sizeof(header), NULL, (const unsigned char*)encrypt_nonce, secret_key);
-
-	std::cout << "data[\n";
-
-	/*::write(STDIN_FILENO, audioDataPacket.data(), audioDataPacket.size());*/
-
-	std::cout << "\n]\n";
-	std::cout << "size("<< audioDataPacket.size() << ")\n";
-	std::cout << "clen_p("<< clen_p << ")\n";
-
-	// uint8_t buffer[65535] = {NULL};
-	// unsigned long long decrypted_len = 0;
-	// if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len,
-	// 						NULL,
-	// 						audioDataPacket.data() + sizeof(header), audioDataPacket.size() - sizeof(header),
-	// 						NULL,
-	// 						NULL,
-	// 						(const unsigned char*)encrypt_nonce, secret_key) != 0) {
-	// 		std::cout << "VERIFICATION FAILED\n";
-	// }
-	// else {
-	// 		auto pb = [](unsigned char *bin, size_t siz){
-	// 				for (size_t i = 0; i < siz; i++) {
-	// 						printf("%d ", bin[i]);
-	// 				}
-	// 		};
-
-	// 		std::cout << "buffer[\n";
-	// 		pb(encodedAudioData.data(), encodedAudioLength);
-	// 		std::cout<<"\n]\n";
-	// 		std::cout << "buffer_len("<< encodedAudioLength <<")\n";
-
-	// 		std::cout << "decrypted_buffer[\n";
-	// 		pb(buffer, decrypted_len);
-	// 		std::cout		<<"\n]\n";
-	// 		std::cout << "decrypted_len("<< decrypted_len <<")\n";
-	// }
-
-	//crypto_secretbox_easy(audioDataPacket.data() + sizeof(header), encodedAudioData.data(), encodedAudioLength, (const unsigned char*)nonce, secret_key);
-
-	/* Append the 4 byte nonce to the whole payload */
+	/* 24 byte is needed for encrypting, discord just want 4 byte so just fill up the rest with null */
+	unsigned char encrypt_nonce[crypto_aead_xchacha20poly1305_ietf_NPUBBYTES] = { '\0' };
+	memcpy(encrypt_nonce, &noncel, sizeof(noncel));
+
+	/* Execute */
+	crypto_aead_xchacha20poly1305_ietf_encrypt(
+			audioDataPacket.data() + sizeof(header),
+			nullptr,
+			encodedAudioData.data(),
+			encodedAudioLength,
+			reinterpret_cast<const unsigned char *>(&header),
+			sizeof(header),
+			nullptr,
+			(const unsigned char*)encrypt_nonce,
+			secret_key);
+
+	/* Append the 4 byte nonce to the resulting payload */
 	std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel));
 
-	this->send((const char*)audioDataPacket.data(), audioDataPacket.size(), duration);
+	this->send(reinterpret_cast<const char*>(audioDataPacket.data()), audioDataPacket.size(), duration);
 	timestamp += frameSize;
 
 	/* Increment for next packet */

From b002e89378722a17e11915da0f55f91e42768a58 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Fri, 20 Sep 2024 06:11:30 +0700
Subject: [PATCH 4/9] fix: static cast this

---
 src/dpp/discordvoiceclient.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index f2236aa085..8b0dba0bca 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -1325,7 +1325,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 			reinterpret_cast<const unsigned char *>(&header),
 			sizeof(header),
 			nullptr,
-			(const unsigned char*)encrypt_nonce,
+			static_cast<const unsigned char*>(encrypt_nonce),
 			secret_key);
 
 	/* Append the 4 byte nonce to the resulting payload */

From d742b71db6242a3a2c93620fd284ff7a900a3fcf Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 13:22:01 +0700
Subject: [PATCH 5/9] feat: initial receive

---
 src/dpp/discordvoiceclient.cpp | 247 ++++++++++++++++-----------------
 1 file changed, 123 insertions(+), 124 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 8b0dba0bca..63d56be01b 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -716,145 +716,144 @@ void discord_voice_client::read_ready()
 	uint8_t buffer[65535];
 	int packet_size = this->udp_recv((char*)buffer, sizeof(buffer));
 
-	if (packet_size > 0 && (!creator->on_voice_receive.empty() || !creator->on_voice_receive_combined.empty())) {
-		constexpr size_t header_size = 12;
-		if (static_cast<size_t>(packet_size) < header_size) {
-			/* Invalid RTP payload */
-			return;
-		}
+	bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty();
+	if (packet_size <= 0 || receive_handler_is_empty) {
+		/* Nothing to do */
+		return;
+	}
 
-		/* It's a "silence packet" - throw it away. */
-		if (packet_size < 44) {
-			return;
-		}
+	constexpr size_t header_size = 12;
+	if (static_cast<size_t>(packet_size) < header_size) {
+		/* Invalid RTP payload */
+		return;
+	}
 
-		if (uint8_t payload_type = buffer[1] & 0b0111'1111;
-		    72 <= payload_type && payload_type <= 76) {
-			/*
-			 * This is an RTCP payload. Discord is known to send
-			 * RTCP Receiver Reports.
-			 *
-			 * See https://datatracker.ietf.org/doc/html/rfc3551#section-6
-			 */
-			return;
-		}
+	/* It's a "silence packet" - throw it away. */
+	if (packet_size < 44) {
+		return;
+	}
 
-		voice_payload vp{0, // seq, populate later
-		                 0, // timestamp, populate later
-		                 std::make_unique<voice_receive_t>(nullptr, std::string((char*)buffer, packet_size))};
+	if (uint8_t payload_type = buffer[1] & 0b0111'1111;
+		72 <= payload_type && payload_type <= 76) {
+		/*
+		 * This is an RTCP payload. Discord is known to send
+		 * RTCP Receiver Reports.
+		 *
+		 * See https://datatracker.ietf.org/doc/html/rfc3551#section-6
+		 */
+		return;
+	}
 
-		vp.vr->voice_client = this;
+	voice_payload vp{0, // seq, populate later
+		0, // timestamp, populate later
+		std::make_unique<voice_receive_t>(nullptr, std::string((char*)buffer, packet_size))};
 
-		{	/* Get the User ID of the speaker */
-			uint32_t speaker_ssrc;
-			std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t));
-			speaker_ssrc = ntohl(speaker_ssrc);
-			vp.vr->user_id = ssrc_map[speaker_ssrc];
-		}
+	vp.vr->voice_client = this;
 
-		/* Get the sequence number of the voice UDP packet */
-		std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t));
-		vp.seq = ntohs(vp.seq);
-		/* Get the timestamp of the voice UDP packet */
-		std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
-		vp.timestamp = ntohl(vp.timestamp);
-
-		// nonce is 4 byte at the end of payload now
-		// change accordingly
-		// /* Nonce is the RTP Header with zero padding */
-		// uint8_t nonce[24] = { 0 };
-		// std::memcpy(nonce, buffer, header_size);
-
-		// /* Get the number of CSRC in header */
-		// const size_t csrc_count = buffer[0] & 0b0000'1111;
-		// /* Skip to the encrypted voice data */
-		// const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
-		// uint8_t* ciphertext = buffer + offset_to_data;
-		// const size_t ciphertext_len = packet_size - offset_to_data;
-
-		unsigned long long decrypted_len = 0;
-
-		// if (crypto_aead_xchacha20poly1305_ietf_decrypt(buffer, &decrypted_len,
-		// 						NULL,
-		// 						ciphertext, ciphertext_len,
-		// 						NULL,
-		// 						NULL,
-		// 						nonce, secret_key) != 0) {
-		// 		/* Invalid Discord RTP payload. */
-		// 		return;
-		// }
-
-		// if(crypto_aead_xchacha20poly1305_ietf_decrypt() != 0)
-
-		// 		if (crypto_secretbox_open_easy(encrypted_data, encrypted_data,
-		// 								encrypted_data_len, nonce, secret_key)) {
-		// 				/* Invalid Discord RTP payload. */
-		// 				return;
-		// 		}
-
-		// const uint8_t* decrypted_data = encrypted_data;
-		// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
-		// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-		// 		/* Skip the RTP Extensions */
-		// 		size_t ext_len = 0;
-		// 		{
-		// 				uint16_t ext_len_in_words;
-		// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
-		// 				ext_len_in_words = ntohs(ext_len_in_words);
-		// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
-		// 		}
-		// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
-		// 		decrypted_data += ext_header_len + ext_len;
-		// 		decrypted_data_len -= ext_header_len + ext_len;
-		// }
+	{	/* Get the User ID of the speaker */
+		uint32_t speaker_ssrc;
+		std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t));
+		speaker_ssrc = ntohl(speaker_ssrc);
+		vp.vr->user_id = ssrc_map[speaker_ssrc];
+	}
 
-		/*
-		 * We're left with the decrypted, opus-encoded data.
-		 * Park the payload and decode on the voice courier thread.
-		 */
-		vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
+	/* Get the sequence number of the voice UDP packet */
+	std::memcpy(&vp.seq, &buffer[2], sizeof(rtp_seq_t));
+	vp.seq = ntohs(vp.seq);
+	/* Get the timestamp of the voice UDP packet */
+	std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
+	vp.timestamp = ntohl(vp.timestamp);
+
+	constexpr size_t nonce_size = sizeof(uint32_t);
+	/* Nonce is 4 byte at the end of payload with zero padding */
+	uint8_t nonce[24] = { 0 };
+	std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size);
+
+	/* Get the number of CSRC in header */
+	const size_t csrc_count = buffer[0] & 0b0000'1111;
+	/* Skip to the encrypted voice data */
+	const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
+	uint8_t* ciphertext = buffer + offset_to_data;
+	const size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
+
+	uint8_t header[header_size] = { 0 };
+	memcpy(header, buffer, header_size);
+
+	unsigned long long decrypted_len = 0;
+	if (crypto_aead_xchacha20poly1305_ietf_decrypt(
+		buffer, &decrypted_len,
+		NULL,
+		ciphertext, ciphertext_len,
+		header,
+		header_size,
+		nonce, secret_key) != 0) {
+		/* Invalid Discord RTP payload. */
+		std::cout << "INVALID PACKET\n";
+		return;
+	}
 
-		{
-			std::lock_guard lk(voice_courier_shared_state.mtx);
-			auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
+	// const uint8_t* decrypted_data = buffer;
+	// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
+	// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
+	// 		/* Skip the RTP Extensions */
+	// 		size_t ext_len = 0;
+	// 		{
+	// 				uint16_t ext_len_in_words;
+	// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
+	// 				ext_len_in_words = ntohs(ext_len_in_words);
+	// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
+	// 		}
+	// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
+	// 		decrypted_data += ext_header_len + ext_len;
+	// 		decrypted_data_len -= ext_header_len + ext_len;
+	// }
+
+	/*
+	 * We're left with the decrypted, opus-encoded data.
+	 * Park the payload and decode on the voice courier thread.
+	 */
+	vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
 
-			if (!decoder) {
-				/*
-				 * Most likely this is the first time we encounter this speaker.
-				 * Do some initialization for not only the decoder but also the range.
+	{
+		std::lock_guard lk(voice_courier_shared_state.mtx);
+		auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
+
+		if (!decoder) {
+			/*
+			 * Most likely this is the first time we encounter this speaker.
+			 * Do some initialization for not only the decoder but also the range.
+			 */
+			range.min_seq = vp.seq;
+			range.min_timestamp = vp.timestamp;
+
+			int opus_error = 0;
+			decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
+				 &opus_decoder_destroy);
+			if (opus_error) {
+				/**
+				 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
+				 * which would otherwise conflict as every C library loves to use values from -1 downwards.
 				 */
-				range.min_seq = vp.seq;
-				range.min_timestamp = vp.timestamp;
-
-				int opus_error = 0;
-				decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
-				              &opus_decoder_destroy);
-				if (opus_error) {
-					/**
-					 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
-					 * which would otherwise conflict as every C library loves to use values from -1 downwards.
-					 */
-					throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed");
-				}
+				throw dpp::voice_exception((exception_error_code)(opus_error - 10), "discord_voice_client::discord_voice_client; opus_decoder_create() failed");
 			}
+		}
 
-			if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
-				/* This packet arrived too late. We can only discard it. */
-				return;
-			}
-			range.max_seq = vp.seq;
-			range.max_timestamp = vp.timestamp;
-			payload_queue.push(std::move(vp));
+		if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
+			/* This packet arrived too late. We can only discard it. */
+			return;
 		}
+		range.max_seq = vp.seq;
+		range.max_timestamp = vp.timestamp;
+		payload_queue.push(std::move(vp));
+	}
 
-		voice_courier_shared_state.signal_iteration.notify_one();
+	voice_courier_shared_state.signal_iteration.notify_one();
 
-		if (!voice_courier.joinable()) {
-			/* Courier thread is not running, start it */
-			voice_courier = std::thread(&voice_courier_loop,
-			                            std::ref(*this),
-			                            std::ref(voice_courier_shared_state));
-		}
+	if (!voice_courier.joinable()) {
+		/* Courier thread is not running, start it */
+		voice_courier = std::thread(&voice_courier_loop,
+							  std::ref(*this),
+							  std::ref(voice_courier_shared_state));
 	}
 #else
 	throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++");

From a228019b52d49961102909f5d2d28b823b51022e Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 23:07:57 +0700
Subject: [PATCH 6/9] feat: working on_voice_receive, need cleanups

---
 src/dpp/discordvoiceclient.cpp | 70 +++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 22 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 63d56be01b..bc9367a6a0 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -710,12 +710,29 @@ void discord_voice_client::send(const char* packet, size_t len, uint64_t duratio
 	outbuf.emplace_back(frame);
 }
 
+// static FILE *f = NULL;
+// 
+// void init() {
+// 	static bool i = false;
+// 	if (i) return;
+// 	i = true;
+// 
+// 	f = fopen("report.bin", "wb");
+// }
+
 void discord_voice_client::read_ready()
 {
 #ifdef HAVE_VOICE
 	uint8_t buffer[65535];
 	int packet_size = this->udp_recv((char*)buffer, sizeof(buffer));
 
+	std::cout << "RECEIVED SIZE("<<packet_size<<")\n";
+	// init();
+	// fwrite("\n;",1,2,f);
+	// fwrite(secret_key,1,32,f);
+	// fwrite(";\n",1,2,f);
+	// fwrite(buffer, 1, packet_size, f);
+
 	bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty();
 	if (packet_size <= 0 || receive_handler_is_empty) {
 		/* Nothing to do */
@@ -750,8 +767,8 @@ void discord_voice_client::read_ready()
 
 	vp.vr->voice_client = this;
 
+	uint32_t speaker_ssrc;
 	{	/* Get the User ID of the speaker */
-		uint32_t speaker_ssrc;
 		std::memcpy(&speaker_ssrc, &buffer[8], sizeof(uint32_t));
 		speaker_ssrc = ntohl(speaker_ssrc);
 		vp.vr->user_id = ssrc_map[speaker_ssrc];
@@ -773,46 +790,55 @@ void discord_voice_client::read_ready()
 	const size_t csrc_count = buffer[0] & 0b0000'1111;
 	/* Skip to the encrypted voice data */
 	const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
+	size_t total_header_len = offset_to_data;
+
 	uint8_t* ciphertext = buffer + offset_to_data;
-	const size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
+	size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
 
-	uint8_t header[header_size] = { 0 };
-	memcpy(header, buffer, header_size);
+	size_t ext_len = 0;
+	if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
+		/* Get the RTP Extensions size */
+		{
+			uint16_t ext_len_in_words;
+			memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t));
+			ext_len_in_words = ntohs(ext_len_in_words);
+			ext_len = sizeof(uint32_t) * ext_len_in_words;
+		}
+		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
+		ciphertext += ext_header_len;
+		ciphertext_len -= ext_header_len;
+		total_header_len += ext_header_len;
+	}
 
-	unsigned long long decrypted_len = 0;
+	uint8_t decrypted[65535] = { 0 };
+	unsigned long long opus_packet_len  = 0;
 	if (crypto_aead_xchacha20poly1305_ietf_decrypt(
-		buffer, &decrypted_len,
+		decrypted, &opus_packet_len,
 		NULL,
 		ciphertext, ciphertext_len,
-		header,
-		header_size,
+		buffer,
+		total_header_len,
 		nonce, secret_key) != 0) {
 		/* Invalid Discord RTP payload. */
 		std::cout << "INVALID PACKET\n";
 		return;
 	}
 
+	uint8_t *opus_packet = decrypted;
+	if (ext_len > 0) {
+		/* Skip RTP Header Extension */
+		opus_packet += ext_len;
+		opus_packet_len -= ext_len;
+	}
+
 	// const uint8_t* decrypted_data = buffer;
 	// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
-	// if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-	// 		/* Skip the RTP Extensions */
-	// 		size_t ext_len = 0;
-	// 		{
-	// 				uint16_t ext_len_in_words;
-	// 				memcpy(&ext_len_in_words, &decrypted_data[2], sizeof(uint16_t));
-	// 				ext_len_in_words = ntohs(ext_len_in_words);
-	// 				ext_len = sizeof(uint32_t) * ext_len_in_words;
-	// 		}
-	// 		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
-	// 		decrypted_data += ext_header_len + ext_len;
-	// 		decrypted_data_len -= ext_header_len + ext_len;
-	// }
 
 	/*
 	 * We're left with the decrypted, opus-encoded data.
 	 * Park the payload and decode on the voice courier thread.
 	 */
-	vp.vr->audio_data.assign(buffer, buffer + decrypted_len);
+	vp.vr->audio_data.assign(opus_packet, opus_packet + opus_packet_len);
 
 	{
 		std::lock_guard lk(voice_courier_shared_state.mtx);

From 72938834ad119c49d8fef1aaaf197dd71046b041 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 23:09:29 +0700
Subject: [PATCH 7/9] feat: cleanups

---
 src/dpp/discordvoiceclient.cpp | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index bc9367a6a0..a62cb75b8b 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -710,29 +710,12 @@ void discord_voice_client::send(const char* packet, size_t len, uint64_t duratio
 	outbuf.emplace_back(frame);
 }
 
-// static FILE *f = NULL;
-// 
-// void init() {
-// 	static bool i = false;
-// 	if (i) return;
-// 	i = true;
-// 
-// 	f = fopen("report.bin", "wb");
-// }
-
 void discord_voice_client::read_ready()
 {
 #ifdef HAVE_VOICE
 	uint8_t buffer[65535];
 	int packet_size = this->udp_recv((char*)buffer, sizeof(buffer));
 
-	std::cout << "RECEIVED SIZE("<<packet_size<<")\n";
-	// init();
-	// fwrite("\n;",1,2,f);
-	// fwrite(secret_key,1,32,f);
-	// fwrite(";\n",1,2,f);
-	// fwrite(buffer, 1, packet_size, f);
-
 	bool receive_handler_is_empty = creator->on_voice_receive.empty() && creator->on_voice_receive_combined.empty();
 	if (packet_size <= 0 || receive_handler_is_empty) {
 		/* Nothing to do */
@@ -820,7 +803,6 @@ void discord_voice_client::read_ready()
 		total_header_len,
 		nonce, secret_key) != 0) {
 		/* Invalid Discord RTP payload. */
-		std::cout << "INVALID PACKET\n";
 		return;
 	}
 
@@ -831,9 +813,6 @@ void discord_voice_client::read_ready()
 		opus_packet_len -= ext_len;
 	}
 
-	// const uint8_t* decrypted_data = buffer;
-	// size_t decrypted_data_len = encrypted_data_len - crypto_box_MACBYTES;
-
 	/*
 	 * We're left with the decrypted, opus-encoded data.
 	 * Park the payload and decode on the voice courier thread.

From 5c45f94b051dc54951f6ccf766451a3690fa8767 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 23:36:31 +0700
Subject: [PATCH 8/9] feat: add some comment

---
 src/dpp/discordvoiceclient.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index a62cb75b8b..770391d34e 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -780,7 +780,10 @@ void discord_voice_client::read_ready()
 
 	size_t ext_len = 0;
 	if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-		/* Get the RTP Extensions size */
+		/**
+		 * Get the RTP Extensions size, we only get the size here because
+		 * the extension itself is encrypted along with the opus packet
+		 */
 		{
 			uint16_t ext_len_in_words;
 			memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t));
@@ -797,9 +800,14 @@ void discord_voice_client::read_ready()
 	unsigned long long opus_packet_len  = 0;
 	if (crypto_aead_xchacha20poly1305_ietf_decrypt(
 		decrypted, &opus_packet_len,
-		NULL,
+		nullptr,
 		ciphertext, ciphertext_len,
 		buffer,
+		/**
+		 * Additional Data:
+		 * The whole header (including csrc list) +
+		 * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length)
+		 */
 		total_header_len,
 		nonce, secret_key) != 0) {
 		/* Invalid Discord RTP payload. */
@@ -808,7 +816,7 @@ void discord_voice_client::read_ready()
 
 	uint8_t *opus_packet = decrypted;
 	if (ext_len > 0) {
-		/* Skip RTP Header Extension */
+		/* Skip previously encrypted RTP Header Extension */
 		opus_packet += ext_len;
 		opus_packet_len -= ext_len;
 	}
@@ -1326,6 +1334,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 			nullptr,
 			encodedAudioData.data(),
 			encodedAudioLength,
+			/* The RTP Header as Additional Data */
 			reinterpret_cast<const unsigned char *>(&header),
 			sizeof(header),
 			nullptr,

From a0f5bc35f53ff8b81976a9cf028c92629e97cfb2 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sat, 21 Sep 2024 23:50:01 +0700
Subject: [PATCH 9/9] feat: replace to snake case

---
 src/dpp/discordvoiceclient.cpp | 42 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 770391d34e..e46d4258a0 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -1275,13 +1275,13 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data,
 		return send_audio_raw((uint16_t*)packet.data(), packet.size());
 	}
 
-	opus_int32 encodedAudioMaxLength = (opus_int32)length;
-	std::vector<uint8_t> encodedAudioData(encodedAudioMaxLength);
-	size_t encodedAudioLength = encodedAudioMaxLength;
+	opus_int32 encoded_audio_max_length = (opus_int32)length;
+	std::vector<uint8_t> encoded_audio(encoded_audio_max_length);
+	size_t encoded_audio_length = encoded_audio_max_length;
 
-	encodedAudioLength = this->encode((uint8_t*)audio_data, length, encodedAudioData.data(), encodedAudioLength);
+	encoded_audio_length = this->encode((uint8_t*)audio_data, length, encoded_audio.data(), encoded_audio_length);
 
-	send_audio_opus(encodedAudioData.data(), encodedAudioLength);
+	send_audio_opus(encoded_audio.data(), encoded_audio_length);
 #else
 	throw dpp::voice_exception(err_no_voice_support, "Voice support not enabled in this build of D++");
 #endif
@@ -1301,25 +1301,25 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 
 discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) {
 #if HAVE_VOICE
-	int frameSize = (int)(48 * duration * (timescale / 1000000));
-	opus_int32 encodedAudioMaxLength = (opus_int32)length;
-	std::vector<uint8_t> encodedAudioData(encodedAudioMaxLength);
-	size_t encodedAudioLength = encodedAudioMaxLength;
+	int frame_size = (int)(48 * duration * (timescale / 1000000));
+	opus_int32 encoded_audio_max_length = (opus_int32)length;
+	std::vector<uint8_t> encoded_audio(encoded_audio_max_length);
+	size_t encoded_audio_length = encoded_audio_max_length;
 
-	encodedAudioLength = length;
-	encodedAudioData.reserve(length);
-	memcpy(encodedAudioData.data(), opus_packet, length);
+	encoded_audio_length = length;
+	encoded_audio.reserve(length);
+	memcpy(encoded_audio.data(), opus_packet, length);
 
 	++sequence;
 	rtp_header header(sequence, timestamp, (uint32_t)ssrc);
 
 	/* Expected payload size is unencrypted header + encrypted opus packet + unencrypted 32 bit nonce */
-	size_t packet_siz = sizeof(header) + (encodedAudioLength + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce);
+	size_t packet_siz = sizeof(header) + (encoded_audio_length + crypto_aead_xchacha20poly1305_IETF_ABYTES) + sizeof(packet_nonce);
 
-	std::vector<uint8_t> audioDataPacket(packet_siz);
+	std::vector<uint8_t> payload(packet_siz);
 
 	/* Set RTP header */
-	std::memcpy(audioDataPacket.data(), &header, sizeof(header));
+	std::memcpy(payload.data(), &header, sizeof(header));
 
 	/* Convert nonce to big-endian */
 	uint32_t noncel = htonl(packet_nonce);
@@ -1330,10 +1330,10 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 
 	/* Execute */
 	crypto_aead_xchacha20poly1305_ietf_encrypt(
-			audioDataPacket.data() + sizeof(header),
+			payload.data() + sizeof(header),
 			nullptr,
-			encodedAudioData.data(),
-			encodedAudioLength,
+			encoded_audio.data(),
+			encoded_audio_length,
 			/* The RTP Header as Additional Data */
 			reinterpret_cast<const unsigned char *>(&header),
 			sizeof(header),
@@ -1342,10 +1342,10 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 			secret_key);
 
 	/* Append the 4 byte nonce to the resulting payload */
-	std::memcpy(audioDataPacket.data() + audioDataPacket.size() - sizeof(noncel), &noncel, sizeof(noncel));
+	std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel));
 
-	this->send(reinterpret_cast<const char*>(audioDataPacket.data()), audioDataPacket.size(), duration);
-	timestamp += frameSize;
+	this->send(reinterpret_cast<const char*>(payload.data()), payload.size(), duration);
+	timestamp += frame_size;
 
 	/* Increment for next packet */
 	packet_nonce++;