Skip to content

Commit

Permalink
Add an heuristic to detect fully encrypted flows (#2058)
Browse files Browse the repository at this point in the history
A fully encrypted session is a flow where every bytes of the
payload is encrypted in an attempt to “look like nothing”.
The heuristic needs only the very first packet of the flow.
See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf

A basic, but generic, inplementation of the popcpunt alg has been added
  • Loading branch information
IvanNardi authored Jul 26, 2023
1 parent 2b230e2 commit 3326fa2
Show file tree
Hide file tree
Showing 17 changed files with 280 additions and 105 deletions.
6 changes: 6 additions & 0 deletions fuzz/fuzz_alg_crc32_md5.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@

int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
u_char hash[16];
struct ndpi_popcount popcount;

/* No memory allocations involved */

/* Used for crc32, md5 and popcount algs */

ndpi_crc32(data, size);
ndpi_md5(data, size, hash);

ndpi_popcount_init(&popcount);
ndpi_popcount_count(&popcount, data, size);

return 0;
}
7 changes: 7 additions & 0 deletions src/include/ndpi_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1843,6 +1843,13 @@ extern "C" {

/* ******************************* */

/* PopCount [count how many bits are set to 1] */

int ndpi_popcount_init(struct ndpi_popcount *h);
void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len);

/* ******************************* */

int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int16_t num_bins);
void ndpi_free_bin(struct ndpi_bin *b);
struct ndpi_bin* ndpi_clone_bin(struct ndpi_bin *b);
Expand Down
17 changes: 15 additions & 2 deletions src/include/ndpi_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ typedef enum {
NDPI_HTTP_OBSOLETE_SERVER,
NDPI_PERIODIC_FLOW, /* Set in case a flow repeats at a specific pace [used by apps on top of nDPI] */
NDPI_MINOR_ISSUES, /* Generic packet issues (e.g. DNS with 0 TTL) */
NDPI_TCP_ISSUES, /* TCP issues such as connection failed, probing or scan */
NDPI_TCP_ISSUES, /* 50 */ /* TCP issues such as connection failed, probing or scan */
NDPI_FULLY_ENCRYPTED, /* This (unknown) session is fully encrypted */

/* Leave this as last member */
NDPI_MAX_RISK /* must be <= 63 due to (**) */
Expand Down Expand Up @@ -1323,6 +1324,7 @@ struct ndpi_detection_module_struct {
u_int32_t aggressiveness_ookla;

int tcp_ack_paylod_heuristic;
int fully_encrypted_based_on_first_pkt_heuristic;

u_int16_t ndpi_to_user_proto_id[NDPI_MAX_NUM_CUSTOM_PROTOCOLS]; /* custom protocolId mapping */
ndpi_proto_defaults_t proto_defaults[NDPI_MAX_SUPPORTED_PROTOCOLS+NDPI_MAX_NUM_CUSTOM_PROTOCOLS];
Expand Down Expand Up @@ -1379,7 +1381,8 @@ struct ndpi_flow_struct {
/* init parameter, internal used to set up timestamp,... */
u_int16_t guessed_protocol_id, guessed_protocol_id_by_ip, guessed_category, guessed_header_category;
u_int8_t l4_proto, protocol_id_already_guessed:1, fail_with_unknown:1,
init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, _pad1: 2;
init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, first_pkt_fully_encrypted:1, _pad1: 1;

u_int16_t num_dissector_calls;
ndpi_confidence_t confidence; /* ndpi_confidence_t */

Expand Down Expand Up @@ -1753,6 +1756,11 @@ typedef enum {
ndpi_dont_load_crawlers_list = (1 << 18),
ndpi_dont_load_protonvpn_list = (1 << 19),
ndpi_dont_load_gambling_list = (1 << 20),
/* Heuristic to detect fully encrypted sessions, i.e. flows where every bytes of
the payload is encrypted in an attempt to “look like nothing”.
This heuristic only analyzes the first packet of the flow.
See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf */
ndpi_disable_fully_encrypted_heuristic = (1 << 21),
} ndpi_prefs;

typedef struct {
Expand Down Expand Up @@ -1912,6 +1920,11 @@ struct ndpi_cm_sketch {
u_int32_t *tables;
};

struct ndpi_popcount {
u_int64_t pop_count; /* Number of bits set to 1 found so far */
u_int64_t tot_bytes_count; /* Total number of bytes processed so far */
};

/* **************************************** */

enum ndpi_bin_family {
Expand Down
5 changes: 5 additions & 0 deletions src/include/ndpi_win32.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,9 @@ typedef unsigned __int64 u_int64_t;
/* https://stackoverflow.com/questions/7993050/multiplatform-atomic-increment */
#define __sync_fetch_and_add(a,b) InterlockedExchangeAdd ((a), b)

#if defined(WIN32) || defined(WIN64)
#include <intrin.h>
#define __builtin_popcount __popcnt
#endif

#endif /* __NDPI_WIN32_H__ */
39 changes: 39 additions & 0 deletions src/lib/ndpi_analyze.c
Original file line number Diff line number Diff line change
Expand Up @@ -1831,3 +1831,42 @@ void ndpi_cm_sketch_destroy(struct ndpi_cm_sketch *sketch) {
ndpi_free(sketch->tables);
ndpi_free(sketch);
}

/* ********************************************************************************* */
/* ********************************************************************************* */

/* Popcount, short for "population count," is a computer programming term that refers to
the number of set bits (bits with a value of 1) in a binary representation of a given
data word or integer. In other words, it is the count of all the 1s present in the
binary representation of a number.
For example, consider the number 45, which is represented in binary as 101101.
The popcount of 45 would be 4 because there are four 1s in its binary representation.
*/

int ndpi_popcount_init(struct ndpi_popcount *h)
{
if(h) {
memset(h, '\0', sizeof(*h));
return 0;
}
return -1;
}

/* ********************************************************************************* */

void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len)
{
u_int32_t i;

if(!h)
return;

/* Trivial alg. TODO: there are lots of better, more performant algorithms */

for(i = 0; i < buf_len / 4; i++)
h->pop_count += __builtin_popcount(*(u_int32_t *)(buf + i * 4));
for(i = 0; i < buf_len % 4; i++)
h->pop_count += __builtin_popcount(buf[buf_len - (buf_len % 4) + i]);

h->tot_bytes_count += buf_len;
}
70 changes: 70 additions & 0 deletions src/lib/ndpi_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ static ndpi_risk_info ndpi_known_risks[] = {
{ NDPI_PERIODIC_FLOW, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
{ NDPI_MINOR_ISSUES, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_BOTH_ACCOUNTABLE },
{ NDPI_TCP_ISSUES, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
{ NDPI_FULLY_ENCRYPTED, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },

/* Leave this as last member */
{ NDPI_MAX_RISK, NDPI_RISK_LOW, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_NO_ACCOUNTABILITY }
Expand Down Expand Up @@ -3062,6 +3063,9 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
if(prefs & ndpi_enable_tcp_ack_payload_heuristic)
ndpi_str->tcp_ack_paylod_heuristic = 1;

if(!(prefs & ndpi_disable_fully_encrypted_heuristic))
ndpi_str->fully_encrypted_based_on_first_pkt_heuristic = 1;

for(i = 0; i < NUM_CUSTOM_CATEGORIES; i++)
ndpi_snprintf(ndpi_str->custom_category_labels[i], CUSTOM_CATEGORY_LABEL_LEN, "User custom category %u",
(unsigned int) (i + 1));
Expand Down Expand Up @@ -5655,6 +5659,60 @@ static u_int8_t ndpi_is_multi_or_broadcast(struct ndpi_packet_struct *packet) {

/* ************************************************ */

static int fully_enc_heuristic(struct ndpi_detection_module_struct *ndpi_str,
struct ndpi_flow_struct *flow) {
struct ndpi_packet_struct *packet = &ndpi_str->packet;
struct ndpi_popcount popcount;
float ratio;
unsigned int i, len, cnt, cnt_consecutives = 0;

if(flow->l4_proto == IPPROTO_TCP &&
ndpi_seen_flow_beginning(flow)) {
/* See original paper, Algorithm 1, for the reference numbers */

/* Ex1 */
ndpi_popcount_init(&popcount);
ndpi_popcount_count(&popcount, packet->payload, packet->payload_packet_len);
ratio = (float)popcount.pop_count / (float)popcount.tot_bytes_count;
if(ratio <= 3.4 || ratio >= 4.6) {
return 0;
}

/* Ex2 */
len = ndpi_min(6, packet->payload_packet_len);
cnt = 0;
for(i = 0; i < len; i++) {
if(ndpi_isprint(packet->payload[i]))
cnt += 1;
}
if(cnt == len) {
return 0;
}

/* Ex3 */
cnt = 0;
for(i = 0; i < packet->payload_packet_len; i++) {
if(ndpi_isprint(packet->payload[i])) {
cnt += 1;
cnt_consecutives += 1;
if(cnt_consecutives >= 20) { /* Ex4 */
return 0;;
}
} else {
cnt_consecutives = 0;
}
}
if((float)cnt / packet->payload_packet_len > 0.5) {
return 0;
}

return 1;
}
return 0;
}

/* ************************************************ */

static int tcp_ack_padding(struct ndpi_packet_struct *packet) {
const struct ndpi_tcphdr *tcph = packet->tcp;
if(tcph && tcph->ack && !tcph->psh &&
Expand Down Expand Up @@ -6553,6 +6611,12 @@ ndpi_protocol ndpi_detection_giveup(struct ndpi_detection_module_struct *ndpi_st
ret.app_protocol = flow->detected_protocol_stack[0];
}

/* TODO: not sure about the best "order" among fully encrypted logic, classification by-port and classification by-ip...*/
if(ret.app_protocol == NDPI_PROTOCOL_UNKNOWN &&
flow->first_pkt_fully_encrypted == 1) {
ndpi_set_risk(ndpi_str, flow, NDPI_FULLY_ENCRYPTED, NULL);
}

/* Classification by-port */
if(enable_guess && ret.app_protocol == NDPI_PROTOCOL_UNKNOWN) {

Expand Down Expand Up @@ -7229,6 +7293,12 @@ static ndpi_protocol ndpi_internal_detection_process_packet(struct ndpi_detectio
&& (flow->l4_proto == IPPROTO_TCP))
ndpi_add_connection_as_zoom(ndpi_str, flow);

if(ndpi_str->fully_encrypted_based_on_first_pkt_heuristic &&
ret.app_protocol == NDPI_PROTOCOL_UNKNOWN && /* Only for unknown traffic */
flow->packet_counter == 1 && packet->payload_packet_len > 0) {
flow->first_pkt_fully_encrypted = fully_enc_heuristic(ndpi_str, flow);
}

return(ret);
}

Expand Down
3 changes: 3 additions & 0 deletions src/lib/ndpi_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2045,6 +2045,9 @@ const char* ndpi_risk2str(ndpi_risk_enum risk) {
case NDPI_TCP_ISSUES:
return("TCP Connection Issues");

case NDPI_FULLY_ENCRYPTED:
return("Fully encrypted flow");

default:
ndpi_snprintf(buf, sizeof(buf), "%d", (int)risk);
return(buf);
Expand Down
4 changes: 2 additions & 2 deletions tests/cfgs/caches_cfg/result/ookla.pcap.out
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Automa domain: 3/0 (search/found)
Automa tls cert: 0/0 (search/found)
Automa risk mask: 0/0 (search/found)
Automa common alpns: 4/4 (search/found)
Patricia risk mask: 0/0 (search/found)
Patricia risk mask: 2/0 (search/found)
Patricia risk: 0/0 (search/found)
Patricia protocols: 11/1 (search/found)

Expand All @@ -33,6 +33,6 @@ JA3 Host Stats:
1 TCP 192.168.1.128:35830 <-> 89.96.108.170:8080 [proto: 91/TLS][IP: 0/Unknown][Encrypted][Confidence: DPI][DPI packets: 6][cat: Web/5][21 pkts/21216 bytes <-> 8 pkts/1950 bytes][Goodput ratio: 93/72][0.32 sec][Hostname/SNI: spd-pub-mi-01-01.fastwebnet.it][(Advertised) ALPNs: h2;http/1.1][TLS Supported Versions: TLSv1.3;TLSv1.2][bytes ratio: 0.832 (Upload)][IAT c2s/s2c min/avg/max/stddev: 0/0 17/61 274/280 62/109][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 1010/244 1514/387 612/138][Risk: ** Known Proto on Non Std Port **][Risk Score: 50][Risk Info: Expected on port 443][TLSv1.3][JA3C: c279b0189edb9269da7bc43dea5e0c36][JA3S: fcb2d4d0991292272fcb1e464eedfd43][Firefox][Cipher: TLS_AES_128_GCM_SHA256][Plen Bins: 0,0,4,0,0,0,0,4,9,0,9,0,0,0,0,0,4,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,55,0,0]
2 TCP 192.168.1.128:48854 <-> 104.16.209.12:443 [proto: 91.191/TLS.Ookla][IP: 220/Cloudflare][Encrypted][Confidence: DPI][DPI packets: 6][cat: Network/14][8 pkts/1620 bytes <-> 6 pkts/3818 bytes][Goodput ratio: 67/89][0.06 sec][Hostname/SNI: www.speedtest.net][(Advertised) ALPNs: h2;http/1.1][TLS Supported Versions: TLSv1.3;TLSv1.2][bytes ratio: -0.404 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 7/5 18/15 7/6][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 202/636 583/1514 181/646][TLSv1.3][JA3C: 579ccef312d18482fc42e2b822ca2430][JA3S: eb1d94daa7e0344597e756a1fb6e7054][Firefox][Cipher: TLS_AES_128_GCM_SHA256][PLAIN TEXT (@oTAgOeedtest.net)][Plen Bins: 0,0,14,0,0,14,0,0,0,0,14,0,0,0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,0]
3 TCP 192.168.1.7:51207 <-> 46.44.253.187:80 [proto: 7.191/HTTP.Ookla][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 6][cat: Network/14][12 pkts/2238 bytes <-> 8 pkts/2082 bytes][Goodput ratio: 64/74][5.33 sec][Hostname/SNI: massarosa-1.speedtest.welcomeitalia.it][bytes ratio: 0.036 (Mixed)][IAT c2s/s2c min/avg/max/stddev: 0/4 528/47 5005/84 1493/28][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 186/260 430/523 168/194][URL: massarosa-1.speedtest.welcomeitalia.it/crossdomain.xml][StatusCode: 200][Content-Type: application/xml][Server: Apache/2.2.22 (Ubuntu)][User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8][Risk: ** HTTP Obsolete Server **][Risk Score: 50][Risk Info: Obsolete Apache server 2.2.22][PLAIN TEXT (GET /crossdomain.xml HTTP/1.1)][Plen Bins: 0,0,0,0,0,0,0,0,0,0,12,75,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
4 TCP 192.168.1.192:51156 <-> 89.96.108.170:8080 [proto: 131/HTTP_Proxy][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 10][cat: Web/5][6 pkts/591 bytes <-> 4 pkts/1784 bytes][Goodput ratio: 32/85][0.05 sec][bytes ratio: -0.502 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 9/10 15/20 6/8][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 98/446 143/1514 31/617][PLAIN TEXT (gKRZvA)][Plen Bins: 0,40,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0]
4 TCP 192.168.1.192:51156 <-> 89.96.108.170:8080 [proto: 131/HTTP_Proxy][IP: 0/Unknown][ClearText][Confidence: Match by port][DPI packets: 10][cat: Web/5][6 pkts/591 bytes <-> 4 pkts/1784 bytes][Goodput ratio: 32/85][0.05 sec][bytes ratio: -0.502 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 9/10 15/20 6/8][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 98/446 143/1514 31/617][Risk: ** Fully encrypted flow **][Risk Score: 50][PLAIN TEXT (gKRZvA)][Plen Bins: 0,40,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,0,0]
5 TCP 192.168.1.7:51215 <-> 46.44.253.187:8080 [proto: 191/Ookla][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 6][cat: Network/14][19 pkts/1421 bytes <-> 11 pkts/920 bytes][Goodput ratio: 11/20][0.80 sec][bytes ratio: 0.214 (Upload)][IAT c2s/s2c min/avg/max/stddev: 26/0 44/75 103/137 23/41][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 75/84 85/100 9/8][PLAIN TEXT ( 6HELLO 2.4 2016)][Plen Bins: 94,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
6 TCP 192.168.1.192:37790 <-> 185.157.229.246:8080 [proto: 191/Ookla][IP: 0/Unknown][ClearText][Confidence: DPI][DPI packets: 6][cat: Network/14][6 pkts/454 bytes <-> 4 pkts/317 bytes][Goodput ratio: 11/14][0.06 sec][bytes ratio: 0.178 (Mixed)][IAT c2s/s2c min/avg/max/stddev: 0/0 12/5 46/9 17/4][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 76/79 106/108 14/17][PLAIN TEXT (HELLO 2.9 )][Plen Bins: 50,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
Binary file added tests/cfgs/default/pcap/shadowsocks.pcap
Binary file not shown.
Loading

0 comments on commit 3326fa2

Please sign in to comment.