From 1d148860a8825d5f80aef40efd0d6d2812419740 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Wed, 30 Sep 2020 11:47:09 -0600 Subject: [PATCH] pack: on JSON formatting, remove duplicated keys (#1835 #1051) Our internal serialization format allows to have several keys with the same name in a map. When converting to JSON the case is quite dangerous, despite the JSON spec is not mandatory about having unique keys at the same level in a map, we got tons of reports that backend services that receives JSON payload raises exception due to duplicated keys. On this patch now the JSON formatter will check for duplicated keys, if found, it preserves the latest one found. Signed-off-by: Eduardo Silva --- src/flb_pack.c | 69 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/src/flb_pack.c b/src/flb_pack.c index eac700db8e5..81c3b44f0a4 100644 --- a/src/flb_pack.c +++ b/src/flb_pack.c @@ -441,12 +441,46 @@ static inline int try_to_write(char *buf, int *off, size_t left, return FLB_TRUE; } + +/* + * Check if a key exists in the map using the 'offset' as an index to define + * which element needs to start looking from + */ +static inline int key_exists_in_map(msgpack_object key, msgpack_object map, int offset) +{ + int i; + msgpack_object p; + + if (key.type != MSGPACK_OBJECT_STR) { + return FLB_FALSE; + } + + for (i = offset; i < map.via.map.size; i++) { + p = map.via.map.ptr[i].key; + if (p.type != MSGPACK_OBJECT_STR) { + continue; + } + + if (key.via.str.size != p.via.str.size) { + continue; + } + + if (memcmp(key.via.str.ptr, p.via.str.ptr, p.via.str.size) == 0) { + return FLB_TRUE; + } + } + + return FLB_FALSE; +} + static int msgpack2json(char *buf, int *off, size_t left, const msgpack_object *o) { - int ret = FLB_FALSE; int i; + int dup; + int ret = FLB_FALSE; int loop; + int packed; switch(o->type) { case MSGPACK_OBJECT_NIL: @@ -558,21 +592,33 @@ static int msgpack2json(char *buf, int *off, size_t left, goto msg2json_end; } if (loop != 0) { + msgpack_object k; msgpack_object_kv *p = o->via.map.ptr; - if (!msgpack2json(buf, off, left, &p->key) || - !try_to_write(buf, off, left, ":", 1) || - !msgpack2json(buf, off, left, &p->val)) { - goto msg2json_end; - } - for (i = 1; i < loop; i++) { + + packed = 0; + dup = FLB_FALSE; + + k = o->via.map.ptr[0].key; + for (i = 0; i < loop; i++) { + k = o->via.map.ptr[i].key; + dup = key_exists_in_map(k, *o, i + 1); + if (dup == FLB_TRUE) { + continue; + } + + if (packed > 0) { + if (!try_to_write(buf, off, left, ",", 1)) { + goto msg2json_end; + } + } + if ( - !try_to_write(buf, off, left, ",", 1) || !msgpack2json(buf, off, left, &(p+i)->key) || !try_to_write(buf, off, left, ":", 1) || !msgpack2json(buf, off, left, &(p+i)->val) ) { goto msg2json_end; - } + packed++; } } @@ -907,6 +953,11 @@ flb_sds_t flb_pack_msgpack_to_json_format(const char *data, uint64_t bytes, msgpack_sbuffer_destroy(&tmp_sbuf); } + if (out_buf && flb_sds_len(out_buf) == 0) { + flb_sds_destroy(out_buf); + return NULL; + } + return out_buf; }