Skip to content

Commit

Permalink
Support quoted relations in collection (#633)
Browse files Browse the repository at this point in the history
# Description
We use the collection as a local knowledge base. Since n3-represented
literals contains embedded double-quotes it is essential that
serialisation and de-serialisation of collections retain embedded
double-quotes.

This PR ensures that embedded double-quotes are escaped in the internal
representation of relations, which is used by the built-in json
encoder/decoder.

## Type of change
- [x] Bug fix & code cleanup
- [x] New feature
- [ ] Documentation update
- [x] Test update

## Checklist for the reviewer
This checklist should be used as a help for the reviewer.

- [ ] Is the change limited to one issue?
- [ ] Does this PR close the issue?
- [ ] Is the code easy to read and understand?
- [ ] Do all new feature have an accompanying new test?
- [ ] Has the documentation been updated as necessary?
  • Loading branch information
jesper-friis authored Sep 21, 2023
2 parents a051aef + b53df57 commit e9c31ac
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 17 deletions.
31 changes: 29 additions & 2 deletions src/dlite-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,22 @@ int _dlite_json_sprint(char *dest, size_t size, const DLiteInstance *inst,
if (dlite_instance_get_property((DLiteInstance *)inst->meta, "relations")) {
PRINT1("%s \"relations\": [\n", in);
for (i=0; i < met->_nrelations; i++) {
int m;
DLiteRelation *r = met->_relations + i;
PRINT4("%s [\"%s\", \"%s\", \"%s\"]\n", in, r->s, r->p, r->o);
PRINT1("%s [", in);
m = strquote(dest+n, PDIFF(size, n), r->s);
if (m < 0) goto fail;
n += m;
PRINT(", ");
m = strquote(dest+n, PDIFF(size, n), r->p);
if (m < 0) goto fail;
n += m;
PRINT(", ");
m = strquote(dest+n, PDIFF(size, n), r->o);
if (m < 0) goto fail;
n += m;
PRINT("]\n");

}
PRINT2("%s ]%s\n", in, prop_comma);
}
Expand Down Expand Up @@ -212,8 +226,21 @@ int _dlite_json_sprint(char *dest, size_t size, const DLiteInstance *inst,
if (dlite_instance_get_property((DLiteInstance *)inst->meta, "relations")) {
PRINT1("%s \"relations\": [\n", in);
for (i=0; i < met->_nrelations; i++) {
int m;
DLiteRelation *r = met->_relations + i;
PRINT4("%s [\"%s\", \"%s\", \"%s\"]\n", in, r->s, r->p, r->o);
PRINT1("%s [", in);
m = strquote(dest+n, PDIFF(size, n), r->s);
if (m < 0) goto fail;
n += m;
PRINT(", ");
m = strquote(dest+n, PDIFF(size, n), r->p);
if (m < 0) goto fail;
n += m;
PRINT(", ");
m = strquote(dest+n, PDIFF(size, n), r->o);
if (m < 0) goto fail;
n += m;
PRINT("]\n");
}
PRINT2("%s ]%s\n", in, prop_comma);
}
Expand Down
35 changes: 28 additions & 7 deletions src/dlite-type.c
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,13 @@ int dlite_type_print(char *dest, size_t n, const void *p, DLiteType dtype,
case dliteRelation:
{
DLiteRelation *r = (DLiteRelation *)p;
m = snprintf(dest, n, "[\"%s\", \"%s\", \"%s\"]", r->s, r->p, r->o);
m = snprintf(dest, n, "[");
m += strquote(dest+m, PDIFF(n, m), r->s);
m += snprintf(dest+m, PDIFF(n, m), ", ");
m += strquote(dest+m, PDIFF(n, m), r->p);
m += snprintf(dest+m, PDIFF(n, m), ", ");
m += strquote(dest+m, PDIFF(n, m), r->o);
m += snprintf(dest+m, PDIFF(n, m), "]");
}
break;
}
Expand Down Expand Up @@ -916,6 +922,15 @@ int dlite_type_aprint(char **dest, size_t *n, size_t pos, const void *p,
#define MAX_PROPERTY_TOKENS 64 // this supports at least 50 dimensions...
#define MAX_RELATION_TOKENS 9

/* Macro used by dlite_type_scan() to asign `target` when scanning a
relation. */
#define SET_RELATION(target, buf, bufsize, t, src) \
if (strnput_unquote(&buf, &bufsize, 0, src + t->start, \
t->end - t->start, NULL, strquoteNoQuote) < 0) \
return -1; \
target = strndup(buf, t->end - t->start);


/*
Scans a value from `src` and write it to memory pointed to by `p`.
Expand Down Expand Up @@ -1197,23 +1212,29 @@ int dlite_type_scan(const char *src, int len, void *p, DLiteType dtype,
return errx(dliteParseError, "relation should have 3 (optionally 4) elements");
m = tokens->end - tokens->start;
if (tokens->type == JSMN_ARRAY) {
size_t bufsize=0;
char *buf=NULL;
if (!(t = jsmn_element(src, tokens, 0))) return -1;
rel->s = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->s, buf, bufsize, t, src);
if (!(t = jsmn_element(src, tokens, 1))) return -1;
rel->p = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->p, buf, bufsize, t, src);
if (!(t = jsmn_element(src, tokens, 2))) return -1;
rel->o = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->o, buf, bufsize, t, src);
if (tokens->size > 3 && (t = jsmn_element(src, tokens, 3)))
rel->id = strndup(src + t->start, t->end - t->start);
free(buf);
} else if (tokens->type == JSMN_OBJECT) {
size_t bufsize=0;
char *buf=NULL;
if (!(t = jsmn_item(src, tokens, "s"))) return -1;
rel->s = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->s, buf, bufsize, t, src);
if (!(t = jsmn_item(src, tokens, "p"))) return -1;
rel->p = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->p, buf, bufsize, t, src);
if (!(t = jsmn_item(src, tokens, "o"))) return -1;
rel->o = strndup(src + t->start, t->end - t->start);
SET_RELATION(rel->o, buf, bufsize, t, src);
if ((t = jsmn_item(src, tokens, "id")))
rel->id = strndup(src + t->start, t->end - t->start);
free(buf);
} else {
return errx(dliteValueError, "relation should be a JSON array");
}
Expand Down
14 changes: 13 additions & 1 deletion src/tests/test_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ MU_TEST(test_sprint)
//printf("%s\n", buf);
mu_assert_int_eq(1165, m);

//printf("\n========================================================\n");


/* Tests for PR #541 */
Expand All @@ -87,6 +86,19 @@ MU_TEST(test_sprint)
m = dlite_json_sprint(NULL, 0, inst, 4, dliteJsonSingle);
mu_assert_int_eq(404, m);


/* Tests for proper quoting */
DLiteCollection *coll = dlite_collection_create(NULL);
dlite_collection_add_relation(coll, "s", "p", "\"o\"");
m = dlite_json_sprint(buf, sizeof(buf), (DLiteInstance *)coll, 2, 0);
const DLiteRelation *rel = dlite_collection_find_first(coll, "s", "p", NULL);
mu_assert_string_eq("\"o\"", rel->o);
dlite_instance_decref((DLiteInstance *)coll);
//printf("\n--------------------------------------------------------\n");
//printf("%s\n", buf);


//printf("\n========================================================\n");
}


Expand Down
76 changes: 69 additions & 7 deletions src/utils/strutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ int strnput_escape(char **destp, size_t *sizep, size_t pos,
Embedded double-quotes are escaped with backslash. At most `size`
characters are written to `dest` (including terminating NUL).
If `s` is NULL, this function will behave similarly to snprintf(),
except for the added quoting marks.
Returns number of characters written to `dest` (excluding
terminating NUL). If the output is truncated, the number of
characters which should have been written is returned.
Expand All @@ -264,24 +267,47 @@ int strnquote(char *dest, size_t size, const char *s, int n,
{
size_t i=0, j=0;
if (!size) dest = NULL;

/* Write initial quote sign to `dest`. */
if (!(flags & strquoteNoQuote)) {
if (size > i) dest[i] = '"';
i++;
}
while (s[j] && (n < 0 || (int)j < n)) {
if (s[j] == '"' && !(flags & strquoteNoEscape)) {
if (size > i) dest[i] = '\\';

if (s) {

/* Loop over each character in source `s` and copy it to `dest`.
If `n` is positive, consume at most `n` characters from `s`. */
while (s[j] && (n < 0 || (int)j < n)) {

/* Add backslash (escape) in front of double-quote characters
when copying to `dest` */
if (s[j] == '"' && !(flags & strquoteNoEscape)) {
if (size > i) dest[i] = '\\';
i++;
}
if (size > i) dest[i] = s[j];
i++;
j++;
}
if (size > i) dest[i] = s[j];
i++;
j++;

} else {

/* If `s` is NULL, use system snprintf() to represent it in a standard
way. */
int m = snprintf(dest+i, PDIFF(size, i), "%s", s);
if (m >= 0) i += j;
}

/* Write final quote sign to `dest`. */
if (!(flags & strquoteNoQuote)) {
if (dest && size > i) dest[i] = '"';
i++;
}

/* Ensure that `dest` is NUL-terminated. */
if (dest) dest[(size > i) ? i : size-1] = '\0';

return i;
}

Expand Down Expand Up @@ -313,7 +339,7 @@ int strunquote(char *dest, size_t size, const char *s,


/*
Like strunquote, but if `n` is non-negative, at most `n` bytes are
Like strunquote(), but if `n` is non-negative, at most `n` bytes are
read from `s`.
This mostly make sense in combination when `flags & strquoteNoEscape`
Expand Down Expand Up @@ -342,6 +368,42 @@ int strnunquote(char *dest, size_t size, const char *s, int n,
}


/*
Like strnunquote(), but reallocates the destination and writes to
position `pos`.
On allocation error, -3 is returned.
*/
int strnput_unquote(char **destp, size_t *sizep, size_t pos, const char *s,
int n, int *consumed, StrquoteFlags flags)
{
int m;
/* Ensure consistency */
if (!*destp) *sizep = 0;
if (!*sizep) *destp = NULL;

/* Use strnunquote() to get now much memory we need. */
m = strnunquote(NULL, 0, s, n, consumed, flags);
if (m < 0) return m; // On error, pass it on...

/* If the allocated size is not large enough, reallocate `*destp` to the
needed size. */
if (m + pos >= *sizep) {
char *q;
size_t size = m + pos + 1;
if (!(q = realloc(*destp, size))) return -3;
*destp = q;
*sizep = size;
}

/* Use strnunquote() again to write the allocated buffer. */
m = strnunquote(*destp+pos, PDIFF(*sizep, pos), s, n, consumed, flags);
assert(m >= 0); // we don't expect any errors now
assert(m + pos < *sizep); // the buffer should be large enough
return m;
}


/********************************************************************
* Hexadecimal encoding/decoding
********************************************************************/
Expand Down
9 changes: 9 additions & 0 deletions src/utils/strutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ int strunquote(char *dest, size_t size, const char *s,
int strnunquote(char *dest, size_t size, const char *s, int n,
int *consumed, StrquoteFlags flags);

/**
Like strnunquote(), but reallocates the destination and writes to
position `pos`.
On allocation error, -3 is returned.
*/
int strnput_unquote(char **destp, size_t *sizep, size_t pos, const char *s,
int n, int *consumed, StrquoteFlags flags);


/** @} */
/**
Expand Down
29 changes: 29 additions & 0 deletions src/utils/tests/test_strutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,34 @@ MU_TEST(test_strunquote)
}


MU_TEST(test_strput_unquote)
{
char *buf=NULL;
size_t size=0;
int n, consumed;

n = strnput_unquote(&buf, &size, 0, "\"123\"", 4, &consumed, 0);
mu_assert_int_eq(3, n);
mu_assert_int_eq(4, consumed);
mu_assert_int_eq(4, size);
mu_assert_string_eq("123", buf);

n = strnput_unquote(&buf, &size, 2, "\"abc\"", 4, &consumed, 0);
mu_assert_int_eq(3, n);
mu_assert_int_eq(4, consumed);
mu_assert_int_eq(6, size);
mu_assert_string_eq("12abc", buf);

n = strnput_unquote(&buf, &size, 0, " \"123\" + 4 ", -1, &consumed, 0);
mu_assert_int_eq(3, n);
mu_assert_int_eq(7, consumed);
mu_assert_int_eq(6, size);
mu_assert_string_eq("123", buf);

free(buf);
}


MU_TEST(test_strhex_encode)
{
unsigned char data[4] = {0x61, 0x62, 0x63, 0x64};
Expand Down Expand Up @@ -428,6 +456,7 @@ MU_TEST_SUITE(test_suite)
MU_RUN_TEST(test_strquote);
MU_RUN_TEST(test_strnquote);
MU_RUN_TEST(test_strunquote);
MU_RUN_TEST(test_strput_unquote);
MU_RUN_TEST(test_strhex_encode);
MU_RUN_TEST(test_strhex_decode);
MU_RUN_TEST(test_strcategory);
Expand Down

0 comments on commit e9c31ac

Please sign in to comment.