diff --git a/markupsafe/_speedups.c b/markupsafe/_speedups.c index 6f5852ed..1f24fb59 100644 --- a/markupsafe/_speedups.c +++ b/markupsafe/_speedups.c @@ -10,15 +10,6 @@ */ #include -#if PY_MAJOR_VERSION < 3 -#define ESCAPED_CHARS_TABLE_SIZE 63 -#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL))); - - -static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; -static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; -#endif - static PyObject* markup; static int @@ -26,21 +17,6 @@ init_constants(void) { PyObject *module; -#if PY_MAJOR_VERSION < 3 - /* mapping of characters to replace */ - escaped_chars_repl['"'] = UNICHR("""); - escaped_chars_repl['\''] = UNICHR("'"); - escaped_chars_repl['&'] = UNICHR("&"); - escaped_chars_repl['<'] = UNICHR("<"); - escaped_chars_repl['>'] = UNICHR(">"); - - /* lengths of those characters when replaced - 1 */ - memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); - escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ - escaped_chars_delta_len['&'] = 4; - escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; -#endif - /* import markup type so that we can mark the return value */ module = PyImport_ImportModule("markupsafe"); if (!module) @@ -51,82 +27,19 @@ init_constants(void) return 1; } -#if PY_MAJOR_VERSION < 3 -static PyObject* -escape_unicode(PyUnicodeObject *in) -{ - PyUnicodeObject *out; - Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); - const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in); - Py_UNICODE *next_escp; - Py_UNICODE *outp; - Py_ssize_t delta=0, erepl=0, delta_len=0; - - /* First we need to figure out how long the escaped string will be */ - while (*(inp) || inp < inp_end) { - if (*inp < ESCAPED_CHARS_TABLE_SIZE) { - delta += escaped_chars_delta_len[*inp]; - erepl += !!escaped_chars_delta_len[*inp]; - } - ++inp; - } - - /* Do we need to escape anything at all? */ - if (!erepl) { - Py_INCREF(in); - return (PyObject*)in; - } - - out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta); - if (!out) - return NULL; - - outp = PyUnicode_AS_UNICODE(out); - inp = PyUnicode_AS_UNICODE(in); - while (erepl-- > 0) { - /* look for the next substitution */ - next_escp = inp; - while (next_escp < inp_end) { - if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && - (delta_len = escaped_chars_delta_len[*next_escp])) { - ++delta_len; - break; - } - ++next_escp; - } - - if (next_escp > inp) { - /* copy unescaped chars between inp and next_escp */ - Py_UNICODE_COPY(outp, inp, next_escp-inp); - outp += next_escp - inp; - } - - /* escape 'next_escp' */ - Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len); - outp += delta_len; - - inp = next_escp + 1; - } - if (inp < inp_end) - Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in))); - - return (PyObject*)out; -} -#else /* PY_MAJOR_VERSION < 3 */ - #define GET_DELTA(inp, inp_end, delta) \ - while (inp < inp_end) { \ - switch (*inp++) { \ - case '"': \ - case '\'': \ - case '&': \ - delta += 4; \ - break; \ - case '<': \ - case '>': \ - delta += 3; \ - break; \ - } \ + while (inp < inp_end) { \ + switch (*inp++) { \ + case '"': \ + case '\'': \ + case '&': \ + delta += 4; \ + break; \ + case '<': \ + case '>': \ + delta += 3; \ + break; \ + } \ } #define DO_ESCAPE(inp, inp_end, outp) \ @@ -185,6 +98,38 @@ escape_unicode(PyUnicodeObject *in) memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ } +#if PY_MAJOR_VERSION < 3 + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + const Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); + const Py_UNICODE *inp_end = inp + PyUnicode_GET_SIZE(in); + Py_UNICODE *outp; + PyObject *out; + Py_ssize_t delta=0; + + /* First we need to figure out how long the escaped string will be */ + GET_DELTA(inp, inp_end, delta); + + /* Do we need to escape anything at all? */ + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta); + if (!out) + return NULL; + + inp = PyUnicode_AS_UNICODE(in); + outp = PyUnicode_AS_UNICODE(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +#else /* PY_MAJOR_VERSION < 3 */ + static PyObject* escape_unicode_kind1(PyUnicodeObject *in) { @@ -201,7 +146,7 @@ escape_unicode_kind1(PyUnicodeObject *in) } out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, - PyUnicode_IS_ASCII(in) ? 127 : 255); + PyUnicode_IS_ASCII(in) ? 127 : 255); if (!out) return NULL;