Skip to content

Commit

Permalink
pythongh-124665: Add _PyCodec_UnregisterError and `_codecs._unregis…
Browse files Browse the repository at this point in the history
…ter_error` (python#124677)
  • Loading branch information
picnixz authored Sep 29, 2024
1 parent 04c837d commit c00964e
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 2 deletions.
11 changes: 11 additions & 0 deletions Include/internal/pycore_codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ extern void _PyCodec_Fini(PyInterpreterState *interp);

extern PyObject* _PyCodec_Lookup(const char *encoding);

/*
* Un-register the error handling callback function registered under
* the given 'name'. Only custom error handlers can be un-registered.
*
* - Return -1 and set an exception if 'name' refers to a built-in
* error handling name (e.g., 'strict'), or if an error occurred.
* - Return 0 if no custom error handler can be found for 'name'.
* - Return 1 if the custom error handler was successfully removed.
*/
extern int _PyCodec_UnregisterError(const char *name);

/* Text codec specific encoding and decoding API.
Checks the encoding against a list of codecs which do not
Expand Down
27 changes: 26 additions & 1 deletion Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from _codecs import _unregister_error as _codecs_unregister_error
import codecs
import html.entities
import itertools
Expand Down Expand Up @@ -1210,7 +1211,6 @@ def replace_with_long(exc):
'\ufffd\x00\x00'
)


def test_fake_error_class(self):
handlers = [
codecs.strict_errors,
Expand All @@ -1235,6 +1235,31 @@ class FakeUnicodeError(Exception):
with self.assertRaises((TypeError, FakeUnicodeError)):
handler(FakeUnicodeError())

def test_reject_unregister_builtin_error_handler(self):
for name in [
'strict', 'ignore', 'replace', 'backslashreplace', 'namereplace',
'xmlcharrefreplace', 'surrogateescape', 'surrogatepass',
]:
with self.subTest(name):
self.assertRaises(ValueError, _codecs_unregister_error, name)

def test_unregister_custom_error_handler(self):
def custom_handler(exc):
raise exc

custom_name = 'test.test_unregister_custom_error_handler'
self.assertRaises(LookupError, codecs.lookup_error, custom_name)
codecs.register_error(custom_name, custom_handler)
self.assertIs(codecs.lookup_error(custom_name), custom_handler)
self.assertTrue(_codecs_unregister_error(custom_name))
self.assertRaises(LookupError, codecs.lookup_error, custom_name)

def test_unregister_custom_unknown_error_handler(self):
unknown_name = 'test.test_unregister_custom_unknown_error_handler'
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)
self.assertFalse(_codecs_unregister_error(unknown_name))
self.assertRaises(LookupError, codecs.lookup_error, unknown_name)


if __name__ == "__main__":
unittest.main()
25 changes: 25 additions & 0 deletions Modules/_codecsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,30 @@ _codecs_register_error_impl(PyObject *module, const char *errors,
Py_RETURN_NONE;
}

/*[clinic input]
_codecs._unregister_error -> bool
errors: str
/
Un-register the specified error handler for the error handling `errors'.
Only custom error handlers can be un-registered. An exception is raised
if the error handling is a built-in one (e.g., 'strict'), or if an error
occurs.
Otherwise, this returns True if a custom handler has been successfully
un-registered, and False if no custom handler for the specified error
handling exists.
[clinic start generated code]*/

static int
_codecs__unregister_error_impl(PyObject *module, const char *errors)
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
{
return _PyCodec_UnregisterError(errors);
}

/*[clinic input]
_codecs.lookup_error
name: str
Expand Down Expand Up @@ -1044,6 +1068,7 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_CODE_PAGE_ENCODE_METHODDEF
_CODECS_CODE_PAGE_DECODE_METHODDEF
_CODECS_REGISTER_ERROR_METHODDEF
_CODECS__UNREGISTER_ERROR_METHODDEF
_CODECS_LOOKUP_ERROR_METHODDEF
{NULL, NULL} /* sentinel */
};
Expand Down
52 changes: 51 additions & 1 deletion Modules/clinic/_codecsmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ Copyright (c) Corporation for National Research Initiatives.
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI

static const char *codecs_builtin_error_handlers[] = {
"strict", "ignore", "replace",
"xmlcharrefreplace", "backslashreplace", "namereplace",
"surrogatepass", "surrogateescape",
};

const char *Py_hexdigits = "0123456789abcdef";

/* --- Codec Registry ----------------------------------------------------- */
Expand Down Expand Up @@ -618,6 +624,20 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
name, error);
}

int _PyCodec_UnregisterError(const char *name)
{
for (size_t i = 0; i < Py_ARRAY_LENGTH(codecs_builtin_error_handlers); ++i) {
if (strcmp(name, codecs_builtin_error_handlers[i]) == 0) {
PyErr_Format(PyExc_ValueError,
"cannot un-register built-in error handler '%s'", name);
return -1;
}
}
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->codecs.initialized);
return PyDict_PopString(interp->codecs.error_registry, name, NULL);
}

/* Lookup the error handling callback function registered under the
name error. As a special case NULL can be passed, in which case
the error handling callback for strict encoding will be returned. */
Expand Down Expand Up @@ -1470,6 +1490,8 @@ _PyCodec_InitRegistry(PyInterpreterState *interp)
}
}
};
// ensure that the built-in error handlers' names are kept in sync
assert(Py_ARRAY_LENGTH(methods) == Py_ARRAY_LENGTH(codecs_builtin_error_handlers));

assert(interp->codecs.initialized == 0);
interp->codecs.search_path = PyList_New(0);
Expand Down
1 change: 1 addition & 0 deletions Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ Python/ast_opt.c fold_unaryop ops -
Python/ceval.c - _PyEval_BinaryOps -
Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - codecs_builtin_error_handlers -
Python/codecs.c - ucnhash_capi -
Python/codecs.c _PyCodec_InitRegistry methods -
Python/compile.c - NO_LOCATION -
Expand Down

0 comments on commit c00964e

Please sign in to comment.