Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-106320: Move _PyUnicodeWriter to the internal C API #106342

Merged
merged 2 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 0 additions & 139 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,131 +480,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
Py_ssize_t start,
Py_ssize_t end);

/* --- _PyUnicodeWriter API ----------------------------------------------- */

typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;

/* Initialize a Unicode writer.
*
* By default, the minimum buffer size is 0 character and overallocation is
* disabled. Set min_length, min_char and overallocate attributes to control
* the allocation of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.

Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.

Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);


/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);

/* --- Manage the default encoding ---------------------------------------- */

/* Returns a pointer to the default encoding (UTF-8) of the
Expand Down Expand Up @@ -774,20 +649,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
PyObject *sepobj
);

/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);

/* === Characters Type APIs =============================================== */

/* These should not be used directly. Use the Py_UNICODE_IS* and
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_complexobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_unicodeobject.h" // _PyUnicodeWriter

/* Operations on complex numbers from complexmodule.c */

PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex);
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_floatobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ extern "C" {
#endif


#include "pycore_unicodeobject.h" // _PyUnicodeWriter

/* runtime lifecycle */

extern void _PyFloat_InitState(PyInterpreterState *);
Expand Down
145 changes: 143 additions & 2 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,148 @@ extern "C" {
void _PyUnicode_ExactDealloc(PyObject *op);
Py_ssize_t _PyUnicode_InternedSize(void);

/* runtime lifecycle */
/* --- _PyUnicodeWriter API ----------------------------------------------- */

typedef struct {
PyObject *buffer;
void *data;
int kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;

/* minimum number of allocated characters (default: 0) */
Py_ssize_t min_length;

/* minimum character (default: 127, ASCII) */
Py_UCS4 min_char;

/* If non-zero, overallocate the buffer (default: 0). */
unsigned char overallocate;

/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly;
} _PyUnicodeWriter ;

/* Initialize a Unicode writer.
*
* By default, the minimum buffer size is 0 character and overallocation is
* disabled. Set min_length, min_char and overallocate attributes to control
* the allocation of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);

/* Prepare the buffer to write 'length' characters
with the specified maximum character.

Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))

/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);

/* Prepare the buffer to have at least the kind KIND.
For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
support characters in range U+000-U+FFFF.

Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
((KIND) <= (WRITER)->kind \
? 0 \
: _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))

/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
macro instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
int kind);

/* Append a Unicode character.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
Py_UCS4 ch
);

/* Append a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
PyObject *str /* Unicode string */
);

/* Append a substring of a Unicode string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
PyObject *str, /* Unicode string */
Py_ssize_t start,
Py_ssize_t end
);

/* Append an ASCII-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
const char *str, /* ASCII-encoded byte string */
Py_ssize_t len /* number of bytes, or -1 if unknown */
);

/* Append a latin1-encoded byte string.
Return 0 on success, raise an exception and return -1 on error. */
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
const char *str, /* latin1-encoded byte string */
Py_ssize_t len /* length in bytes */
);

/* Get the value of the writer as a Unicode string. Clear the
buffer of the writer. Raise an exception and return NULL
on error. */
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);

/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);


/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);

/* --- Methods & Slots ---------------------------------------------------- */

/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
_PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar);

/* --- Runtime lifecycle -------------------------------------------------- */

extern void _PyUnicode_InitState(PyInterpreterState *);
extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
Expand All @@ -24,7 +165,7 @@ extern void _PyUnicode_FiniTypes(PyInterpreterState *);

extern PyTypeObject _PyUnicodeASCIIIter_Type;

/* other API */
/* --- Other API ---------------------------------------------------------- */

struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
Expand Down
4 changes: 4 additions & 0 deletions Modules/cjkcodecs/cjkcodecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_

#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif

#include "Python.h"
#include "multibytecodec.h"

Expand Down
4 changes: 4 additions & 0 deletions Modules/cjkcodecs/multibytecodec.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/

#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif

#include "Python.h"
#include "structmember.h" // PyMemberDef
#include "multibytecodec.h"
Expand Down
2 changes: 2 additions & 0 deletions Modules/cjkcodecs/multibytecodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
extern "C" {
#endif

#include "pycore_unicodeobject.h" // _PyUnicodeWriter

#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
Expand Down
12 changes: 9 additions & 3 deletions Tools/c-analyzer/c_parser/preprocessor/gcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

from . import common as _common

# Modules/socketmodule.h uses pycore_time.h which needs the Py_BUILD_CORE
# macro. Usually it's defined by the C file which includes it.
# Other header files have a similar issue.
NEED_BUILD_CORE = {
'cjkcodecs.h',
'multibytecodec.h',
'socketmodule.h',
}

TOOL = 'gcc'

Expand Down Expand Up @@ -62,9 +70,7 @@ def preprocess(filename,
filename = _normpath(filename, cwd)

postargs = POST_ARGS
if os.path.basename(filename) == 'socketmodule.h':
# Modules/socketmodule.h uses pycore_time.h which needs Py_BUILD_CORE.
# Usually it's defined by the C file which includes it.
if os.path.basename(filename) in NEED_BUILD_CORE:
postargs += ('-DPy_BUILD_CORE=1',)

text = _common.preprocess(
Expand Down