Skip to content

Commit efc37ba

Browse files
authored
gh-139353: Add Objects/unicode_writer.c file (#139911)
Move the public PyUnicodeWriter API and the private _PyUnicodeWriter API to a new Objects/unicode_writer.c file. Rename a few helper functions to share them between unicodeobject.c and unicode_writer.c, such as resize_compact() or unicode_result().
1 parent 75a1cbd commit efc37ba

File tree

8 files changed

+717
-638
lines changed

8 files changed

+717
-638
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,46 @@ extern "C" {
1717

1818

1919
extern int _PyUnicode_IsModifiable(PyObject *unicode);
20+
extern void _PyUnicodeWriter_InitWithBuffer(
21+
_PyUnicodeWriter *writer,
22+
PyObject *buffer);
23+
extern PyObject* _PyUnicode_Result(PyObject *unicode);
24+
extern int _PyUnicode_DecodeUTF8Writer(
25+
_PyUnicodeWriter *writer,
26+
const char *s,
27+
Py_ssize_t size,
28+
_Py_error_handler error_handler,
29+
const char *errors,
30+
Py_ssize_t *consumed);
31+
extern PyObject* _PyUnicode_ResizeCompact(
32+
PyObject *unicode,
33+
Py_ssize_t length);
34+
extern PyObject* _PyUnicode_GetEmpty(void);
35+
36+
37+
/* Generic helper macro to convert characters of different types.
38+
from_type and to_type have to be valid type names, begin and end
39+
are pointers to the source characters which should be of type
40+
"from_type *". to is a pointer of type "to_type *" and points to the
41+
buffer where the result characters are written to. */
42+
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
43+
do { \
44+
to_type *_to = (to_type *)(to); \
45+
const from_type *_iter = (const from_type *)(begin);\
46+
const from_type *_end = (const from_type *)(end);\
47+
Py_ssize_t n = (_end) - (_iter); \
48+
const from_type *_unrolled_end = \
49+
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
50+
while (_iter < (_unrolled_end)) { \
51+
_to[0] = (to_type) _iter[0]; \
52+
_to[1] = (to_type) _iter[1]; \
53+
_to[2] = (to_type) _iter[2]; \
54+
_to[3] = (to_type) _iter[3]; \
55+
_iter += 4; _to += 4; \
56+
} \
57+
while (_iter < (_end)) \
58+
*_to++ = (to_type) *_iter++; \
59+
} while (0)
2060

2161

2262
static inline void

Makefile.pre.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ OBJECT_OBJS= \
559559
Objects/typevarobject.o \
560560
Objects/unicode_format.o \
561561
Objects/unicode_formatter.o \
562+
Objects/unicode_writer.o \
562563
Objects/unicodectype.o \
563564
Objects/unicodeobject.o \
564565
Objects/unionobject.o \

0 commit comments

Comments
 (0)