| /* |
| * wchar_t helpers, version CPython >= 3.3. |
| * |
| * CPython 3.3 added support for sys.maxunicode == 0x10FFFF on all |
| * platforms, even ones with wchar_t limited to 2 bytes. As such, |
| * this code here works from the outside like wchar_helper.h in the |
| * case Py_UNICODE_SIZE == 4, but the implementation is very different. |
| */ |
| |
| typedef uint16_t cffi_char16_t; |
| typedef uint32_t cffi_char32_t; |
| |
| |
| static PyObject * |
| _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size) |
| { |
| return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, w, size); |
| } |
| |
| static PyObject * |
| _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size) |
| { |
| /* are there any surrogate pairs, and if so, how many? */ |
| Py_ssize_t i, count_surrogates = 0; |
| for (i = 0; i < size - 1; i++) { |
| if (0xD800 <= w[i] && w[i] <= 0xDBFF && |
| 0xDC00 <= w[i+1] && w[i+1] <= 0xDFFF) |
| count_surrogates++; |
| } |
| if (count_surrogates == 0) { |
| /* no, fast path */ |
| return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, w, size); |
| } |
| else |
| { |
| PyObject *result = PyUnicode_New(size - count_surrogates, 0x10FFFF); |
| Py_UCS4 *data; |
| assert(PyUnicode_KIND(result) == PyUnicode_4BYTE_KIND); |
| data = PyUnicode_4BYTE_DATA(result); |
| |
| for (i = 0; i < size; i++) |
| { |
| cffi_char32_t ch = w[i]; |
| if (0xD800 <= ch && ch <= 0xDBFF && i < size - 1) { |
| cffi_char32_t ch2 = w[i + 1]; |
| if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { |
| ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; |
| i++; |
| } |
| } |
| *data++ = ch; |
| } |
| return result; |
| } |
| } |
| |
| static int |
| _my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result, |
| char *err_got) |
| { |
| cffi_char32_t ch; |
| if (PyUnicode_GET_LENGTH(unicode) != 1) { |
| sprintf(err_got, "unicode string of length %zd", |
| PyUnicode_GET_LENGTH(unicode)); |
| return -1; |
| } |
| ch = PyUnicode_READ_CHAR(unicode, 0); |
| |
| if (ch > 0xFFFF) |
| { |
| sprintf(err_got, "larger-than-0xFFFF character"); |
| return -1; |
| } |
| *result = (cffi_char16_t)ch; |
| return 0; |
| } |
| |
| static int |
| _my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result, |
| char *err_got) |
| { |
| if (PyUnicode_GET_LENGTH(unicode) != 1) { |
| sprintf(err_got, "unicode string of length %zd", |
| PyUnicode_GET_LENGTH(unicode)); |
| return -1; |
| } |
| *result = PyUnicode_READ_CHAR(unicode, 0); |
| return 0; |
| } |
| |
| static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode) |
| { |
| Py_ssize_t length = PyUnicode_GET_LENGTH(unicode); |
| Py_ssize_t result = length; |
| unsigned int kind = PyUnicode_KIND(unicode); |
| |
| if (kind == PyUnicode_4BYTE_KIND) |
| { |
| Py_UCS4 *data = PyUnicode_4BYTE_DATA(unicode); |
| Py_ssize_t i; |
| for (i = 0; i < length; i++) { |
| if (data[i] > 0xFFFF) |
| result++; |
| } |
| } |
| return result; |
| } |
| |
| static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode) |
| { |
| return PyUnicode_GET_LENGTH(unicode); |
| } |
| |
| static int _my_PyUnicode_AsChar16(PyObject *unicode, |
| cffi_char16_t *result, |
| Py_ssize_t resultlen) |
| { |
| Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); |
| unsigned int kind = PyUnicode_KIND(unicode); |
| void *data = PyUnicode_DATA(unicode); |
| Py_ssize_t i; |
| |
| for (i = 0; i < len; i++) { |
| cffi_char32_t ordinal = PyUnicode_READ(kind, data, i); |
| if (ordinal > 0xFFFF) { |
| if (ordinal > 0x10FFFF) { |
| PyErr_Format(PyExc_ValueError, |
| "unicode character out of range for " |
| "conversion to char16_t: 0x%x", (int)ordinal); |
| return -1; |
| } |
| ordinal -= 0x10000; |
| *result++ = 0xD800 | (ordinal >> 10); |
| *result++ = 0xDC00 | (ordinal & 0x3FF); |
| } |
| else |
| *result++ = ordinal; |
| } |
| return 0; |
| } |
| |
| static int _my_PyUnicode_AsChar32(PyObject *unicode, |
| cffi_char32_t *result, |
| Py_ssize_t resultlen) |
| { |
| if (PyUnicode_AsUCS4(unicode, (Py_UCS4 *)result, resultlen, 0) == NULL) |
| return -1; |
| return 0; |
| } |