| /* stringlib: bytes joining implementation */ |
| |
| #if STRINGLIB_IS_UNICODE |
| #error join.h only compatible with byte-wise strings |
| #endif |
| |
| Py_LOCAL_INLINE(PyObject *) |
| STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) |
| { |
| const char *sepstr = STRINGLIB_STR(sep); |
| Py_ssize_t seplen = STRINGLIB_LEN(sep); |
| PyObject *res = NULL; |
| char *p; |
| Py_ssize_t seqlen = 0; |
| Py_ssize_t sz = 0; |
| Py_ssize_t i, nbufs; |
| PyObject *seq, *item; |
| Py_buffer *buffers = NULL; |
| #define NB_STATIC_BUFFERS 10 |
| Py_buffer static_buffers[NB_STATIC_BUFFERS]; |
| #define GIL_THRESHOLD 1048576 |
| int drop_gil = 1; |
| PyThreadState *save = NULL; |
| |
| seq = PySequence_Fast(iterable, "can only join an iterable"); |
| if (seq == NULL) { |
| return NULL; |
| } |
| |
| seqlen = PySequence_Fast_GET_SIZE(seq); |
| if (seqlen == 0) { |
| Py_DECREF(seq); |
| return STRINGLIB_NEW(NULL, 0); |
| } |
| #if !STRINGLIB_MUTABLE |
| if (seqlen == 1) { |
| item = PySequence_Fast_GET_ITEM(seq, 0); |
| if (STRINGLIB_CHECK_EXACT(item)) { |
| Py_INCREF(item); |
| Py_DECREF(seq); |
| return item; |
| } |
| } |
| #endif |
| if (seqlen > NB_STATIC_BUFFERS) { |
| buffers = PyMem_NEW(Py_buffer, seqlen); |
| if (buffers == NULL) { |
| Py_DECREF(seq); |
| PyErr_NoMemory(); |
| return NULL; |
| } |
| } |
| else { |
| buffers = static_buffers; |
| } |
| |
| /* Here is the general case. Do a pre-pass to figure out the total |
| * amount of space we'll need (sz), and see whether all arguments are |
| * bytes-like. |
| */ |
| for (i = 0, nbufs = 0; i < seqlen; i++) { |
| Py_ssize_t itemlen; |
| item = PySequence_Fast_GET_ITEM(seq, i); |
| if (PyBytes_CheckExact(item)) { |
| /* Fast path. */ |
| Py_INCREF(item); |
| buffers[i].obj = item; |
| buffers[i].buf = PyBytes_AS_STRING(item); |
| buffers[i].len = PyBytes_GET_SIZE(item); |
| } |
| else { |
| if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { |
| PyErr_Format(PyExc_TypeError, |
| "sequence item %zd: expected a bytes-like object, " |
| "%.80s found", |
| i, Py_TYPE(item)->tp_name); |
| goto error; |
| } |
| /* If the backing objects are mutable, then dropping the GIL |
| * opens up race conditions where another thread tries to modify |
| * the object which we hold a buffer on it. Such code has data |
| * races anyway, but this is a conservative approach that avoids |
| * changing the behaviour of that data race. |
| */ |
| drop_gil = 0; |
| } |
| nbufs = i + 1; /* for error cleanup */ |
| itemlen = buffers[i].len; |
| if (itemlen > PY_SSIZE_T_MAX - sz) { |
| PyErr_SetString(PyExc_OverflowError, |
| "join() result is too long"); |
| goto error; |
| } |
| sz += itemlen; |
| if (i != 0) { |
| if (seplen > PY_SSIZE_T_MAX - sz) { |
| PyErr_SetString(PyExc_OverflowError, |
| "join() result is too long"); |
| goto error; |
| } |
| sz += seplen; |
| } |
| if (seqlen != PySequence_Fast_GET_SIZE(seq)) { |
| PyErr_SetString(PyExc_RuntimeError, |
| "sequence changed size during iteration"); |
| goto error; |
| } |
| } |
| |
| /* Allocate result space. */ |
| res = STRINGLIB_NEW(NULL, sz); |
| if (res == NULL) |
| goto error; |
| |
| /* Catenate everything. */ |
| p = STRINGLIB_STR(res); |
| if (sz < GIL_THRESHOLD) { |
| drop_gil = 0; /* Benefits are likely outweighed by the overheads */ |
| } |
| if (drop_gil) { |
| save = PyEval_SaveThread(); |
| } |
| if (!seplen) { |
| /* fast path */ |
| for (i = 0; i < nbufs; i++) { |
| Py_ssize_t n = buffers[i].len; |
| char *q = buffers[i].buf; |
| memcpy(p, q, n); |
| p += n; |
| } |
| } |
| else { |
| for (i = 0; i < nbufs; i++) { |
| Py_ssize_t n; |
| char *q; |
| if (i) { |
| memcpy(p, sepstr, seplen); |
| p += seplen; |
| } |
| n = buffers[i].len; |
| q = buffers[i].buf; |
| memcpy(p, q, n); |
| p += n; |
| } |
| } |
| if (drop_gil) { |
| PyEval_RestoreThread(save); |
| } |
| goto done; |
| |
| error: |
| res = NULL; |
| done: |
| Py_DECREF(seq); |
| for (i = 0; i < nbufs; i++) |
| PyBuffer_Release(&buffers[i]); |
| if (buffers != static_buffers) |
| PyMem_Free(buffers); |
| return res; |
| } |
| |
| #undef NB_STATIC_BUFFERS |
| #undef GIL_THRESHOLD |