| #include "Python.h" |
| #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) |
| typedef int Py_ssize_t; |
| #define PY_SSIZE_T_MAX INT_MAX |
| #define PY_SSIZE_T_MIN INT_MIN |
| #endif |
| |
| static Py_ssize_t |
| ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); |
| static PyObject * |
| ascii_escape_unicode(PyObject *pystr); |
| static PyObject * |
| ascii_escape_str(PyObject *pystr); |
| static PyObject * |
| py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr); |
| void init_speedups(void); |
| |
| #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') |
| |
| #define MIN_EXPANSION 6 |
| #ifdef Py_UNICODE_WIDE |
| #define MAX_EXPANSION (2 * MIN_EXPANSION) |
| #else |
| #define MAX_EXPANSION MIN_EXPANSION |
| #endif |
| |
| static Py_ssize_t |
| ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { |
| Py_UNICODE x; |
| output[chars++] = '\\'; |
| switch (c) { |
| case '/': output[chars++] = (char)c; break; |
| case '\\': output[chars++] = (char)c; break; |
| case '"': output[chars++] = (char)c; break; |
| case '\b': output[chars++] = 'b'; break; |
| case '\f': output[chars++] = 'f'; break; |
| case '\n': output[chars++] = 'n'; break; |
| case '\r': output[chars++] = 'r'; break; |
| case '\t': output[chars++] = 't'; break; |
| default: |
| #ifdef Py_UNICODE_WIDE |
| if (c >= 0x10000) { |
| /* UTF-16 surrogate pair */ |
| Py_UNICODE v = c - 0x10000; |
| c = 0xd800 | ((v >> 10) & 0x3ff); |
| output[chars++] = 'u'; |
| x = (c & 0xf000) >> 12; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x0f00) >> 8; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x00f0) >> 4; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x000f); |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| c = 0xdc00 | (v & 0x3ff); |
| output[chars++] = '\\'; |
| } |
| #endif |
| output[chars++] = 'u'; |
| x = (c & 0xf000) >> 12; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x0f00) >> 8; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x00f0) >> 4; |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| x = (c & 0x000f); |
| output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); |
| } |
| return chars; |
| } |
| |
| static PyObject * |
| ascii_escape_unicode(PyObject *pystr) { |
| Py_ssize_t i; |
| Py_ssize_t input_chars; |
| Py_ssize_t output_size; |
| Py_ssize_t chars; |
| PyObject *rval; |
| char *output; |
| Py_UNICODE *input_unicode; |
| |
| input_chars = PyUnicode_GET_SIZE(pystr); |
| input_unicode = PyUnicode_AS_UNICODE(pystr); |
| /* One char input can be up to 6 chars output, estimate 4 of these */ |
| output_size = 2 + (MIN_EXPANSION * 4) + input_chars; |
| rval = PyString_FromStringAndSize(NULL, output_size); |
| if (rval == NULL) { |
| return NULL; |
| } |
| output = PyString_AS_STRING(rval); |
| chars = 0; |
| output[chars++] = '"'; |
| for (i = 0; i < input_chars; i++) { |
| Py_UNICODE c = input_unicode[i]; |
| if (S_CHAR(c)) { |
| output[chars++] = (char)c; |
| } else { |
| chars = ascii_escape_char(c, output, chars); |
| } |
| if (output_size - chars < (1 + MAX_EXPANSION)) { |
| /* There's more than four, so let's resize by a lot */ |
| output_size *= 2; |
| /* This is an upper bound */ |
| if (output_size > 2 + (input_chars * MAX_EXPANSION)) { |
| output_size = 2 + (input_chars * MAX_EXPANSION); |
| } |
| if (_PyString_Resize(&rval, output_size) == -1) { |
| return NULL; |
| } |
| output = PyString_AS_STRING(rval); |
| } |
| } |
| output[chars++] = '"'; |
| if (_PyString_Resize(&rval, chars) == -1) { |
| return NULL; |
| } |
| return rval; |
| } |
| |
| static PyObject * |
| ascii_escape_str(PyObject *pystr) { |
| Py_ssize_t i; |
| Py_ssize_t input_chars; |
| Py_ssize_t output_size; |
| Py_ssize_t chars; |
| PyObject *rval; |
| char *output; |
| char *input_str; |
| |
| input_chars = PyString_GET_SIZE(pystr); |
| input_str = PyString_AS_STRING(pystr); |
| /* One char input can be up to 6 chars output, estimate 4 of these */ |
| output_size = 2 + (MIN_EXPANSION * 4) + input_chars; |
| rval = PyString_FromStringAndSize(NULL, output_size); |
| if (rval == NULL) { |
| return NULL; |
| } |
| output = PyString_AS_STRING(rval); |
| chars = 0; |
| output[chars++] = '"'; |
| for (i = 0; i < input_chars; i++) { |
| Py_UNICODE c = (Py_UNICODE)input_str[i]; |
| if (S_CHAR(c)) { |
| output[chars++] = (char)c; |
| } else if (c > 0x7F) { |
| /* We hit a non-ASCII character, bail to unicode mode */ |
| PyObject *uni; |
| Py_DECREF(rval); |
| uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); |
| if (uni == NULL) { |
| return NULL; |
| } |
| rval = ascii_escape_unicode(uni); |
| Py_DECREF(uni); |
| return rval; |
| } else { |
| chars = ascii_escape_char(c, output, chars); |
| } |
| /* An ASCII char can't possibly expand to a surrogate! */ |
| if (output_size - chars < (1 + MIN_EXPANSION)) { |
| /* There's more than four, so let's resize by a lot */ |
| output_size *= 2; |
| if (output_size > 2 + (input_chars * MIN_EXPANSION)) { |
| output_size = 2 + (input_chars * MIN_EXPANSION); |
| } |
| if (_PyString_Resize(&rval, output_size) == -1) { |
| return NULL; |
| } |
| output = PyString_AS_STRING(rval); |
| } |
| } |
| output[chars++] = '"'; |
| if (_PyString_Resize(&rval, chars) == -1) { |
| return NULL; |
| } |
| return rval; |
| } |
| |
| PyDoc_STRVAR(pydoc_encode_basestring_ascii, |
| "encode_basestring_ascii(basestring) -> str\n" |
| "\n" |
| "..." |
| ); |
| |
| static PyObject * |
| py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { |
| /* METH_O */ |
| if (PyString_Check(pystr)) { |
| return ascii_escape_str(pystr); |
| } else if (PyUnicode_Check(pystr)) { |
| return ascii_escape_unicode(pystr); |
| } |
| PyErr_SetString(PyExc_TypeError, "first argument must be a string"); |
| return NULL; |
| } |
| |
| #define DEFN(n, k) \ |
| { \ |
| #n, \ |
| (PyCFunction)py_ ##n, \ |
| k, \ |
| pydoc_ ##n \ |
| } |
| static PyMethodDef speedups_methods[] = { |
| DEFN(encode_basestring_ascii, METH_O), |
| {} |
| }; |
| #undef DEFN |
| |
| void |
| init_speedups(void) |
| { |
| PyObject *m; |
| m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); |
| } |