To: vim_dev@googlegroups.com Subject: Patch 8.2.2178 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.2178 Problem: Python 3: non-utf8 character cannot be handled. Solution: Change the string decode. (Björn Linse, closes #1053) Files: src/if_py_both.h, src/if_python.c, src/if_python3.c, src/testdir/test_python3.vim, src/testdir/test_python2.vim *** ../vim-8.2.2177/src/if_py_both.h 2020-10-21 21:01:56.070323838 +0200 --- src/if_py_both.h 2020-12-21 16:00:22.495130874 +0100 *************** *** 130,139 **** { PyObject *bytes; ! if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL))) return NULL; ! if(PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1 || str == NULL) { Py_DECREF(bytes); --- 130,140 ---- { PyObject *bytes; ! if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, ! ERRORS_ENCODE_ARG))) return NULL; ! if (PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1 || str == NULL) { Py_DECREF(bytes); *************** *** 4243,4249 **** } else if (PyUnicode_Check(obj)) { ! if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL))) return NULL; if (PyBytes_AsStringAndSize(bytes, &str, &len) == -1 --- 4244,4251 ---- } else if (PyUnicode_Check(obj)) { ! if (!(bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, ! ERRORS_ENCODE_ARG))) return NULL; if (PyBytes_AsStringAndSize(bytes, &str, &len) == -1 *************** *** 6290,6300 **** PyObject *bytes; char_u *str; ! bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, NULL); if (bytes == NULL) return -1; ! if(PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1) return -1; if (str == NULL) return -1; --- 6292,6302 ---- PyObject *bytes; char_u *str; ! bytes = PyUnicode_AsEncodedString(obj, ENC_OPT, ERRORS_ENCODE_ARG); if (bytes == NULL) return -1; ! if (PyBytes_AsStringAndSize(bytes, (char **) &str, NULL) == -1) return -1; if (str == NULL) return -1; *** ../vim-8.2.2177/src/if_python.c 2020-10-21 21:01:56.070323838 +0200 --- src/if_python.c 2020-12-21 16:01:00.862946749 +0100 *************** *** 69,74 **** --- 69,78 ---- # undef PY_SSIZE_T_CLEAN #endif + // these are NULL for Python 2 + #define ERRORS_DECODE_ARG NULL + #define ERRORS_ENCODE_ARG ERRORS_DECODE_ARG + #undef main // Defined in python.h - aargh #undef HAVE_FCNTL_H // Clash with os_win32.h *** ../vim-8.2.2177/src/if_python3.c 2020-10-21 21:01:56.070323838 +0200 --- src/if_python3.c 2020-12-21 15:59:04.891509142 +0100 *************** *** 81,92 **** // Python 3 does not support CObjects, always use Capsules #define PY_USE_CAPSULE #define PyInt Py_ssize_t #ifndef PyString_Check # define PyString_Check(obj) PyUnicode_Check(obj) #endif #define PyString_FromString(repr) \ ! PyUnicode_Decode(repr, STRLEN(repr), ENC_OPT, NULL) #define PyString_FromFormat PyUnicode_FromFormat #ifndef PyInt_Check # define PyInt_Check(obj) PyLong_Check(obj) --- 81,95 ---- // Python 3 does not support CObjects, always use Capsules #define PY_USE_CAPSULE + #define ERRORS_DECODE_ARG CODEC_ERROR_HANDLER + #define ERRORS_ENCODE_ARG ERRORS_DECODE_ARG + #define PyInt Py_ssize_t #ifndef PyString_Check # define PyString_Check(obj) PyUnicode_Check(obj) #endif #define PyString_FromString(repr) \ ! PyUnicode_Decode(repr, STRLEN(repr), ENC_OPT, ERRORS_DECODE_ARG) #define PyString_FromFormat PyUnicode_FromFormat #ifndef PyInt_Check # define PyInt_Check(obj) PyLong_Check(obj) *************** *** 1088,1095 **** // PyRun_SimpleString expects a UTF-8 string. Wrong encoding may cause // SyntaxError (unicode error). cmdstr = PyUnicode_Decode(cmd, strlen(cmd), ! (char *)ENC_OPT, CODEC_ERROR_HANDLER); ! cmdbytes = PyUnicode_AsEncodedString(cmdstr, "utf-8", CODEC_ERROR_HANDLER); Py_XDECREF(cmdstr); run(PyBytes_AsString(cmdbytes), arg, &pygilstate); --- 1091,1098 ---- // PyRun_SimpleString expects a UTF-8 string. Wrong encoding may cause // SyntaxError (unicode error). cmdstr = PyUnicode_Decode(cmd, strlen(cmd), ! (char *)ENC_OPT, ERRORS_DECODE_ARG); ! cmdbytes = PyUnicode_AsEncodedString(cmdstr, "utf-8", ERRORS_ENCODE_ARG); Py_XDECREF(cmdstr); run(PyBytes_AsString(cmdbytes), arg, &pygilstate); *************** *** 1745,1751 **** } *p = '\0'; ! result = PyUnicode_Decode(tmp, len, (char *)ENC_OPT, CODEC_ERROR_HANDLER); vim_free(tmp); return result; --- 1748,1754 ---- } *p = '\0'; ! result = PyUnicode_Decode(tmp, len, (char *)ENC_OPT, ERRORS_DECODE_ARG); vim_free(tmp); return result; *** ../vim-8.2.2177/src/testdir/test_python3.vim 2020-12-18 19:49:52.349571840 +0100 --- src/testdir/test_python3.vim 2020-12-21 15:55:49.472504472 +0100 *************** *** 4008,4011 **** --- 4008,4018 ---- call assert_equal(1, g:options_iter_ref_count_increase) endfunc + func Test_python3_non_utf8_string() + smap @ + py3 vim.command('redir => _tmp_smaps | smap | redir END') + py3 vim.eval('_tmp_smaps').splitlines() + sunmap @ + endfunc + " vim: shiftwidth=2 sts=2 expandtab *** ../vim-8.2.2177/src/testdir/test_python2.vim 2020-12-18 19:49:52.349571840 +0100 --- src/testdir/test_python2.vim 2020-12-21 15:58:55.695554566 +0100 *************** *** 3775,3778 **** --- 3775,3785 ---- close! endfunc + func Test_python_non_utf8_string() + smap @ + python vim.command('redir => _tmp_smaps | smap | redir END') + python vim.eval('_tmp_smaps').splitlines() + sunmap @ + endfunc + " vim: shiftwidth=2 sts=2 expandtab *** ../vim-8.2.2177/src/version.c 2020-12-21 14:54:28.844116987 +0100 --- src/version.c 2020-12-21 16:02:40.610475850 +0100 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 2178, /**/ -- For society, it's probably a good thing that engineers value function over appearance. For example, you wouldn't want engineers to build nuclear power plants that only _look_ like they would keep all the radiation inside. (Scott Adams - The Dilbert principle) /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///