最近工作中遇到关于函数类型注释引起的错误,特此记录一下。

起因是公司的项目为了安全和执行速度,在发布时会使用 Cython 转为 C 语言并编译成动态连接库进行调用,但是有个函数在 Python 执行时正常,但是在动态连接库中却执行错误。

错误复现

测试用例 test.py

def ip_str(ips: str):
ips = [x for x in ips]

def ip(ips):
ips = [x for x in ips]

编译 compile.py

from setuptools import setup
from Cython.Build import cythonize

extensions = ["test.py",]

setup(name='test',
ext_modules=cythonize(extensions)
)

运行编译:

python compile.py build

编译之后进入对应 so 文件目录 build/lib-{平台架构},运行对比:

>>> import test
>>> test.ip("123")
>>> test.ip_str("123")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "test.py", line 5, in test.ip_str
TypeError: Expected str, got list

经过调查发现,当函数变量做了类型注释时,不能重新赋值为其他类型,否则会在 Cython 编译后执行时报错。

Cython 可以在编译时推断出部分简单的错误,比如:

def ip1():
ips: str = '1'
ips = ['1']
Error compiling Cython file:
------------------------------------------------------------
...

def ip1():
ips: str = '1'
ips = ['1']
^
------------------------------------------------------------

test.py:10:10: Cannot coerce list to type 'str object'

但如果代码比较复杂,则只能在运行时才会出错。所以上述错误只能在执行的时候才被抛出。

原因

Cython 将 Python 转为 C 代码比较后类型注释与否代码比较:

  • 没有类型注释:

    /* "test.py":2
    *
    * def ip_str(ips: str): # <<<<<<<<<<<<<<
    * ips = [x for x in ips]
    *
    */

    /* Python wrapper */
    static PyObject *__pyx_pw_4test_1ip_str(PyObject *__pyx_self, PyObject *__pyx_v_ips); /*proto*/
    static PyMethodDef __pyx_mdef_4test_1ip_str = {"ip_str", (PyCFunction)__pyx_pw_4test_1ip_str, METH_O, 0};
    static PyObject *__pyx_pw_4test_1ip_str(PyObject *__pyx_self, PyObject *__pyx_v_ips) {
    PyObject *__pyx_r = 0;
    __Pyx_RefNannyDeclarations
    __Pyx_RefNannySetupContext("ip_str (wrapper)", 0);
    if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_ips), (&PyString_Type), 1, "ips", 1))) __PYX_ERR(0, 2, __pyx_L1_error)
    __pyx_r = __pyx_pf_4test_ip_str(__pyx_self, ((PyObject*)__pyx_v_ips));

    /* function exit code */
    goto __pyx_L0;
    __pyx_L1_error:;
    __pyx_r = NULL;
    __pyx_L0:;
    __Pyx_RefNannyFinishContext();
    return __pyx_r;
    }

    static PyObject *__pyx_pf_4test_ip_str(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ips) {
    PyObject *__pyx_v_x = NULL;
    PyObject *__pyx_r = NULL;
    __Pyx_RefNannyDeclarations
    PyObject *__pyx_t_1 = NULL;
    PyObject *__pyx_t_2 = NULL;
    PyObject *(*__pyx_t_3)(PyObject *);
    PyObject *__pyx_t_4 = NULL;
    __Pyx_RefNannySetupContext("ip_str", 0);
    __Pyx_INCREF(__pyx_v_ips);

    /* "test.py":3
    *
    * def ip_str(ips: str):
    * ips = [x for x in ips] # <<<<<<<<<<<<<<
    *
    * def ip(ips):
    */
    __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_2 = PyObject_GetIter(__pyx_v_ips); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 3, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_3 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 3, __pyx_L1_error)
    for (;;) {
    {
    __pyx_t_4 = __pyx_t_3(__pyx_t_2);
    if (unlikely(!__pyx_t_4)) {
    PyObject* exc_type = PyErr_Occurred();
    if (exc_type) {
    if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
    else __PYX_ERR(0, 3, __pyx_L1_error)
    }
    break;
    }
    __Pyx_GOTREF(__pyx_t_4);
    }
    __Pyx_XDECREF_SET(__pyx_v_x, __pyx_t_4);
    __pyx_t_4 = 0;
    if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_v_x))) __PYX_ERR(0, 3, __pyx_L1_error)
    }
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    if (!(likely(PyString_CheckExact(__pyx_t_1))||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(__pyx_t_1)->tp_name), 0))) __PYX_ERR(0, 3, __pyx_L1_error)
    __Pyx_DECREF_SET(__pyx_v_ips, ((PyObject*)__pyx_t_1));
    __pyx_t_1 = 0;
  • 有类型注释:

    /* "test.py":2
    *
    * def ip_str(ips: str): # <<<<<<<<<<<<<<
    * ips = [x for x in ips]
    *
    */

    /* function exit code */
    __pyx_r = Py_None; __Pyx_INCREF(Py_None);
    goto __pyx_L0;
    __pyx_L1_error:;
    __Pyx_XDECREF(__pyx_t_1);
    __Pyx_XDECREF(__pyx_t_2);
    __Pyx_XDECREF(__pyx_t_4);
    __Pyx_AddTraceback("test.ip_str", __pyx_clineno, __pyx_lineno, __pyx_filename);
    __pyx_r = NULL;
    __pyx_L0:;
    __Pyx_XDECREF(__pyx_v_x);
    __Pyx_XDECREF(__pyx_v_ips);
    __Pyx_XGIVEREF(__pyx_r);
    __Pyx_RefNannyFinishContext();
    return __pyx_r;
    }

    /* "test.py":5
    * ips = [x for x in ips]
    *
    * def ip(ips): # <<<<<<<<<<<<<<
    * ips = [x for x in ips]
    */

    /* Python wrapper */
    static PyObject *__pyx_pw_4test_3ip(PyObject *__pyx_self, PyObject *__pyx_v_ips); /*proto*/
    static PyMethodDef __pyx_mdef_4test_3ip = {"ip", (PyCFunction)__pyx_pw_4test_3ip, METH_O, 0};
    static PyObject *__pyx_pw_4test_3ip(PyObject *__pyx_self, PyObject *__pyx_v_ips) {
    PyObject *__pyx_r = 0;
    __Pyx_RefNannyDeclarations
    __Pyx_RefNannySetupContext("ip (wrapper)", 0);
    __pyx_r = __pyx_pf_4test_2ip(__pyx_self, ((PyObject *)__pyx_v_ips));

    /* function exit code */
    __Pyx_RefNannyFinishContext();
    return __pyx_r;
    }

    static PyObject *__pyx_pf_4test_2ip(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ips) {
    PyObject *__pyx_v_x = NULL;
    PyObject *__pyx_r = NULL;
    __Pyx_RefNannyDeclarations
    PyObject *__pyx_t_1 = NULL;
    PyObject *__pyx_t_2 = NULL;
    Py_ssize_t __pyx_t_3;
    PyObject *(*__pyx_t_4)(PyObject *);
    PyObject *__pyx_t_5 = NULL;
    __Pyx_RefNannySetupContext("ip", 0);
    __Pyx_INCREF(__pyx_v_ips);

    /* "test.py":6
    *
    * def ip(ips):
    * ips = [x for x in ips] # <<<<<<<<<<<<<<
    */
    __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    if (likely(PyList_CheckExact(__pyx_v_ips)) || PyTuple_CheckExact(__pyx_v_ips)) {
    __pyx_t_2 = __pyx_v_ips; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0;
    __pyx_t_4 = NULL;
    } else {
    __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_ips); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 6, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 6, __pyx_L1_error)
    }
    for (;;) {
    if (likely(!__pyx_t_4)) {
    if (likely(PyList_CheckExact(__pyx_t_2))) {
    if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break;
    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(0, 6, __pyx_L1_error)
    #else
    __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 6, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    #endif
    } else {
    if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(0, 6, __pyx_L1_error)
    #else
    __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 6, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    #endif
    }
    } else {
    __pyx_t_5 = __pyx_t_4(__pyx_t_2);
    if (unlikely(!__pyx_t_5)) {
    PyObject* exc_type = PyErr_Occurred();
    if (exc_type) {
    if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
    else __PYX_ERR(0, 6, __pyx_L1_error)
    }
    break;
    }
    __Pyx_GOTREF(__pyx_t_5);
    }
    __Pyx_XDECREF_SET(__pyx_v_x, __pyx_t_5);
    __pyx_t_5 = 0;
    if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_v_x))) __PYX_ERR(0, 6, __pyx_L1_error)
    }
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    __Pyx_DECREF_SET(__pyx_v_ips, __pyx_t_1);
    __pyx_t_1 = 0;

主要区别在于,类型注释增加了变量检测 __Pyx_ArgTypeTest ,以及之后赋值 ips 时的类型检测 PyString_CheckExact;没有变量类型注释则进行了变量推测,判断是否为List( PyList_CheckExact)或者Tuple ( PyTuple_CheckExact),还是可迭代类型。

结论

  1. 类型注释在编译后会简化处理流程;
  2. 类型注释的变量不能赋值为其他类型。

GitHub repo: qiwihui/blog

Follow me: @qiwihui

Site: QIWIHUI

Comments