Python 函数变量类型注释会导致用 Cython 编译后执行与直接执行结果不一致
最近工作中遇到关于函数类型注释引起的错误,特此记录一下。
起因是公司的项目为了安全和执行速度,在发布时会使用 Cython 转为 C 语言并编译成动态连接库进行调用,但是有个函数在 Python 执行时正常,但是在动态连接库中却执行错误。
错误复现
测试用例 test.py
:
def ip_str(ips: str):
ips = [x for x in ips]
def ip(ips):
ips = [x for x in ips]
编译 compile.py
:
from setuptools import setup
from Cython.Build import cythonize
extensions = ["test.py",]
setup(name='test',
ext_modules=cythonize(extensions)
)
运行编译:
python compile.py build
编译之后进入对应 so 文件目录 build/lib-{平台架构}
,运行对比:
>>> import test
>>> test.ip("123")
>>> test.ip_str("123")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "test.py", line 5, in test.ip_str
TypeError: Expected str, got list
经过调查发现,当函数变量做了类型注释时,不能重新赋值为其他类型,否则会在 Cython 编译后执行时报错。
Cython 可以在编译时推断出部分简单的错误,比如:
def ip1():
ips: str = '1'
ips = ['1']
Error compiling Cython file:
------------------------------------------------------------
...
def ip1():
ips: str = '1'
ips = ['1']
^
------------------------------------------------------------
test.py:10:10: Cannot coerce list to type 'str object'
但如果代码比较复杂,则只能在运行时才会出错。所以上述错误只能在执行的时候才被抛出。
原因
Cython 将 Python 转为 C 代码比较后类型注释与否代码比较:
-
没有类型注释:
/* "test.py":2 * * def ip_str(ips: str): # <<<<<<<<<<<<<< * ips = [x for x in ips] * */ /* Python wrapper */ static PyObject *__pyx_pw_4test_1ip_str(PyObject *__pyx_self, PyObject *__pyx_v_ips); /*proto*/ static PyMethodDef __pyx_mdef_4test_1ip_str = {"ip_str", (PyCFunction)__pyx_pw_4test_1ip_str, METH_O, 0}; static PyObject *__pyx_pw_4test_1ip_str(PyObject *__pyx_self, PyObject *__pyx_v_ips) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("ip_str (wrapper)", 0); if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_ips), (&PyString_Type), 1, "ips", 1))) __PYX_ERR(0, 2, __pyx_L1_error) __pyx_r = __pyx_pf_4test_ip_str(__pyx_self, ((PyObject*)__pyx_v_ips)); /* function exit code */ goto __pyx_L0; __pyx_L1_error:; __pyx_r = NULL; __pyx_L0:; __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4test_ip_str(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ips) { PyObject *__pyx_v_x = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_2 = NULL; PyObject *(*__pyx_t_3)(PyObject *); PyObject *__pyx_t_4 = NULL; __Pyx_RefNannySetupContext("ip_str", 0); __Pyx_INCREF(__pyx_v_ips); /* "test.py":3 * * def ip_str(ips: str): * ips = [x for x in ips] # <<<<<<<<<<<<<< * * def ip(ips): */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = PyObject_GetIter(__pyx_v_ips); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 3, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 3, __pyx_L1_error) for (;;) { { __pyx_t_4 = __pyx_t_3(__pyx_t_2); if (unlikely(!__pyx_t_4)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 3, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_4); } __Pyx_XDECREF_SET(__pyx_v_x, __pyx_t_4); __pyx_t_4 = 0; if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_v_x))) __PYX_ERR(0, 3, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (!(likely(PyString_CheckExact(__pyx_t_1))||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(__pyx_t_1)->tp_name), 0))) __PYX_ERR(0, 3, __pyx_L1_error) __Pyx_DECREF_SET(__pyx_v_ips, ((PyObject*)__pyx_t_1)); __pyx_t_1 = 0;
-
有类型注释:
/* "test.py":2 * * def ip_str(ips: str): # <<<<<<<<<<<<<< * ips = [x for x in ips] * */ /* function exit code */ __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_4); __Pyx_AddTraceback("test.ip_str", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_x); __Pyx_XDECREF(__pyx_v_ips); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* "test.py":5 * ips = [x for x in ips] * * def ip(ips): # <<<<<<<<<<<<<< * ips = [x for x in ips] */ /* Python wrapper */ static PyObject *__pyx_pw_4test_3ip(PyObject *__pyx_self, PyObject *__pyx_v_ips); /*proto*/ static PyMethodDef __pyx_mdef_4test_3ip = {"ip", (PyCFunction)__pyx_pw_4test_3ip, METH_O, 0}; static PyObject *__pyx_pw_4test_3ip(PyObject *__pyx_self, PyObject *__pyx_v_ips) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("ip (wrapper)", 0); __pyx_r = __pyx_pf_4test_2ip(__pyx_self, ((PyObject *)__pyx_v_ips)); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4test_2ip(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ips) { PyObject *__pyx_v_x = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; PyObject *__pyx_t_2 = NULL; Py_ssize_t __pyx_t_3; PyObject *(*__pyx_t_4)(PyObject *); PyObject *__pyx_t_5 = NULL; __Pyx_RefNannySetupContext("ip", 0); __Pyx_INCREF(__pyx_v_ips); /* "test.py":6 * * def ip(ips): * ips = [x for x in ips] # <<<<<<<<<<<<<< */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (likely(PyList_CheckExact(__pyx_v_ips)) || PyTuple_CheckExact(__pyx_v_ips)) { __pyx_t_2 = __pyx_v_ips; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; __pyx_t_4 = NULL; } else { __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_ips); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 6, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_4)) { if (likely(PyList_CheckExact(__pyx_t_2))) { if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(0, 6, __pyx_L1_error) #else __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(0, 6, __pyx_L1_error) #else __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } } else { __pyx_t_5 = __pyx_t_4(__pyx_t_2); if (unlikely(!__pyx_t_5)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); else __PYX_ERR(0, 6, __pyx_L1_error) } break; } __Pyx_GOTREF(__pyx_t_5); } __Pyx_XDECREF_SET(__pyx_v_x, __pyx_t_5); __pyx_t_5 = 0; if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_v_x))) __PYX_ERR(0, 6, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF_SET(__pyx_v_ips, __pyx_t_1); __pyx_t_1 = 0;
主要区别在于,类型注释增加了变量检测 __Pyx_ArgTypeTest
,以及之后赋值 ips 时的类型检测 PyString_CheckExact
;没有变量类型注释则进行了变量推测,判断是否为List( PyList_CheckExact
)或者Tuple ( PyTuple_CheckExact
),还是可迭代类型。
结论
- 类型注释在编译后会简化处理流程;
- 类型注释的变量不能赋值为其他类型。
GitHub repo: qiwihui/blog
Follow me: @qiwihui
Site: QIWIHUI