类型提示

PEP 484,它提供了一个关于 Python3 中类型系统应该是什么样子的规范,引入了类型提示的概念。此外,为了更好地理解类型提示的设计理念,阅读 PEP 483 至关重要,这将有助于 Python 开发者理解 Python 引入类型系统的原因。这份速查表的主要目标是展示 Python3 中一些关于类型提示的常见用法。

无类型检查

def fib(n):
    a, b = 0, 1
    for _ in range(n):
        yield a
        b, a = a + b, b

print([n for n in fib(3.6)])

输出

# errors will not be detected until runtime

$ python fib.py
Traceback (most recent call last):
  File "fib.py", line 8, in <module>
    print([n for n in fib(3.5)])
  File "fib.py", line 8, in <listcomp>
    print([n for n in fib(3.5)])
  File "fib.py", line 3, in fib
    for _ in range(n):
TypeError: 'float' object cannot be interpreted as an integer

有类型检查

# give a type hint
from typing import Generator

def fib(n: int) -> Generator:
    a: int = 0
    b: int = 1
    for _ in range(n):
        yield a
        b, a = a + b, b

print([n for n in fib(3.6)])

输出

# errors will be detected before running

$ mypy --strict fib.py
fib.py:12: error: Argument 1 to "fib" has incompatible type "float"; expected "int"

基本类型

import io
import re

from collections import deque, namedtuple
from typing import (
    Dict,
    List,
    Tuple,
    Set,
    Deque,
    NamedTuple,
    IO,
    Pattern,
    Match,
    Text,
    Optional,
    Sequence,
    Iterable,
    Mapping,
    MutableMapping,
    Any,
)

# without initializing
x: int

# any type
y: Any
y = 1
y = "1"

# built-in
var_int: int = 1
var_str: str = "Hello Typing"
var_byte: bytes = b"Hello Typing"
var_bool: bool = True
var_float: float = 1.
var_unicode: Text = u'\u2713'

# could be none
var_could_be_none: Optional[int] = None
var_could_be_none = 1

# collections
var_set: Set[int] = {i for i in range(3)}
var_dict: Dict[str, str] = {"foo": "Foo"}
var_list: List[int] = [i for i in range(3)]
var_static_length_Tuple: Tuple[int, int, int] = (1, 2, 3)
var_dynamic_length_Tuple: Tuple[int, ...] = (i for i in range(10, 3))
var_deque: Deque = deque([1, 2, 3])
var_nametuple: NamedTuple = namedtuple('P', ['x', 'y'])

# io
var_io_str: IO[str] = io.StringIO("Hello String")
var_io_byte: IO[bytes] = io.BytesIO(b"Hello Bytes")
var_io_file_str: IO[str] = open(__file__)
var_io_file_byte: IO[bytes] = open(__file__, 'rb')

# re
p: Pattern = re.compile("(https?)://([^/\r\n]+)(/[^\r\n]*)?")
m: Optional[Match] = p.match("https://pythonlang.cn/")

# duck types: list-like
var_seq_list: Sequence[int] = [1, 2, 3]
var_seq_tuple: Sequence[int] = (1, 2, 3)
var_iter_list: Iterable[int] = [1, 2, 3]
var_iter_tuple: Iterable[int] = (1, 2, 3)

# duck types: dict-like
var_map_dict: Mapping[str, str] = {"foo": "Foo"}
var_mutable_dict: MutableMapping[str, str] = {"bar": "Bar"}

函数

from typing import Generator, Callable

# function
def gcd(a: int, b: int) -> int:
    while b:
        a, b = b, a % b
    return a

# callback
def fun(cb: Callable[[int, int], int]) -> int:
    return cb(55, 66)

# lambda
f: Callable[[int], int] = lambda x: x * 2

from typing import ClassVar, Dict, List

class Foo:

    x: int = 1  # instance variable. default = 1
    y: ClassVar[str] = "class var"  # class variable

    def __init__(self) -> None:
        self.i: List[int] = [0]

    def foo(self, a: int, b: str) -> Dict[int, str]:
        return {a: b}

foo = Foo()
foo.x = 123

print(foo.x)
print(foo.i)
print(Foo.y)
print(foo.foo(1, "abc"))

生成器

from typing import Generator

# Generator[YieldType, SendType, ReturnType]
def fib(n: int) -> Generator[int, None, None]:
    a: int = 0
    b: int = 1
    while n > 0:
        yield a
        b, a = a + b, b
        n -= 1

g: Generator = fib(10)
i: Iterator[int] = (x for x in range(3))

异步生成器

import asyncio

from typing import AsyncGenerator, AsyncIterator

async def fib(n: int) -> AsyncGenerator:
    a: int = 0
    b: int = 1
    while n > 0:
        await asyncio.sleep(0.1)
        yield a

        b, a = a + b, b
        n -= 1

async def main() -> None:
    async for f in fib(10):
        print(f)

    ag: AsyncIterator = (f async for f in fib(10))

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

上下文管理器

from typing import ContextManager, Generator, IO
from contextlib import contextmanager

@contextmanager
def open_file(name: str) -> Generator:
    f = open(name)
    yield f
    f.close()

cm: ContextManager[IO] = open_file(__file__)
with cm as f:
    print(f.read())

异步上下文管理器

import asyncio

from typing import AsyncContextManager, AsyncGenerator, IO
from contextlib import asynccontextmanager

# need python 3.7 or above
@asynccontextmanager
async def open_file(name: str) -> AsyncGenerator:
    await asyncio.sleep(0.1)
    f = open(name)
    yield f
    await asyncio.sleep(0.1)
    f.close()

async def main() -> None:
    acm: AsyncContextManager[IO] = open_file(__file__)
    async with acm as f:
        print(f.read())

loop = asyncio.get_event_loop()
loop.run_until_complete(main())

避免访问 None

import re

from typing import Pattern, Dict, Optional

# like c++
# std::regex url("(https?)://([^/\r\n]+)(/[^\r\n]*)?");
# std::regex color("^#?([a-f0-9]{6}|[a-f0-9]{3})$");

url: Pattern = re.compile("(https?)://([^/\r\n]+)(/[^\r\n]*)?")
color: Pattern = re.compile("^#?([a-f0-9]{6}|[a-f0-9]{3})$")

x: Dict[str, Pattern] = {"url": url, "color": color}
y: Optional[Pattern] = x.get("baz", None)

print(y.match("https://pythonlang.cn/"))

输出

$ mypy --strict foo.py
foo.py:15: error: Item "None" of "Optional[Pattern[Any]]" has no attribute "match"

仅限位置参数

# define arguments with names beginning with __

def fib(__n: int) -> int:  # positional only arg
    a, b = 0, 1
    for _ in range(__n):
        b, a = a + b, b
    return a


def gcd(*, a: int, b: int) -> int:  # keyword only arg
    while b:
        a, b = b, a % b
    return a


print(fib(__n=10))  # error
print(gcd(10, 5))   # error

输出

mypy --strict foo.py
foo.py:1: note: "fib" defined here
foo.py:14: error: Unexpected keyword argument "__n" for "fib"
foo.py:15: error: Too many positional arguments for "gcd"

多个返回值

from typing import Tuple, Iterable, Union

def foo(x: int, y: int) -> Tuple[int, int]:
    return x, y

# or

def bar(x: int, y: str) -> Iterable[Union[int, str]]:
    # XXX: not recommend declaring in this way
    return x, y

a: int
b: int
a, b = foo(1, 2)      # ok
c, d = bar(3, "bar")  # ok

Union[Any, None] == Optional[Any]

from typing import List, Union

def first(l: List[Union[int, None]]) -> Union[int, None]:
    return None if len(l) == 0 else l[0]

first([None])

# equal to

from typing import List, Optional

def first(l: List[Optional[int]]) -> Optional[int]:
    return None if len(l) == 0 else l[0]

first([None])

小心使用 Optional

from typing import cast, Optional

def fib(n):
    a, b = 0, 1
    for _ in range(n):
        b, a = a + b, b
    return a

def cal(n: Optional[int]) -> None:
    print(fib(n))

cal(None)

输出

# mypy will not detect errors
$ mypy foo.py

显式声明

from typing import Optional

def fib(n: int) -> int:  # declare n to be int
    a, b = 0, 1
    for _ in range(n):
        b, a = a + b, b
    return a

def cal(n: Optional[int]) -> None:
    print(fib(n))

输出

# mypy can detect errors even we do not check None
$ mypy --strict foo.py
foo.py:11: error: Argument 1 to "fib" has incompatible type "Optional[int]"; expected "int"

小心类型转换

from typing import cast, Optional

def gcd(a: int, b: int) -> int:
    while b:
        a, b = b, a % b
    return a

def cal(a: Optional[int], b: Optional[int]) -> None:
    # XXX: Avoid casting
    ca, cb = cast(int, a), cast(int, b)
    print(gcd(ca, cb))

cal(None, None)

输出

# mypy will not detect type errors
$ mypy --strict foo.py

前向引用

根据 PEP 484,如果我们想在类型声明之前引用一个类型,我们必须使用**字符串字面量**来暗示在文件后面存在一个具有该名称的类型。

from typing import Optional


class Tree:
    def __init__(
        self, data: int,
        left: Optional["Tree"],  # Forward references.
        right: Optional["Tree"]
    ) -> None:
        self.data = data
        self.left = left
        self.right = right

注意

mypy 有一些问题不会对前向引用进行报错。更多信息请参见 Issue#948

class A:
    def __init__(self, a: A) -> None:  # should fail
        self.a = a

输出

$ mypy --strict type.py
$ echo $?
0
$ python type.py   # get runtime fail
Traceback (most recent call last):
  File "type.py", line 1, in <module>
    class A:
  File "type.py", line 2, in A
    def __init__(self, a: A) -> None:  # should fail
NameError: name 'A' is not defined

注解的延迟求值

Python 3.7 新特性

  • PEP 563 - 注解的延迟求值

Python 3.7 之前

>>> class A:
...     def __init__(self, a: A) -> None:
...         self._a = a
...
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<stdin>", line 2, in A
NameError: name 'A' is not defined

Python 3.7 及之后 (包括 3.7)

>>> from __future__ import annotations
>>> class A:
...     def __init__(self, a: A) -> None:
...         self._a = a
...

注意

注解只能在名称已经存在的作用域内使用。因此,**前向引用**不支持在当前作用域中名称不可用的情况。**注解的延迟求值**将在 Python 4.0 中成为默认行为。

类型别名

类似于 c/c++ 中的 typedefusing

#include <iostream>
#include <string>
#include <regex>
#include <vector>

typedef std::string Url;
template<typename T> using Vector = std::vector<T>;

int main(int argc, char *argv[])
{
    Url url = "https://pythonlang.cn";
    std::regex p("(https?)://([^/\r\n]+)(/[^\r\n]*)?");
    bool m = std::regex_match(url, p);
    Vector<int> v = {1, 2};

    std::cout << m << std::endl;
    for (auto it : v) std::cout << it << std::endl;
    return 0;
}

类型别名通过简单的变量赋值来定义

import re

from typing import Pattern, List

# Like typedef, using in c/c++

# PEP 484 recommend capitalizing alias names
Url = str

url: Url = "https://pythonlang.cn/"

p: Pattern = re.compile("(https?)://([^/\r\n]+)(/[^\r\n]*)?")
m = p.match(url)

Vector = List[int]
v: Vector = [1., 2.]

定义 NewType

与别名不同,NewType 返回一个单独的类型,但在运行时与原始类型相同。

from sqlalchemy import Column, String, Integer
from sqlalchemy.ext.declarative import declarative_base
from typing import NewType, Any

# check mypy #2477
Base: Any = declarative_base()

# create a new type
Id = NewType('Id', int) # not equal alias, it's a 'new type'

class User(Base):
    __tablename__ = 'User'
    id = Column(Integer, primary_key=True)
    age = Column(Integer, nullable=False)
    name = Column(String, nullable=False)

    def __init__(self, id: Id, age: int, name: str) -> None:
        self.id = id
        self.age = age
        self.name = name

# create users
user1 = User(Id(1), 62, "Guido van Rossum") # ok
user2 = User(2, 48, "David M. Beazley")     # error

输出

$ python foo.py
$ mypy --ignore-missing-imports foo.py
foo.py:24: error: Argument 1 to "User" has incompatible type "int"; expected "Id"

进一步阅读

使用 TypeVar 作为模板

类似于 c++ template <typename T>

#include <iostream>

template <typename T>
T add(T x, T y) {
    return x + y;
}

int main(int argc, char *argv[])
{
    std::cout << add(1, 2) << std::endl;
    std::cout << add(1., 2.) << std::endl;
    return 0;
}

Python 使用 TypeVar

from typing import TypeVar

T = TypeVar("T")

def add(x: T, y: T) -> T:
    return x + y

add(1, 2)
add(1., 2.)

使用 TypeVarGeneric 作为类模板

类似于 c++ template <typename T> class

#include <iostream>

template<typename T>
class Foo {
public:
    Foo(T foo) {
        foo_ = foo;
    }
    T Get() {
        return foo_;
    }
private:
    T foo_;
};

int main(int argc, char *argv[])
{
    Foo<int> f(123);
    std::cout << f.Get() << std::endl;
    return 0;
}

在 Python 中定义泛型类

from typing import Generic, TypeVar

T = TypeVar("T")

class Foo(Generic[T]):
    def __init__(self, foo: T) -> None:
        self.foo = foo

    def get(self) -> T:
        return self.foo

f: Foo[str] = Foo("Foo")
v: int = f.get()

输出

$ mypy --strict foo.py
foo.py:13: error: Incompatible types in assignment (expression has type "str", variable has type "int")

TypeVar 的作用域规则

  • 在不同的泛型函数中使用的 TypeVar 将被推断为不同的类型。

from typing import TypeVar

T = TypeVar("T")

def foo(x: T) -> T:
    return x

def bar(y: T) -> T:
    return y

a: int = foo(1)    # ok: T is inferred to be int
b: int = bar("2")  # error: T is inferred to be str

输出

$ mypy --strict foo.py
foo.py:12: error: Incompatible types in assignment (expression has type "str", variable has type "int")
  • 在泛型类中使用的 TypeVar 将被推断为相同的类型。

from typing import TypeVar, Generic

T = TypeVar("T")

class Foo(Generic[T]):

    def foo(self, x: T) -> T:
        return x

    def bar(self, y: T) -> T:
        return y

f: Foo[int] = Foo()
a: int = f.foo(1)    # ok: T is inferred to be int
b: str = f.bar("2")  # error: T is expected to be int

输出

$ mypy --strict foo.py
foo.py:15: error: Incompatible types in assignment (expression has type "int", variable has type "str")
foo.py:15: error: Argument 1 to "bar" of "Foo" has incompatible type "str"; expected "int"
  • 在方法中使用 TypeVar 但与在 Generic 中声明的任何参数都不匹配,可以被推断为不同的类型。

from typing import TypeVar, Generic

T = TypeVar("T")
S = TypeVar("S")

class Foo(Generic[T]):    # S does not match params

    def foo(self, x: T, y: S) -> S:
        return y

    def bar(self, z: S) -> S:
        return z

f: Foo[int] = Foo()
a: str = f.foo(1, "foo")  # S is inferred to be str
b: int = f.bar(12345678)  # S is inferred to be int

输出

$  mypy --strict foo.py
  • 如果 TypeVar 是未绑定类型,则不应出现在方法/函数的主体中。

from typing import TypeVar, Generic

T = TypeVar("T")
S = TypeVar("S")

def foo(x: T) -> None:
    a: T = x    # ok
    b: S = 123  # error: invalid type

输出

$ mypy --strict foo.py
foo.py:8: error: Invalid type "foo.S"

限制为一组固定的可能类型

T = TypeVar('T', ClassA, ...) 表示我们创建一个**具有值限制的类型变量**。

from typing import TypeVar

# restrict T = int or T = float
T = TypeVar("T", int, float)

def add(x: T, y: T) -> T:
    return x + y

add(1, 2)
add(1., 2.)
add("1", 2)
add("hello", "world")

输出

# mypy can detect wrong type
$ mypy --strict foo.py
foo.py:10: error: Value of type variable "T" of "add" cannot be "object"
foo.py:11: error: Value of type variable "T" of "add" cannot be "str"

TypeVar 带有上限

T = TypeVar('T', bound=BaseClass) 表示我们创建一个**具有上限的类型变量**。这个概念类似于 c++ 中的**多态**。

#include <iostream>

class Shape {
public:
    Shape(double width, double height) {
        width_ = width;
        height_ = height;
    };
    virtual double Area() = 0;
protected:
    double width_;
    double height_;
};

class Rectangle: public Shape {
public:
    Rectangle(double width, double height)
    :Shape(width, height)
    {};

    double Area() {
        return width_ * height_;
    };
};

class Triangle: public Shape {
public:
    Triangle(double width, double height)
    :Shape(width, height)
    {};

    double Area() {
        return width_ * height_ / 2;
    };
};

double Area(Shape &s) {
    return s.Area();
}

int main(int argc, char *argv[])
{
    Rectangle r(1., 2.);
    Triangle t(3., 4.);

    std::cout << Area(r) << std::endl;
    std::cout << Area(t) << std::endl;
    return 0;
}

类似于 c++,创建一个基类和绑定到基类的 TypeVar。然后,静态类型检查器将每个子类都视为基类的类型。

from typing import TypeVar


class Shape:
    def __init__(self, width: float, height: float) -> None:
        self.width = width
        self.height = height

    def area(self) -> float:
        return 0


class Rectangle(Shape):
    def area(self) -> float:
        width: float = self.width
        height: float = self.height
        return width * height


class Triangle(Shape):
    def area(self) -> float:
        width: float = self.width
        height: float = self.height
        return width * height / 2


S = TypeVar("S", bound=Shape)


def area(s: S) -> float:
    return s.area()


r: Rectangle = Rectangle(1, 2)
t: Triangle = Triangle(3, 4)
i: int = 5566

print(area(r))
print(area(t))
print(area(i))

输出

$ mypy --strict foo.py
foo.py:40: error: Value of type variable "S" of "area" cannot be "int"

@overload

有时,我们使用 Union 来推断函数的返回值有多个不同的类型。但是,类型检查器无法区分我们想要哪种类型。因此,以下代码片段显示类型检查器无法确定哪种类型是正确的。

from typing import List, Union


class Array(object):
    def __init__(self, arr: List[int]) -> None:
        self.arr = arr

    def __getitem__(self, i: Union[int, str]) -> Union[int, str]:
        if isinstance(i, int):
            return self.arr[i]
        if isinstance(i, str):
            return str(self.arr[int(i)])


arr = Array([1, 2, 3, 4, 5])
x:int = arr[1]
y:str = arr["2"]

输出

$ mypy --strict foo.py
foo.py:16: error: Incompatible types in assignment (expression has type "Union[int, str]", variable has type "int")
foo.py:17: error: Incompatible types in assignment (expression has type "Union[int, str]", variable has type "str")

虽然我们可以使用 cast 来解决这个问题,但它不能避免拼写错误,并且 cast 不安全。

from typing import  List, Union, cast


class Array(object):
    def __init__(self, arr: List[int]) -> None:
        self.arr = arr

    def __getitem__(self, i: Union[int, str]) -> Union[int, str]:
        if isinstance(i, int):
            return self.arr[i]
        if isinstance(i, str):
            return str(self.arr[int(i)])


arr = Array([1, 2, 3, 4, 5])
x: int = cast(int, arr[1])
y: str = cast(str, arr[2])  # typo. we want to assign arr["2"]

输出

$ mypy --strict foo.py
$ echo $?
0

使用 @overload 可以解决这个问题。我们可以显式地声明返回类型。

from typing import Generic, List, Union, overload


class Array(object):
    def __init__(self, arr: List[int]) -> None:
        self.arr = arr

    @overload
    def __getitem__(self, i: str) -> str:
        ...

    @overload
    def __getitem__(self, i: int) -> int:
        ...

    def __getitem__(self, i: Union[int, str]) -> Union[int, str]:
        if isinstance(i, int):
            return self.arr[i]
        if isinstance(i, str):
            return str(self.arr[int(i)])


arr = Array([1, 2, 3, 4, 5])
x: int = arr[1]
y: str = arr["2"]

输出

$ mypy --strict foo.py
$ echo $?
0

警告

根据 PEP 484,@overload 装饰器仅**供类型检查器使用**,它没有像 c++/java 那样实现真正的重载。因此,我们必须实现一个完全非 @overload 的函数。在运行时,调用 @overload 函数将引发 NotImplementedError

from typing import List, Union, overload


class Array(object):
    def __init__(self, arr: List[int]) -> None:
        self.arr = arr

    @overload
    def __getitem__(self, i: Union[int, str]) -> Union[int, str]:
        if isinstance(i, int):
            return self.arr[i]
        if isinstance(i, str):
            return str(self.arr[int(i)])


arr = Array([1, 2, 3, 4, 5])
try:
    x: int = arr[1]
except NotImplementedError as e:
    print("NotImplementedError")

输出

$ python foo.py
NotImplementedError

存根文件

存根文件就像我们通常用于在 c/c++ 中定义接口的头文件一样。在 Python 中,我们可以在同一个模块目录或 export MYPYPATH=${stubs} 中定义我们的接口。

首先,我们需要为模块创建一个存根文件(接口文件)。

$ mkdir fib
$ touch fib/__init__.py fib/__init__.pyi

然后,在 __init__.pyi 中定义函数的接口并实现模块。

# fib/__init__.pyi
def fib(n: int) -> int: ...

# fib/__init__.py

def fib(n):
    a, b = 0, 1
    for _ in range(n):
        b, a = a + b, b
    return a

然后,编写一个 test.py 用于测试 fib 模块。

# touch test.py
import sys

from pathlib import Path

p = Path(__file__).parent / "fib"
sys.path.append(str(p))

from fib import fib

print(fib(10.0))

输出

$ mypy --strict test.py
test.py:10: error: Argument 1 to "fib" has incompatible type "float"; expected "int"