数据存储 models.py
api.py 定义一系列函数作为外部使用的接口,从而可以看出两者各自的应用场景
Requst
该模块最直观的是定义了一些类,作为存储模型,也就是规定了存储各个字段
用户输入了一些列参数: method, url, headers, files, data, params, auth, cookies, hooks, json
参数较多,首先把这些分散的参数汇聚成一个Request对象
每一个参数需要校验格式化,并且与系统默认的参数合并,PreparedRequest为每个参数定义了各自处理方法
PreparedRequest是最终send的合格数据,研究每个处理方法可以加深对HTTP协议的理解,以及处理用户输入的不确定性
# Reqeust对用户输入数据首次封装
# Request也提供了封装数据处理成格式数据的方法 prepare
class Request(RequestHooksMixin):
def __init__(self,
method=None, url=None, headers=None, files=None, data=None,
params=None, auth=None, cookies=None, hooks=None, json=None):
# 定义函数参数为None,内部再用三元表达式处理成可变的数据类型
data = [] if data is None else data
files = [] if files is None else files
headers = {} if headers is None else headers
params = {} if params is None else params
hooks = {} if hooks is None else hooks
self.hooks = default_hooks()
for (k, v) in list(hooks.items()):
self.register_hook(event=k, hook=v)
self.method = method
self.url = url
self.headers = headers
self.files = files
self.data = data
self.json = json
self.params = params
self.auth = auth
self.cookies = cookies
# 执行__init__内部代码时,self这个变量是已经创建好了,可以放心使用
# d = dict() # 可变类型dict
# ret = d # ret作为返回值, d修改时ret也会跟着变动,所以放心使用self
def __repr__(self):
return '<Request [%s]>' % (self.method) # 学着定义更加明确 '<类名[标识如post]>'
def prepare(self):
# 类的使用有时也很简单,p属性与方法的结合体
p = PreparedRequest()
# p.prepare可处理的参数,初始化自带参数,绑定方法定义的参数
# 绑定方法对一些列参数进行处理,可以有直接返回值
# 也可以直接使用p,因为p是可变类型,并且还是属性和方法的结合体
p.prepare(
method=self.method,
url=self.url,
headers=self.headers,
files=self.files,
data=self.data,
json=self.json,
params=self.params,
auth=self.auth,
cookies=self.cookies,
hooks=self.hooks,
)
# pp 很神奇
return p
PreparedRequest
PreparedRequest对象p是最终发送时符合HTTP协议规范的数据集合。parepare接收的10个参数经过处理封装到self属性上,结合HTTP协议,分析每个self.prepare_*绑定方法
class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
def __init__(self):
self.method = None
self.url = None
self.headers = None
self._cookies = None
self.body = None
self.hooks = default_hooks()
self._body_position = None
def prepare(self,
method=None, url=None, headers=None, files=None, data=None,
params=None, auth=None, cookies=None, hooks=None, json=None)
self.prepare_method(method)
self.prepare_url(url, params)
self.prepare_headers(headers)
self.prepare_cookies(cookies)
self.prepare_body(data, files, json)
self.prepare_auth(auth, url)
self.prepare_hooks(hooks)
prepare_method
def prepare_method(self, method):
# self == p 数据存储容器
self.method = method
# 解决Pyhton 2 3 兼容性问题,以及method输入支持b'post'格式,最终统一处理为'POST'
if self.method is not None:
self.method = to_native_string(self.method.upper())
prepare_url
http://username:password@www.example.com:80/dir/index.html?uid=1#ch1
scheme 协议名 http:或https: 不区分大小写 最后附一个冒号(:)
auth 登录信息(认证)
host 服务器地址
port 服务器端口
path 带层次的文件路径
query 查询字符串
fragment 片段标识符
def prepare_url(self, url, params):
# str bytes unicode 是跨不过去的坎
if isinstance(url, bytes):
url = url.decode('utf8')
else:
url = unicode(url) if is_py2 else str(url)
# 后续处理http开始的url
url = url.lstrip()
if ':' in url and not url.lower().startswith('http'):
self.url = url
return
try:
scheme, auth, host, port, path, query, fragment = parse_url(url)
except LocationParseError as e:
raise InvalidURL(*e.args)
if not scheme:
error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?")
error = error.format(to_native_string(url, 'utf8'))
raise MissingSchema(error)
if not host:
raise InvalidURL("Invalid URL %r: No host supplied" % url)
# 支持国际化域名
if not unicode_is_ascii(host):
try:
host = self._get_idna_encoded_host(host)
except UnicodeError:
raise InvalidURL('URL has an invalid label.')
elif host.startswith(u'*'):
raise InvalidURL('URL has an invalid label.')
# username:password@www.example.com:80
netloc = auth or ''
if netloc:
netloc += '@'
netloc += host
if port:
netloc += ':' + str(port)
if not path:
path = '/'
# get请求时支持 params参数的原因
enc_params = self._encode_params(params)
if enc_params:
if query:
query = '%s&%s' % (query, enc_params)
else:
query = enc_params
# 最后把处理好的各项重新拼接为url
url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
# self == p 数据存储容器
self.url = url
prepare_headers
def prepare_headers(self, headers):
# self == p 数据存储容器, key值大小写不敏感
self.headers = CaseInsensitiveDict()
if headers:
for header in headers.items():
check_header_validity(header)
name, value = header
self.headers[to_native_string(name)] = value
prepare_cookies
# Cookie是headers中一表项,所以prepare_cookies放置于prepare_headers之后执行
def prepare_cookies(self, cookies):
# cookielib.CookieJar 数据结构单独分析
if isinstance(cookies, cookielib.CookieJar):
self._cookies = cookies
else:
self._cookies = cookiejar_from_dict(cookies)
cookie_header = get_cookie_header(self._cookies, self)
if cookie_header is not None:
self.headers['Cookie'] = cookie_header
prepare_body
def prepare_body(self, data, files, json=None):
body = None
content_type = None
# json 传入一个dict,会执行dumps(dict)
if not data and json is not None:
content_type = 'application/json'
body = complexjson.dumps(json)
if not isinstance(body, bytes):
body = body.encode('utf-8')
is_stream = all([
hasattr(data, '__iter__'),
not isinstance(data, (basestring, list, tuple, Mapping))
])
try:
length = super_len(data)
except (TypeError, AttributeError, UnsupportedOperation):
length = None
if is_stream:
body = data
if getattr(body, 'tell', None) is not None:
# Record the current file position before reading.
# This will allow us to rewind a file in the event
# of a redirect.
try:
self._body_position = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body
self._body_position = object()
if files:
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')
if length:
self.headers['Content-Length'] = builtin_str(length)
else:
self.headers['Transfer-Encoding'] = 'chunked'
else:
# Multi-part file uploads.
if files:
(body, content_type) = self._encode_files(files, data)
else:
if data:
body = self._encode_params(data)
if isinstance(data, basestring) or hasattr(data, 'read'):
content_type = None
else:
content_type = 'application/x-www-form-urlencoded'
self.prepare_content_length(body)
# Add content-type if it wasn't explicitly provided.
if content_type and ('content-type' not in self.headers):
self.headers['Content-Type'] = content_type
# self == p 数据存储容器
self.body = body
Response
requests会把HTTP返回的信息以对象的形式存储,那类Response就是存储的模型。模型一般是有初始值,在使用的过程中会赋值不同值,满足不同HTTP的返回对象。类Response必然符合HTTP返回信息的相关字段
返回状态码 status_code和描述短语 reason
返回头部字段 headers,以及cookies
网络传输必然涉及到bytes内容的存储 _content和编码信息encoding
整个过程的日志记录信息等 url, history, request, elapsed等
以及其他所需的状态表示和演化而来的property等
从数据流转角度,包括定义模型类,向模型类写数据,从模型类读数据
定义模型类
def __init__(self):pass,可以理解为建立库表字段时,定义哪些字段并附加初始值。类的优势可以根据初始属性字段推导出更符合上层使用接口,可仔细分析它们之间的层次结构
向模型类写数据
代码写的太好了,不忍加注释,整个过程是属性的确定,没有涉及到Response绑定方法
# requets.adapters.HTTPAdapter
def build_response(self, req, resp):
"""
:param req: The :class:`PreparedRequest <PreparedRequest>` object
:param resp: The urllib3 response object
:rtype: requests.Response
"""
response = Response()
response.status_code = getattr(resp, 'status', None)
response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))
response.encoding = get_encoding_from_headers(response.headers)
response.raw = resp
response.reason = response.raw.reason
if isinstance(req.url, bytes):
response.url = req.url.decode('utf-8')
else:
response.url = req.url
extract_cookies_to_jar(response.cookies, req, resp)
response.request = req
response.connection = self
return response
从模型类读数据
具体Response的使用方式,参考官方文档
Last updated