今天来看看Django的工作机制。首先我们知道 django 是基于 WSGI(或 ASGI),而 WSGI 接收的一个可调用对象,可以是一个函数或者实现 __call__ 方法的类实例,并接收 environ 和 start_response 参数。通过 wsgi.py 文件看出实际调用的为 get_wsgi_application() 的返回对象:
# file: wsgi.py
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings.settings')
application = get_wsgi_application()
而 get_wsgi_application 函数返回了一个 WSGIHandler 实例:
# file: django.core.wsgi
def get_wsgi_application():
"""
The public interface to Django's WSGI support. Return a WSGI callable.
Avoids making django.core.handlers.WSGIHandler a public API, in case the
internal WSGI implementation changes or moves in the future.
"""
django.setup(set_prefix=False)
return WSGIHandler()
然后我们详细来看 WSGIHandler 类:
class WSGIHandler(base.BaseHandler):
request_class = WSGIRequest
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.load_middleware()
def __call__(self, environ, start_response):
set_script_prefix(get_script_name(environ))
signals.request_started.send(sender=self.__class__, environ=environ)
request = self.request_class(environ)
response = self.get_response(request)
response._handler_class = self.__class__
status = '%d %s' % (response.status_code, response.reason_phrase)
response_headers = [
*response.items(),
*(('Set-Cookie', c.output(header='')) for c in response.cookies.values()),
]
start_response(status, response_headers)
if getattr(response, 'file_to_stream', None) is not None and environ.get('wsgi.file_wrapper'):
# If `wsgi.file_wrapper` is used the WSGI server does not call
# .close on the response, but on the file wrapper. Patch it to use
# response.close instead which takes care of closing all files.
response.file_to_stream.close = response.close
response = environ['wsgi.file_wrapper'](response.file_to_stream, response.block_size)
return response
与预期一致,WSGIHandler 为一个可重复调用类。而 __call__ 方法就请求进入 django 的第一站。
在 WSGIHandler 的 __init__ 方法中调用了 load_middleware 方法,其主要生成了 middleware chain。load_middleware 方法是比较有意思的地方,来看看源码。
class BaseHandler:
_view_middleware = None
_template_response_middleware = None
_exception_middleware = None
_middleware_chain = None
def load_middleware(self, is_async=False):
"""
Populate middleware lists from settings.MIDDLEWARE.
Must be called after the environment is fixed (see __call__ in subclasses).
"""
self._view_middleware = []
self._template_response_middleware = []
self._exception_middleware = []
get_response = self._get_response_async if is_async else self._get_response
handler = convert_exception_to_response(get_response)
handler_is_async = is_async
for middleware_path in reversed(settings.MIDDLEWARE):
middleware = import_string(middleware_path)
middleware_can_sync = getattr(middleware, 'sync_capable', True)
middleware_can_async = getattr(middleware, 'async_capable', False)
if not middleware_can_sync and not middleware_can_async:
raise RuntimeError(
'Middleware %s must have at least one of '
'sync_capable/async_capable set to True.' % middleware_path
)
elif not handler_is_async and middleware_can_sync:
middleware_is_async = False
else:
middleware_is_async = middleware_can_async
try:
# Adapt handler, if needed.
adapted_handler = self.adapt_method_mode(
middleware_is_async, handler, handler_is_async,
debug=settings.DEBUG, name='middleware %s' % middleware_path,
)
mw_instance = middleware(adapted_handler)
except MiddlewareNotUsed as exc:
if settings.DEBUG:
if str(exc):
logger.debug('MiddlewareNotUsed(%r): %s', middleware_path, exc)
else:
logger.debug('MiddlewareNotUsed: %r', middleware_path)
continue
else:
handler = adapted_handler
if mw_instance is None:
raise ImproperlyConfigured(
'Middleware factory %s returned None.' % middleware_path
)
if hasattr(mw_instance, 'process_view'):
self._view_middleware.insert(
0,
self.adapt_method_mode(is_async, mw_instance.process_view),
)
if hasattr(mw_instance, 'process_template_response'):
self._template_response_middleware.append(
self.adapt_method_mode(is_async, mw_instance.process_template_response),
)
if hasattr(mw_instance, 'process_exception'):
# The exception-handling stack is still always synchronous for
# now, so adapt that way.
self._exception_middleware.append(
self.adapt_method_mode(False, mw_instance.process_exception),
)
handler = convert_exception_to_response(mw_instance)
handler_is_async = middleware_is_async
# Adapt the top of the stack, if needed.
handler = self.adapt_method_mode(is_async, handler, handler_is_async)
# We only assign to this when initialization is complete as it is used
# as a flag for initialization being complete.
self._middleware_chain = handler
# ... 省略了一些方法~
def _get_response(self, request):
"""
Resolve and call the view, then apply view, exception, and
template_response middleware. This method is everything that happens
inside the request/response middleware.
"""
response = None
callback, callback_args, callback_kwargs = self.resolve_request(request)
# Apply view middleware
for middleware_method in self._view_middleware:
response = middleware_method(request, callback, callback_args, callback_kwargs)
if response:
break
if response is None:
wrapped_callback = self.make_view_atomic(callback)
# If it is an asynchronous view, run it in a subthread.
if asyncio.iscoroutinefunction(wrapped_callback):
wrapped_callback = async_to_sync(wrapped_callback)
try:
response = wrapped_callback(request, *callback_args, **callback_kwargs)
except Exception as e:
response = self.process_exception_by_middleware(e, request)
if response is None:
raise
# Complain if the view returned None (a common error).
self.check_response(response, callback)
# If the response supports deferred rendering, apply template
# response middleware and then render the response
if hasattr(response, 'render') and callable(response.render):
for middleware_method in self._template_response_middleware:
response = middleware_method(request, response)
# Complain if the template response middleware returned None (a common error).
self.check_response(
response,
middleware_method,
name='%s.process_template_response' % (
middleware_method.__self__.__class__.__name__,
)
)
try:
response = response.render()
except Exception as e:
response = self.process_exception_by_middleware(e, request)
if response is None:
raise
return response
# 篇幅问题,后边的方式省略了~
# 完整的 BaseHandler 源码可以看这个链接
# https://github.com/django/django/blob/3.2.13/django/core/handlers/base.py
因为在 django3 中引入了 asgi 异步,在源码中有很多来处理同步/异步发判断,这个不是本文关注的重点所以我们不多做解释。可以看到在 load_middleware 中 get_response 由 self._get_response 而来。而在 self._get_response 中通过 callback, callback_args, callback_kwargs = self.resolve_request(request) 匹配了url 将到达的视图,也就是我们实现的 View。
class BaseHandler:
def load_middleware(self, is_async=False):
# ...
get_response = self._get_response_async if is_async else self._get_response
# ...
def _get_response(self, request):
# ...
callback, callback_args, callback_kwargs = self.resolve_request(request)
# ...
已得知 load_middleware 方法中 get_response 获取到的是最终达到是 View 函数。在继续往下看之前,我们先看一下 django 给我们的自定义中间件文档。我把关键示例代码贴出来,想看文档可以到这个链接:
class SimpleMiddleware:
def __init__(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def __call__(self, request):
# Code to be executed for each request before
# the view (and later middleware) are called.
response = self.get_response(request)
# Code to be executed for each request/response after
# the view is called.
return response
中间件在初始化时接收了get_response,并且其实例为可调用对象(重声一次:可调用对象可以为函数或者实现__call__ 方法的类实例)。
然后回到前边提到的 load_middleware 方法中。get_response 为 View 函数,并且通过 get_response 生成 handler 然后逆序遍历所有中间件。在 for 循环体中,通过 handle 来创建中间件(做为 SimpleMiddleware.__init__ 方法的 get_response的参数)并把创建好的中间件(可调用对象)重新赋值给handle 进入下一次循环。这样就把中间件串了起来。
然后我们再来看中间件的 __call__ 函数,在 response = self.get_response(request) 之前的代码处理请求,之后的处理响应。再想,当未进入中间件 for 循环时 handle 为最终的 View 调用对象,第一次循环相当于将 View 调用对象插入到了 response = self.get_response(request) 这个位置执行。第二次循环时将第一次循环生成的中间件可调用对象(源码中为handle)插入第二个中间件的 response = self.get_response(request) 位置。这样就相当于构建处理一个 V 型插入调用模型。V 的底部为 View 调用对象,插入中间件的 response = self.get_response(request) 位置形成新的 V ,然后插入下一个中间件的 response = self.get_response(request) 位置,依次循环。假想底部左边为 response = self.get_response(request) 之前的代码体 ,右边为 response = self.get_response(request) 之后的代码体。看下图就一幕了然了~
缺个图后补。
然后继续往下走,在for 循环结束后,将串好的中间件 V 模型赋值给 self._middleware_chain 以结束 load_middleware 函数。至此在启动项目时进行的初始化工作结束。然后我们再看接收到请求时是 django 是如何处理。
django 接收到请求和如何处理?
在前面我们已经知道,wsgi.py 中的 application 实际为通过 get_wsgi_application() 函数获取到的 WSGIHandler() 对象(wsgi 协议要求其接收一个函数或者可调用对象(实现__call__方法))。 我们回到 WSGIHandler 类的 __call__ 方法:
class WSGIHandler(base.BaseHandler):
request_class = WSGIRequest
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.load_middleware()
def __call__(self, environ, start_response):
set_script_prefix(get_script_name(environ))
signals.request_started.send(sender=self.__class__, environ=environ)
request = self.request_class(environ)
response = self.get_response(request)
response._handler_class = self.__class__
status = '%d %s' % (response.status_code, response.reason_phrase)
response_headers = [
*response.items(),
*(('Set-Cookie', c.output(header='')) for c in response.cookies.values()),
]
start_response(status, response_headers)
if getattr(response, 'file_to_stream', None) is not None and environ.get('wsgi.file_wrapper'):
# If `wsgi.file_wrapper` is used the WSGI server does not call
# .close on the response, but on the file wrapper. Patch it to use
# response.close instead which takes care of closing all files.
response.file_to_stream.close = response.close
response = environ['wsgi.file_wrapper'](response.file_to_stream, response.block_size)
return response
# get_response 是父类 BaseHandler 的方法,为了方便查看把它贴在这里
def get_response(self, request):
"""Return an HttpResponse object for the given HttpRequest."""
# Setup default url resolver for this thread
set_urlconf(settings.ROOT_URLCONF)
response = self._middleware_chain(request)
response._resource_closers.append(request.close)
if response.status_code >= 400:
log_response(
'%s: %s', response.reason_phrase, request.path,
response=response,
request=request,
)
return response
在 __call__ 方法中可以看到 response 由 response = self.get_response(request) 而来;而在 get_response 中由 response = self._middleware_chain(request) 而来(前边我们已经知道 _middleware_chain 为在启动时准备好的中间件调用串V型模型)。request 一层一层进入中间件最终到达View执行业务逻辑然后在一层一层的退出来完成流程,当然这是在没有异常的情况下,如果某一个中间件抛出了异常或者自定义校验信息不通过不再执行 get_response 那么会直接返回响应,后续的中间件以及最终的 View 都不会执行。
这个是欠了很久的作业,终于补上了。不去了解之前一直迷迷糊糊感觉是个谜,了解之后其实原理非常简单。