今天来看看Django的工作机制。首先我们知道 django 是基于 WSGI(或 ASGI),而 WSGI 接收的一个可调用对象,可以是一个函数或者实现 __call__ 方法的类实例,并接收 environ 和 start_response 参数。通过 wsgi.py 文件看出实际调用的为 get_wsgi_application() 的返回对象:

 # file: wsgi.py
 import os
 ​
 from django.core.wsgi import get_wsgi_application
 ​
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings.settings')
 ​
 application = get_wsgi_application()

而 get_wsgi_application 函数返回了一个 WSGIHandler 实例:

 # file: django.core.wsgi
 def get_wsgi_application():
     """
     The public interface to Django's WSGI support. Return a WSGI callable.
 ​
     Avoids making django.core.handlers.WSGIHandler a public API, in case the
     internal WSGI implementation changes or moves in the future.
     """
     django.setup(set_prefix=False)
     return WSGIHandler()

然后我们详细来看 WSGIHandler 类:

 class WSGIHandler(base.BaseHandler):
     request_class = WSGIRequest
 ​
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.load_middleware()
 ​
     def __call__(self, environ, start_response):
         set_script_prefix(get_script_name(environ))
         signals.request_started.send(sender=self.__class__, environ=environ)
         request = self.request_class(environ)
         response = self.get_response(request)
 ​
         response._handler_class = self.__class__
 ​
         status = '%d %s' % (response.status_code, response.reason_phrase)
         response_headers = [
             *response.items(),
             *(('Set-Cookie', c.output(header='')) for c in response.cookies.values()),
         ]
         start_response(status, response_headers)
         if getattr(response, 'file_to_stream', None) is not None and environ.get('wsgi.file_wrapper'):
             # If `wsgi.file_wrapper` is used the WSGI server does not call
             # .close on the response, but on the file wrapper. Patch it to use
             # response.close instead which takes care of closing all files.
             response.file_to_stream.close = response.close
             response = environ['wsgi.file_wrapper'](response.file_to_stream, response.block_size)
         return response

与预期一致,WSGIHandler 为一个可重复调用类。而 __call__ 方法就请求进入 django 的第一站。

在 WSGIHandler 的 __init__ 方法中调用了 load_middleware 方法,其主要生成了 middleware chain。load_middleware 方法是比较有意思的地方,来看看源码。

 class BaseHandler:
     _view_middleware = None
     _template_response_middleware = None
     _exception_middleware = None
     _middleware_chain = None
 ​
     def load_middleware(self, is_async=False):
         """
         Populate middleware lists from settings.MIDDLEWARE.
 ​
         Must be called after the environment is fixed (see __call__ in subclasses).
         """
         self._view_middleware = []
         self._template_response_middleware = []
         self._exception_middleware = []
         
         get_response = self._get_response_async if is_async else self._get_response
         handler = convert_exception_to_response(get_response)
         handler_is_async = is_async
         for middleware_path in reversed(settings.MIDDLEWARE):
             middleware = import_string(middleware_path)
             middleware_can_sync = getattr(middleware, 'sync_capable', True)
             middleware_can_async = getattr(middleware, 'async_capable', False)
             if not middleware_can_sync and not middleware_can_async:
                 raise RuntimeError(
                     'Middleware %s must have at least one of '
                     'sync_capable/async_capable set to True.' % middleware_path
                 )
             elif not handler_is_async and middleware_can_sync:
                 middleware_is_async = False
             else:
                 middleware_is_async = middleware_can_async
             try:
                 # Adapt handler, if needed.
                 adapted_handler = self.adapt_method_mode(
                     middleware_is_async, handler, handler_is_async,
                     debug=settings.DEBUG, name='middleware %s' % middleware_path,
                 )
                 mw_instance = middleware(adapted_handler)
             except MiddlewareNotUsed as exc:
                 if settings.DEBUG:
                     if str(exc):
                         logger.debug('MiddlewareNotUsed(%r): %s', middleware_path, exc)
                     else:
                         logger.debug('MiddlewareNotUsed: %r', middleware_path)
                 continue
             else:
                 handler = adapted_handler
 ​
             if mw_instance is None:
                 raise ImproperlyConfigured(
                     'Middleware factory %s returned None.' % middleware_path
                 )
 ​
             if hasattr(mw_instance, 'process_view'):
                 self._view_middleware.insert(
                     0,
                     self.adapt_method_mode(is_async, mw_instance.process_view),
                 )
             if hasattr(mw_instance, 'process_template_response'):
                 self._template_response_middleware.append(
                     self.adapt_method_mode(is_async, mw_instance.process_template_response),
                 )
             if hasattr(mw_instance, 'process_exception'):
                 # The exception-handling stack is still always synchronous for
                 # now, so adapt that way.
                 self._exception_middleware.append(
                     self.adapt_method_mode(False, mw_instance.process_exception),
                 )
 ​
             handler = convert_exception_to_response(mw_instance)
             handler_is_async = middleware_is_async
 ​
         # Adapt the top of the stack, if needed.
         handler = self.adapt_method_mode(is_async, handler, handler_is_async)
         # We only assign to this when initialization is complete as it is used
         # as a flag for initialization being complete.
         self._middleware_chain = handler
     # ... 省略了一些方法~
     def _get_response(self, request):
         """
         Resolve and call the view, then apply view, exception, and
         template_response middleware. This method is everything that happens
         inside the request/response middleware.
         """
         response = None
         callback, callback_args, callback_kwargs = self.resolve_request(request)
 ​
         # Apply view middleware
         for middleware_method in self._view_middleware:
             response = middleware_method(request, callback, callback_args, callback_kwargs)
             if response:
                 break
 ​
         if response is None:
             wrapped_callback = self.make_view_atomic(callback)
             # If it is an asynchronous view, run it in a subthread.
             if asyncio.iscoroutinefunction(wrapped_callback):
                 wrapped_callback = async_to_sync(wrapped_callback)
             try:
                 response = wrapped_callback(request, *callback_args, **callback_kwargs)
             except Exception as e:
                 response = self.process_exception_by_middleware(e, request)
                 if response is None:
                     raise
 ​
         # Complain if the view returned None (a common error).
         self.check_response(response, callback)
 ​
         # If the response supports deferred rendering, apply template
         # response middleware and then render the response
         if hasattr(response, 'render') and callable(response.render):
             for middleware_method in self._template_response_middleware:
                 response = middleware_method(request, response)
                 # Complain if the template response middleware returned None (a common error).
                 self.check_response(
                     response,
                     middleware_method,
                     name='%s.process_template_response' % (
                         middleware_method.__self__.__class__.__name__,
                     )
                 )
             try:
                 response = response.render()
             except Exception as e:
                 response = self.process_exception_by_middleware(e, request)
                 if response is None:
                     raise
 ​
         return response
 ​
     # 篇幅问题,后边的方式省略了~
     # 完整的 BaseHandler 源码可以看这个链接
     # https://github.com/django/django/blob/3.2.13/django/core/handlers/base.py

因为在 django3 中引入了 asgi 异步,在源码中有很多来处理同步/异步发判断,这个不是本文关注的重点所以我们不多做解释。可以看到在 load_middleware 中 get_response 由 self._get_response 而来。而在 self._get_response 中通过 callback, callback_args, callback_kwargs = self.resolve_request(request) 匹配了url 将到达的视图,也就是我们实现的 View。

 class BaseHandler:
     def load_middleware(self, is_async=False):
         # ...
         get_response = self._get_response_async if is_async else self._get_response
         # ...
     def _get_response(self, request):
         # ...
         callback, callback_args, callback_kwargs = self.resolve_request(request)
         # ...

已得知 load_middleware 方法中 get_response 获取到的是最终达到是 View 函数。在继续往下看之前,我们先看一下 django 给我们的自定义中间件文档。我把关键示例代码贴出来,想看文档可以到这个链接:

 class SimpleMiddleware:
     def __init__(self, get_response):
         self.get_response = get_response
         # One-time configuration and initialization.
 ​
     def __call__(self, request):
         # Code to be executed for each request before
         # the view (and later middleware) are called.
         response = self.get_response(request)
         # Code to be executed for each request/response after
         # the view is called.
         return response

中间件在初始化时接收了get_response,并且其实例为可调用对象(重声一次:可调用对象可以为函数或者实现__call__ 方法的类实例)。

然后回到前边提到的 load_middleware 方法中。get_response 为 View 函数,并且通过 get_response 生成 handler 然后逆序遍历所有中间件。在 for 循环体中,通过 handle 来创建中间件(做为 SimpleMiddleware.__init__ 方法的 get_response的参数)并把创建好的中间件(可调用对象)重新赋值给handle 进入下一次循环。这样就把中间件串了起来。

然后我们再来看中间件的 __call__ 函数,在 response = self.get_response(request) 之前的代码处理请求,之后的处理响应。再想,当未进入中间件 for 循环时 handle 为最终的 View 调用对象,第一次循环相当于将 View 调用对象插入到了 response = self.get_response(request) 这个位置执行。第二次循环时将第一次循环生成的中间件可调用对象(源码中为handle)插入第二个中间件的 response = self.get_response(request) 位置。这样就相当于构建处理一个 V 型插入调用模型。V 的底部为 View 调用对象,插入中间件的 response = self.get_response(request) 位置形成新的 V ,然后插入下一个中间件的 response = self.get_response(request) 位置,依次循环。假想底部左边为 response = self.get_response(request) 之前的代码体 ,右边为 response = self.get_response(request) 之后的代码体。看下图就一幕了然了~

缺个图后补。

然后继续往下走,在for 循环结束后,将串好的中间件 V 模型赋值给 self._middleware_chain 以结束 load_middleware 函数。至此在启动项目时进行的初始化工作结束。然后我们再看接收到请求时是 django 是如何处理。

django 接收到请求和如何处理?

在前面我们已经知道,wsgi.py 中的 application 实际为通过 get_wsgi_application() 函数获取到的 WSGIHandler() 对象(wsgi 协议要求其接收一个函数或者可调用对象(实现__call__方法))。 我们回到 WSGIHandler 类的 __call__ 方法:

 class WSGIHandler(base.BaseHandler):
     request_class = WSGIRequest
 ​
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.load_middleware()
 ​
     def __call__(self, environ, start_response):
         set_script_prefix(get_script_name(environ))
         signals.request_started.send(sender=self.__class__, environ=environ)
         request = self.request_class(environ)
         response = self.get_response(request)
 ​
         response._handler_class = self.__class__
 ​
         status = '%d %s' % (response.status_code, response.reason_phrase)
         response_headers = [
             *response.items(),
             *(('Set-Cookie', c.output(header='')) for c in response.cookies.values()),
         ]
         start_response(status, response_headers)
         if getattr(response, 'file_to_stream', None) is not None and environ.get('wsgi.file_wrapper'):
             # If `wsgi.file_wrapper` is used the WSGI server does not call
             # .close on the response, but on the file wrapper. Patch it to use
             # response.close instead which takes care of closing all files.
             response.file_to_stream.close = response.close
             response = environ['wsgi.file_wrapper'](response.file_to_stream, response.block_size)
         return response
 ​
     # get_response 是父类 BaseHandler 的方法,为了方便查看把它贴在这里
     def get_response(self, request):
         """Return an HttpResponse object for the given HttpRequest."""
         # Setup default url resolver for this thread
         set_urlconf(settings.ROOT_URLCONF)
         response = self._middleware_chain(request)
         response._resource_closers.append(request.close)
         if response.status_code >= 400:
             log_response(
                 '%s: %s', response.reason_phrase, request.path,
                 response=response,
                 request=request,
             )
         return response
 ​

在 __call__ 方法中可以看到 response 由 response = self.get_response(request) 而来;而在 get_response 中由 response = self._middleware_chain(request) 而来(前边我们已经知道 _middleware_chain 为在启动时准备好的中间件调用串V型模型)。request 一层一层进入中间件最终到达View执行业务逻辑然后在一层一层的退出来完成流程,当然这是在没有异常的情况下,如果某一个中间件抛出了异常或者自定义校验信息不通过不再执行 get_response 那么会直接返回响应,后续的中间件以及最终的 View 都不会执行。

这个是欠了很久的作业,终于补上了。不去了解之前一直迷迷糊糊感觉是个谜,了解之后其实原理非常简单。