You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
int uv_run(uv_loop_t* loop, uv_run_mode mode) {
int timeout;
int r;
int ran_pending;
// 有活跃的handle或req
r = uv__loop_alive(loop);
if (!r)
uv__update_time(loop);
while (r != 0 && loop->stop_flag == 0) {
uv__update_time(loop);
uv__run_timers(loop);
// run pending queue
ran_pending = uv__run_pending(loop);
// UV_LOOP_WATCHER_DEFINE,执行队列
uv__run_idle(loop);
uv__run_prepare(loop);
timeout = 0;
if ((mode == UV_RUN_ONCE && !ran_pending) || mode == UV_RUN_DEFAULT)
// 检查下还有没有active handle,返回下次timer发生剩余时间
timeout = uv_backend_timeout(loop);
uv__io_poll(loop, timeout);
uv__run_check(loop);
uv__run_closing_handles(loop);
if (mode == UV_RUN_ONCE) {
/* UV_RUN_ONCE implies forward progress: at least one callback must have
* been invoked when it returns. uv__io_poll() can return without doing
* I/O (meaning: no callbacks) when its timeout expires - which means we
* have pending timers that satisfy the forward progress constraint.
*
* UV_RUN_NOWAIT makes no guarantees about progress so it's omitted from
* the check.
*/
uv__update_time(loop);
uv__run_timers(loop);
}
r = uv__loop_alive(loop);
if (mode == UV_RUN_ONCE || mode == UV_RUN_NOWAIT)
break;
}
/* The if statement lets gcc compile it to a conditional store. Avoids
* dirtying a cache line.
*/
if (loop->stop_flag != 0)
loop->stop_flag = 0;
return r;
}
void uv__io_poll(uv_loop_t* loop, int timeout) {
/* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes
* effectively infinite on 32 bits architectures. To avoid blocking
* indefinitely, we cap the timeout and poll again if necessary.
*
* Note that "30 minutes" is a simplification because it depends on
* the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200,
* that being the largest value I have seen in the wild (and only once.)
*/
static const int max_safe_timeout = 1789569;
static int no_epoll_pwait;
static int no_epoll_wait;
struct uv__epoll_event events[1024];
struct uv__epoll_event* pe;
struct uv__epoll_event e;
int real_timeout;
QUEUE* q;
uv__io_t* w;
sigset_t sigset;
uint64_t sigmask;
uint64_t base;
int have_signals;
int nevents;
int count;
int nfds;
int fd;
int op;
int i;
// loop->watchers[w->fd] = w in uv__io_start func
if (loop->nfds == 0) {
assert(QUEUE_EMPTY(&loop->watcher_queue));
return;
}
// 取出观察者队列中的fd, 调用uv__epoll_ctl监听
while (!QUEUE_EMPTY(&loop->watcher_queue)) {
q = QUEUE_HEAD(&loop->watcher_queue);
QUEUE_REMOVE(q);
QUEUE_INIT(q);
// QUEUE_DATA类似container
w = QUEUE_DATA(q, uv__io_t, watcher_queue);
assert(w->pevents != 0);
assert(w->fd >= 0);
assert(w->fd < (int) loop->nwatchers);
e.events = w->pevents;
e.data = w->fd;
if (w->events == 0)
op = UV__EPOLL_CTL_ADD;
else
op = UV__EPOLL_CTL_MOD;
/* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
* events, skip the syscall and squelch the events after epoll_wait().
*/
// fd = uv__epoll_create1(UV__EPOLL_CLOEXEC); loop->backend_fd = fd;
if (uv__epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
if (errno != EEXIST)
abort();
assert(op == UV__EPOLL_CTL_ADD);
/* We've reactivated a file descriptor that's been watched before. */
if (uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_MOD, w->fd, &e))
abort();
}
w->events = w->pevents;
}
sigmask = 0;
if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
sigemptyset(&sigset);
sigaddset(&sigset, SIGPROF);
sigmask |= 1 << (SIGPROF - 1);
}
assert(timeout >= -1);
base = loop->time;
count = 48; /* Benchmarks suggest this gives the best throughput. */
real_timeout = timeout;
for (;;) {
/* See the comment for max_safe_timeout for an explanation of why
* this is necessary. Executive summary: kernel bug workaround.
*/
if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
timeout = max_safe_timeout;
if (sigmask != 0 && no_epoll_pwait != 0)
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
abort();
if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
// 返回需要处理的事件数目
nfds = uv__epoll_pwait(loop->backend_fd,
events,
ARRAY_SIZE(events),
timeout,
sigmask);
if (nfds == -1 && errno == ENOSYS)
no_epoll_pwait = 1;
} else {
nfds = uv__epoll_wait(loop->backend_fd,
events,
ARRAY_SIZE(events),
timeout);
if (nfds == -1 && errno == ENOSYS)
no_epoll_wait = 1;
}
if (sigmask != 0 && no_epoll_pwait != 0)
if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
abort();
/* Update loop->time unconditionally. It's tempting to skip the update when
* timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
* operating system didn't reschedule our process while in the syscall.
*/
SAVE_ERRNO(uv__update_time(loop));
if (nfds == 0) {
assert(timeout != -1);
if (timeout == 0)
return;
/* We may have been inside the system call for longer than |timeout|
* milliseconds so we need to update the timestamp to avoid drift.
*/
// 没有需要处理的事件
goto update_timeout;
}
if (nfds == -1) {
if (errno == ENOSYS) {
/* epoll_wait() or epoll_pwait() failed, try the other system call. */
assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
continue;
}
if (errno != EINTR)
abort();
if (timeout == -1)
continue;
if (timeout == 0)
return;
/* Interrupted by a signal. Update timeout and poll again. */
goto update_timeout;
}
have_signals = 0;
nevents = 0;
assert(loop->watchers != NULL);
loop->watchers[loop->nwatchers] = (void*) events;
loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
for (i = 0; i < nfds; i++) {
pe = events + i;
// (*pe).data
fd = pe->data;
/* Skip invalidated events, see uv__platform_invalidate_fd */
if (fd == -1)
continue;
assert(fd >= 0);
assert((unsigned) fd < loop->nwatchers);
w = loop->watchers[fd];
if (w == NULL) {
/* File descriptor that we've stopped watching, disarm it.
*
* Ignore all errors because we may be racing with another thread
* when the file descriptor is closed.
*/
// 从红黑树中删除fd
uv__epoll_ctl(loop->backend_fd, UV__EPOLL_CTL_DEL, fd, pe);
continue;
}
/* Give users only events they're interested in. Prevents spurious
* callbacks when previous callback invocation in this loop has stopped
* the current watcher. Also, filters out events that users has not
* requested us to watch.
*/
pe->events &= w->pevents | POLLERR | POLLHUP;
/* Work around an epoll quirk where it sometimes reports just the
* EPOLLERR or EPOLLHUP event. In order to force the event loop to
* move forward, we merge in the read/write events that the watcher
* is interested in; uv__read() and uv__write() will then deal with
* the error or hangup in the usual fashion.
*
* Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
* reads the available data, calls uv_read_stop(), then sometime later
* calls uv_read_start() again. By then, libuv has forgotten about the
* hangup and the kernel won't report EPOLLIN again because there's
* nothing left to read. If anything, libuv is to blame here. The
* current hack is just a quick bandaid; to properly fix it, libuv
* needs to remember the error/hangup event. We should get that for
* free when we switch over to edge-triggered I/O.
*/
if (pe->events == POLLERR || pe->events == POLLHUP)
pe->events |= w->pevents & (POLLIN | POLLOUT | UV__POLLPRI);
if (pe->events != 0) {
/* Run signal watchers last. This also affects child process watchers
* because those are implemented in terms of signal watchers.
*/
if (w == &loop->signal_io_watcher)
have_signals = 1;
else
// uv__async_io, uv__async_start中的uv__io_init注册
w->cb(loop, w, pe->events);
nevents++;
}
}
if (have_signals != 0)
loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
loop->watchers[loop->nwatchers] = NULL;
loop->watchers[loop->nwatchers + 1] = NULL;
if (have_signals != 0)
return; /* Event loop should cycle now so don't poll again. */
if (nevents != 0) {
if (nfds == ARRAY_SIZE(events) && --count != 0) {
/* Poll for more events but don't block this time. */
timeout = 0;
continue;
}
return;
}
if (timeout == 0)
return;
if (timeout == -1)
continue;
update_timeout:
assert(timeout > 0);
real_timeout -= (loop->time - base);
if (real_timeout <= 0)
return;
timeout = real_timeout;
}
}
void uv__io_start(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
assert(0 == (events & ~(POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI)));
assert(0 != events);
assert(w->fd >= 0);
assert(w->fd < INT_MAX);
w->pevents |= events;
maybe_resize(loop, w->fd + 1);
#if !defined(__sun)
/* The event ports backend needs to rearm all file descriptors on each and
* every tick of the event loop but the other backends allow us to
* short-circuit here if the event mask is unchanged.
*/
if (w->events == w->pevents)
return;
#endif
if (QUEUE_EMPTY(&w->watcher_queue))
QUEUE_INSERT_TAIL(&loop->watcher_queue, &w->watcher_queue);
if (loop->watchers[w->fd] == NULL) {
loop->watchers[w->fd] = w;
loop->nfds++;
}
}
本文将主要介绍libuv的事件循环,包括了事件循环的流程,而我们也知道libuv是使用poll机制来实现网络I/O,通过线程池来实现文件I/O,当然线程间也是通过poll机制来实现通信的,后面就将介绍线程池与事件循环是如何结合的。
event loop流程
事件循环的流程大致如下图所示:
代码如下所示:
时间循环可以分为以下几个步骤:
时间循环结束的条件有如下几种:
下面挑选重要的几点进行讲解:
判断loop是不是alive
决定loop是否是alive取决于是否有活跃的handle或者req,或者被直接stop掉,代码如下:
uv__run_timers
uv__run_timers代码如下:
我们注意到,存储timer节点的数据结构是一个以handle->timeout为基准的最小堆,函数循环过程中主要做了如下几件事:
uv__run_pending
uv__run_pending主要是将loop->pending_queue中的callback取出执行,代码如下:
后面的uv__run_idle和uv__run_prepare与之类似。
poll I/O
poll I/O是事件循环的重点,它基于IO多路复用的机制,所有网络操作都使用 non-blocking 套接字,并使用各个平台上性能最好的 poll 机制例如 linux 上的 epoll,OSX 的 kqueue 等等;而所有文件I/O基于线程池实现,但线程间通信同样基于相应的poll机制。
下面的uv__io_poll是基于linux伤的epoll来实现,其他平台的实现也类似,具体代码如下:
这里主要做了如下几件事:
这里需要注意的有以下几点:
loop->backend_fd
uv__epoll_ctl(loop->backend_fd, op, w->fd, &e),了解epoll的同学都会知道这里loop->backend_fd在内核高速缓冲区,用来表示当前这个epoll在所在红黑树的起点。
其在uv__platform_loop_init中被赋值,代码如下:
loop->watchers
epoll通过调用uv__epoll_pwait来获取需要处理事件的数据,参数events用来从内核得到事件的集合,这也是epoll的优势之一(共享内存的方式)。我们从events中取出相应的fd,然后根据fd从loop->watchers中取出handle并执行起callback,那么loop->watchers是如何初始化的呢?
其在uv__io_start中被初始化,loop->watchers是一个数组类型,其index用来表示uv__io_t handle中的fd,这样我们根据fd可以轻松的找出其uv__io_t handle。
uv__io_start在多处被用到,包括uv__async_start中调用uv__io_start来监听线程间通信用到的fd,还有在tcp、udp模块中都有用其监听fd。
我们可以看出,IO事件都会调用 uv__io_start 函数,该函数将需要监听的事件保存到 event loop的watcher_queue队列中
超时
我们发现uv__io_poll其实是阻塞的,为了解决阻塞的问题,在调用的时候加入了timeout参数,timeout参数表示距离下一个timer需要执行(超过了timer的timeout)的时间,当没有要处理的事件时,会根据进入uv__io_poll时的事件来计算是否需要break。update_timeout的代码如下:
线程池实现文件异步I/O
Libuv的文件I/O是基于线程池来实现的,大致原理是主线程提交任务到任务队列,发送信号给线程池,线程池中的worker收到信号,从任务队列中取出任务并执行,工作线程执行完任务后,将任务对应uv_async_t handle的pending状态置0,通过fd通知主线程(该 fd 同样由epoll管理),主线程监听该fd,当有epoll事件时,执行非pending的uv_async_t handle对应的回调,然后根据层层回调,最终会调用到用户注册的回调函数
说到线程池,几乎所有线程池的实现都遵循如下模型,也就是任务队列+线程池的模型,libuv的实现也是基于此。
libuv中任务队列基于一个双向链表,其中的任务的struct声明如下:
我们可以看到,其中work代表线程池实际要做的工作,done代表任务执行后的callback,wq数组为两个指针,分别指向任务队列中的前后节点。
下面我们首先看一下主线程如何提交任务到任务队列:
首先在fs.c中有这样一段逻辑,其中所有的文件操作都会调用POST,代码如下:
POST宏中调用了uv__work_submit将任务提交到队列,下面我们看下uv__work_submit的代码:
这里主要做了两件事:
uv__work_submit这块涉及的逻辑如下:
这里需要关注的有以下几点:
我们再来看下工作线程执行完任务后是如何通知主线程的,也就是上述的uv_async_send方法:
这里主要做了如下几件事:
当主线程监听到async_watcher->io_watcher.fd的变化后,通过层层回调,最终调用uv__work的done函数,也就是用户注册的回调。这部分我们首先从前向后看下回调的注册:
这块按照执行顺序做了如下几件事:
现在我们来梳理下当主线程接收到事件后,如何层层回调,最终执行uv__work的done即用户提交的回调函数。
在uv__io_poll方法中,通过uv__epoll_pwait监听到时间后,会执行loop->watchers取出uv__io_start中注册的uv__io_t(也就是上面注册的loop->async_io_watcher),然后执行其注册的回调(uv__async_io)。
uv__async_io代码如下:
这里主要做了两件事:
总结
由于Node.js异步I/O依赖libuv,libuv的核心又是event loop,本文主要介绍了event loop的流程以及线程池的实现。
The text was updated successfully, but these errors were encountered: