Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : main select loop and event handling - epoll implementation
5 :
6 : Copyright (C) Andrew Tridgell 2003-2005
7 : Copyright (C) Stefan Metzmacher 2005-2013
8 : Copyright (C) Jeremy Allison 2013
9 :
10 : ** NOTE! The following LGPL license applies to the tevent
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 :
28 : #include "replace.h"
29 : #include "system/filesys.h"
30 : #include "system/select.h"
31 : #include "tevent.h"
32 : #include "tevent_internal.h"
33 : #include "tevent_util.h"
34 :
35 : struct epoll_event_context {
36 : /* a pointer back to the generic event_context */
37 : struct tevent_context *ev;
38 :
39 : /* when using epoll this is the handle from epoll_create */
40 : int epoll_fd;
41 :
42 : pid_t pid;
43 :
44 : bool panic_force_replay;
45 : bool *panic_state;
46 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
47 : };
48 :
49 : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 : #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 : #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
53 :
54 : #ifdef TEST_PANIC_FALLBACK
55 :
56 : static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 : int size)
58 : {
59 : if (epoll_ev->panic_fallback == NULL) {
60 : return epoll_create(size);
61 : }
62 :
63 : /* 50% of the time, fail... */
64 : if ((random() % 2) == 0) {
65 : errno = EINVAL;
66 : return -1;
67 : }
68 :
69 : return epoll_create(size);
70 : }
71 :
72 : static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 : int epfd, int op, int fd,
74 : struct epoll_event *event)
75 : {
76 : if (epoll_ev->panic_fallback == NULL) {
77 : return epoll_ctl(epfd, op, fd, event);
78 : }
79 :
80 : /* 50% of the time, fail... */
81 : if ((random() % 2) == 0) {
82 : errno = EINVAL;
83 : return -1;
84 : }
85 :
86 : return epoll_ctl(epfd, op, fd, event);
87 : }
88 :
89 : static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 : int epfd,
91 : struct epoll_event *events,
92 : int maxevents,
93 : int timeout)
94 : {
95 : if (epoll_ev->panic_fallback == NULL) {
96 : return epoll_wait(epfd, events, maxevents, timeout);
97 : }
98 :
99 : /* 50% of the time, fail... */
100 : if ((random() % 2) == 0) {
101 : errno = EINVAL;
102 : return -1;
103 : }
104 :
105 : return epoll_wait(epfd, events, maxevents, timeout);
106 : }
107 :
108 : #define epoll_create(_size) \
109 : epoll_create_panic_fallback(epoll_ev, _size)
110 : #define epoll_ctl(_epfd, _op, _fd, _event) \
111 : epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 : #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 : epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 : #endif
115 :
116 : /*
117 : called to set the panic fallback function.
118 : */
119 87544476 : _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 : bool (*panic_fallback)(struct tevent_context *ev,
121 : bool replay))
122 : {
123 76941460 : struct epoll_event_context *epoll_ev =
124 87544476 : talloc_get_type_abort(ev->additional_data,
125 : struct epoll_event_context);
126 :
127 87544476 : epoll_ev->panic_fallback = panic_fallback;
128 87544476 : }
129 :
130 : /*
131 : called when a epoll call fails
132 : */
133 5 : static void epoll_panic(struct epoll_event_context *epoll_ev,
134 : const char *reason, bool replay)
135 : {
136 5 : struct tevent_context *ev = epoll_ev->ev;
137 : bool (*panic_fallback)(struct tevent_context *ev, bool replay);
138 :
139 5 : panic_fallback = epoll_ev->panic_fallback;
140 :
141 5 : if (epoll_ev->panic_state != NULL) {
142 0 : *epoll_ev->panic_state = true;
143 : }
144 :
145 5 : if (epoll_ev->panic_force_replay) {
146 0 : replay = true;
147 : }
148 :
149 5 : TALLOC_FREE(ev->additional_data);
150 :
151 5 : if (panic_fallback == NULL) {
152 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
153 : "%s (%s) replay[%u] - calling abort()\n",
154 0 : reason, strerror(errno), (unsigned)replay);
155 0 : abort();
156 : }
157 :
158 10 : tevent_debug(ev, TEVENT_DEBUG_ERROR,
159 : "%s (%s) replay[%u] - calling panic_fallback\n",
160 5 : reason, strerror(errno), (unsigned)replay);
161 :
162 5 : if (!panic_fallback(ev, replay)) {
163 : /* Fallback failed. */
164 0 : tevent_debug(ev, TEVENT_DEBUG_FATAL,
165 : "%s (%s) replay[%u] - calling abort()\n",
166 0 : reason, strerror(errno), (unsigned)replay);
167 0 : abort();
168 : }
169 5 : }
170 :
171 : /*
172 : map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
173 : */
174 39467780 : static uint32_t epoll_map_flags(uint16_t flags)
175 : {
176 39535967 : uint32_t ret = 0;
177 39535967 : if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
178 39535967 : if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
179 39467780 : return ret;
180 : }
181 :
182 : /*
183 : free the epoll fd
184 : */
185 87522295 : static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
186 : {
187 87522295 : close(epoll_ev->epoll_fd);
188 87522295 : epoll_ev->epoll_fd = -1;
189 87522295 : return 0;
190 : }
191 :
192 : /*
193 : init the epoll fd
194 : */
195 87544481 : static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
196 : {
197 87544481 : epoll_ev->epoll_fd = epoll_create(64);
198 87544481 : if (epoll_ev->epoll_fd == -1) {
199 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
200 : "Failed to create epoll handle.\n");
201 0 : return -1;
202 : }
203 :
204 87544481 : if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
205 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
206 : "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
207 : }
208 :
209 87544481 : epoll_ev->pid = getpid();
210 87544481 : talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
211 :
212 87544481 : return 0;
213 : }
214 :
215 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
216 :
217 : /*
218 : reopen the epoll handle when our pid changes
219 : see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
220 : demonstration of why this is needed
221 : */
222 227646047 : static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
223 : {
224 : struct tevent_fd *fde;
225 227646047 : bool *caller_panic_state = epoll_ev->panic_state;
226 227646047 : bool panic_triggered = false;
227 :
228 227646047 : if (epoll_ev->pid == getpid()) {
229 433376873 : return;
230 : }
231 :
232 59045 : close(epoll_ev->epoll_fd);
233 59045 : epoll_ev->epoll_fd = epoll_create(64);
234 59045 : if (epoll_ev->epoll_fd == -1) {
235 0 : epoll_panic(epoll_ev, "epoll_create() failed", false);
236 0 : return;
237 : }
238 :
239 59045 : if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
240 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
241 : "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
242 : }
243 :
244 59045 : epoll_ev->pid = getpid();
245 59045 : epoll_ev->panic_state = &panic_triggered;
246 873675 : for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
247 814630 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
248 814630 : epoll_update_event(epoll_ev, fde);
249 :
250 814630 : if (panic_triggered) {
251 0 : if (caller_panic_state != NULL) {
252 0 : *caller_panic_state = true;
253 : }
254 0 : return;
255 : }
256 : }
257 59045 : epoll_ev->panic_state = NULL;
258 : }
259 :
260 : /*
261 : epoll cannot add the same file descriptor twice, once
262 : with read, once with write which is allowed by the
263 : tevent backend. Multiplex the existing fde, flag it
264 : as such so we can search for the correct fde on
265 : event triggering.
266 : */
267 :
268 36062 : static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
269 : struct tevent_fd *add_fde)
270 : {
271 : struct epoll_event event;
272 : struct tevent_fd *mpx_fde;
273 : int ret;
274 :
275 : /* Find the existing fde that caused the EEXIST error. */
276 72145 : for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
277 72145 : if (mpx_fde->fd != add_fde->fd) {
278 21 : continue;
279 : }
280 :
281 72124 : if (mpx_fde == add_fde) {
282 36062 : continue;
283 : }
284 :
285 35431 : break;
286 : }
287 36062 : if (mpx_fde == NULL) {
288 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
289 : "can't find multiplex fde for fd[%d]",
290 : add_fde->fd);
291 0 : return -1;
292 : }
293 :
294 36062 : if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
295 : /* Logic error. Can't have more than 2 multiplexed fde's. */
296 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
297 : "multiplex fde for fd[%d] is already multiplexed\n",
298 : mpx_fde->fd);
299 0 : return -1;
300 : }
301 :
302 : /*
303 : * The multiplex fde must have the same fd, and also
304 : * already have an epoll event attached.
305 : */
306 36062 : if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
307 : /* Logic error. Can't have more than 2 multiplexed fde's. */
308 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
309 : "multiplex fde for fd[%d] has no event\n",
310 : mpx_fde->fd);
311 0 : return -1;
312 : }
313 :
314 : /* Modify the mpx_fde to add in the new flags. */
315 36062 : ZERO_STRUCT(event);
316 36693 : event.events = epoll_map_flags(mpx_fde->flags);
317 36693 : event.events |= epoll_map_flags(add_fde->flags);
318 36062 : event.data.ptr = mpx_fde;
319 36062 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
320 36062 : if (ret != 0 && errno == EBADF) {
321 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
322 : "EPOLL_CTL_MOD EBADF for "
323 : "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
324 : add_fde, mpx_fde, add_fde->fd);
325 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
326 0 : mpx_fde->wrapper = NULL;
327 0 : mpx_fde->event_ctx = NULL;
328 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
329 0 : add_fde->wrapper = NULL;
330 0 : add_fde->event_ctx = NULL;
331 0 : return 0;
332 36062 : } else if (ret != 0) {
333 0 : return ret;
334 : }
335 :
336 : /*
337 : * Make each fde->additional_data pointers point at each other
338 : * so we can look them up from each other. They are now paired.
339 : */
340 36062 : mpx_fde->additional_data = (struct tevent_fd *)add_fde;
341 36062 : add_fde->additional_data = (struct tevent_fd *)mpx_fde;
342 :
343 : /* Now flag both fde's as being multiplexed. */
344 36062 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
345 36062 : add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
346 :
347 : /* we need to keep the GOT_ERROR flag */
348 36062 : if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
349 0 : add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
350 : }
351 :
352 35431 : return 0;
353 : }
354 :
355 : /*
356 : add the epoll event to the given fd_event
357 : */
358 35511031 : static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
359 : {
360 : struct epoll_event event;
361 : int ret;
362 35511031 : struct tevent_fd *mpx_fde = NULL;
363 :
364 35511031 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
365 35511031 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
366 :
367 35511031 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
368 : /*
369 : * This is a multiplexed fde, we need to include both
370 : * flags in the modified event.
371 : */
372 0 : mpx_fde = talloc_get_type_abort(fde->additional_data,
373 : struct tevent_fd);
374 :
375 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
376 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
377 : }
378 :
379 35511031 : ZERO_STRUCT(event);
380 35569464 : event.events = epoll_map_flags(fde->flags);
381 35511031 : if (mpx_fde != NULL) {
382 0 : event.events |= epoll_map_flags(mpx_fde->flags);
383 : }
384 35511031 : event.data.ptr = fde;
385 35511031 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
386 35511031 : if (ret != 0 && errno == EBADF) {
387 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
388 : "EPOLL_CTL_ADD EBADF for "
389 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
390 : fde, mpx_fde, fde->fd);
391 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
392 0 : fde->wrapper = NULL;
393 0 : fde->event_ctx = NULL;
394 0 : if (mpx_fde != NULL) {
395 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
396 0 : mpx_fde->wrapper = NULL;
397 0 : mpx_fde->event_ctx = NULL;
398 : }
399 35511031 : return;
400 35511031 : } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
401 36062 : ret = epoll_add_multiplex_fd(epoll_ev, fde);
402 64901 : if (ret != 0) {
403 0 : epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
404 : false);
405 0 : return;
406 : }
407 35474969 : } else if (ret != 0) {
408 5 : epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
409 5 : return;
410 : }
411 :
412 35511026 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
413 : /* only if we want to read we want to tell the event handler about errors */
414 35511026 : if (fde->flags & TEVENT_FD_READ) {
415 35502350 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
416 : }
417 :
418 35511026 : if (mpx_fde == NULL) {
419 35452593 : return;
420 : }
421 :
422 0 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
423 : /* only if we want to read we want to tell the event handler about errors */
424 0 : if (mpx_fde->flags & TEVENT_FD_READ) {
425 0 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
426 : }
427 : }
428 :
429 : /*
430 : delete the epoll event for given fd_event
431 : */
432 9975983 : static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
433 : {
434 : struct epoll_event event;
435 : int ret;
436 9975983 : struct tevent_fd *mpx_fde = NULL;
437 :
438 9975983 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
439 9975983 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
440 :
441 9975983 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
442 : /*
443 : * This is a multiplexed fde, we need to modify both events.
444 : */
445 0 : mpx_fde = talloc_get_type_abort(fde->additional_data,
446 : struct tevent_fd);
447 :
448 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
449 0 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
450 : }
451 :
452 9975983 : ZERO_STRUCT(event);
453 9975983 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
454 9975983 : if (ret != 0 && errno == ENOENT) {
455 : /*
456 : * This can happen after a epoll_check_reopen
457 : * within epoll_event_fd_destructor.
458 : */
459 52270 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
460 : "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
461 : fde->fd);
462 52270 : return;
463 9923713 : } else if (ret != 0 && errno == EBADF) {
464 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
465 : "EPOLL_CTL_DEL EBADF for "
466 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
467 : fde, mpx_fde, fde->fd);
468 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
469 0 : fde->wrapper = NULL;
470 0 : fde->event_ctx = NULL;
471 0 : if (mpx_fde != NULL) {
472 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
473 0 : mpx_fde->wrapper = NULL;
474 0 : mpx_fde->event_ctx = NULL;
475 : }
476 0 : return;
477 9923713 : } else if (ret != 0) {
478 0 : epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
479 0 : return;
480 : }
481 : }
482 :
483 : /*
484 : change the epoll event to the given fd_event
485 : */
486 3949572 : static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
487 : {
488 3949572 : struct tevent_fd *mpx_fde = NULL;
489 : struct epoll_event event;
490 : int ret;
491 :
492 3949572 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
493 3949572 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
494 :
495 3949572 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
496 : /*
497 : * This is a multiplexed fde, we need to include both
498 : * flags in the modified event.
499 : */
500 3240 : mpx_fde = talloc_get_type_abort(fde->additional_data,
501 : struct tevent_fd);
502 :
503 3240 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
504 3240 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
505 : }
506 :
507 3949572 : ZERO_STRUCT(event);
508 3958064 : event.events = epoll_map_flags(fde->flags);
509 3949572 : if (mpx_fde != NULL) {
510 3240 : event.events |= epoll_map_flags(mpx_fde->flags);
511 : }
512 3949572 : event.data.ptr = fde;
513 3949572 : ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
514 3949572 : if (ret != 0 && errno == EBADF) {
515 0 : tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
516 : "EPOLL_CTL_MOD EBADF for "
517 : "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
518 : fde, mpx_fde, fde->fd);
519 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
520 0 : fde->wrapper = NULL;
521 0 : fde->event_ctx = NULL;
522 0 : if (mpx_fde != NULL) {
523 0 : DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
524 0 : mpx_fde->wrapper = NULL;
525 0 : mpx_fde->event_ctx = NULL;
526 : }
527 3946332 : return;
528 3949572 : } else if (ret != 0) {
529 0 : epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
530 0 : return;
531 : }
532 :
533 3949572 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
534 : /* only if we want to read we want to tell the event handler about errors */
535 3949572 : if (fde->flags & TEVENT_FD_READ) {
536 3943515 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
537 : }
538 :
539 3949572 : if (mpx_fde == NULL) {
540 3937840 : return;
541 : }
542 :
543 3240 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
544 : /* only if we want to read we want to tell the event handler about errors */
545 3240 : if (mpx_fde->flags & TEVENT_FD_READ) {
546 3240 : mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
547 : }
548 : }
549 :
550 49717036 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
551 : {
552 49717036 : bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
553 49717036 : bool want_read = (fde->flags & TEVENT_FD_READ);
554 49717036 : bool want_write= (fde->flags & TEVENT_FD_WRITE);
555 49717036 : struct tevent_fd *mpx_fde = NULL;
556 :
557 49717036 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
558 : /*
559 : * work out what the multiplexed fde wants.
560 : */
561 3240 : mpx_fde = talloc_get_type_abort(fde->additional_data,
562 : struct tevent_fd);
563 :
564 3240 : if (mpx_fde->flags & TEVENT_FD_READ) {
565 3240 : want_read = true;
566 : }
567 :
568 3240 : if (mpx_fde->flags & TEVENT_FD_WRITE) {
569 0 : want_write = true;
570 : }
571 : }
572 :
573 : /* there's already an event */
574 49717036 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
575 13925555 : if (want_read || (want_write && !got_error)) {
576 3949572 : epoll_mod_event(epoll_ev, fde);
577 3949572 : return;
578 : }
579 : /*
580 : * if we want to match the select behavior, we need to remove the epoll_event
581 : * when the caller isn't interested in events.
582 : *
583 : * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
584 : */
585 9975983 : epoll_del_event(epoll_ev, fde);
586 9975983 : return;
587 : }
588 :
589 : /* there's no epoll_event attached to the fde */
590 35791481 : if (want_read || (want_write && !got_error)) {
591 35511031 : epoll_add_event(epoll_ev, fde);
592 35511031 : return;
593 : }
594 : }
595 :
596 : /*
597 : Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
598 : Return true if there's nothing else to do, false if
599 : this event needs further handling.
600 : */
601 11232456 : static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
602 : struct tevent_fd *fde)
603 : {
604 11235531 : if (fde == NULL) {
605 : /* Nothing to do if no event. */
606 5616228 : return true;
607 : }
608 :
609 5619303 : fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
610 : /*
611 : * if we only wait for TEVENT_FD_WRITE, we should not tell the
612 : * event handler about it, and remove the epoll_event,
613 : * as we only report errors when waiting for read events,
614 : * to match the select() behavior
615 : */
616 5619303 : if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
617 : /*
618 : * Do the same as the poll backend and
619 : * remove the writeable flag.
620 : */
621 0 : fde->flags &= ~TEVENT_FD_WRITE;
622 0 : return true;
623 : }
624 : /* This has TEVENT_FD_READ set, we're not finished. */
625 5616228 : return false;
626 : }
627 :
628 : /*
629 : event loop handling using epoll
630 : */
631 178779703 : static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
632 : {
633 : int ret, i;
634 : #define MAXEVENTS 1
635 : struct epoll_event events[MAXEVENTS];
636 178779703 : int timeout = -1;
637 : int wait_errno;
638 :
639 178779703 : if (tvalp) {
640 : /* it's better to trigger timed events a bit later than too early */
641 178779703 : timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
642 : }
643 :
644 292553062 : if (epoll_ev->ev->signal_events &&
645 113773359 : tevent_common_check_signal(epoll_ev->ev)) {
646 0 : return 0;
647 : }
648 :
649 178779703 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
650 178779703 : ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
651 178779703 : wait_errno = errno;
652 178779703 : tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
653 :
654 178779703 : if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
655 23329419 : if (tevent_common_check_signal(epoll_ev->ev)) {
656 23278466 : return 0;
657 : }
658 : }
659 :
660 155450329 : if (ret == -1 && wait_errno != EINTR) {
661 0 : epoll_panic(epoll_ev, "epoll_wait() failed", true);
662 0 : return -1;
663 : }
664 :
665 155450329 : if (ret == 0 && tvalp) {
666 : /* we don't care about a possible delay here */
667 3191675 : tevent_common_loop_timer_delay(epoll_ev->ev);
668 3191663 : return 0;
669 : }
670 :
671 151133628 : for (i=0;i<ret;i++) {
672 152225606 : struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
673 : struct tevent_fd);
674 152225606 : uint16_t flags = 0;
675 152225606 : struct tevent_fd *mpx_fde = NULL;
676 :
677 152225606 : if (fde == NULL) {
678 0 : epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
679 0 : return -1;
680 : }
681 152225606 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
682 : /*
683 : * Save off the multiplexed event in case we need
684 : * to use it to call the handler function.
685 : */
686 781415 : mpx_fde = talloc_get_type_abort(fde->additional_data,
687 : struct tevent_fd);
688 : }
689 152225606 : if (events[i].events & (EPOLLHUP|EPOLLERR)) {
690 5622378 : bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
691 5622378 : bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
692 :
693 5619303 : if (handled_fde && handled_mpx) {
694 0 : epoll_update_event(epoll_ev, fde);
695 0 : continue;
696 : }
697 :
698 5619303 : if (!handled_mpx) {
699 : /*
700 : * If the mpx event was the one that needs
701 : * further handling, it's the TEVENT_FD_READ
702 : * event so switch over and call that handler.
703 : */
704 0 : fde = mpx_fde;
705 0 : mpx_fde = NULL;
706 : }
707 5616228 : flags |= TEVENT_FD_READ;
708 : }
709 152225606 : if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
710 152225606 : if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
711 :
712 152225606 : if (flags & TEVENT_FD_WRITE) {
713 12310003 : if (fde->flags & TEVENT_FD_WRITE) {
714 11672830 : mpx_fde = NULL;
715 : }
716 12036498 : if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
717 361631 : fde = mpx_fde;
718 361631 : mpx_fde = NULL;
719 : }
720 : }
721 :
722 152223569 : if (mpx_fde) {
723 : /* Ensure we got the right fde. */
724 139082 : if ((flags & fde->flags) == 0) {
725 5622 : fde = mpx_fde;
726 5622 : mpx_fde = NULL;
727 : }
728 : }
729 :
730 : /*
731 : * make sure we only pass the flags
732 : * the handler is expecting.
733 : */
734 152225606 : flags &= fde->flags;
735 152225606 : if (flags) {
736 152225606 : return tevent_common_invoke_fd_handler(fde, flags, NULL);
737 : }
738 : }
739 :
740 33048 : return 0;
741 : }
742 :
743 : /*
744 : create a epoll_event_context structure.
745 : */
746 87544481 : static int epoll_event_context_init(struct tevent_context *ev)
747 : {
748 : int ret;
749 : struct epoll_event_context *epoll_ev;
750 :
751 : /*
752 : * We might be called during tevent_re_initialise()
753 : * which means we need to free our old additional_data.
754 : */
755 87544481 : TALLOC_FREE(ev->additional_data);
756 :
757 87544481 : epoll_ev = talloc_zero(ev, struct epoll_event_context);
758 87544481 : if (!epoll_ev) return -1;
759 87544481 : epoll_ev->ev = ev;
760 87544481 : epoll_ev->epoll_fd = -1;
761 :
762 87544481 : ret = epoll_init_ctx(epoll_ev);
763 87544481 : if (ret != 0) {
764 0 : talloc_free(epoll_ev);
765 0 : return ret;
766 : }
767 :
768 87544481 : ev->additional_data = epoll_ev;
769 87544481 : return 0;
770 : }
771 :
772 : /*
773 : destroy an fd_event
774 : */
775 41995410 : static int epoll_event_fd_destructor(struct tevent_fd *fde)
776 : {
777 41995410 : struct tevent_context *ev = fde->event_ctx;
778 41995410 : struct epoll_event_context *epoll_ev = NULL;
779 41995410 : bool panic_triggered = false;
780 41995410 : struct tevent_fd *mpx_fde = NULL;
781 41995410 : int flags = fde->flags;
782 :
783 41995410 : if (ev == NULL) {
784 32050630 : return tevent_common_fd_destructor(fde);
785 : }
786 :
787 9944780 : epoll_ev = talloc_get_type_abort(ev->additional_data,
788 : struct epoll_event_context);
789 :
790 : /*
791 : * we must remove the event from the list
792 : * otherwise a panic fallback handler may
793 : * reuse invalid memory
794 : */
795 9944780 : DLIST_REMOVE(ev->fd_events, fde);
796 :
797 9944780 : if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
798 36062 : mpx_fde = talloc_get_type_abort(fde->additional_data,
799 : struct tevent_fd);
800 :
801 36062 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
802 36062 : mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
803 :
804 36062 : fde->additional_data = NULL;
805 36062 : mpx_fde->additional_data = NULL;
806 :
807 36062 : fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
808 : }
809 :
810 9944780 : epoll_ev->panic_state = &panic_triggered;
811 9944780 : epoll_check_reopen(epoll_ev);
812 9944780 : if (panic_triggered) {
813 0 : return tevent_common_fd_destructor(fde);
814 : }
815 :
816 9944780 : if (mpx_fde != NULL) {
817 36062 : epoll_update_event(epoll_ev, mpx_fde);
818 36062 : if (panic_triggered) {
819 0 : return tevent_common_fd_destructor(fde);
820 : }
821 : }
822 :
823 9944780 : fde->flags = 0;
824 9944780 : epoll_update_event(epoll_ev, fde);
825 9944780 : fde->flags = flags;
826 9944780 : if (panic_triggered) {
827 0 : return tevent_common_fd_destructor(fde);
828 : }
829 9944780 : epoll_ev->panic_state = NULL;
830 :
831 9944780 : return tevent_common_fd_destructor(fde);
832 : }
833 :
834 : /*
835 : add a fd based event
836 : return NULL on failure (memory allocation error)
837 : */
838 34635940 : static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
839 : int fd, uint16_t flags,
840 : tevent_fd_handler_t handler,
841 : void *private_data,
842 : const char *handler_name,
843 : const char *location)
844 : {
845 31762570 : struct epoll_event_context *epoll_ev =
846 34635940 : talloc_get_type_abort(ev->additional_data,
847 : struct epoll_event_context);
848 : struct tevent_fd *fde;
849 34635940 : bool panic_triggered = false;
850 :
851 34635940 : fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
852 : handler, private_data,
853 : handler_name, location);
854 34635940 : if (!fde) return NULL;
855 :
856 34635940 : talloc_set_destructor(fde, epoll_event_fd_destructor);
857 :
858 34635940 : epoll_ev->panic_state = &panic_triggered;
859 34635940 : epoll_check_reopen(epoll_ev);
860 34635940 : if (panic_triggered) {
861 0 : return fde;
862 : }
863 34635940 : epoll_ev->panic_state = NULL;
864 :
865 34635940 : epoll_update_event(epoll_ev, fde);
866 :
867 34635940 : return fde;
868 : }
869 :
870 : /*
871 : set the fd event flags
872 : */
873 37799222 : static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
874 : {
875 : struct tevent_context *ev;
876 : struct epoll_event_context *epoll_ev;
877 37799222 : bool panic_triggered = false;
878 :
879 70256629 : if (fde->flags == flags) return;
880 :
881 4285624 : ev = fde->event_ctx;
882 4285624 : epoll_ev = talloc_get_type_abort(ev->additional_data,
883 : struct epoll_event_context);
884 :
885 4285624 : fde->flags = flags;
886 :
887 4285624 : epoll_ev->panic_state = &panic_triggered;
888 4285624 : epoll_check_reopen(epoll_ev);
889 4285624 : if (panic_triggered) {
890 0 : return;
891 : }
892 4285624 : epoll_ev->panic_state = NULL;
893 :
894 4285624 : epoll_update_event(epoll_ev, fde);
895 : }
896 :
897 : /*
898 : do a single event loop using the events defined in ev
899 : */
900 376755652 : static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
901 : {
902 338574359 : struct epoll_event_context *epoll_ev =
903 376755652 : talloc_get_type_abort(ev->additional_data,
904 : struct epoll_event_context);
905 : struct timeval tval;
906 376755652 : bool panic_triggered = false;
907 :
908 515519093 : if (ev->signal_events &&
909 138763451 : tevent_common_check_signal(ev)) {
910 1608641 : return 0;
911 : }
912 :
913 374986975 : if (ev->threaded_contexts != NULL) {
914 4507330 : tevent_common_threaded_activate_immediate(ev);
915 : }
916 :
917 422443103 : if (ev->immediate_events &&
918 47468472 : tevent_common_loop_immediate(ev)) {
919 47232260 : return 0;
920 : }
921 :
922 327518503 : tval = tevent_common_loop_timer_delay(ev);
923 327518491 : if (tevent_timeval_is_zero(&tval)) {
924 145318472 : return 0;
925 : }
926 :
927 178779703 : epoll_ev->panic_state = &panic_triggered;
928 178779703 : epoll_ev->panic_force_replay = true;
929 178779703 : epoll_check_reopen(epoll_ev);
930 178779703 : if (panic_triggered) {
931 0 : errno = EINVAL;
932 0 : return -1;
933 : }
934 178779703 : epoll_ev->panic_force_replay = false;
935 178779703 : epoll_ev->panic_state = NULL;
936 :
937 178779703 : return epoll_event_loop(epoll_ev, &tval);
938 : }
939 :
940 : static const struct tevent_ops epoll_event_ops = {
941 : .context_init = epoll_event_context_init,
942 : .add_fd = epoll_event_add_fd,
943 : .set_fd_close_fn = tevent_common_fd_set_close_fn,
944 : .get_fd_flags = tevent_common_fd_get_flags,
945 : .set_fd_flags = epoll_event_set_fd_flags,
946 : .add_timer = tevent_common_add_timer_v2,
947 : .schedule_immediate = tevent_common_schedule_immediate,
948 : .add_signal = tevent_common_add_signal,
949 : .loop_once = epoll_event_loop_once,
950 : .loop_wait = tevent_common_loop_wait,
951 : };
952 :
953 65046 : _PRIVATE_ bool tevent_epoll_init(void)
954 : {
955 65046 : return tevent_register_backend("epoll", &epoll_event_ops);
956 : }
|