LCOV - code coverage report
Current view: top level - lib/tevent - tevent_epoll.c (source / functions) Hit Total Coverage
Test: coverage report for abartlet/fix-coverage dd10fb34 Lines: 289 384 75.3 %
Date: 2021-09-23 10:06:22 Functions: 20 20 100.0 %

          Line data    Source code
       1             : /* 
       2             :    Unix SMB/CIFS implementation.
       3             : 
       4             :    main select loop and event handling - epoll implementation
       5             : 
       6             :    Copyright (C) Andrew Tridgell        2003-2005
       7             :    Copyright (C) Stefan Metzmacher      2005-2013
       8             :    Copyright (C) Jeremy Allison         2013
       9             : 
      10             :      ** NOTE! The following LGPL license applies to the tevent
      11             :      ** library. This does NOT imply that all of Samba is released
      12             :      ** under the LGPL
      13             : 
      14             :    This library is free software; you can redistribute it and/or
      15             :    modify it under the terms of the GNU Lesser General Public
      16             :    License as published by the Free Software Foundation; either
      17             :    version 3 of the License, or (at your option) any later version.
      18             : 
      19             :    This library is distributed in the hope that it will be useful,
      20             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      21             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      22             :    Lesser General Public License for more details.
      23             : 
      24             :    You should have received a copy of the GNU Lesser General Public
      25             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      26             : */
      27             : 
      28             : #include "replace.h"
      29             : #include "system/filesys.h"
      30             : #include "system/select.h"
      31             : #include "tevent.h"
      32             : #include "tevent_internal.h"
      33             : #include "tevent_util.h"
      34             : 
      35             : struct epoll_event_context {
      36             :         /* a pointer back to the generic event_context */
      37             :         struct tevent_context *ev;
      38             : 
      39             :         /* when using epoll this is the handle from epoll_create */
      40             :         int epoll_fd;
      41             : 
      42             :         pid_t pid;
      43             : 
      44             :         bool panic_force_replay;
      45             :         bool *panic_state;
      46             :         bool (*panic_fallback)(struct tevent_context *ev, bool replay);
      47             : };
      48             : 
      49             : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT      (1<<0)
      50             : #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR   (1<<1)
      51             : #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR      (1<<2)
      52             : #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX        (1<<3)
      53             : 
      54             : #ifdef TEST_PANIC_FALLBACK
      55             : 
      56             : static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
      57             :                                        int size)
      58             : {
      59             :         if (epoll_ev->panic_fallback == NULL) {
      60             :                 return epoll_create(size);
      61             :         }
      62             : 
      63             :         /* 50% of the time, fail... */
      64             :         if ((random() % 2) == 0) {
      65             :                 errno = EINVAL;
      66             :                 return -1;
      67             :         }
      68             : 
      69             :         return epoll_create(size);
      70             : }
      71             : 
      72             : static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
      73             :                                     int epfd, int op, int fd,
      74             :                                     struct epoll_event *event)
      75             : {
      76             :         if (epoll_ev->panic_fallback == NULL) {
      77             :                 return epoll_ctl(epfd, op, fd, event);
      78             :         }
      79             : 
      80             :         /* 50% of the time, fail... */
      81             :         if ((random() % 2) == 0) {
      82             :                 errno = EINVAL;
      83             :                 return -1;
      84             :         }
      85             : 
      86             :         return epoll_ctl(epfd, op, fd, event);
      87             : }
      88             : 
      89             : static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
      90             :                                      int epfd,
      91             :                                      struct epoll_event *events,
      92             :                                      int maxevents,
      93             :                                      int timeout)
      94             : {
      95             :         if (epoll_ev->panic_fallback == NULL) {
      96             :                 return epoll_wait(epfd, events, maxevents, timeout);
      97             :         }
      98             : 
      99             :         /* 50% of the time, fail... */
     100             :         if ((random() % 2) == 0) {
     101             :                 errno = EINVAL;
     102             :                 return -1;
     103             :         }
     104             : 
     105             :         return epoll_wait(epfd, events, maxevents, timeout);
     106             : }
     107             : 
     108             : #define epoll_create(_size) \
     109             :         epoll_create_panic_fallback(epoll_ev, _size)
     110             : #define epoll_ctl(_epfd, _op, _fd, _event) \
     111             :         epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
     112             : #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
     113             :         epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
     114             : #endif
     115             : 
     116             : /*
     117             :   called to set the panic fallback function.
     118             : */
     119    87544476 : _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
     120             :                                 bool (*panic_fallback)(struct tevent_context *ev,
     121             :                                                        bool replay))
     122             : {
     123    76941460 :         struct epoll_event_context *epoll_ev =
     124    87544476 :                 talloc_get_type_abort(ev->additional_data,
     125             :                 struct epoll_event_context);
     126             : 
     127    87544476 :         epoll_ev->panic_fallback = panic_fallback;
     128    87544476 : }
     129             : 
     130             : /*
     131             :   called when a epoll call fails
     132             : */
     133           5 : static void epoll_panic(struct epoll_event_context *epoll_ev,
     134             :                         const char *reason, bool replay)
     135             : {
     136           5 :         struct tevent_context *ev = epoll_ev->ev;
     137             :         bool (*panic_fallback)(struct tevent_context *ev, bool replay);
     138             : 
     139           5 :         panic_fallback = epoll_ev->panic_fallback;
     140             : 
     141           5 :         if (epoll_ev->panic_state != NULL) {
     142           0 :                 *epoll_ev->panic_state = true;
     143             :         }
     144             : 
     145           5 :         if (epoll_ev->panic_force_replay) {
     146           0 :                 replay = true;
     147             :         }
     148             : 
     149           5 :         TALLOC_FREE(ev->additional_data);
     150             : 
     151           5 :         if (panic_fallback == NULL) {
     152           0 :                 tevent_debug(ev, TEVENT_DEBUG_FATAL,
     153             :                         "%s (%s) replay[%u] - calling abort()\n",
     154           0 :                         reason, strerror(errno), (unsigned)replay);
     155           0 :                 abort();
     156             :         }
     157             : 
     158          10 :         tevent_debug(ev, TEVENT_DEBUG_ERROR,
     159             :                      "%s (%s) replay[%u] - calling panic_fallback\n",
     160           5 :                      reason, strerror(errno), (unsigned)replay);
     161             : 
     162           5 :         if (!panic_fallback(ev, replay)) {
     163             :                 /* Fallback failed. */
     164           0 :                 tevent_debug(ev, TEVENT_DEBUG_FATAL,
     165             :                         "%s (%s) replay[%u] - calling abort()\n",
     166           0 :                         reason, strerror(errno), (unsigned)replay);
     167           0 :                 abort();
     168             :         }
     169           5 : }
     170             : 
     171             : /*
     172             :   map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
     173             : */
     174    39467780 : static uint32_t epoll_map_flags(uint16_t flags)
     175             : {
     176    39535967 :         uint32_t ret = 0;
     177    39535967 :         if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
     178    39535967 :         if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
     179    39467780 :         return ret;
     180             : }
     181             : 
     182             : /*
     183             :  free the epoll fd
     184             : */
     185    87522295 : static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
     186             : {
     187    87522295 :         close(epoll_ev->epoll_fd);
     188    87522295 :         epoll_ev->epoll_fd = -1;
     189    87522295 :         return 0;
     190             : }
     191             : 
     192             : /*
     193             :  init the epoll fd
     194             : */
     195    87544481 : static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
     196             : {
     197    87544481 :         epoll_ev->epoll_fd = epoll_create(64);
     198    87544481 :         if (epoll_ev->epoll_fd == -1) {
     199           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
     200             :                              "Failed to create epoll handle.\n");
     201           0 :                 return -1;
     202             :         }
     203             : 
     204    87544481 :         if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
     205           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
     206             :                              "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
     207             :         }
     208             : 
     209    87544481 :         epoll_ev->pid = getpid();
     210    87544481 :         talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
     211             : 
     212    87544481 :         return 0;
     213             : }
     214             : 
     215             : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
     216             : 
     217             : /*
     218             :   reopen the epoll handle when our pid changes
     219             :   see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
     220             :   demonstration of why this is needed
     221             :  */
     222   227646047 : static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
     223             : {
     224             :         struct tevent_fd *fde;
     225   227646047 :         bool *caller_panic_state = epoll_ev->panic_state;
     226   227646047 :         bool panic_triggered = false;
     227             : 
     228   227646047 :         if (epoll_ev->pid == getpid()) {
     229   433376873 :                 return;
     230             :         }
     231             : 
     232       59045 :         close(epoll_ev->epoll_fd);
     233       59045 :         epoll_ev->epoll_fd = epoll_create(64);
     234       59045 :         if (epoll_ev->epoll_fd == -1) {
     235           0 :                 epoll_panic(epoll_ev, "epoll_create() failed", false);
     236           0 :                 return;
     237             :         }
     238             : 
     239       59045 :         if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
     240           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
     241             :                              "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
     242             :         }
     243             : 
     244       59045 :         epoll_ev->pid = getpid();
     245       59045 :         epoll_ev->panic_state = &panic_triggered;
     246      873675 :         for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
     247      814630 :                 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     248      814630 :                 epoll_update_event(epoll_ev, fde);
     249             : 
     250      814630 :                 if (panic_triggered) {
     251           0 :                         if (caller_panic_state != NULL) {
     252           0 :                                 *caller_panic_state = true;
     253             :                         }
     254           0 :                         return;
     255             :                 }
     256             :         }
     257       59045 :         epoll_ev->panic_state = NULL;
     258             : }
     259             : 
     260             : /*
     261             :  epoll cannot add the same file descriptor twice, once
     262             :  with read, once with write which is allowed by the
     263             :  tevent backend. Multiplex the existing fde, flag it
     264             :  as such so we can search for the correct fde on
     265             :  event triggering.
     266             : */
     267             : 
     268       36062 : static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
     269             :                                   struct tevent_fd *add_fde)
     270             : {
     271             :         struct epoll_event event;
     272             :         struct tevent_fd *mpx_fde;
     273             :         int ret;
     274             : 
     275             :         /* Find the existing fde that caused the EEXIST error. */
     276       72145 :         for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
     277       72145 :                 if (mpx_fde->fd != add_fde->fd) {
     278          21 :                         continue;
     279             :                 }
     280             : 
     281       72124 :                 if (mpx_fde == add_fde) {
     282       36062 :                         continue;
     283             :                 }
     284             : 
     285       35431 :                 break;
     286             :         }
     287       36062 :         if (mpx_fde == NULL) {
     288           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
     289             :                              "can't find multiplex fde for fd[%d]",
     290             :                              add_fde->fd);
     291           0 :                 return -1;
     292             :         }
     293             : 
     294       36062 :         if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     295             :                 /* Logic error. Can't have more than 2 multiplexed fde's. */
     296           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
     297             :                              "multiplex fde for fd[%d] is already multiplexed\n",
     298             :                              mpx_fde->fd);
     299           0 :                 return -1;
     300             :         }
     301             : 
     302             :         /*
     303             :          * The multiplex fde must have the same fd, and also
     304             :          * already have an epoll event attached.
     305             :          */
     306       36062 :         if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
     307             :                 /* Logic error. Can't have more than 2 multiplexed fde's. */
     308           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
     309             :                              "multiplex fde for fd[%d] has no event\n",
     310             :                              mpx_fde->fd);
     311           0 :                 return -1;
     312             :         }
     313             : 
     314             :         /* Modify the mpx_fde to add in the new flags. */
     315       36062 :         ZERO_STRUCT(event);
     316       36693 :         event.events = epoll_map_flags(mpx_fde->flags);
     317       36693 :         event.events |= epoll_map_flags(add_fde->flags);
     318       36062 :         event.data.ptr = mpx_fde;
     319       36062 :         ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
     320       36062 :         if (ret != 0 && errno == EBADF) {
     321           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
     322             :                              "EPOLL_CTL_MOD EBADF for "
     323             :                              "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
     324             :                              add_fde, mpx_fde, add_fde->fd);
     325           0 :                 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
     326           0 :                 mpx_fde->wrapper = NULL;
     327           0 :                 mpx_fde->event_ctx = NULL;
     328           0 :                 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
     329           0 :                 add_fde->wrapper = NULL;
     330           0 :                 add_fde->event_ctx = NULL;
     331           0 :                 return 0;
     332       36062 :         } else if (ret != 0) {
     333           0 :                 return ret;
     334             :         }
     335             : 
     336             :         /*
     337             :          * Make each fde->additional_data pointers point at each other
     338             :          * so we can look them up from each other. They are now paired.
     339             :          */
     340       36062 :         mpx_fde->additional_data = (struct tevent_fd *)add_fde;
     341       36062 :         add_fde->additional_data = (struct tevent_fd *)mpx_fde;
     342             : 
     343             :         /* Now flag both fde's as being multiplexed. */
     344       36062 :         mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
     345       36062 :         add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
     346             : 
     347             :         /* we need to keep the GOT_ERROR flag */
     348       36062 :         if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
     349           0 :                 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
     350             :         }
     351             : 
     352       35431 :         return 0;
     353             : }
     354             : 
     355             : /*
     356             :  add the epoll event to the given fd_event
     357             : */
     358    35511031 : static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
     359             : {
     360             :         struct epoll_event event;
     361             :         int ret;
     362    35511031 :         struct tevent_fd *mpx_fde = NULL;
     363             : 
     364    35511031 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     365    35511031 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     366             : 
     367    35511031 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     368             :                 /*
     369             :                  * This is a multiplexed fde, we need to include both
     370             :                  * flags in the modified event.
     371             :                  */
     372           0 :                 mpx_fde = talloc_get_type_abort(fde->additional_data,
     373             :                                                 struct tevent_fd);
     374             : 
     375           0 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     376           0 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     377             :         }
     378             : 
     379    35511031 :         ZERO_STRUCT(event);
     380    35569464 :         event.events = epoll_map_flags(fde->flags);
     381    35511031 :         if (mpx_fde != NULL) {
     382           0 :                 event.events |= epoll_map_flags(mpx_fde->flags);
     383             :         }
     384    35511031 :         event.data.ptr = fde;
     385    35511031 :         ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
     386    35511031 :         if (ret != 0 && errno == EBADF) {
     387           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
     388             :                              "EPOLL_CTL_ADD EBADF for "
     389             :                              "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
     390             :                              fde, mpx_fde, fde->fd);
     391           0 :                 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
     392           0 :                 fde->wrapper = NULL;
     393           0 :                 fde->event_ctx = NULL;
     394           0 :                 if (mpx_fde != NULL) {
     395           0 :                         DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
     396           0 :                         mpx_fde->wrapper = NULL;
     397           0 :                         mpx_fde->event_ctx = NULL;
     398             :                 }
     399    35511031 :                 return;
     400    35511031 :         } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
     401       36062 :                 ret = epoll_add_multiplex_fd(epoll_ev, fde);
     402       64901 :                 if (ret != 0) {
     403           0 :                         epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
     404             :                                     false);
     405           0 :                         return;
     406             :                 }
     407    35474969 :         } else if (ret != 0) {
     408           5 :                 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
     409           5 :                 return;
     410             :         }
     411             : 
     412    35511026 :         fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     413             :         /* only if we want to read we want to tell the event handler about errors */
     414    35511026 :         if (fde->flags & TEVENT_FD_READ) {
     415    35502350 :                 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     416             :         }
     417             : 
     418    35511026 :         if (mpx_fde == NULL) {
     419    35452593 :                 return;
     420             :         }
     421             : 
     422           0 :         mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     423             :         /* only if we want to read we want to tell the event handler about errors */
     424           0 :         if (mpx_fde->flags & TEVENT_FD_READ) {
     425           0 :                 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     426             :         }
     427             : }
     428             : 
     429             : /*
     430             :  delete the epoll event for given fd_event
     431             : */
     432     9975983 : static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
     433             : {
     434             :         struct epoll_event event;
     435             :         int ret;
     436     9975983 :         struct tevent_fd *mpx_fde = NULL;
     437             : 
     438     9975983 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     439     9975983 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     440             : 
     441     9975983 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     442             :                 /*
     443             :                  * This is a multiplexed fde, we need to modify both events.
     444             :                  */
     445           0 :                 mpx_fde = talloc_get_type_abort(fde->additional_data,
     446             :                                                 struct tevent_fd);
     447             : 
     448           0 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     449           0 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     450             :         }
     451             : 
     452     9975983 :         ZERO_STRUCT(event);
     453     9975983 :         ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
     454     9975983 :         if (ret != 0 && errno == ENOENT) {
     455             :                 /*
     456             :                  * This can happen after a epoll_check_reopen
     457             :                  * within epoll_event_fd_destructor.
     458             :                  */
     459       52270 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
     460             :                              "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
     461             :                              fde->fd);
     462       52270 :                 return;
     463     9923713 :         } else if (ret != 0 && errno == EBADF) {
     464           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
     465             :                              "EPOLL_CTL_DEL EBADF for "
     466             :                              "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
     467             :                              fde, mpx_fde, fde->fd);
     468           0 :                 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
     469           0 :                 fde->wrapper = NULL;
     470           0 :                 fde->event_ctx = NULL;
     471           0 :                 if (mpx_fde != NULL) {
     472           0 :                         DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
     473           0 :                         mpx_fde->wrapper = NULL;
     474           0 :                         mpx_fde->event_ctx = NULL;
     475             :                 }
     476           0 :                 return;
     477     9923713 :         } else if (ret != 0) {
     478           0 :                 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
     479           0 :                 return;
     480             :         }
     481             : }
     482             : 
     483             : /*
     484             :  change the epoll event to the given fd_event
     485             : */
     486     3949572 : static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
     487             : {
     488     3949572 :         struct tevent_fd *mpx_fde = NULL;
     489             :         struct epoll_event event;
     490             :         int ret;
     491             : 
     492     3949572 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     493     3949572 :         fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     494             : 
     495     3949572 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     496             :                 /*
     497             :                  * This is a multiplexed fde, we need to include both
     498             :                  * flags in the modified event.
     499             :                  */
     500        3240 :                 mpx_fde = talloc_get_type_abort(fde->additional_data,
     501             :                                                 struct tevent_fd);
     502             : 
     503        3240 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     504        3240 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     505             :         }
     506             : 
     507     3949572 :         ZERO_STRUCT(event);
     508     3958064 :         event.events = epoll_map_flags(fde->flags);
     509     3949572 :         if (mpx_fde != NULL) {
     510        3240 :                 event.events |= epoll_map_flags(mpx_fde->flags);
     511             :         }
     512     3949572 :         event.data.ptr = fde;
     513     3949572 :         ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
     514     3949572 :         if (ret != 0 && errno == EBADF) {
     515           0 :                 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
     516             :                              "EPOLL_CTL_MOD EBADF for "
     517             :                              "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
     518             :                              fde, mpx_fde, fde->fd);
     519           0 :                 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
     520           0 :                 fde->wrapper = NULL;
     521           0 :                 fde->event_ctx = NULL;
     522           0 :                 if (mpx_fde != NULL) {
     523           0 :                         DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
     524           0 :                         mpx_fde->wrapper = NULL;
     525           0 :                         mpx_fde->event_ctx = NULL;
     526             :                 }
     527     3946332 :                 return;
     528     3949572 :         } else if (ret != 0) {
     529           0 :                 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
     530           0 :                 return;
     531             :         }
     532             : 
     533     3949572 :         fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     534             :         /* only if we want to read we want to tell the event handler about errors */
     535     3949572 :         if (fde->flags & TEVENT_FD_READ) {
     536     3943515 :                 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     537             :         }
     538             : 
     539     3949572 :         if (mpx_fde == NULL) {
     540     3937840 :                 return;
     541             :         }
     542             : 
     543        3240 :         mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     544             :         /* only if we want to read we want to tell the event handler about errors */
     545        3240 :         if (mpx_fde->flags & TEVENT_FD_READ) {
     546        3240 :                 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
     547             :         }
     548             : }
     549             : 
     550    49717036 : static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
     551             : {
     552    49717036 :         bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
     553    49717036 :         bool want_read = (fde->flags & TEVENT_FD_READ);
     554    49717036 :         bool want_write= (fde->flags & TEVENT_FD_WRITE);
     555    49717036 :         struct tevent_fd *mpx_fde = NULL;
     556             : 
     557    49717036 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     558             :                 /*
     559             :                  * work out what the multiplexed fde wants.
     560             :                  */
     561        3240 :                 mpx_fde = talloc_get_type_abort(fde->additional_data,
     562             :                                                 struct tevent_fd);
     563             : 
     564        3240 :                 if (mpx_fde->flags & TEVENT_FD_READ) {
     565        3240 :                         want_read = true;
     566             :                 }
     567             : 
     568        3240 :                 if (mpx_fde->flags & TEVENT_FD_WRITE) {
     569           0 :                         want_write = true;
     570             :                 }
     571             :         }
     572             : 
     573             :         /* there's already an event */
     574    49717036 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
     575    13925555 :                 if (want_read || (want_write && !got_error)) {
     576     3949572 :                         epoll_mod_event(epoll_ev, fde);
     577     3949572 :                         return;
     578             :                 }
     579             :                 /* 
     580             :                  * if we want to match the select behavior, we need to remove the epoll_event
     581             :                  * when the caller isn't interested in events.
     582             :                  *
     583             :                  * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
     584             :                  */
     585     9975983 :                 epoll_del_event(epoll_ev, fde);
     586     9975983 :                 return;
     587             :         }
     588             : 
     589             :         /* there's no epoll_event attached to the fde */
     590    35791481 :         if (want_read || (want_write && !got_error)) {
     591    35511031 :                 epoll_add_event(epoll_ev, fde);
     592    35511031 :                 return;
     593             :         }
     594             : }
     595             : 
     596             : /*
     597             :   Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
     598             :   Return true if there's nothing else to do, false if
     599             :   this event needs further handling.
     600             : */
     601    11232456 : static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
     602             :                                 struct tevent_fd *fde)
     603             : {
     604    11235531 :         if (fde == NULL) {
     605             :                 /* Nothing to do if no event. */
     606     5616228 :                 return true;
     607             :         }
     608             : 
     609     5619303 :         fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
     610             :         /*
     611             :          * if we only wait for TEVENT_FD_WRITE, we should not tell the
     612             :          * event handler about it, and remove the epoll_event,
     613             :          * as we only report errors when waiting for read events,
     614             :          * to match the select() behavior
     615             :          */
     616     5619303 :         if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
     617             :                 /*
     618             :                  * Do the same as the poll backend and
     619             :                  * remove the writeable flag.
     620             :                  */
     621           0 :                 fde->flags &= ~TEVENT_FD_WRITE;
     622           0 :                 return true;
     623             :         }
     624             :         /* This has TEVENT_FD_READ set, we're not finished. */
     625     5616228 :         return false;
     626             : }
     627             : 
     628             : /*
     629             :   event loop handling using epoll
     630             : */
     631   178779703 : static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
     632             : {
     633             :         int ret, i;
     634             : #define MAXEVENTS 1
     635             :         struct epoll_event events[MAXEVENTS];
     636   178779703 :         int timeout = -1;
     637             :         int wait_errno;
     638             : 
     639   178779703 :         if (tvalp) {
     640             :                 /* it's better to trigger timed events a bit later than too early */
     641   178779703 :                 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
     642             :         }
     643             : 
     644   292553062 :         if (epoll_ev->ev->signal_events &&
     645   113773359 :             tevent_common_check_signal(epoll_ev->ev)) {
     646           0 :                 return 0;
     647             :         }
     648             : 
     649   178779703 :         tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
     650   178779703 :         ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
     651   178779703 :         wait_errno = errno;
     652   178779703 :         tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
     653             : 
     654   178779703 :         if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
     655    23329419 :                 if (tevent_common_check_signal(epoll_ev->ev)) {
     656    23278466 :                         return 0;
     657             :                 }
     658             :         }
     659             : 
     660   155450329 :         if (ret == -1 && wait_errno != EINTR) {
     661           0 :                 epoll_panic(epoll_ev, "epoll_wait() failed", true);
     662           0 :                 return -1;
     663             :         }
     664             : 
     665   155450329 :         if (ret == 0 && tvalp) {
     666             :                 /* we don't care about a possible delay here */
     667     3191675 :                 tevent_common_loop_timer_delay(epoll_ev->ev);
     668     3191663 :                 return 0;
     669             :         }
     670             : 
     671   151133628 :         for (i=0;i<ret;i++) {
     672   152225606 :                 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
     673             :                                                        struct tevent_fd);
     674   152225606 :                 uint16_t flags = 0;
     675   152225606 :                 struct tevent_fd *mpx_fde = NULL;
     676             : 
     677   152225606 :                 if (fde == NULL) {
     678           0 :                         epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
     679           0 :                         return -1;
     680             :                 }
     681   152225606 :                 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     682             :                         /*
     683             :                          * Save off the multiplexed event in case we need
     684             :                          * to use it to call the handler function.
     685             :                          */
     686      781415 :                         mpx_fde = talloc_get_type_abort(fde->additional_data,
     687             :                                                         struct tevent_fd);
     688             :                 }
     689   152225606 :                 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
     690     5622378 :                         bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
     691     5622378 :                         bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
     692             : 
     693     5619303 :                         if (handled_fde && handled_mpx) {
     694           0 :                                 epoll_update_event(epoll_ev, fde);
     695           0 :                                 continue;
     696             :                         }
     697             : 
     698     5619303 :                         if (!handled_mpx) {
     699             :                                 /*
     700             :                                  * If the mpx event was the one that needs
     701             :                                  * further handling, it's the TEVENT_FD_READ
     702             :                                  * event so switch over and call that handler.
     703             :                                  */
     704           0 :                                 fde = mpx_fde;
     705           0 :                                 mpx_fde = NULL;
     706             :                         }
     707     5616228 :                         flags |= TEVENT_FD_READ;
     708             :                 }
     709   152225606 :                 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
     710   152225606 :                 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
     711             : 
     712   152225606 :                 if (flags & TEVENT_FD_WRITE) {
     713    12310003 :                         if (fde->flags & TEVENT_FD_WRITE) {
     714    11672830 :                                 mpx_fde = NULL;
     715             :                         }
     716    12036498 :                         if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
     717      361631 :                                 fde = mpx_fde;
     718      361631 :                                 mpx_fde = NULL;
     719             :                         }
     720             :                 }
     721             : 
     722   152223569 :                 if (mpx_fde) {
     723             :                         /* Ensure we got the right fde. */
     724      139082 :                         if ((flags & fde->flags) == 0) {
     725        5622 :                                 fde = mpx_fde;
     726        5622 :                                 mpx_fde = NULL;
     727             :                         }
     728             :                 }
     729             : 
     730             :                 /*
     731             :                  * make sure we only pass the flags
     732             :                  * the handler is expecting.
     733             :                  */
     734   152225606 :                 flags &= fde->flags;
     735   152225606 :                 if (flags) {
     736   152225606 :                         return tevent_common_invoke_fd_handler(fde, flags, NULL);
     737             :                 }
     738             :         }
     739             : 
     740       33048 :         return 0;
     741             : }
     742             : 
     743             : /*
     744             :   create a epoll_event_context structure.
     745             : */
     746    87544481 : static int epoll_event_context_init(struct tevent_context *ev)
     747             : {
     748             :         int ret;
     749             :         struct epoll_event_context *epoll_ev;
     750             : 
     751             :         /*
     752             :          * We might be called during tevent_re_initialise()
     753             :          * which means we need to free our old additional_data.
     754             :          */
     755    87544481 :         TALLOC_FREE(ev->additional_data);
     756             : 
     757    87544481 :         epoll_ev = talloc_zero(ev, struct epoll_event_context);
     758    87544481 :         if (!epoll_ev) return -1;
     759    87544481 :         epoll_ev->ev = ev;
     760    87544481 :         epoll_ev->epoll_fd = -1;
     761             : 
     762    87544481 :         ret = epoll_init_ctx(epoll_ev);
     763    87544481 :         if (ret != 0) {
     764           0 :                 talloc_free(epoll_ev);
     765           0 :                 return ret;
     766             :         }
     767             : 
     768    87544481 :         ev->additional_data = epoll_ev;
     769    87544481 :         return 0;
     770             : }
     771             : 
     772             : /*
     773             :   destroy an fd_event
     774             : */
     775    41995410 : static int epoll_event_fd_destructor(struct tevent_fd *fde)
     776             : {
     777    41995410 :         struct tevent_context *ev = fde->event_ctx;
     778    41995410 :         struct epoll_event_context *epoll_ev = NULL;
     779    41995410 :         bool panic_triggered = false;
     780    41995410 :         struct tevent_fd *mpx_fde = NULL;
     781    41995410 :         int flags = fde->flags;
     782             : 
     783    41995410 :         if (ev == NULL) {
     784    32050630 :                 return tevent_common_fd_destructor(fde);
     785             :         }
     786             : 
     787     9944780 :         epoll_ev = talloc_get_type_abort(ev->additional_data,
     788             :                                          struct epoll_event_context);
     789             : 
     790             :         /*
     791             :          * we must remove the event from the list
     792             :          * otherwise a panic fallback handler may
     793             :          * reuse invalid memory
     794             :          */
     795     9944780 :         DLIST_REMOVE(ev->fd_events, fde);
     796             : 
     797     9944780 :         if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
     798       36062 :                 mpx_fde = talloc_get_type_abort(fde->additional_data,
     799             :                                                 struct tevent_fd);
     800             : 
     801       36062 :                 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
     802       36062 :                 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
     803             : 
     804       36062 :                 fde->additional_data = NULL;
     805       36062 :                 mpx_fde->additional_data = NULL;
     806             : 
     807       36062 :                 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
     808             :         }
     809             : 
     810     9944780 :         epoll_ev->panic_state = &panic_triggered;
     811     9944780 :         epoll_check_reopen(epoll_ev);
     812     9944780 :         if (panic_triggered) {
     813           0 :                 return tevent_common_fd_destructor(fde);
     814             :         }
     815             : 
     816     9944780 :         if (mpx_fde != NULL) {
     817       36062 :                 epoll_update_event(epoll_ev, mpx_fde);
     818       36062 :                 if (panic_triggered) {
     819           0 :                         return tevent_common_fd_destructor(fde);
     820             :                 }
     821             :         }
     822             : 
     823     9944780 :         fde->flags = 0;
     824     9944780 :         epoll_update_event(epoll_ev, fde);
     825     9944780 :         fde->flags = flags;
     826     9944780 :         if (panic_triggered) {
     827           0 :                 return tevent_common_fd_destructor(fde);
     828             :         }
     829     9944780 :         epoll_ev->panic_state = NULL;
     830             : 
     831     9944780 :         return tevent_common_fd_destructor(fde);
     832             : }
     833             : 
     834             : /*
     835             :   add a fd based event
     836             :   return NULL on failure (memory allocation error)
     837             : */
     838    34635940 : static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
     839             :                                             int fd, uint16_t flags,
     840             :                                             tevent_fd_handler_t handler,
     841             :                                             void *private_data,
     842             :                                             const char *handler_name,
     843             :                                             const char *location)
     844             : {
     845    31762570 :         struct epoll_event_context *epoll_ev =
     846    34635940 :                 talloc_get_type_abort(ev->additional_data,
     847             :                 struct epoll_event_context);
     848             :         struct tevent_fd *fde;
     849    34635940 :         bool panic_triggered = false;
     850             : 
     851    34635940 :         fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
     852             :                                    handler, private_data,
     853             :                                    handler_name, location);
     854    34635940 :         if (!fde) return NULL;
     855             : 
     856    34635940 :         talloc_set_destructor(fde, epoll_event_fd_destructor);
     857             : 
     858    34635940 :         epoll_ev->panic_state = &panic_triggered;
     859    34635940 :         epoll_check_reopen(epoll_ev);
     860    34635940 :         if (panic_triggered) {
     861           0 :                 return fde;
     862             :         }
     863    34635940 :         epoll_ev->panic_state = NULL;
     864             : 
     865    34635940 :         epoll_update_event(epoll_ev, fde);
     866             : 
     867    34635940 :         return fde;
     868             : }
     869             : 
     870             : /*
     871             :   set the fd event flags
     872             : */
     873    37799222 : static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
     874             : {
     875             :         struct tevent_context *ev;
     876             :         struct epoll_event_context *epoll_ev;
     877    37799222 :         bool panic_triggered = false;
     878             : 
     879    70256629 :         if (fde->flags == flags) return;
     880             : 
     881     4285624 :         ev = fde->event_ctx;
     882     4285624 :         epoll_ev = talloc_get_type_abort(ev->additional_data,
     883             :                                          struct epoll_event_context);
     884             : 
     885     4285624 :         fde->flags = flags;
     886             : 
     887     4285624 :         epoll_ev->panic_state = &panic_triggered;
     888     4285624 :         epoll_check_reopen(epoll_ev);
     889     4285624 :         if (panic_triggered) {
     890           0 :                 return;
     891             :         }
     892     4285624 :         epoll_ev->panic_state = NULL;
     893             : 
     894     4285624 :         epoll_update_event(epoll_ev, fde);
     895             : }
     896             : 
     897             : /*
     898             :   do a single event loop using the events defined in ev
     899             : */
     900   376755652 : static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
     901             : {
     902   338574359 :         struct epoll_event_context *epoll_ev =
     903   376755652 :                 talloc_get_type_abort(ev->additional_data,
     904             :                 struct epoll_event_context);
     905             :         struct timeval tval;
     906   376755652 :         bool panic_triggered = false;
     907             : 
     908   515519093 :         if (ev->signal_events &&
     909   138763451 :             tevent_common_check_signal(ev)) {
     910     1608641 :                 return 0;
     911             :         }
     912             : 
     913   374986975 :         if (ev->threaded_contexts != NULL) {
     914     4507330 :                 tevent_common_threaded_activate_immediate(ev);
     915             :         }
     916             : 
     917   422443103 :         if (ev->immediate_events &&
     918    47468472 :             tevent_common_loop_immediate(ev)) {
     919    47232260 :                 return 0;
     920             :         }
     921             : 
     922   327518503 :         tval = tevent_common_loop_timer_delay(ev);
     923   327518491 :         if (tevent_timeval_is_zero(&tval)) {
     924   145318472 :                 return 0;
     925             :         }
     926             : 
     927   178779703 :         epoll_ev->panic_state = &panic_triggered;
     928   178779703 :         epoll_ev->panic_force_replay = true;
     929   178779703 :         epoll_check_reopen(epoll_ev);
     930   178779703 :         if (panic_triggered) {
     931           0 :                 errno = EINVAL;
     932           0 :                 return -1;
     933             :         }
     934   178779703 :         epoll_ev->panic_force_replay = false;
     935   178779703 :         epoll_ev->panic_state = NULL;
     936             : 
     937   178779703 :         return epoll_event_loop(epoll_ev, &tval);
     938             : }
     939             : 
     940             : static const struct tevent_ops epoll_event_ops = {
     941             :         .context_init           = epoll_event_context_init,
     942             :         .add_fd                 = epoll_event_add_fd,
     943             :         .set_fd_close_fn        = tevent_common_fd_set_close_fn,
     944             :         .get_fd_flags           = tevent_common_fd_get_flags,
     945             :         .set_fd_flags           = epoll_event_set_fd_flags,
     946             :         .add_timer              = tevent_common_add_timer_v2,
     947             :         .schedule_immediate     = tevent_common_schedule_immediate,
     948             :         .add_signal             = tevent_common_add_signal,
     949             :         .loop_once              = epoll_event_loop_once,
     950             :         .loop_wait              = tevent_common_loop_wait,
     951             : };
     952             : 
     953       65046 : _PRIVATE_ bool tevent_epoll_init(void)
     954             : {
     955       65046 :         return tevent_register_backend("epoll", &epoll_event_ops);
     956             : }

Generated by: LCOV version 1.13