LCOV - code coverage report
Current view: top level - fs - select.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 407 650 62.6 %
Date: 2023-07-31 20:08:22 Functions: 28 52 53.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * This file contains the procedures for the handling of select and poll
       4             :  *
       5             :  * Created for Linux based loosely upon Mathius Lattner's minix
       6             :  * patches by Peter MacDonald. Heavily edited by Linus.
       7             :  *
       8             :  *  4 February 1994
       9             :  *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
      10             :  *     flag set in its personality we do *not* modify the given timeout
      11             :  *     parameter to reflect time remaining.
      12             :  *
      13             :  *  24 January 2000
      14             :  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
      15             :  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
      16             :  */
      17             : 
      18             : #include <linux/compat.h>
      19             : #include <linux/kernel.h>
      20             : #include <linux/sched/signal.h>
      21             : #include <linux/sched/rt.h>
      22             : #include <linux/syscalls.h>
      23             : #include <linux/export.h>
      24             : #include <linux/slab.h>
      25             : #include <linux/poll.h>
      26             : #include <linux/personality.h> /* for STICKY_TIMEOUTS */
      27             : #include <linux/file.h>
      28             : #include <linux/fdtable.h>
      29             : #include <linux/fs.h>
      30             : #include <linux/rcupdate.h>
      31             : #include <linux/hrtimer.h>
      32             : #include <linux/freezer.h>
      33             : #include <net/busy_poll.h>
      34             : #include <linux/vmalloc.h>
      35             : 
      36             : #include <linux/uaccess.h>
      37             : 
      38             : 
      39             : /*
      40             :  * Estimate expected accuracy in ns from a timeval.
      41             :  *
      42             :  * After quite a bit of churning around, we've settled on
      43             :  * a simple thing of taking 0.1% of the timeout as the
      44             :  * slack, with a cap of 100 msec.
      45             :  * "nice" tasks get a 0.5% slack instead.
      46             :  *
      47             :  * Consider this comment an open invitation to come up with even
      48             :  * better solutions..
      49             :  */
      50             : 
      51             : #define MAX_SLACK       (100 * NSEC_PER_MSEC)
      52             : 
      53     3672336 : static long __estimate_accuracy(struct timespec64 *tv)
      54             : {
      55     3672336 :         long slack;
      56     3672336 :         int divfactor = 1000;
      57             : 
      58     3672336 :         if (tv->tv_sec < 0)
      59             :                 return 0;
      60             : 
      61     3672336 :         if (task_nice(current) > 0)
      62           0 :                 divfactor = divfactor / 5;
      63             : 
      64     3672336 :         if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
      65             :                 return MAX_SLACK;
      66             : 
      67     3559682 :         slack = tv->tv_nsec / divfactor;
      68     3559682 :         slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
      69             : 
      70     3559682 :         if (slack > MAX_SLACK)
      71             :                 return MAX_SLACK;
      72             : 
      73             :         return slack;
      74             : }
      75             : 
      76     3672337 : u64 select_estimate_accuracy(struct timespec64 *tv)
      77             : {
      78     3672337 :         u64 ret;
      79     3672337 :         struct timespec64 now;
      80             : 
      81             :         /*
      82             :          * Realtime tasks get a slack of 0 for obvious reasons.
      83             :          */
      84             : 
      85     3672337 :         if (rt_task(current))
      86             :                 return 0;
      87             : 
      88     3672337 :         ktime_get_ts64(&now);
      89     3672370 :         now = timespec64_sub(*tv, now);
      90     3672381 :         ret = __estimate_accuracy(&now);
      91     3672381 :         if (ret < current->timer_slack_ns)
      92      155426 :                 return current->timer_slack_ns;
      93             :         return ret;
      94             : }
      95             : 
      96             : 
      97             : 
      98             : struct poll_table_page {
      99             :         struct poll_table_page * next;
     100             :         struct poll_table_entry * entry;
     101             :         struct poll_table_entry entries[];
     102             : };
     103             : 
     104             : #define POLL_TABLE_FULL(table) \
     105             :         ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
     106             : 
     107             : /*
     108             :  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
     109             :  * I have rewritten this, taking some shortcuts: This code may not be easy to
     110             :  * follow, but it should be free of race-conditions, and it's practical. If you
     111             :  * understand what I'm doing here, then you understand how the linux
     112             :  * sleep/wakeup mechanism works.
     113             :  *
     114             :  * Two very simple procedures, poll_wait() and poll_freewait() make all the
     115             :  * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
     116             :  * as all select/poll functions have to call it to add an entry to the
     117             :  * poll table.
     118             :  */
     119             : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     120             :                        poll_table *p);
     121             : 
     122           0 : void poll_initwait(struct poll_wqueues *pwq)
     123             : {
     124     8471117 :         init_poll_funcptr(&pwq->pt, __pollwait);
     125     8471117 :         pwq->polling_task = current;
     126     8471117 :         pwq->triggered = 0;
     127     8471117 :         pwq->error = 0;
     128     8471117 :         pwq->table = NULL;
     129     8471117 :         pwq->inline_index = 0;
     130           0 : }
     131             : EXPORT_SYMBOL(poll_initwait);
     132             : 
     133     7806250 : static void free_poll_entry(struct poll_table_entry *entry)
     134             : {
     135     7806250 :         remove_wait_queue(entry->wait_address, &entry->wait);
     136     7806595 :         fput(entry->filp);
     137     7806665 : }
     138             : 
     139     8470806 : void poll_freewait(struct poll_wqueues *pwq)
     140             : {
     141     8470806 :         struct poll_table_page * p = pwq->table;
     142     8470806 :         int i;
     143    16138851 :         for (i = 0; i < pwq->inline_index; i++)
     144     7667624 :                 free_poll_entry(pwq->inline_entries + i);
     145     8609811 :         while (p) {
     146      138641 :                 struct poll_table_entry * entry;
     147      138641 :                 struct poll_table_page *old;
     148             : 
     149      138641 :                 entry = p->entry;
     150      138641 :                 do {
     151      138641 :                         entry--;
     152      138641 :                         free_poll_entry(entry);
     153      138641 :                 } while (entry > p->entries);
     154      138641 :                 old = p;
     155      138641 :                 p = p->next;
     156      138641 :                 free_page((unsigned long) old);
     157             :         }
     158     8471170 : }
     159             : EXPORT_SYMBOL(poll_freewait);
     160             : 
     161     7806590 : static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
     162             : {
     163     7806590 :         struct poll_table_page *table = p->table;
     164             : 
     165     7806590 :         if (p->inline_index < N_INLINE_POLL_ENTRIES)
     166     7667949 :                 return p->inline_entries + p->inline_index++;
     167             : 
     168      138641 :         if (!table || POLL_TABLE_FULL(table)) {
     169      138641 :                 struct poll_table_page *new_table;
     170             : 
     171      138641 :                 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
     172      138641 :                 if (!new_table) {
     173           0 :                         p->error = -ENOMEM;
     174           0 :                         return NULL;
     175             :                 }
     176      138641 :                 new_table->entry = new_table->entries;
     177      138641 :                 new_table->next = table;
     178      138641 :                 p->table = new_table;
     179      138641 :                 table = new_table;
     180             :         }
     181             : 
     182      138641 :         return table->entry++;
     183             : }
     184             : 
     185     5919010 : static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     186             : {
     187     5919010 :         struct poll_wqueues *pwq = wait->private;
     188     5919010 :         DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
     189             : 
     190             :         /*
     191             :          * Although this function is called under waitqueue lock, LOCK
     192             :          * doesn't imply write barrier and the users expect write
     193             :          * barrier semantics on wakeup functions.  The following
     194             :          * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
     195             :          * and is paired with smp_store_mb() in poll_schedule_timeout.
     196             :          */
     197     5919010 :         smp_wmb();
     198     5919009 :         pwq->triggered = 1;
     199             : 
     200             :         /*
     201             :          * Perform the default wake up operation using a dummy
     202             :          * waitqueue.
     203             :          *
     204             :          * TODO: This is hacky but there currently is no interface to
     205             :          * pass in @sync.  @sync is scheduled to be removed and once
     206             :          * that happens, wake_up_process() can be used directly.
     207             :          */
     208     5919009 :         return default_wake_function(&dummy_wait, mode, sync, key);
     209             : }
     210             : 
     211     7386328 : static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     212             : {
     213     7386328 :         struct poll_table_entry *entry;
     214             : 
     215     7386328 :         entry = container_of(wait, struct poll_table_entry, wait);
     216     7386328 :         if (key && !(key_to_poll(key) & entry->key))
     217             :                 return 0;
     218     5919087 :         return __pollwake(wait, mode, sync, key);
     219             : }
     220             : 
     221             : /* Add a new entry */
     222     7806635 : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     223             :                                 poll_table *p)
     224             : {
     225     7806635 :         struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
     226     7806635 :         struct poll_table_entry *entry = poll_get_entry(pwq);
     227     7806569 :         if (!entry)
     228             :                 return;
     229     7806569 :         entry->filp = get_file(filp);
     230     7806880 :         entry->wait_address = wait_address;
     231     7806880 :         entry->key = p->_key;
     232     7806880 :         init_waitqueue_func_entry(&entry->wait, pollwake);
     233     7806880 :         entry->wait.private = pwq;
     234     7806880 :         add_wait_queue(wait_address, &entry->wait);
     235             : }
     236             : 
     237     5356745 : static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
     238             :                           ktime_t *expires, unsigned long slack)
     239             : {
     240     5356745 :         int rc = -EINTR;
     241             : 
     242     5356745 :         set_current_state(state);
     243     5357062 :         if (!pwq->triggered)
     244     5356530 :                 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
     245     5356916 :         __set_current_state(TASK_RUNNING);
     246             : 
     247             :         /*
     248             :          * Prepare for the next iteration.
     249             :          *
     250             :          * The following smp_store_mb() serves two purposes.  First, it's
     251             :          * the counterpart rmb of the wmb in pollwake() such that data
     252             :          * written before wake up is always visible after wake up.
     253             :          * Second, the full barrier guarantees that triggered clearing
     254             :          * doesn't pass event check of the next iteration.  Note that
     255             :          * this problem doesn't exist for the first iteration as
     256             :          * add_wait_queue() has full barrier semantics.
     257             :          */
     258     5356916 :         smp_store_mb(pwq->triggered, 0);
     259             : 
     260     5356952 :         return rc;
     261             : }
     262             : 
     263             : /**
     264             :  * poll_select_set_timeout - helper function to setup the timeout value
     265             :  * @to:         pointer to timespec64 variable for the final timeout
     266             :  * @sec:        seconds (from user space)
     267             :  * @nsec:       nanoseconds (from user space)
     268             :  *
     269             :  * Note, we do not use a timespec for the user space value here, That
     270             :  * way we can use the function for timeval and compat interfaces as well.
     271             :  *
     272             :  * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
     273             :  */
     274     6371244 : int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
     275             : {
     276     6371244 :         struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
     277             : 
     278     6371244 :         if (!timespec64_valid(&ts))
     279             :                 return -EINVAL;
     280             : 
     281             :         /* Optimize for the zero timeout value here */
     282     6371244 :         if (!sec && !nsec) {
     283     2905759 :                 to->tv_sec = to->tv_nsec = 0;
     284             :         } else {
     285     3465485 :                 ktime_get_ts64(to);
     286     3465502 :                 *to = timespec64_add_safe(*to, ts);
     287             :         }
     288             :         return 0;
     289             : }
     290             : 
     291             : enum poll_time_type {
     292             :         PT_TIMEVAL = 0,
     293             :         PT_OLD_TIMEVAL = 1,
     294             :         PT_TIMESPEC = 2,
     295             :         PT_OLD_TIMESPEC = 3,
     296             : };
     297             : 
     298     4950430 : static int poll_select_finish(struct timespec64 *end_time,
     299             :                               void __user *p,
     300             :                               enum poll_time_type pt_type, int ret)
     301             : {
     302     4950430 :         struct timespec64 rts;
     303             : 
     304     4950430 :         restore_saved_sigmask_unless(ret == -ERESTARTNOHAND);
     305             : 
     306     4950423 :         if (!p)
     307             :                 return ret;
     308             : 
     309     3418174 :         if (current->personality & STICKY_TIMEOUTS)
     310           0 :                 goto sticky;
     311             : 
     312             :         /* No update for zero timeout */
     313     3418174 :         if (!end_time->tv_sec && !end_time->tv_nsec)
     314             :                 return ret;
     315             : 
     316     3213946 :         ktime_get_ts64(&rts);
     317     3213985 :         rts = timespec64_sub(*end_time, rts);
     318     3213961 :         if (rts.tv_sec < 0)
     319      116193 :                 rts.tv_sec = rts.tv_nsec = 0;
     320             : 
     321             : 
     322     3213961 :         switch (pt_type) {
     323             :         case PT_TIMEVAL:
     324             :                 {
     325           0 :                         struct __kernel_old_timeval rtv;
     326             : 
     327           0 :                         if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
     328             :                                 memset(&rtv, 0, sizeof(rtv));
     329           0 :                         rtv.tv_sec = rts.tv_sec;
     330           0 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     331           0 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     332           0 :                                 return ret;
     333             :                 }
     334           0 :                 break;
     335           0 :         case PT_OLD_TIMEVAL:
     336             :                 {
     337           0 :                         struct old_timeval32 rtv;
     338             : 
     339           0 :                         rtv.tv_sec = rts.tv_sec;
     340           0 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     341           0 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     342           0 :                                 return ret;
     343             :                 }
     344           0 :                 break;
     345     3213961 :         case PT_TIMESPEC:
     346     3213961 :                 if (!put_timespec64(&rts, p))
     347             :                         return ret;
     348             :                 break;
     349           0 :         case PT_OLD_TIMESPEC:
     350           0 :                 if (!put_old_timespec32(&rts, p))
     351             :                         return ret;
     352             :                 break;
     353           0 :         default:
     354           0 :                 BUG();
     355             :         }
     356             :         /*
     357             :          * If an application puts its timeval in read-only memory, we
     358             :          * don't want the Linux-specific update to the timeval to
     359             :          * cause a fault after the select has completed
     360             :          * successfully. However, because we're not updating the
     361             :          * timeval, we can't restart the system call.
     362             :          */
     363             : 
     364           5 : sticky:
     365           5 :         if (ret == -ERESTARTNOHAND)
     366           0 :                 ret = -EINTR;
     367             :         return ret;
     368             : }
     369             : 
     370             : /*
     371             :  * Scalable version of the fd_set.
     372             :  */
     373             : 
     374             : typedef struct {
     375             :         unsigned long *in, *out, *ex;
     376             :         unsigned long *res_in, *res_out, *res_ex;
     377             : } fd_set_bits;
     378             : 
     379             : /*
     380             :  * How many longwords for "nr" bits?
     381             :  */
     382             : #define FDS_BITPERLONG  (8*sizeof(long))
     383             : #define FDS_LONGS(nr)   (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
     384             : #define FDS_BYTES(nr)   (FDS_LONGS(nr)*sizeof(long))
     385             : 
     386             : /*
     387             :  * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
     388             :  */
     389             : static inline
     390      345441 : int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     391             : {
     392      345441 :         nr = FDS_BYTES(nr);
     393      345441 :         if (ufdset)
     394      280130 :                 return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
     395             : 
     396      205376 :         memset(fdset, 0, nr);
     397      205376 :         return 0;
     398             : }
     399             : 
     400             : static inline unsigned long __must_check
     401      345429 : set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     402             : {
     403      345429 :         if (ufdset)
     404      140061 :                 return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
     405             :         return 0;
     406             : }
     407             : 
     408             : static inline
     409      345441 : void zero_fd_set(unsigned long nr, unsigned long *fdset)
     410             : {
     411      345441 :         memset(fdset, 0, FDS_BYTES(nr));
     412      345441 : }
     413             : 
     414             : #define FDS_IN(fds, n)          (fds->in + n)
     415             : #define FDS_OUT(fds, n)         (fds->out + n)
     416             : #define FDS_EX(fds, n)          (fds->ex + n)
     417             : 
     418             : #define BITS(fds, n)    (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
     419             : 
     420      115147 : static int max_select_fd(unsigned long n, fd_set_bits *fds)
     421             : {
     422      115147 :         unsigned long *open_fds;
     423      115147 :         unsigned long set;
     424      115147 :         int max;
     425      115147 :         struct fdtable *fdt;
     426             : 
     427             :         /* handle last in-complete long-word first */
     428      115147 :         set = ~(~0UL << (n & (BITS_PER_LONG-1)));
     429      115147 :         n /= BITS_PER_LONG;
     430      115147 :         fdt = files_fdtable(current->files);
     431      115147 :         open_fds = fdt->open_fds + n;
     432      115147 :         max = 0;
     433      115147 :         if (set) {
     434      114482 :                 set &= BITS(fds, n);
     435      114482 :                 if (set) {
     436      114482 :                         if (!(set & ~*open_fds))
     437      114482 :                                 goto get_max;
     438             :                         return -EBADF;
     439             :                 }
     440             :         }
     441      115147 :         while (n) {
     442           0 :                 open_fds--;
     443           0 :                 n--;
     444           0 :                 set = BITS(fds, n);
     445           0 :                 if (!set)
     446           0 :                         continue;
     447           0 :                 if (set & ~*open_fds)
     448             :                         return -EBADF;
     449           0 :                 if (max)
     450           0 :                         continue;
     451           0 : get_max:
     452      657080 :                 do {
     453      657080 :                         max++;
     454      657080 :                         set >>= 1;
     455      657080 :                 } while (set);
     456      114482 :                 max += n * BITS_PER_LONG;
     457             :         }
     458             : 
     459             :         return max;
     460             : }
     461             : 
     462             : #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
     463             :                         EPOLLNVAL)
     464             : #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
     465             :                          EPOLLNVAL)
     466             : #define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
     467             : 
     468             : static inline void wait_key_set(poll_table *wait, unsigned long in,
     469             :                                 unsigned long out, unsigned long bit,
     470             :                                 __poll_t ll_flag)
     471             : {
     472      274876 :         wait->_key = POLLEX_SET | ll_flag;
     473      274876 :         if (in & bit)
     474      272388 :                 wait->_key |= POLLIN_SET;
     475      274876 :         if (out & bit)
     476        2488 :                 wait->_key |= POLLOUT_SET;
     477             : }
     478             : 
     479      115147 : static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
     480             : {
     481      115147 :         ktime_t expire, *to = NULL;
     482      115147 :         struct poll_wqueues table;
     483      115147 :         poll_table *wait;
     484      115147 :         int retval, i, timed_out = 0;
     485      115147 :         u64 slack = 0;
     486      115147 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     487      115147 :         unsigned long busy_start = 0;
     488             : 
     489      115147 :         rcu_read_lock();
     490      115147 :         retval = max_select_fd(n, fds);
     491      115147 :         rcu_read_unlock();
     492             : 
     493      115147 :         if (retval < 0)
     494             :                 return retval;
     495      115147 :         n = retval;
     496             : 
     497      115147 :         poll_initwait(&table);
     498      115147 :         wait = &table.pt;
     499      115147 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     500           0 :                 wait->_qproc = NULL;
     501           0 :                 timed_out = 1;
     502             :         }
     503             : 
     504      115147 :         if (end_time && !timed_out)
     505       87076 :                 slack = select_estimate_accuracy(end_time);
     506             : 
     507      115147 :         retval = 0;
     508      225040 :         for (;;) {
     509      225040 :                 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
     510      225040 :                 bool can_busy_loop = false;
     511             : 
     512      225040 :                 inp = fds->in; outp = fds->out; exp = fds->ex;
     513      225040 :                 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
     514             : 
     515      448749 :                 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
     516      223710 :                         unsigned long in, out, ex, all_bits, bit = 1, j;
     517      223710 :                         unsigned long res_in = 0, res_out = 0, res_ex = 0;
     518      223710 :                         __poll_t mask;
     519             : 
     520      223710 :                         in = *inp++; out = *outp++; ex = *exp++;
     521      223710 :                         all_bits = in | out | ex;
     522      223710 :                         if (all_bits == 0) {
     523           0 :                                 i += BITS_PER_LONG;
     524           0 :                                 continue;
     525             :                         }
     526             : 
     527     1505893 :                         for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
     528     1505893 :                                 struct fd f;
     529     1505893 :                                 if (i >= n)
     530             :                                         break;
     531     1282184 :                                 if (!(bit & all_bits))
     532     1007307 :                                         continue;
     533      274877 :                                 mask = EPOLLNVAL;
     534      274877 :                                 f = fdget(i);
     535      274876 :                                 if (f.file) {
     536      274876 :                                         wait_key_set(wait, in, out, bit,
     537             :                                                      busy_flag);
     538      274876 :                                         mask = vfs_poll(f.file, wait);
     539             : 
     540      274876 :                                         fdput(f);
     541             :                                 }
     542      274876 :                                 if ((mask & POLLIN_SET) && (in & bit)) {
     543       30561 :                                         res_in |= bit;
     544       30561 :                                         retval++;
     545       30561 :                                         wait->_qproc = NULL;
     546             :                                 }
     547      274876 :                                 if ((mask & POLLOUT_SET) && (out & bit)) {
     548        2488 :                                         res_out |= bit;
     549        2488 :                                         retval++;
     550        2488 :                                         wait->_qproc = NULL;
     551             :                                 }
     552      274876 :                                 if ((mask & POLLEX_SET) && (ex & bit)) {
     553           0 :                                         res_ex |= bit;
     554           0 :                                         retval++;
     555           0 :                                         wait->_qproc = NULL;
     556             :                                 }
     557             :                                 /* got something, stop busy polling */
     558      274876 :                                 if (retval) {
     559             :                                         can_busy_loop = false;
     560             :                                         busy_flag = 0;
     561             : 
     562             :                                 /*
     563             :                                  * only remember a returned
     564             :                                  * POLL_BUSY_LOOP if we asked for it
     565             :                                  */
     566      241772 :                                 } else if (busy_flag & mask)
     567           0 :                                         can_busy_loop = true;
     568             : 
     569             :                         }
     570      223709 :                         if (res_in)
     571       30559 :                                 *rinp = res_in;
     572      223709 :                         if (res_out)
     573        2488 :                                 *routp = res_out;
     574      223709 :                         if (res_ex)
     575           0 :                                 *rexp = res_ex;
     576      223709 :                         cond_resched();
     577             :                 }
     578      225039 :                 wait->_qproc = NULL;
     579      225039 :                 if (retval || timed_out || signal_pending(current))
     580             :                         break;
     581      109896 :                 if (table.error) {
     582             :                         retval = table.error;
     583             :                         break;
     584             :                 }
     585             : 
     586             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     587      109896 :                 if (can_busy_loop && !need_resched()) {
     588           0 :                         if (!busy_start) {
     589           0 :                                 busy_start = busy_loop_current_time();
     590           0 :                                 continue;
     591             :                         }
     592           0 :                         if (!busy_loop_timeout(busy_start))
     593           0 :                                 continue;
     594             :                 }
     595      109896 :                 busy_flag = 0;
     596             : 
     597             :                 /*
     598             :                  * If this is the first loop and we have a timeout
     599             :                  * given, then we convert to ktime_t and set the to
     600             :                  * pointer to the expiry value.
     601             :                  */
     602      109896 :                 if (end_time && !to) {
     603       84313 :                         expire = timespec64_to_ktime(*end_time);
     604       84313 :                         to = &expire;
     605             :                 }
     606             : 
     607      109896 :                 if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
     608             :                                            to, slack))
     609       82096 :                         timed_out = 1;
     610             :         }
     611             : 
     612      115143 :         poll_freewait(&table);
     613             : 
     614      115143 :         return retval;
     615             : }
     616             : 
     617             : /*
     618             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
     619             :  * like to be certain this leads to no problems. So I return
     620             :  * EINTR just for safety.
     621             :  *
     622             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
     623             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
     624             :  */
     625      115147 : int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
     626             :                            fd_set __user *exp, struct timespec64 *end_time)
     627             : {
     628      115147 :         fd_set_bits fds;
     629      115147 :         void *bits;
     630      115147 :         int ret, max_fds;
     631      115147 :         size_t size, alloc_size;
     632      115147 :         struct fdtable *fdt;
     633             :         /* Allocate small arguments on the stack to save memory and be faster */
     634      115147 :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
     635             : 
     636      115147 :         ret = -EINVAL;
     637      115147 :         if (n < 0)
     638           0 :                 goto out_nofds;
     639             : 
     640             :         /* max_fds can increase, so grab it once to avoid race */
     641      115147 :         rcu_read_lock();
     642      115147 :         fdt = files_fdtable(current->files);
     643      115147 :         max_fds = fdt->max_fds;
     644      115147 :         rcu_read_unlock();
     645      115147 :         if (n > max_fds)
     646             :                 n = max_fds;
     647             : 
     648             :         /*
     649             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
     650             :          * since we used fdset we need to allocate memory in units of
     651             :          * long-words. 
     652             :          */
     653      115147 :         size = FDS_BYTES(n);
     654      115147 :         bits = stack_fds;
     655      115147 :         if (size > sizeof(stack_fds) / 6) {
     656             :                 /* Not enough space in on-stack array; must use kmalloc */
     657           0 :                 ret = -ENOMEM;
     658           0 :                 if (size > (SIZE_MAX / 6))
     659             :                         goto out_nofds;
     660             : 
     661           0 :                 alloc_size = 6 * size;
     662           0 :                 bits = kvmalloc(alloc_size, GFP_KERNEL);
     663           0 :                 if (!bits)
     664           0 :                         goto out_nofds;
     665             :         }
     666      115147 :         fds.in      = bits;
     667      115147 :         fds.out     = bits +   size;
     668      115147 :         fds.ex      = bits + 2*size;
     669      115147 :         fds.res_in  = bits + 3*size;
     670      115147 :         fds.res_out = bits + 4*size;
     671      115147 :         fds.res_ex  = bits + 5*size;
     672             : 
     673      230294 :         if ((ret = get_fd_set(n, inp, fds.in)) ||
     674      230294 :             (ret = get_fd_set(n, outp, fds.out)) ||
     675      115147 :             (ret = get_fd_set(n, exp, fds.ex)))
     676           0 :                 goto out;
     677      115147 :         zero_fd_set(n, fds.res_in);
     678      115147 :         zero_fd_set(n, fds.res_out);
     679      115147 :         zero_fd_set(n, fds.res_ex);
     680             : 
     681      115147 :         ret = do_select(n, &fds, end_time);
     682             : 
     683      115143 :         if (ret < 0)
     684           0 :                 goto out;
     685      115143 :         if (!ret) {
     686       82096 :                 ret = -ERESTARTNOHAND;
     687       82096 :                 if (signal_pending(current))
     688           0 :                         goto out;
     689             :                 ret = 0;
     690             :         }
     691             : 
     692      230286 :         if (set_fd_set(n, inp, fds.res_in) ||
     693      230286 :             set_fd_set(n, outp, fds.res_out) ||
     694      115143 :             set_fd_set(n, exp, fds.res_ex))
     695             :                 ret = -EFAULT;
     696             : 
     697      115143 : out:
     698      115143 :         if (bits != stack_fds)
     699           0 :                 kvfree(bits);
     700      115143 : out_nofds:
     701      115143 :         return ret;
     702             : }
     703             : 
     704           0 : static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
     705             :                        fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
     706             : {
     707           0 :         struct timespec64 end_time, *to = NULL;
     708           0 :         struct __kernel_old_timeval tv;
     709           0 :         int ret;
     710             : 
     711           0 :         if (tvp) {
     712           0 :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
     713             :                         return -EFAULT;
     714             : 
     715           0 :                 to = &end_time;
     716           0 :                 if (poll_select_set_timeout(to,
     717           0 :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
     718           0 :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
     719             :                         return -EINVAL;
     720             :         }
     721             : 
     722           0 :         ret = core_sys_select(n, inp, outp, exp, to);
     723           0 :         return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret);
     724             : }
     725             : 
     726           0 : SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
     727             :                 fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
     728             : {
     729           0 :         return kern_select(n, inp, outp, exp, tvp);
     730             : }
     731             : 
     732      115147 : static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
     733             :                        fd_set __user *exp, void __user *tsp,
     734             :                        const sigset_t __user *sigmask, size_t sigsetsize,
     735             :                        enum poll_time_type type)
     736             : {
     737      115147 :         struct timespec64 ts, end_time, *to = NULL;
     738      115147 :         int ret;
     739             : 
     740      115147 :         if (tsp) {
     741       87076 :                 switch (type) {
     742       87076 :                 case PT_TIMESPEC:
     743       87076 :                         if (get_timespec64(&ts, tsp))
     744             :                                 return -EFAULT;
     745             :                         break;
     746           0 :                 case PT_OLD_TIMESPEC:
     747           0 :                         if (get_old_timespec32(&ts, tsp))
     748             :                                 return -EFAULT;
     749             :                         break;
     750           0 :                 default:
     751           0 :                         BUG();
     752             :                 }
     753             : 
     754       87076 :                 to = &end_time;
     755       87076 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
     756             :                         return -EINVAL;
     757             :         }
     758             : 
     759      115147 :         ret = set_user_sigmask(sigmask, sigsetsize);
     760      115147 :         if (ret)
     761           0 :                 return ret;
     762             : 
     763      115147 :         ret = core_sys_select(n, inp, outp, exp, to);
     764      115143 :         return poll_select_finish(&end_time, tsp, type, ret);
     765             : }
     766             : 
     767             : /*
     768             :  * Most architectures can't handle 7-argument syscalls. So we provide a
     769             :  * 6-argument version where the sixth argument is a pointer to a structure
     770             :  * which has a pointer to the sigset_t itself followed by a size_t containing
     771             :  * the sigset size.
     772             :  */
     773             : struct sigset_argpack {
     774             :         sigset_t __user *p;
     775             :         size_t size;
     776             : };
     777             : 
     778      115147 : static inline int get_sigset_argpack(struct sigset_argpack *to,
     779             :                                      struct sigset_argpack __user *from)
     780             : {
     781             :         // the path is hot enough for overhead of copy_from_user() to matter
     782      115147 :         if (from) {
     783         665 :                 if (!user_read_access_begin(from, sizeof(*from)))
     784             :                         return -EFAULT;
     785         665 :                 unsafe_get_user(to->p, &from->p, Efault);
     786         665 :                 unsafe_get_user(to->size, &from->size, Efault);
     787         665 :                 user_read_access_end();
     788             :         }
     789             :         return 0;
     790           0 : Efault:
     791           0 :         user_access_end();
     792           0 :         return -EFAULT;
     793             : }
     794             : 
     795      230294 : SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
     796             :                 fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
     797             :                 void __user *, sig)
     798             : {
     799      115147 :         struct sigset_argpack x = {NULL, 0};
     800             : 
     801      115147 :         if (get_sigset_argpack(&x, sig))
     802             :                 return -EFAULT;
     803             : 
     804      115147 :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC);
     805             : }
     806             : 
     807             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
     808             : 
     809             : SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
     810             :                 fd_set __user *, exp, struct old_timespec32 __user *, tsp,
     811             :                 void __user *, sig)
     812             : {
     813             :         struct sigset_argpack x = {NULL, 0};
     814             : 
     815             :         if (get_sigset_argpack(&x, sig))
     816             :                 return -EFAULT;
     817             : 
     818             :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC);
     819             : }
     820             : 
     821             : #endif
     822             : 
     823             : #ifdef __ARCH_WANT_SYS_OLD_SELECT
     824             : struct sel_arg_struct {
     825             :         unsigned long n;
     826             :         fd_set __user *inp, *outp, *exp;
     827             :         struct __kernel_old_timeval __user *tvp;
     828             : };
     829             : 
     830             : SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
     831             : {
     832             :         struct sel_arg_struct a;
     833             : 
     834             :         if (copy_from_user(&a, arg, sizeof(a)))
     835             :                 return -EFAULT;
     836             :         return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
     837             : }
     838             : #endif
     839             : 
     840             : struct poll_list {
     841             :         struct poll_list *next;
     842             :         int len;
     843             :         struct pollfd entries[];
     844             : };
     845             : 
     846             : #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
     847             : 
     848             : /*
     849             :  * Fish for pollable events on the pollfd->fd file descriptor. We're only
     850             :  * interested in events matching the pollfd->events mask, and the result
     851             :  * matching that mask is both recorded in pollfd->revents and returned. The
     852             :  * pwait poll_table will be used by the fd-provided poll handler for waiting,
     853             :  * if pwait->_qproc is non-NULL.
     854             :  */
     855    18737245 : static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
     856             :                                      bool *can_busy_poll,
     857             :                                      __poll_t busy_flag)
     858             : {
     859    18737245 :         int fd = pollfd->fd;
     860    18737245 :         __poll_t mask = 0, filter;
     861    18737245 :         struct fd f;
     862             : 
     863    18737245 :         if (fd < 0)
     864       33590 :                 goto out;
     865    18703655 :         mask = EPOLLNVAL;
     866    18703655 :         f = fdget(fd);
     867    18702662 :         if (!f.file)
     868        3465 :                 goto out;
     869             : 
     870             :         /* userland u16 ->events contains POLL... bitmap */
     871    18699197 :         filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
     872    18699197 :         pwait->_key = filter | busy_flag;
     873    18699197 :         mask = vfs_poll(f.file, pwait);
     874    18700462 :         if (mask & busy_flag)
     875           0 :                 *can_busy_poll = true;
     876    18700462 :         mask &= filter;             /* Mask out unneeded events. */
     877    18700462 :         fdput(f);
     878             : 
     879    18737517 : out:
     880             :         /* ... and so does ->revents */
     881    18737517 :         pollfd->revents = mangle_poll(mask);
     882    18737517 :         return mask;
     883             : }
     884             : 
     885     8356001 : static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
     886             :                    struct timespec64 *end_time)
     887             : {
     888     8356001 :         poll_table* pt = &wait->pt;
     889     8356001 :         ktime_t expire, *to = NULL;
     890     8356001 :         int timed_out = 0, count = 0;
     891     8356001 :         u64 slack = 0;
     892     8356001 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     893     8356001 :         unsigned long busy_start = 0;
     894             : 
     895             :         /* Optimise the no-wait case */
     896     8356001 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     897     2905759 :                 pt->_qproc = NULL;
     898     2905759 :                 timed_out = 1;
     899             :         }
     900             : 
     901     8356001 :         if (end_time && !timed_out)
     902     3378370 :                 slack = select_estimate_accuracy(end_time);
     903             : 
     904    13603017 :         for (;;) {
     905    13603017 :                 struct poll_list *walk;
     906    13603017 :                 bool can_busy_loop = false;
     907             : 
     908    27206108 :                 for (walk = list; walk != NULL; walk = walk->next) {
     909    13603282 :                         struct pollfd * pfd, * pfd_end;
     910             : 
     911    13603282 :                         pfd = walk->entries;
     912    13603282 :                         pfd_end = pfd + walk->len;
     913    32340292 :                         for (; pfd != pfd_end; pfd++) {
     914             :                                 /*
     915             :                                  * Fish for events. If we found one, record it
     916             :                                  * and kill poll_table->_qproc, so we don't
     917             :                                  * needlessly register any other waiters after
     918             :                                  * this. They'll get immediately deregistered
     919             :                                  * when we break out and return.
     920             :                                  */
     921    18737201 :                                 if (do_pollfd(pfd, pt, &can_busy_loop,
     922             :                                               busy_flag)) {
     923     5381590 :                                         count++;
     924     5381590 :                                         pt->_qproc = NULL;
     925             :                                         /* found something, stop busy polling */
     926     5381590 :                                         busy_flag = 0;
     927     5381590 :                                         can_busy_loop = false;
     928             :                                 }
     929             :                         }
     930             :                 }
     931             :                 /*
     932             :                  * All waiters have already been registered, so don't provide
     933             :                  * a poll_table->_qproc to them on the next loop iteration.
     934             :                  */
     935    13602826 :                 pt->_qproc = NULL;
     936    13602826 :                 if (!count) {
     937     8232270 :                         count = wait->error;
     938     8232270 :                         if (signal_pending(current))
     939        1188 :                                 count = -ERESTARTNOHAND;
     940             :                 }
     941    13602706 :                 if (count || timed_out)
     942             :                         break;
     943             : 
     944             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     945     5247005 :                 if (can_busy_loop && !need_resched()) {
     946           0 :                         if (!busy_start) {
     947           0 :                                 busy_start = busy_loop_current_time();
     948           0 :                                 continue;
     949             :                         }
     950           0 :                         if (!busy_loop_timeout(busy_start))
     951           0 :                                 continue;
     952             :                 }
     953     5247005 :                 busy_flag = 0;
     954             : 
     955             :                 /*
     956             :                  * If this is the first loop and we have a timeout
     957             :                  * given, then we convert to ktime_t and set the to
     958             :                  * pointer to the expiry value.
     959             :                  */
     960     5247005 :                 if (end_time && !to) {
     961     3312303 :                         expire = timespec64_to_ktime(*end_time);
     962     3312303 :                         to = &expire;
     963             :                 }
     964             : 
     965     5247005 :                 if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
     966      169883 :                         timed_out = 1;
     967             :         }
     968     8355701 :         return count;
     969             : }
     970             : 
     971             : #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
     972             :                         sizeof(struct pollfd))
     973             : 
     974     8355968 : static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
     975             :                 struct timespec64 *end_time)
     976             : {
     977     8355968 :         struct poll_wqueues table;
     978     8355968 :         int err = -EFAULT, fdcount, len;
     979             :         /* Allocate small arguments on the stack to save memory and be
     980             :            faster - use long to make sure the buffer is aligned properly
     981             :            on 64 bit archs to avoid unaligned access */
     982     8355968 :         long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
     983     8355968 :         struct poll_list *const head = (struct poll_list *)stack_pps;
     984     8355968 :         struct poll_list *walk = head;
     985     8355968 :         unsigned long todo = nfds;
     986             : 
     987     8355968 :         if (nfds > rlimit(RLIMIT_NOFILE))
     988             :                 return -EINVAL;
     989             : 
     990     8355968 :         len = min_t(unsigned int, nfds, N_STACK_PPS);
     991     8356249 :         for (;;) {
     992     8356249 :                 walk->next = NULL;
     993     8356249 :                 walk->len = len;
     994     8356249 :                 if (!len)
     995             :                         break;
     996             : 
     997     8355108 :                 if (copy_from_user(walk->entries, ufds + nfds-todo,
     998     8355103 :                                         sizeof(struct pollfd) * walk->len))
     999           0 :                         goto out_fds;
    1000             : 
    1001     8355108 :                 todo -= walk->len;
    1002     8355108 :                 if (!todo)
    1003             :                         break;
    1004             : 
    1005         284 :                 len = min(todo, POLLFD_PER_PAGE);
    1006         284 :                 walk = walk->next = kmalloc(struct_size(walk, entries, len),
    1007             :                                             GFP_KERNEL);
    1008         290 :                 if (!walk) {
    1009           0 :                         err = -ENOMEM;
    1010           0 :                         goto out_fds;
    1011             :                 }
    1012             :         }
    1013             : 
    1014     8355970 :         poll_initwait(&table);
    1015     8355970 :         fdcount = do_poll(head, &table, end_time);
    1016     8355684 :         poll_freewait(&table);
    1017             : 
    1018     8355970 :         if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
    1019           0 :                 goto out_fds;
    1020             : 
    1021    16711770 :         for (walk = head; walk; walk = walk->next) {
    1022     8356296 :                 struct pollfd *fds = walk->entries;
    1023     8356296 :                 int j;
    1024             : 
    1025    19950623 :                 for (j = walk->len; j; fds++, ufds++, j--)
    1026    11594869 :                         unsafe_put_user(fds->revents, &ufds->revents, Efault);
    1027             :         }
    1028     8355474 :         user_write_access_end();
    1029             : 
    1030     8355474 :         err = fdcount;
    1031     8356099 : out_fds:
    1032     8356099 :         walk = head->next;
    1033     8356360 :         while (walk) {
    1034         284 :                 struct poll_list *pos = walk;
    1035         284 :                 walk = walk->next;
    1036         284 :                 kfree(pos);
    1037             :         }
    1038             : 
    1039             :         return err;
    1040             : 
    1041           0 : Efault:
    1042           0 :         user_write_access_end();
    1043           0 :         err = -EFAULT;
    1044           0 :         goto out_fds;
    1045             : }
    1046             : 
    1047           0 : static long do_restart_poll(struct restart_block *restart_block)
    1048             : {
    1049           0 :         struct pollfd __user *ufds = restart_block->poll.ufds;
    1050           0 :         int nfds = restart_block->poll.nfds;
    1051           0 :         struct timespec64 *to = NULL, end_time;
    1052           0 :         int ret;
    1053             : 
    1054           0 :         if (restart_block->poll.has_timeout) {
    1055           0 :                 end_time.tv_sec = restart_block->poll.tv_sec;
    1056           0 :                 end_time.tv_nsec = restart_block->poll.tv_nsec;
    1057           0 :                 to = &end_time;
    1058             :         }
    1059             : 
    1060           0 :         ret = do_sys_poll(ufds, nfds, to);
    1061             : 
    1062           0 :         if (ret == -ERESTARTNOHAND)
    1063           0 :                 ret = set_restart_fn(restart_block, do_restart_poll);
    1064             : 
    1065           0 :         return ret;
    1066             : }
    1067             : 
    1068     7041475 : SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
    1069             :                 int, timeout_msecs)
    1070             : {
    1071     3520735 :         struct timespec64 end_time, *to = NULL;
    1072     3520735 :         int ret;
    1073             : 
    1074     3520735 :         if (timeout_msecs >= 0) {
    1075     2952981 :                 to = &end_time;
    1076     2952981 :                 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
    1077     2952981 :                         NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
    1078             :         }
    1079             : 
    1080     3520735 :         ret = do_sys_poll(ufds, nfds, to);
    1081             : 
    1082     3520732 :         if (ret == -ERESTARTNOHAND) {
    1083        1120 :                 struct restart_block *restart_block;
    1084             : 
    1085        1120 :                 restart_block = &current->restart_block;
    1086        1120 :                 restart_block->poll.ufds = ufds;
    1087        1120 :                 restart_block->poll.nfds = nfds;
    1088             : 
    1089        1120 :                 if (timeout_msecs >= 0) {
    1090        1120 :                         restart_block->poll.tv_sec = end_time.tv_sec;
    1091        1120 :                         restart_block->poll.tv_nsec = end_time.tv_nsec;
    1092        1120 :                         restart_block->poll.has_timeout = 1;
    1093             :                 } else
    1094           0 :                         restart_block->poll.has_timeout = 0;
    1095             : 
    1096        1120 :                 ret = set_restart_fn(restart_block, do_restart_poll);
    1097             :         }
    1098     3520732 :         return ret;
    1099             : }
    1100             : 
    1101     9670753 : SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
    1102             :                 struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
    1103             :                 size_t, sigsetsize)
    1104             : {
    1105     4835370 :         struct timespec64 ts, end_time, *to = NULL;
    1106     4835370 :         int ret;
    1107             : 
    1108     4835370 :         if (tsp) {
    1109     3331192 :                 if (get_timespec64(&ts, tsp))
    1110             :                         return -EFAULT;
    1111             : 
    1112     3331191 :                 to = &end_time;
    1113     3331191 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1114             :                         return -EINVAL;
    1115             :         }
    1116             : 
    1117     4835366 :         ret = set_user_sigmask(sigmask, sigsetsize);
    1118     4835330 :         if (ret)
    1119           0 :                 return ret;
    1120             : 
    1121     4835330 :         ret = do_sys_poll(ufds, nfds, to);
    1122     4835329 :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1123             : }
    1124             : 
    1125             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
    1126             : 
    1127             : SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
    1128             :                 struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
    1129             :                 size_t, sigsetsize)
    1130             : {
    1131             :         struct timespec64 ts, end_time, *to = NULL;
    1132             :         int ret;
    1133             : 
    1134             :         if (tsp) {
    1135             :                 if (get_old_timespec32(&ts, tsp))
    1136             :                         return -EFAULT;
    1137             : 
    1138             :                 to = &end_time;
    1139             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1140             :                         return -EINVAL;
    1141             :         }
    1142             : 
    1143             :         ret = set_user_sigmask(sigmask, sigsetsize);
    1144             :         if (ret)
    1145             :                 return ret;
    1146             : 
    1147             :         ret = do_sys_poll(ufds, nfds, to);
    1148             :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1149             : }
    1150             : #endif
    1151             : 
    1152             : #ifdef CONFIG_COMPAT
    1153             : #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
    1154             : 
    1155             : /*
    1156             :  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
    1157             :  * 64-bit unsigned longs.
    1158             :  */
    1159             : static
    1160           0 : int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1161             :                         unsigned long *fdset)
    1162             : {
    1163           0 :         if (ufdset) {
    1164           0 :                 return compat_get_bitmap(fdset, ufdset, nr);
    1165             :         } else {
    1166           0 :                 zero_fd_set(nr, fdset);
    1167           0 :                 return 0;
    1168             :         }
    1169             : }
    1170             : 
    1171             : static
    1172             : int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1173             :                       unsigned long *fdset)
    1174             : {
    1175           0 :         if (!ufdset)
    1176             :                 return 0;
    1177           0 :         return compat_put_bitmap(ufdset, fdset, nr);
    1178             : }
    1179             : 
    1180             : 
    1181             : /*
    1182             :  * This is a virtual copy of sys_select from fs/select.c and probably
    1183             :  * should be compared to it from time to time
    1184             :  */
    1185             : 
    1186             : /*
    1187             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
    1188             :  * like to be certain this leads to no problems. So I return
    1189             :  * EINTR just for safety.
    1190             :  *
    1191             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
    1192             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
    1193             :  */
    1194           0 : static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
    1195             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1196             :         struct timespec64 *end_time)
    1197             : {
    1198           0 :         fd_set_bits fds;
    1199           0 :         void *bits;
    1200           0 :         int size, max_fds, ret = -EINVAL;
    1201           0 :         struct fdtable *fdt;
    1202           0 :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
    1203             : 
    1204           0 :         if (n < 0)
    1205           0 :                 goto out_nofds;
    1206             : 
    1207             :         /* max_fds can increase, so grab it once to avoid race */
    1208           0 :         rcu_read_lock();
    1209           0 :         fdt = files_fdtable(current->files);
    1210           0 :         max_fds = fdt->max_fds;
    1211           0 :         rcu_read_unlock();
    1212           0 :         if (n > max_fds)
    1213             :                 n = max_fds;
    1214             : 
    1215             :         /*
    1216             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
    1217             :          * since we used fdset we need to allocate memory in units of
    1218             :          * long-words.
    1219             :          */
    1220           0 :         size = FDS_BYTES(n);
    1221           0 :         bits = stack_fds;
    1222           0 :         if (size > sizeof(stack_fds) / 6) {
    1223           0 :                 bits = kmalloc_array(6, size, GFP_KERNEL);
    1224           0 :                 ret = -ENOMEM;
    1225           0 :                 if (!bits)
    1226           0 :                         goto out_nofds;
    1227             :         }
    1228           0 :         fds.in      = (unsigned long *)  bits;
    1229           0 :         fds.out     = (unsigned long *) (bits +   size);
    1230           0 :         fds.ex      = (unsigned long *) (bits + 2*size);
    1231           0 :         fds.res_in  = (unsigned long *) (bits + 3*size);
    1232           0 :         fds.res_out = (unsigned long *) (bits + 4*size);
    1233           0 :         fds.res_ex  = (unsigned long *) (bits + 5*size);
    1234             : 
    1235           0 :         if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
    1236           0 :             (ret = compat_get_fd_set(n, outp, fds.out)) ||
    1237           0 :             (ret = compat_get_fd_set(n, exp, fds.ex)))
    1238           0 :                 goto out;
    1239           0 :         zero_fd_set(n, fds.res_in);
    1240           0 :         zero_fd_set(n, fds.res_out);
    1241           0 :         zero_fd_set(n, fds.res_ex);
    1242             : 
    1243           0 :         ret = do_select(n, &fds, end_time);
    1244             : 
    1245           0 :         if (ret < 0)
    1246           0 :                 goto out;
    1247           0 :         if (!ret) {
    1248           0 :                 ret = -ERESTARTNOHAND;
    1249           0 :                 if (signal_pending(current))
    1250           0 :                         goto out;
    1251             :                 ret = 0;
    1252             :         }
    1253             : 
    1254           0 :         if (compat_set_fd_set(n, inp, fds.res_in) ||
    1255           0 :             compat_set_fd_set(n, outp, fds.res_out) ||
    1256           0 :             compat_set_fd_set(n, exp, fds.res_ex))
    1257             :                 ret = -EFAULT;
    1258           0 : out:
    1259           0 :         if (bits != stack_fds)
    1260           0 :                 kfree(bits);
    1261           0 : out_nofds:
    1262           0 :         return ret;
    1263             : }
    1264             : 
    1265           0 : static int do_compat_select(int n, compat_ulong_t __user *inp,
    1266             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1267             :         struct old_timeval32 __user *tvp)
    1268             : {
    1269           0 :         struct timespec64 end_time, *to = NULL;
    1270           0 :         struct old_timeval32 tv;
    1271           0 :         int ret;
    1272             : 
    1273           0 :         if (tvp) {
    1274           0 :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
    1275             :                         return -EFAULT;
    1276             : 
    1277           0 :                 to = &end_time;
    1278           0 :                 if (poll_select_set_timeout(to,
    1279           0 :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
    1280           0 :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
    1281             :                         return -EINVAL;
    1282             :         }
    1283             : 
    1284           0 :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1285           0 :         return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret);
    1286             : }
    1287             : 
    1288           0 : COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
    1289             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1290             :         struct old_timeval32 __user *, tvp)
    1291             : {
    1292           0 :         return do_compat_select(n, inp, outp, exp, tvp);
    1293             : }
    1294             : 
    1295             : struct compat_sel_arg_struct {
    1296             :         compat_ulong_t n;
    1297             :         compat_uptr_t inp;
    1298             :         compat_uptr_t outp;
    1299             :         compat_uptr_t exp;
    1300             :         compat_uptr_t tvp;
    1301             : };
    1302             : 
    1303           0 : COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
    1304             : {
    1305           0 :         struct compat_sel_arg_struct a;
    1306             : 
    1307           0 :         if (copy_from_user(&a, arg, sizeof(a)))
    1308             :                 return -EFAULT;
    1309           0 :         return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
    1310           0 :                                 compat_ptr(a.exp), compat_ptr(a.tvp));
    1311             : }
    1312             : 
    1313           0 : static long do_compat_pselect(int n, compat_ulong_t __user *inp,
    1314             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1315             :         void __user *tsp, compat_sigset_t __user *sigmask,
    1316             :         compat_size_t sigsetsize, enum poll_time_type type)
    1317             : {
    1318           0 :         struct timespec64 ts, end_time, *to = NULL;
    1319           0 :         int ret;
    1320             : 
    1321           0 :         if (tsp) {
    1322           0 :                 switch (type) {
    1323           0 :                 case PT_OLD_TIMESPEC:
    1324           0 :                         if (get_old_timespec32(&ts, tsp))
    1325             :                                 return -EFAULT;
    1326             :                         break;
    1327           0 :                 case PT_TIMESPEC:
    1328           0 :                         if (get_timespec64(&ts, tsp))
    1329             :                                 return -EFAULT;
    1330             :                         break;
    1331           0 :                 default:
    1332           0 :                         BUG();
    1333             :                 }
    1334             : 
    1335           0 :                 to = &end_time;
    1336           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1337             :                         return -EINVAL;
    1338             :         }
    1339             : 
    1340           0 :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1341           0 :         if (ret)
    1342           0 :                 return ret;
    1343             : 
    1344           0 :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1345           0 :         return poll_select_finish(&end_time, tsp, type, ret);
    1346             : }
    1347             : 
    1348             : struct compat_sigset_argpack {
    1349             :         compat_uptr_t p;
    1350             :         compat_size_t size;
    1351             : };
    1352           0 : static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
    1353             :                                             struct compat_sigset_argpack __user *from)
    1354             : {
    1355           0 :         if (from) {
    1356           0 :                 if (!user_read_access_begin(from, sizeof(*from)))
    1357             :                         return -EFAULT;
    1358           0 :                 unsafe_get_user(to->p, &from->p, Efault);
    1359           0 :                 unsafe_get_user(to->size, &from->size, Efault);
    1360           0 :                 user_read_access_end();
    1361             :         }
    1362             :         return 0;
    1363           0 : Efault:
    1364           0 :         user_access_end();
    1365           0 :         return -EFAULT;
    1366             : }
    1367             : 
    1368           0 : COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
    1369             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1370             :         struct __kernel_timespec __user *, tsp, void __user *, sig)
    1371             : {
    1372           0 :         struct compat_sigset_argpack x = {0, 0};
    1373             : 
    1374           0 :         if (get_compat_sigset_argpack(&x, sig))
    1375             :                 return -EFAULT;
    1376             : 
    1377           0 :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1378             :                                  x.size, PT_TIMESPEC);
    1379             : }
    1380             : 
    1381             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1382             : 
    1383           0 : COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
    1384             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1385             :         struct old_timespec32 __user *, tsp, void __user *, sig)
    1386             : {
    1387           0 :         struct compat_sigset_argpack x = {0, 0};
    1388             : 
    1389           0 :         if (get_compat_sigset_argpack(&x, sig))
    1390             :                 return -EFAULT;
    1391             : 
    1392           0 :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1393             :                                  x.size, PT_OLD_TIMESPEC);
    1394             : }
    1395             : 
    1396             : #endif
    1397             : 
    1398             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1399           0 : COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
    1400             :         unsigned int,  nfds, struct old_timespec32 __user *, tsp,
    1401             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1402             : {
    1403           0 :         struct timespec64 ts, end_time, *to = NULL;
    1404           0 :         int ret;
    1405             : 
    1406           0 :         if (tsp) {
    1407           0 :                 if (get_old_timespec32(&ts, tsp))
    1408             :                         return -EFAULT;
    1409             : 
    1410           0 :                 to = &end_time;
    1411           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1412             :                         return -EINVAL;
    1413             :         }
    1414             : 
    1415           0 :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1416           0 :         if (ret)
    1417           0 :                 return ret;
    1418             : 
    1419           0 :         ret = do_sys_poll(ufds, nfds, to);
    1420           0 :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1421             : }
    1422             : #endif
    1423             : 
    1424             : /* New compat syscall for 64 bit time_t*/
    1425           0 : COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
    1426             :         unsigned int,  nfds, struct __kernel_timespec __user *, tsp,
    1427             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1428             : {
    1429           0 :         struct timespec64 ts, end_time, *to = NULL;
    1430           0 :         int ret;
    1431             : 
    1432           0 :         if (tsp) {
    1433           0 :                 if (get_timespec64(&ts, tsp))
    1434             :                         return -EFAULT;
    1435             : 
    1436           0 :                 to = &end_time;
    1437           0 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1438             :                         return -EINVAL;
    1439             :         }
    1440             : 
    1441           0 :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1442           0 :         if (ret)
    1443           0 :                 return ret;
    1444             : 
    1445           0 :         ret = do_sys_poll(ufds, nfds, to);
    1446           0 :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1447             : }
    1448             : 
    1449             : #endif

Generated by: LCOV version 1.14