LCOV - code coverage report
Current view: top level - fs - select.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 386 510 75.7 %
Date: 2023-07-31 20:08:07 Functions: 26 32 81.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * This file contains the procedures for the handling of select and poll
       4             :  *
       5             :  * Created for Linux based loosely upon Mathius Lattner's minix
       6             :  * patches by Peter MacDonald. Heavily edited by Linus.
       7             :  *
       8             :  *  4 February 1994
       9             :  *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
      10             :  *     flag set in its personality we do *not* modify the given timeout
      11             :  *     parameter to reflect time remaining.
      12             :  *
      13             :  *  24 January 2000
      14             :  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
      15             :  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
      16             :  */
      17             : 
      18             : #include <linux/compat.h>
      19             : #include <linux/kernel.h>
      20             : #include <linux/sched/signal.h>
      21             : #include <linux/sched/rt.h>
      22             : #include <linux/syscalls.h>
      23             : #include <linux/export.h>
      24             : #include <linux/slab.h>
      25             : #include <linux/poll.h>
      26             : #include <linux/personality.h> /* for STICKY_TIMEOUTS */
      27             : #include <linux/file.h>
      28             : #include <linux/fdtable.h>
      29             : #include <linux/fs.h>
      30             : #include <linux/rcupdate.h>
      31             : #include <linux/hrtimer.h>
      32             : #include <linux/freezer.h>
      33             : #include <net/busy_poll.h>
      34             : #include <linux/vmalloc.h>
      35             : 
      36             : #include <linux/uaccess.h>
      37             : 
      38             : 
      39             : /*
      40             :  * Estimate expected accuracy in ns from a timeval.
      41             :  *
      42             :  * After quite a bit of churning around, we've settled on
      43             :  * a simple thing of taking 0.1% of the timeout as the
      44             :  * slack, with a cap of 100 msec.
      45             :  * "nice" tasks get a 0.5% slack instead.
      46             :  *
      47             :  * Consider this comment an open invitation to come up with even
      48             :  * better solutions..
      49             :  */
      50             : 
      51             : #define MAX_SLACK       (100 * NSEC_PER_MSEC)
      52             : 
      53      589334 : static long __estimate_accuracy(struct timespec64 *tv)
      54             : {
      55      589334 :         long slack;
      56      589334 :         int divfactor = 1000;
      57             : 
      58      589334 :         if (tv->tv_sec < 0)
      59             :                 return 0;
      60             : 
      61      589334 :         if (task_nice(current) > 0)
      62         222 :                 divfactor = divfactor / 5;
      63             : 
      64      589334 :         if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
      65             :                 return MAX_SLACK;
      66             : 
      67      574746 :         slack = tv->tv_nsec / divfactor;
      68      574746 :         slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
      69             : 
      70      574746 :         if (slack > MAX_SLACK)
      71             :                 return MAX_SLACK;
      72             : 
      73             :         return slack;
      74             : }
      75             : 
      76      589352 : u64 select_estimate_accuracy(struct timespec64 *tv)
      77             : {
      78      589352 :         u64 ret;
      79      589352 :         struct timespec64 now;
      80             : 
      81             :         /*
      82             :          * Realtime tasks get a slack of 0 for obvious reasons.
      83             :          */
      84             : 
      85      589352 :         if (rt_task(current))
      86             :                 return 0;
      87             : 
      88      589352 :         ktime_get_ts64(&now);
      89      589333 :         now = timespec64_sub(*tv, now);
      90      589353 :         ret = __estimate_accuracy(&now);
      91      589353 :         if (ret < current->timer_slack_ns)
      92       24630 :                 return current->timer_slack_ns;
      93             :         return ret;
      94             : }
      95             : 
      96             : 
      97             : 
      98             : struct poll_table_page {
      99             :         struct poll_table_page * next;
     100             :         struct poll_table_entry * entry;
     101             :         struct poll_table_entry entries[];
     102             : };
     103             : 
     104             : #define POLL_TABLE_FULL(table) \
     105             :         ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
     106             : 
     107             : /*
     108             :  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
     109             :  * I have rewritten this, taking some shortcuts: This code may not be easy to
     110             :  * follow, but it should be free of race-conditions, and it's practical. If you
     111             :  * understand what I'm doing here, then you understand how the linux
     112             :  * sleep/wakeup mechanism works.
     113             :  *
     114             :  * Two very simple procedures, poll_wait() and poll_freewait() make all the
     115             :  * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
     116             :  * as all select/poll functions have to call it to add an entry to the
     117             :  * poll table.
     118             :  */
     119             : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     120             :                        poll_table *p);
     121             : 
     122           0 : void poll_initwait(struct poll_wqueues *pwq)
     123             : {
     124     3842433 :         init_poll_funcptr(&pwq->pt, __pollwait);
     125     3842433 :         pwq->polling_task = current;
     126     3842433 :         pwq->triggered = 0;
     127     3842433 :         pwq->error = 0;
     128     3842433 :         pwq->table = NULL;
     129     3842433 :         pwq->inline_index = 0;
     130           0 : }
     131             : EXPORT_SYMBOL(poll_initwait);
     132             : 
     133     1343176 : static void free_poll_entry(struct poll_table_entry *entry)
     134             : {
     135     1343176 :         remove_wait_queue(entry->wait_address, &entry->wait);
     136     1343175 :         fput(entry->filp);
     137     1343180 : }
     138             : 
     139     3842505 : void poll_freewait(struct poll_wqueues *pwq)
     140             : {
     141     3842505 :         struct poll_table_page * p = pwq->table;
     142     3842505 :         int i;
     143     5162893 :         for (i = 0; i < pwq->inline_index; i++)
     144     1320400 :                 free_poll_entry(pwq->inline_entries + i);
     145     3865253 :         while (p) {
     146       22794 :                 struct poll_table_entry * entry;
     147       22794 :                 struct poll_table_page *old;
     148             : 
     149       22794 :                 entry = p->entry;
     150       22794 :                 do {
     151       22794 :                         entry--;
     152       22794 :                         free_poll_entry(entry);
     153       22794 :                 } while (entry > p->entries);
     154       22794 :                 old = p;
     155       22794 :                 p = p->next;
     156       22794 :                 free_page((unsigned long) old);
     157             :         }
     158     3842459 : }
     159             : EXPORT_SYMBOL(poll_freewait);
     160             : 
     161     1343170 : static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
     162             : {
     163     1343170 :         struct poll_table_page *table = p->table;
     164             : 
     165     1343170 :         if (p->inline_index < N_INLINE_POLL_ENTRIES)
     166     1320376 :                 return p->inline_entries + p->inline_index++;
     167             : 
     168       22794 :         if (!table || POLL_TABLE_FULL(table)) {
     169       22794 :                 struct poll_table_page *new_table;
     170             : 
     171       22794 :                 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
     172       22794 :                 if (!new_table) {
     173           0 :                         p->error = -ENOMEM;
     174           0 :                         return NULL;
     175             :                 }
     176       22794 :                 new_table->entry = new_table->entries;
     177       22794 :                 new_table->next = table;
     178       22794 :                 p->table = new_table;
     179       22794 :                 table = new_table;
     180             :         }
     181             : 
     182       22794 :         return table->entry++;
     183             : }
     184             : 
     185      894623 : static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     186             : {
     187      894623 :         struct poll_wqueues *pwq = wait->private;
     188      894623 :         DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
     189             : 
     190             :         /*
     191             :          * Although this function is called under waitqueue lock, LOCK
     192             :          * doesn't imply write barrier and the users expect write
     193             :          * barrier semantics on wakeup functions.  The following
     194             :          * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
     195             :          * and is paired with smp_store_mb() in poll_schedule_timeout.
     196             :          */
     197      894623 :         smp_wmb();
     198      894621 :         pwq->triggered = 1;
     199             : 
     200             :         /*
     201             :          * Perform the default wake up operation using a dummy
     202             :          * waitqueue.
     203             :          *
     204             :          * TODO: This is hacky but there currently is no interface to
     205             :          * pass in @sync.  @sync is scheduled to be removed and once
     206             :          * that happens, wake_up_process() can be used directly.
     207             :          */
     208      894621 :         return default_wake_function(&dummy_wait, mode, sync, key);
     209             : }
     210             : 
     211     1152551 : static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
     212             : {
     213     1152551 :         struct poll_table_entry *entry;
     214             : 
     215     1152551 :         entry = container_of(wait, struct poll_table_entry, wait);
     216     1152551 :         if (key && !(key_to_poll(key) & entry->key))
     217             :                 return 0;
     218      894621 :         return __pollwake(wait, mode, sync, key);
     219             : }
     220             : 
     221             : /* Add a new entry */
     222     1343156 : static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
     223             :                                 poll_table *p)
     224             : {
     225     1343156 :         struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
     226     1343156 :         struct poll_table_entry *entry = poll_get_entry(pwq);
     227     1343155 :         if (!entry)
     228             :                 return;
     229     1343155 :         entry->filp = get_file(filp);
     230     1343150 :         entry->wait_address = wait_address;
     231     1343150 :         entry->key = p->_key;
     232     1343150 :         init_waitqueue_func_entry(&entry->wait, pollwake);
     233     1343150 :         entry->wait.private = pwq;
     234     1343150 :         add_wait_queue(wait_address, &entry->wait);
     235             : }
     236             : 
     237     2020515 : static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
     238             :                           ktime_t *expires, unsigned long slack)
     239             : {
     240     2020515 :         int rc = -EINTR;
     241             : 
     242     2020515 :         set_current_state(state);
     243     2020521 :         if (!pwq->triggered)
     244     2020519 :                 rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
     245     2020537 :         __set_current_state(TASK_RUNNING);
     246             : 
     247             :         /*
     248             :          * Prepare for the next iteration.
     249             :          *
     250             :          * The following smp_store_mb() serves two purposes.  First, it's
     251             :          * the counterpart rmb of the wmb in pollwake() such that data
     252             :          * written before wake up is always visible after wake up.
     253             :          * Second, the full barrier guarantees that triggered clearing
     254             :          * doesn't pass event check of the next iteration.  Note that
     255             :          * this problem doesn't exist for the first iteration as
     256             :          * add_wait_queue() has full barrier semantics.
     257             :          */
     258     2020537 :         smp_store_mb(pwq->triggered, 0);
     259             : 
     260     2020519 :         return rc;
     261             : }
     262             : 
     263             : /**
     264             :  * poll_select_set_timeout - helper function to setup the timeout value
     265             :  * @to:         pointer to timespec64 variable for the final timeout
     266             :  * @sec:        seconds (from user space)
     267             :  * @nsec:       nanoseconds (from user space)
     268             :  *
     269             :  * Note, we do not use a timespec for the user space value here, That
     270             :  * way we can use the function for timeval and compat interfaces as well.
     271             :  *
     272             :  * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
     273             :  */
     274     2349783 : int poll_select_set_timeout(struct timespec64 *to, time64_t sec, long nsec)
     275             : {
     276     2349783 :         struct timespec64 ts = {.tv_sec = sec, .tv_nsec = nsec};
     277             : 
     278     2349783 :         if (!timespec64_valid(&ts))
     279             :                 return -EINVAL;
     280             : 
     281             :         /* Optimize for the zero timeout value here */
     282     2349783 :         if (!sec && !nsec) {
     283     1794226 :                 to->tv_sec = to->tv_nsec = 0;
     284             :         } else {
     285      555557 :                 ktime_get_ts64(to);
     286      555544 :                 *to = timespec64_add_safe(*to, ts);
     287             :         }
     288             :         return 0;
     289             : }
     290             : 
     291             : enum poll_time_type {
     292             :         PT_TIMEVAL = 0,
     293             :         PT_OLD_TIMEVAL = 1,
     294             :         PT_TIMESPEC = 2,
     295             :         PT_OLD_TIMESPEC = 3,
     296             : };
     297             : 
     298     3842434 : static int poll_select_finish(struct timespec64 *end_time,
     299             :                               void __user *p,
     300             :                               enum poll_time_type pt_type, int ret)
     301             : {
     302     3842434 :         struct timespec64 rts;
     303             : 
     304     3842434 :         restore_saved_sigmask_unless(ret == -ERESTARTNOHAND);
     305             : 
     306     3842457 :         if (!p)
     307             :                 return ret;
     308             : 
     309     2349787 :         if (current->personality & STICKY_TIMEOUTS)
     310           0 :                 goto sticky;
     311             : 
     312             :         /* No update for zero timeout */
     313     2349787 :         if (!end_time->tv_sec && !end_time->tv_nsec)
     314             :                 return ret;
     315             : 
     316      555561 :         ktime_get_ts64(&rts);
     317      555556 :         rts = timespec64_sub(*end_time, rts);
     318      555552 :         if (rts.tv_sec < 0)
     319       44235 :                 rts.tv_sec = rts.tv_nsec = 0;
     320             : 
     321             : 
     322      555552 :         switch (pt_type) {
     323             :         case PT_TIMEVAL:
     324             :                 {
     325           0 :                         struct __kernel_old_timeval rtv;
     326             : 
     327           0 :                         if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec))
     328             :                                 memset(&rtv, 0, sizeof(rtv));
     329           0 :                         rtv.tv_sec = rts.tv_sec;
     330           0 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     331           0 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     332           0 :                                 return ret;
     333             :                 }
     334           0 :                 break;
     335           0 :         case PT_OLD_TIMEVAL:
     336             :                 {
     337           0 :                         struct old_timeval32 rtv;
     338             : 
     339           0 :                         rtv.tv_sec = rts.tv_sec;
     340           0 :                         rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
     341           0 :                         if (!copy_to_user(p, &rtv, sizeof(rtv)))
     342           0 :                                 return ret;
     343             :                 }
     344           0 :                 break;
     345      555552 :         case PT_TIMESPEC:
     346      555552 :                 if (!put_timespec64(&rts, p))
     347             :                         return ret;
     348             :                 break;
     349           0 :         case PT_OLD_TIMESPEC:
     350           0 :                 if (!put_old_timespec32(&rts, p))
     351             :                         return ret;
     352             :                 break;
     353           0 :         default:
     354           0 :                 BUG();
     355             :         }
     356             :         /*
     357             :          * If an application puts its timeval in read-only memory, we
     358             :          * don't want the Linux-specific update to the timeval to
     359             :          * cause a fault after the select has completed
     360             :          * successfully. However, because we're not updating the
     361             :          * timeval, we can't restart the system call.
     362             :          */
     363             : 
     364           4 : sticky:
     365           4 :         if (ret == -ERESTARTNOHAND)
     366           0 :                 ret = -EINTR;
     367             :         return ret;
     368             : }
     369             : 
     370             : /*
     371             :  * Scalable version of the fd_set.
     372             :  */
     373             : 
     374             : typedef struct {
     375             :         unsigned long *in, *out, *ex;
     376             :         unsigned long *res_in, *res_out, *res_ex;
     377             : } fd_set_bits;
     378             : 
     379             : /*
     380             :  * How many longwords for "nr" bits?
     381             :  */
     382             : #define FDS_BITPERLONG  (8*sizeof(long))
     383             : #define FDS_LONGS(nr)   (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
     384             : #define FDS_BYTES(nr)   (FDS_LONGS(nr)*sizeof(long))
     385             : 
     386             : /*
     387             :  * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
     388             :  */
     389             : static inline
     390       53211 : int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     391             : {
     392       53211 :         nr = FDS_BYTES(nr);
     393       53211 :         if (ufdset)
     394       40702 :                 return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0;
     395             : 
     396       32860 :         memset(fdset, 0, nr);
     397       32860 :         return 0;
     398             : }
     399             : 
     400             : static inline unsigned long __must_check
     401       53211 : set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset)
     402             : {
     403       53211 :         if (ufdset)
     404       20351 :                 return __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
     405             :         return 0;
     406             : }
     407             : 
     408             : static inline
     409       53211 : void zero_fd_set(unsigned long nr, unsigned long *fdset)
     410             : {
     411       53211 :         memset(fdset, 0, FDS_BYTES(nr));
     412       53211 : }
     413             : 
     414             : #define FDS_IN(fds, n)          (fds->in + n)
     415             : #define FDS_OUT(fds, n)         (fds->out + n)
     416             : #define FDS_EX(fds, n)          (fds->ex + n)
     417             : 
     418             : #define BITS(fds, n)    (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n))
     419             : 
     420       17737 : static int max_select_fd(unsigned long n, fd_set_bits *fds)
     421             : {
     422       17737 :         unsigned long *open_fds;
     423       17737 :         unsigned long set;
     424       17737 :         int max;
     425       17737 :         struct fdtable *fdt;
     426             : 
     427             :         /* handle last in-complete long-word first */
     428       17737 :         set = ~(~0UL << (n & (BITS_PER_LONG-1)));
     429       17737 :         n /= BITS_PER_LONG;
     430       17737 :         fdt = files_fdtable(current->files);
     431       17737 :         open_fds = fdt->open_fds + n;
     432       17737 :         max = 0;
     433       17737 :         if (set) {
     434       17323 :                 set &= BITS(fds, n);
     435       17323 :                 if (set) {
     436       17323 :                         if (!(set & ~*open_fds))
     437       17323 :                                 goto get_max;
     438             :                         return -EBADF;
     439             :                 }
     440             :         }
     441       17737 :         while (n) {
     442           0 :                 open_fds--;
     443           0 :                 n--;
     444           0 :                 set = BITS(fds, n);
     445           0 :                 if (!set)
     446           0 :                         continue;
     447           0 :                 if (set & ~*open_fds)
     448             :                         return -EBADF;
     449           0 :                 if (max)
     450           0 :                         continue;
     451           0 : get_max:
     452       96923 :                 do {
     453       96923 :                         max++;
     454       96923 :                         set >>= 1;
     455       96923 :                 } while (set);
     456       17323 :                 max += n * BITS_PER_LONG;
     457             :         }
     458             : 
     459             :         return max;
     460             : }
     461             : 
     462             : #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
     463             :                         EPOLLNVAL)
     464             : #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
     465             :                          EPOLLNVAL)
     466             : #define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
     467             : 
     468             : static inline void wait_key_set(poll_table *wait, unsigned long in,
     469             :                                 unsigned long out, unsigned long bit,
     470             :                                 __poll_t ll_flag)
     471             : {
     472       39946 :         wait->_key = POLLEX_SET | ll_flag;
     473       39946 :         if (in & bit)
     474       39581 :                 wait->_key |= POLLIN_SET;
     475       39946 :         if (out & bit)
     476         364 :                 wait->_key |= POLLOUT_SET;
     477             : }
     478             : 
     479       17737 : static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
     480             : {
     481       17737 :         ktime_t expire, *to = NULL;
     482       17737 :         struct poll_wqueues table;
     483       17737 :         poll_table *wait;
     484       17737 :         int retval, i, timed_out = 0;
     485       17737 :         u64 slack = 0;
     486       17737 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     487       17737 :         unsigned long busy_start = 0;
     488             : 
     489       17737 :         rcu_read_lock();
     490       17737 :         retval = max_select_fd(n, fds);
     491       17737 :         rcu_read_unlock();
     492             : 
     493       17737 :         if (retval < 0)
     494             :                 return retval;
     495       17737 :         n = retval;
     496             : 
     497       17737 :         poll_initwait(&table);
     498       17737 :         wait = &table.pt;
     499       17737 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     500           0 :                 wait->_qproc = NULL;
     501           0 :                 timed_out = 1;
     502             :         }
     503             : 
     504       17737 :         if (end_time && !timed_out)
     505       14345 :                 slack = select_estimate_accuracy(end_time);
     506             : 
     507       17737 :         retval = 0;
     508       34718 :         for (;;) {
     509       34718 :                 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
     510       34718 :                 bool can_busy_loop = false;
     511             : 
     512       34718 :                 inp = fds->in; outp = fds->out; exp = fds->ex;
     513       34718 :                 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
     514             : 
     515       68608 :                 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
     516       33890 :                         unsigned long in, out, ex, all_bits, bit = 1, j;
     517       33890 :                         unsigned long res_in = 0, res_out = 0, res_ex = 0;
     518       33890 :                         __poll_t mask;
     519             : 
     520       33890 :                         in = *inp++; out = *outp++; ex = *exp++;
     521       33890 :                         all_bits = in | out | ex;
     522       33890 :                         if (all_bits == 0) {
     523           0 :                                 i += BITS_PER_LONG;
     524           0 :                                 continue;
     525             :                         }
     526             : 
     527      223175 :                         for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
     528      223175 :                                 struct fd f;
     529      223175 :                                 if (i >= n)
     530             :                                         break;
     531      189287 :                                 if (!(bit & all_bits))
     532      149341 :                                         continue;
     533       39946 :                                 mask = EPOLLNVAL;
     534       39946 :                                 f = fdget(i);
     535       39946 :                                 if (f.file) {
     536       39946 :                                         wait_key_set(wait, in, out, bit,
     537             :                                                      busy_flag);
     538       39946 :                                         mask = vfs_poll(f.file, wait);
     539             : 
     540       39944 :                                         fdput(f);
     541             :                                 }
     542       39944 :                                 if ((mask & POLLIN_SET) && (in & bit)) {
     543        3758 :                                         res_in |= bit;
     544        3758 :                                         retval++;
     545        3758 :                                         wait->_qproc = NULL;
     546             :                                 }
     547       39944 :                                 if ((mask & POLLOUT_SET) && (out & bit)) {
     548         364 :                                         res_out |= bit;
     549         364 :                                         retval++;
     550         364 :                                         wait->_qproc = NULL;
     551             :                                 }
     552       39944 :                                 if ((mask & POLLEX_SET) && (ex & bit)) {
     553           0 :                                         res_ex |= bit;
     554           0 :                                         retval++;
     555           0 :                                         wait->_qproc = NULL;
     556             :                                 }
     557             :                                 /* got something, stop busy polling */
     558       39944 :                                 if (retval) {
     559             :                                         can_busy_loop = false;
     560             :                                         busy_flag = 0;
     561             : 
     562             :                                 /*
     563             :                                  * only remember a returned
     564             :                                  * POLL_BUSY_LOOP if we asked for it
     565             :                                  */
     566       35815 :                                 } else if (busy_flag & mask)
     567           0 :                                         can_busy_loop = true;
     568             : 
     569             :                         }
     570       33888 :                         if (res_in)
     571        3756 :                                 *rinp = res_in;
     572       33888 :                         if (res_out)
     573         364 :                                 *routp = res_out;
     574       33888 :                         if (res_ex)
     575           0 :                                 *rexp = res_ex;
     576       33888 :                         cond_resched();
     577             :                 }
     578       34718 :                 wait->_qproc = NULL;
     579       34718 :                 if (retval || timed_out || signal_pending(current))
     580             :                         break;
     581       16981 :                 if (table.error) {
     582             :                         retval = table.error;
     583             :                         break;
     584             :                 }
     585             : 
     586             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     587       16981 :                 if (can_busy_loop && !need_resched()) {
     588           0 :                         if (!busy_start) {
     589           0 :                                 busy_start = busy_loop_current_time();
     590           0 :                                 continue;
     591             :                         }
     592           0 :                         if (!busy_loop_timeout(busy_start))
     593           0 :                                 continue;
     594             :                 }
     595       16981 :                 busy_flag = 0;
     596             : 
     597             :                 /*
     598             :                  * If this is the first loop and we have a timeout
     599             :                  * given, then we convert to ktime_t and set the to
     600             :                  * pointer to the expiry value.
     601             :                  */
     602       16981 :                 if (end_time && !to) {
     603       13953 :                         expire = timespec64_to_ktime(*end_time);
     604       13953 :                         to = &expire;
     605             :                 }
     606             : 
     607       16981 :                 if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
     608             :                                            to, slack))
     609       13617 :                         timed_out = 1;
     610             :         }
     611             : 
     612       17737 :         poll_freewait(&table);
     613             : 
     614       17737 :         return retval;
     615             : }
     616             : 
     617             : /*
     618             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
     619             :  * like to be certain this leads to no problems. So I return
     620             :  * EINTR just for safety.
     621             :  *
     622             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
     623             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
     624             :  */
     625       17737 : int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
     626             :                            fd_set __user *exp, struct timespec64 *end_time)
     627             : {
     628       17737 :         fd_set_bits fds;
     629       17737 :         void *bits;
     630       17737 :         int ret, max_fds;
     631       17737 :         size_t size, alloc_size;
     632       17737 :         struct fdtable *fdt;
     633             :         /* Allocate small arguments on the stack to save memory and be faster */
     634       17737 :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
     635             : 
     636       17737 :         ret = -EINVAL;
     637       17737 :         if (n < 0)
     638           0 :                 goto out_nofds;
     639             : 
     640             :         /* max_fds can increase, so grab it once to avoid race */
     641       17737 :         rcu_read_lock();
     642       17737 :         fdt = files_fdtable(current->files);
     643       17737 :         max_fds = fdt->max_fds;
     644       17737 :         rcu_read_unlock();
     645       17737 :         if (n > max_fds)
     646             :                 n = max_fds;
     647             : 
     648             :         /*
     649             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
     650             :          * since we used fdset we need to allocate memory in units of
     651             :          * long-words. 
     652             :          */
     653       17737 :         size = FDS_BYTES(n);
     654       17737 :         bits = stack_fds;
     655       17737 :         if (size > sizeof(stack_fds) / 6) {
     656             :                 /* Not enough space in on-stack array; must use kmalloc */
     657           0 :                 ret = -ENOMEM;
     658           0 :                 if (size > (SIZE_MAX / 6))
     659             :                         goto out_nofds;
     660             : 
     661           0 :                 alloc_size = 6 * size;
     662           0 :                 bits = kvmalloc(alloc_size, GFP_KERNEL);
     663           0 :                 if (!bits)
     664           0 :                         goto out_nofds;
     665             :         }
     666       17737 :         fds.in      = bits;
     667       17737 :         fds.out     = bits +   size;
     668       17737 :         fds.ex      = bits + 2*size;
     669       17737 :         fds.res_in  = bits + 3*size;
     670       17737 :         fds.res_out = bits + 4*size;
     671       17737 :         fds.res_ex  = bits + 5*size;
     672             : 
     673       35474 :         if ((ret = get_fd_set(n, inp, fds.in)) ||
     674       35474 :             (ret = get_fd_set(n, outp, fds.out)) ||
     675       17737 :             (ret = get_fd_set(n, exp, fds.ex)))
     676           0 :                 goto out;
     677       17737 :         zero_fd_set(n, fds.res_in);
     678       17737 :         zero_fd_set(n, fds.res_out);
     679       17737 :         zero_fd_set(n, fds.res_ex);
     680             : 
     681       17737 :         ret = do_select(n, &fds, end_time);
     682             : 
     683       17737 :         if (ret < 0)
     684           0 :                 goto out;
     685       17737 :         if (!ret) {
     686       13617 :                 ret = -ERESTARTNOHAND;
     687       13617 :                 if (signal_pending(current))
     688           0 :                         goto out;
     689             :                 ret = 0;
     690             :         }
     691             : 
     692       35474 :         if (set_fd_set(n, inp, fds.res_in) ||
     693       35474 :             set_fd_set(n, outp, fds.res_out) ||
     694       17737 :             set_fd_set(n, exp, fds.res_ex))
     695             :                 ret = -EFAULT;
     696             : 
     697       17737 : out:
     698       17737 :         if (bits != stack_fds)
     699           0 :                 kvfree(bits);
     700       17737 : out_nofds:
     701       17737 :         return ret;
     702             : }
     703             : 
     704           0 : static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
     705             :                        fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
     706             : {
     707           0 :         struct timespec64 end_time, *to = NULL;
     708           0 :         struct __kernel_old_timeval tv;
     709           0 :         int ret;
     710             : 
     711           0 :         if (tvp) {
     712           0 :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
     713             :                         return -EFAULT;
     714             : 
     715           0 :                 to = &end_time;
     716           0 :                 if (poll_select_set_timeout(to,
     717           0 :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
     718           0 :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
     719             :                         return -EINVAL;
     720             :         }
     721             : 
     722           0 :         ret = core_sys_select(n, inp, outp, exp, to);
     723           0 :         return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret);
     724             : }
     725             : 
     726           0 : SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
     727             :                 fd_set __user *, exp, struct __kernel_old_timeval __user *, tvp)
     728             : {
     729           0 :         return kern_select(n, inp, outp, exp, tvp);
     730             : }
     731             : 
     732       17737 : static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
     733             :                        fd_set __user *exp, void __user *tsp,
     734             :                        const sigset_t __user *sigmask, size_t sigsetsize,
     735             :                        enum poll_time_type type)
     736             : {
     737       17737 :         struct timespec64 ts, end_time, *to = NULL;
     738       17737 :         int ret;
     739             : 
     740       17737 :         if (tsp) {
     741       14345 :                 switch (type) {
     742       14345 :                 case PT_TIMESPEC:
     743       14345 :                         if (get_timespec64(&ts, tsp))
     744             :                                 return -EFAULT;
     745             :                         break;
     746           0 :                 case PT_OLD_TIMESPEC:
     747           0 :                         if (get_old_timespec32(&ts, tsp))
     748             :                                 return -EFAULT;
     749             :                         break;
     750           0 :                 default:
     751           0 :                         BUG();
     752             :                 }
     753             : 
     754       14345 :                 to = &end_time;
     755       14345 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
     756             :                         return -EINVAL;
     757             :         }
     758             : 
     759       17737 :         ret = set_user_sigmask(sigmask, sigsetsize);
     760       17737 :         if (ret)
     761           0 :                 return ret;
     762             : 
     763       17737 :         ret = core_sys_select(n, inp, outp, exp, to);
     764       17737 :         return poll_select_finish(&end_time, tsp, type, ret);
     765             : }
     766             : 
     767             : /*
     768             :  * Most architectures can't handle 7-argument syscalls. So we provide a
     769             :  * 6-argument version where the sixth argument is a pointer to a structure
     770             :  * which has a pointer to the sigset_t itself followed by a size_t containing
     771             :  * the sigset size.
     772             :  */
     773             : struct sigset_argpack {
     774             :         sigset_t __user *p;
     775             :         size_t size;
     776             : };
     777             : 
     778       17737 : static inline int get_sigset_argpack(struct sigset_argpack *to,
     779             :                                      struct sigset_argpack __user *from)
     780             : {
     781             :         // the path is hot enough for overhead of copy_from_user() to matter
     782       17737 :         if (from) {
     783         378 :                 if (!user_read_access_begin(from, sizeof(*from)))
     784             :                         return -EFAULT;
     785         378 :                 unsafe_get_user(to->p, &from->p, Efault);
     786         378 :                 unsafe_get_user(to->size, &from->size, Efault);
     787             :                 user_read_access_end();
     788             :         }
     789             :         return 0;
     790             : Efault:
     791             :         user_access_end();
     792             :         return -EFAULT;
     793             : }
     794             : 
     795       35474 : SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
     796             :                 fd_set __user *, exp, struct __kernel_timespec __user *, tsp,
     797             :                 void __user *, sig)
     798             : {
     799       17737 :         struct sigset_argpack x = {NULL, 0};
     800             : 
     801       17737 :         if (get_sigset_argpack(&x, sig))
     802             :                 return -EFAULT;
     803             : 
     804       17737 :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_TIMESPEC);
     805             : }
     806             : 
     807             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
     808             : 
     809             : SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, outp,
     810             :                 fd_set __user *, exp, struct old_timespec32 __user *, tsp,
     811             :                 void __user *, sig)
     812             : {
     813             :         struct sigset_argpack x = {NULL, 0};
     814             : 
     815             :         if (get_sigset_argpack(&x, sig))
     816             :                 return -EFAULT;
     817             : 
     818             :         return do_pselect(n, inp, outp, exp, tsp, x.p, x.size, PT_OLD_TIMESPEC);
     819             : }
     820             : 
     821             : #endif
     822             : 
     823             : #ifdef __ARCH_WANT_SYS_OLD_SELECT
     824             : struct sel_arg_struct {
     825             :         unsigned long n;
     826             :         fd_set __user *inp, *outp, *exp;
     827             :         struct __kernel_old_timeval __user *tvp;
     828             : };
     829             : 
     830             : SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
     831             : {
     832             :         struct sel_arg_struct a;
     833             : 
     834             :         if (copy_from_user(&a, arg, sizeof(a)))
     835             :                 return -EFAULT;
     836             :         return kern_select(a.n, a.inp, a.outp, a.exp, a.tvp);
     837             : }
     838             : #endif
     839             : 
     840             : struct poll_list {
     841             :         struct poll_list *next;
     842             :         int len;
     843             :         struct pollfd entries[];
     844             : };
     845             : 
     846             : #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
     847             : 
     848             : /*
     849             :  * Fish for pollable events on the pollfd->fd file descriptor. We're only
     850             :  * interested in events matching the pollfd->events mask, and the result
     851             :  * matching that mask is both recorded in pollfd->revents and returned. The
     852             :  * pwait poll_table will be used by the fd-provided poll handler for waiting,
     853             :  * if pwait->_qproc is non-NULL.
     854             :  */
     855     4490746 : static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
     856             :                                      bool *can_busy_poll,
     857             :                                      __poll_t busy_flag)
     858             : {
     859     4490746 :         int fd = pollfd->fd;
     860     4490746 :         __poll_t mask = 0, filter;
     861     4490746 :         struct fd f;
     862             : 
     863     4490746 :         if (fd < 0)
     864       19156 :                 goto out;
     865     4471590 :         mask = EPOLLNVAL;
     866     4471590 :         f = fdget(fd);
     867     4471301 :         if (!f.file)
     868         578 :                 goto out;
     869             : 
     870             :         /* userland u16 ->events contains POLL... bitmap */
     871     4470723 :         filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
     872     4470723 :         pwait->_key = filter | busy_flag;
     873     4470723 :         mask = vfs_poll(f.file, pwait);
     874     4471026 :         if (mask & busy_flag)
     875           0 :                 *can_busy_poll = true;
     876     4471026 :         mask &= filter;             /* Mask out unneeded events. */
     877     4471026 :         fdput(f);
     878             : 
     879     4490761 : out:
     880             :         /* ... and so does ->revents */
     881     4490761 :         pollfd->revents = mangle_poll(mask);
     882     4490761 :         return mask;
     883             : }
     884             : 
     885     3824729 : static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
     886             :                    struct timespec64 *end_time)
     887             : {
     888     3824729 :         poll_table* pt = &wait->pt;
     889     3824729 :         ktime_t expire, *to = NULL;
     890     3824729 :         int timed_out = 0, count = 0;
     891     3824729 :         u64 slack = 0;
     892     3824729 :         __poll_t busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
     893     3824729 :         unsigned long busy_start = 0;
     894             : 
     895             :         /* Optimise the no-wait case */
     896     3824729 :         if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
     897     1794226 :                 pt->_qproc = NULL;
     898     1794226 :                 timed_out = 1;
     899             :         }
     900             : 
     901     3824729 :         if (end_time && !timed_out)
     902      541199 :                 slack = select_estimate_accuracy(end_time);
     903             : 
     904     5828249 :         for (;;) {
     905     5828249 :                 struct poll_list *walk;
     906     5828249 :                 bool can_busy_loop = false;
     907             : 
     908    11656607 :                 for (walk = list; walk != NULL; walk = walk->next) {
     909     5828396 :                         struct pollfd * pfd, * pfd_end;
     910             : 
     911     5828396 :                         pfd = walk->entries;
     912     5828396 :                         pfd_end = pfd + walk->len;
     913    10319157 :                         for (; pfd != pfd_end; pfd++) {
     914             :                                 /*
     915             :                                  * Fish for events. If we found one, record it
     916             :                                  * and kill poll_table->_qproc, so we don't
     917             :                                  * needlessly register any other waiters after
     918             :                                  * this. They'll get immediately deregistered
     919             :                                  * when we break out and return.
     920             :                                  */
     921     4490799 :                                 if (do_pollfd(pfd, pt, &can_busy_loop,
     922             :                                               busy_flag)) {
     923      932408 :                                         count++;
     924      932408 :                                         pt->_qproc = NULL;
     925             :                                         /* found something, stop busy polling */
     926      932408 :                                         busy_flag = 0;
     927      932408 :                                         can_busy_loop = false;
     928             :                                 }
     929             :                         }
     930             :                 }
     931             :                 /*
     932             :                  * All waiters have already been registered, so don't provide
     933             :                  * a poll_table->_qproc to them on the next loop iteration.
     934             :                  */
     935     5828211 :                 pt->_qproc = NULL;
     936     5828211 :                 if (!count) {
     937     4897670 :                         count = wait->error;
     938     4897670 :                         if (signal_pending(current))
     939     1179613 :                                 count = -ERESTARTNOHAND;
     940             :                 }
     941     5828150 :                 if (count || timed_out)
     942             :                         break;
     943             : 
     944             :                 /* only if found POLL_BUSY_LOOP sockets && not out of time */
     945     2003414 :                 if (can_busy_loop && !need_resched()) {
     946           0 :                         if (!busy_start) {
     947           0 :                                 busy_start = busy_loop_current_time();
     948           0 :                                 continue;
     949             :                         }
     950           0 :                         if (!busy_loop_timeout(busy_start))
     951           0 :                                 continue;
     952             :                 }
     953     2003414 :                 busy_flag = 0;
     954             : 
     955             :                 /*
     956             :                  * If this is the first loop and we have a timeout
     957             :                  * given, then we convert to ktime_t and set the to
     958             :                  * pointer to the expiry value.
     959             :                  */
     960     2003414 :                 if (end_time && !to) {
     961      536860 :                         expire = timespec64_to_ktime(*end_time);
     962      536860 :                         to = &expire;
     963             :                 }
     964             : 
     965     2003414 :                 if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
     966       30521 :                         timed_out = 1;
     967             :         }
     968     3824736 :         return count;
     969             : }
     970             : 
     971             : #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list))  / \
     972             :                         sizeof(struct pollfd))
     973             : 
     974     3824757 : static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
     975             :                 struct timespec64 *end_time)
     976             : {
     977     3824757 :         struct poll_wqueues table;
     978     3824757 :         int err = -EFAULT, fdcount, len;
     979             :         /* Allocate small arguments on the stack to save memory and be
     980             :            faster - use long to make sure the buffer is aligned properly
     981             :            on 64 bit archs to avoid unaligned access */
     982     3824757 :         long stack_pps[POLL_STACK_ALLOC/sizeof(long)];
     983     3824757 :         struct poll_list *const head = (struct poll_list *)stack_pps;
     984     3824757 :         struct poll_list *walk = head;
     985     3824757 :         unsigned long todo = nfds;
     986             : 
     987     3824757 :         if (nfds > rlimit(RLIMIT_NOFILE))
     988             :                 return -EINVAL;
     989             : 
     990     3824757 :         len = min_t(unsigned int, nfds, N_STACK_PPS);
     991     3824853 :         for (;;) {
     992     3824853 :                 walk->next = NULL;
     993     3824853 :                 walk->len = len;
     994     3824853 :                 if (!len)
     995             :                         break;
     996             : 
     997     2645311 :                 if (copy_from_user(walk->entries, ufds + nfds-todo,
     998     2645307 :                                         sizeof(struct pollfd) * walk->len))
     999           0 :                         goto out_fds;
    1000             : 
    1001     2645311 :                 todo -= walk->len;
    1002     2645311 :                 if (!todo)
    1003             :                         break;
    1004             : 
    1005         161 :                 len = min(todo, POLLFD_PER_PAGE);
    1006         161 :                 walk = walk->next = kmalloc(struct_size(walk, entries, len),
    1007             :                                             GFP_KERNEL);
    1008         161 :                 if (!walk) {
    1009           0 :                         err = -ENOMEM;
    1010           0 :                         goto out_fds;
    1011             :                 }
    1012             :         }
    1013             : 
    1014     3824696 :         poll_initwait(&table);
    1015     3824696 :         fdcount = do_poll(head, &table, end_time);
    1016     3824760 :         poll_freewait(&table);
    1017             : 
    1018     3824718 :         if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
    1019           0 :                 goto out_fds;
    1020             : 
    1021     7649575 :         for (walk = head; walk; walk = walk->next) {
    1022     3824876 :                 struct pollfd *fds = walk->entries;
    1023     3824876 :                 int j;
    1024             : 
    1025     7090345 :                 for (j = walk->len; j; fds++, ufds++, j--)
    1026     3265484 :                         unsafe_put_user(fds->revents, &ufds->revents, Efault);
    1027             :         }
    1028             :         user_write_access_end();
    1029             : 
    1030             :         err = fdcount;
    1031     3824699 : out_fds:
    1032     3824699 :         walk = head->next;
    1033     3824750 :         while (walk) {
    1034         161 :                 struct poll_list *pos = walk;
    1035         161 :                 walk = walk->next;
    1036         161 :                 kfree(pos);
    1037             :         }
    1038             : 
    1039             :         return err;
    1040             : 
    1041             : Efault:
    1042           0 :         user_write_access_end();
    1043           0 :         err = -EFAULT;
    1044           0 :         goto out_fds;
    1045             : }
    1046             : 
    1047           0 : static long do_restart_poll(struct restart_block *restart_block)
    1048             : {
    1049           0 :         struct pollfd __user *ufds = restart_block->poll.ufds;
    1050           0 :         int nfds = restart_block->poll.nfds;
    1051           0 :         struct timespec64 *to = NULL, end_time;
    1052           0 :         int ret;
    1053             : 
    1054           0 :         if (restart_block->poll.has_timeout) {
    1055           0 :                 end_time.tv_sec = restart_block->poll.tv_sec;
    1056           0 :                 end_time.tv_nsec = restart_block->poll.tv_nsec;
    1057           0 :                 to = &end_time;
    1058             :         }
    1059             : 
    1060           0 :         ret = do_sys_poll(ufds, nfds, to);
    1061             : 
    1062           0 :         if (ret == -ERESTARTNOHAND)
    1063           0 :                 ret = set_restart_fn(restart_block, do_restart_poll);
    1064             : 
    1065           0 :         return ret;
    1066             : }
    1067             : 
    1068           0 : SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
    1069             :                 int, timeout_msecs)
    1070             : {
    1071           0 :         struct timespec64 end_time, *to = NULL;
    1072           0 :         int ret;
    1073             : 
    1074           0 :         if (timeout_msecs >= 0) {
    1075           0 :                 to = &end_time;
    1076           0 :                 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
    1077           0 :                         NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
    1078             :         }
    1079             : 
    1080           0 :         ret = do_sys_poll(ufds, nfds, to);
    1081             : 
    1082           0 :         if (ret == -ERESTARTNOHAND) {
    1083           0 :                 struct restart_block *restart_block;
    1084             : 
    1085           0 :                 restart_block = &current->restart_block;
    1086           0 :                 restart_block->poll.ufds = ufds;
    1087           0 :                 restart_block->poll.nfds = nfds;
    1088             : 
    1089           0 :                 if (timeout_msecs >= 0) {
    1090           0 :                         restart_block->poll.tv_sec = end_time.tv_sec;
    1091           0 :                         restart_block->poll.tv_nsec = end_time.tv_nsec;
    1092           0 :                         restart_block->poll.has_timeout = 1;
    1093             :                 } else
    1094           0 :                         restart_block->poll.has_timeout = 0;
    1095             : 
    1096           0 :                 ret = set_restart_fn(restart_block, do_restart_poll);
    1097             :         }
    1098           0 :         return ret;
    1099             : }
    1100             : 
    1101     7649306 : SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
    1102             :                 struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask,
    1103             :                 size_t, sigsetsize)
    1104             : {
    1105     3824646 :         struct timespec64 ts, end_time, *to = NULL;
    1106     3824646 :         int ret;
    1107             : 
    1108     3824646 :         if (tsp) {
    1109     2335413 :                 if (get_timespec64(&ts, tsp))
    1110             :                         return -EFAULT;
    1111             : 
    1112     2335393 :                 to = &end_time;
    1113     2335393 :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1114             :                         return -EINVAL;
    1115             :         }
    1116             : 
    1117     3824634 :         ret = set_user_sigmask(sigmask, sigsetsize);
    1118     3824683 :         if (ret)
    1119           0 :                 return ret;
    1120             : 
    1121     3824683 :         ret = do_sys_poll(ufds, nfds, to);
    1122     3824620 :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1123             : }
    1124             : 
    1125             : #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT)
    1126             : 
    1127             : SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
    1128             :                 struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask,
    1129             :                 size_t, sigsetsize)
    1130             : {
    1131             :         struct timespec64 ts, end_time, *to = NULL;
    1132             :         int ret;
    1133             : 
    1134             :         if (tsp) {
    1135             :                 if (get_old_timespec32(&ts, tsp))
    1136             :                         return -EFAULT;
    1137             : 
    1138             :                 to = &end_time;
    1139             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1140             :                         return -EINVAL;
    1141             :         }
    1142             : 
    1143             :         ret = set_user_sigmask(sigmask, sigsetsize);
    1144             :         if (ret)
    1145             :                 return ret;
    1146             : 
    1147             :         ret = do_sys_poll(ufds, nfds, to);
    1148             :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1149             : }
    1150             : #endif
    1151             : 
    1152             : #ifdef CONFIG_COMPAT
    1153             : #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
    1154             : 
    1155             : /*
    1156             :  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
    1157             :  * 64-bit unsigned longs.
    1158             :  */
    1159             : static
    1160             : int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1161             :                         unsigned long *fdset)
    1162             : {
    1163             :         if (ufdset) {
    1164             :                 return compat_get_bitmap(fdset, ufdset, nr);
    1165             :         } else {
    1166             :                 zero_fd_set(nr, fdset);
    1167             :                 return 0;
    1168             :         }
    1169             : }
    1170             : 
    1171             : static
    1172             : int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
    1173             :                       unsigned long *fdset)
    1174             : {
    1175             :         if (!ufdset)
    1176             :                 return 0;
    1177             :         return compat_put_bitmap(ufdset, fdset, nr);
    1178             : }
    1179             : 
    1180             : 
    1181             : /*
    1182             :  * This is a virtual copy of sys_select from fs/select.c and probably
    1183             :  * should be compared to it from time to time
    1184             :  */
    1185             : 
    1186             : /*
    1187             :  * We can actually return ERESTARTSYS instead of EINTR, but I'd
    1188             :  * like to be certain this leads to no problems. So I return
    1189             :  * EINTR just for safety.
    1190             :  *
    1191             :  * Update: ERESTARTSYS breaks at least the xview clock binary, so
    1192             :  * I'm trying ERESTARTNOHAND which restart only when you want to.
    1193             :  */
    1194             : static int compat_core_sys_select(int n, compat_ulong_t __user *inp,
    1195             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1196             :         struct timespec64 *end_time)
    1197             : {
    1198             :         fd_set_bits fds;
    1199             :         void *bits;
    1200             :         int size, max_fds, ret = -EINVAL;
    1201             :         struct fdtable *fdt;
    1202             :         long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
    1203             : 
    1204             :         if (n < 0)
    1205             :                 goto out_nofds;
    1206             : 
    1207             :         /* max_fds can increase, so grab it once to avoid race */
    1208             :         rcu_read_lock();
    1209             :         fdt = files_fdtable(current->files);
    1210             :         max_fds = fdt->max_fds;
    1211             :         rcu_read_unlock();
    1212             :         if (n > max_fds)
    1213             :                 n = max_fds;
    1214             : 
    1215             :         /*
    1216             :          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
    1217             :          * since we used fdset we need to allocate memory in units of
    1218             :          * long-words.
    1219             :          */
    1220             :         size = FDS_BYTES(n);
    1221             :         bits = stack_fds;
    1222             :         if (size > sizeof(stack_fds) / 6) {
    1223             :                 bits = kmalloc_array(6, size, GFP_KERNEL);
    1224             :                 ret = -ENOMEM;
    1225             :                 if (!bits)
    1226             :                         goto out_nofds;
    1227             :         }
    1228             :         fds.in      = (unsigned long *)  bits;
    1229             :         fds.out     = (unsigned long *) (bits +   size);
    1230             :         fds.ex      = (unsigned long *) (bits + 2*size);
    1231             :         fds.res_in  = (unsigned long *) (bits + 3*size);
    1232             :         fds.res_out = (unsigned long *) (bits + 4*size);
    1233             :         fds.res_ex  = (unsigned long *) (bits + 5*size);
    1234             : 
    1235             :         if ((ret = compat_get_fd_set(n, inp, fds.in)) ||
    1236             :             (ret = compat_get_fd_set(n, outp, fds.out)) ||
    1237             :             (ret = compat_get_fd_set(n, exp, fds.ex)))
    1238             :                 goto out;
    1239             :         zero_fd_set(n, fds.res_in);
    1240             :         zero_fd_set(n, fds.res_out);
    1241             :         zero_fd_set(n, fds.res_ex);
    1242             : 
    1243             :         ret = do_select(n, &fds, end_time);
    1244             : 
    1245             :         if (ret < 0)
    1246             :                 goto out;
    1247             :         if (!ret) {
    1248             :                 ret = -ERESTARTNOHAND;
    1249             :                 if (signal_pending(current))
    1250             :                         goto out;
    1251             :                 ret = 0;
    1252             :         }
    1253             : 
    1254             :         if (compat_set_fd_set(n, inp, fds.res_in) ||
    1255             :             compat_set_fd_set(n, outp, fds.res_out) ||
    1256             :             compat_set_fd_set(n, exp, fds.res_ex))
    1257             :                 ret = -EFAULT;
    1258             : out:
    1259             :         if (bits != stack_fds)
    1260             :                 kfree(bits);
    1261             : out_nofds:
    1262             :         return ret;
    1263             : }
    1264             : 
    1265             : static int do_compat_select(int n, compat_ulong_t __user *inp,
    1266             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1267             :         struct old_timeval32 __user *tvp)
    1268             : {
    1269             :         struct timespec64 end_time, *to = NULL;
    1270             :         struct old_timeval32 tv;
    1271             :         int ret;
    1272             : 
    1273             :         if (tvp) {
    1274             :                 if (copy_from_user(&tv, tvp, sizeof(tv)))
    1275             :                         return -EFAULT;
    1276             : 
    1277             :                 to = &end_time;
    1278             :                 if (poll_select_set_timeout(to,
    1279             :                                 tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),
    1280             :                                 (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC))
    1281             :                         return -EINVAL;
    1282             :         }
    1283             : 
    1284             :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1285             :         return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret);
    1286             : }
    1287             : 
    1288             : COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
    1289             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1290             :         struct old_timeval32 __user *, tvp)
    1291             : {
    1292             :         return do_compat_select(n, inp, outp, exp, tvp);
    1293             : }
    1294             : 
    1295             : struct compat_sel_arg_struct {
    1296             :         compat_ulong_t n;
    1297             :         compat_uptr_t inp;
    1298             :         compat_uptr_t outp;
    1299             :         compat_uptr_t exp;
    1300             :         compat_uptr_t tvp;
    1301             : };
    1302             : 
    1303             : COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
    1304             : {
    1305             :         struct compat_sel_arg_struct a;
    1306             : 
    1307             :         if (copy_from_user(&a, arg, sizeof(a)))
    1308             :                 return -EFAULT;
    1309             :         return do_compat_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
    1310             :                                 compat_ptr(a.exp), compat_ptr(a.tvp));
    1311             : }
    1312             : 
    1313             : static long do_compat_pselect(int n, compat_ulong_t __user *inp,
    1314             :         compat_ulong_t __user *outp, compat_ulong_t __user *exp,
    1315             :         void __user *tsp, compat_sigset_t __user *sigmask,
    1316             :         compat_size_t sigsetsize, enum poll_time_type type)
    1317             : {
    1318             :         struct timespec64 ts, end_time, *to = NULL;
    1319             :         int ret;
    1320             : 
    1321             :         if (tsp) {
    1322             :                 switch (type) {
    1323             :                 case PT_OLD_TIMESPEC:
    1324             :                         if (get_old_timespec32(&ts, tsp))
    1325             :                                 return -EFAULT;
    1326             :                         break;
    1327             :                 case PT_TIMESPEC:
    1328             :                         if (get_timespec64(&ts, tsp))
    1329             :                                 return -EFAULT;
    1330             :                         break;
    1331             :                 default:
    1332             :                         BUG();
    1333             :                 }
    1334             : 
    1335             :                 to = &end_time;
    1336             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1337             :                         return -EINVAL;
    1338             :         }
    1339             : 
    1340             :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1341             :         if (ret)
    1342             :                 return ret;
    1343             : 
    1344             :         ret = compat_core_sys_select(n, inp, outp, exp, to);
    1345             :         return poll_select_finish(&end_time, tsp, type, ret);
    1346             : }
    1347             : 
    1348             : struct compat_sigset_argpack {
    1349             :         compat_uptr_t p;
    1350             :         compat_size_t size;
    1351             : };
    1352             : static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
    1353             :                                             struct compat_sigset_argpack __user *from)
    1354             : {
    1355             :         if (from) {
    1356             :                 if (!user_read_access_begin(from, sizeof(*from)))
    1357             :                         return -EFAULT;
    1358             :                 unsafe_get_user(to->p, &from->p, Efault);
    1359             :                 unsafe_get_user(to->size, &from->size, Efault);
    1360             :                 user_read_access_end();
    1361             :         }
    1362             :         return 0;
    1363             : Efault:
    1364             :         user_access_end();
    1365             :         return -EFAULT;
    1366             : }
    1367             : 
    1368             : COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
    1369             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1370             :         struct __kernel_timespec __user *, tsp, void __user *, sig)
    1371             : {
    1372             :         struct compat_sigset_argpack x = {0, 0};
    1373             : 
    1374             :         if (get_compat_sigset_argpack(&x, sig))
    1375             :                 return -EFAULT;
    1376             : 
    1377             :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1378             :                                  x.size, PT_TIMESPEC);
    1379             : }
    1380             : 
    1381             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1382             : 
    1383             : COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
    1384             :         compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
    1385             :         struct old_timespec32 __user *, tsp, void __user *, sig)
    1386             : {
    1387             :         struct compat_sigset_argpack x = {0, 0};
    1388             : 
    1389             :         if (get_compat_sigset_argpack(&x, sig))
    1390             :                 return -EFAULT;
    1391             : 
    1392             :         return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(x.p),
    1393             :                                  x.size, PT_OLD_TIMESPEC);
    1394             : }
    1395             : 
    1396             : #endif
    1397             : 
    1398             : #if defined(CONFIG_COMPAT_32BIT_TIME)
    1399             : COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
    1400             :         unsigned int,  nfds, struct old_timespec32 __user *, tsp,
    1401             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1402             : {
    1403             :         struct timespec64 ts, end_time, *to = NULL;
    1404             :         int ret;
    1405             : 
    1406             :         if (tsp) {
    1407             :                 if (get_old_timespec32(&ts, tsp))
    1408             :                         return -EFAULT;
    1409             : 
    1410             :                 to = &end_time;
    1411             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1412             :                         return -EINVAL;
    1413             :         }
    1414             : 
    1415             :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1416             :         if (ret)
    1417             :                 return ret;
    1418             : 
    1419             :         ret = do_sys_poll(ufds, nfds, to);
    1420             :         return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret);
    1421             : }
    1422             : #endif
    1423             : 
    1424             : /* New compat syscall for 64 bit time_t*/
    1425             : COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
    1426             :         unsigned int,  nfds, struct __kernel_timespec __user *, tsp,
    1427             :         const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
    1428             : {
    1429             :         struct timespec64 ts, end_time, *to = NULL;
    1430             :         int ret;
    1431             : 
    1432             :         if (tsp) {
    1433             :                 if (get_timespec64(&ts, tsp))
    1434             :                         return -EFAULT;
    1435             : 
    1436             :                 to = &end_time;
    1437             :                 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
    1438             :                         return -EINVAL;
    1439             :         }
    1440             : 
    1441             :         ret = set_compat_user_sigmask(sigmask, sigsetsize);
    1442             :         if (ret)
    1443             :                 return ret;
    1444             : 
    1445             :         ret = do_sys_poll(ufds, nfds, to);
    1446             :         return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret);
    1447             : }
    1448             : 
    1449             : #endif

Generated by: LCOV version 1.14