LCOV - code coverage report
Current view: top level - fs - splice.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 522 830 62.9 %
Date: 2023-07-31 20:08:07 Functions: 30 44 68.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * "splice": joining two ropes together by interweaving their strands.
       4             :  *
       5             :  * This is the "extended pipe" functionality, where a pipe is used as
       6             :  * an arbitrary in-memory buffer. Think of a pipe as a small kernel
       7             :  * buffer that you can use to transfer data from one end to the other.
       8             :  *
       9             :  * The traditional unix read/write is extended with a "splice()" operation
      10             :  * that transfers data buffers to or from a pipe buffer.
      11             :  *
      12             :  * Named by Larry McVoy, original implementation from Linus, extended by
      13             :  * Jens to support splicing to files, network, direct splicing, etc and
      14             :  * fixing lots of bugs.
      15             :  *
      16             :  * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
      17             :  * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
      18             :  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
      19             :  *
      20             :  */
      21             : #include <linux/bvec.h>
      22             : #include <linux/fs.h>
      23             : #include <linux/file.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/splice.h>
      26             : #include <linux/memcontrol.h>
      27             : #include <linux/mm_inline.h>
      28             : #include <linux/swap.h>
      29             : #include <linux/writeback.h>
      30             : #include <linux/export.h>
      31             : #include <linux/syscalls.h>
      32             : #include <linux/uio.h>
      33             : #include <linux/fsnotify.h>
      34             : #include <linux/security.h>
      35             : #include <linux/gfp.h>
      36             : #include <linux/net.h>
      37             : #include <linux/socket.h>
      38             : #include <linux/sched/signal.h>
      39             : 
      40             : #include "internal.h"
      41             : 
      42             : /*
      43             :  * Splice doesn't support FMODE_NOWAIT. Since pipes may set this flag to
      44             :  * indicate they support non-blocking reads or writes, we must clear it
      45             :  * here if set to avoid blocking other users of this pipe if splice is
      46             :  * being done on it.
      47             :  */
      48     1987295 : static noinline void noinline pipe_clear_nowait(struct file *file)
      49             : {
      50     1987295 :         fmode_t fmode = READ_ONCE(file->f_mode);
      51             : 
      52     1987295 :         do {
      53     1987295 :                 if (!(fmode & FMODE_NOWAIT))
      54             :                         break;
      55     1874572 :         } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_NOWAIT));
      56     1987311 : }
      57             : 
      58             : /*
      59             :  * Attempt to steal a page from a pipe buffer. This should perhaps go into
      60             :  * a vm helper function, it's already simplified quite a bit by the
      61             :  * addition of remove_mapping(). If success is returned, the caller may
      62             :  * attempt to reuse this page for another destination.
      63             :  */
      64           0 : static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
      65             :                 struct pipe_buffer *buf)
      66             : {
      67           0 :         struct folio *folio = page_folio(buf->page);
      68           0 :         struct address_space *mapping;
      69             : 
      70           0 :         folio_lock(folio);
      71             : 
      72           0 :         mapping = folio_mapping(folio);
      73           0 :         if (mapping) {
      74           0 :                 WARN_ON(!folio_test_uptodate(folio));
      75             : 
      76             :                 /*
      77             :                  * At least for ext2 with nobh option, we need to wait on
      78             :                  * writeback completing on this folio, since we'll remove it
      79             :                  * from the pagecache.  Otherwise truncate wont wait on the
      80             :                  * folio, allowing the disk blocks to be reused by someone else
      81             :                  * before we actually wrote our data to them. fs corruption
      82             :                  * ensues.
      83             :                  */
      84           0 :                 folio_wait_writeback(folio);
      85             : 
      86           0 :                 if (folio_has_private(folio) &&
      87           0 :                     !filemap_release_folio(folio, GFP_KERNEL))
      88           0 :                         goto out_unlock;
      89             : 
      90             :                 /*
      91             :                  * If we succeeded in removing the mapping, set LRU flag
      92             :                  * and return good.
      93             :                  */
      94           0 :                 if (remove_mapping(mapping, folio)) {
      95           0 :                         buf->flags |= PIPE_BUF_FLAG_LRU;
      96           0 :                         return true;
      97             :                 }
      98             :         }
      99             : 
     100             :         /*
     101             :          * Raced with truncate or failed to remove folio from current
     102             :          * address space, unlock and return failure.
     103             :          */
     104           0 : out_unlock:
     105           0 :         folio_unlock(folio);
     106           0 :         return false;
     107             : }
     108             : 
     109     9697422 : static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
     110             :                                         struct pipe_buffer *buf)
     111             : {
     112     9697422 :         put_page(buf->page);
     113     9697463 :         buf->flags &= ~PIPE_BUF_FLAG_LRU;
     114     9697463 : }
     115             : 
     116             : /*
     117             :  * Check whether the contents of buf is OK to access. Since the content
     118             :  * is a page cache page, IO may be in flight.
     119             :  */
     120     9695227 : static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
     121             :                                        struct pipe_buffer *buf)
     122             : {
     123     9695227 :         struct page *page = buf->page;
     124     9695227 :         int err;
     125             : 
     126     9695227 :         if (!PageUptodate(page)) {
     127           0 :                 lock_page(page);
     128             : 
     129             :                 /*
     130             :                  * Page got truncated/unhashed. This will cause a 0-byte
     131             :                  * splice, if this is the first page.
     132             :                  */
     133           0 :                 if (!page->mapping) {
     134           0 :                         err = -ENODATA;
     135           0 :                         goto error;
     136             :                 }
     137             : 
     138             :                 /*
     139             :                  * Uh oh, read-error from disk.
     140             :                  */
     141           0 :                 if (!PageUptodate(page)) {
     142           0 :                         err = -EIO;
     143           0 :                         goto error;
     144             :                 }
     145             : 
     146             :                 /*
     147             :                  * Page is ok afterall, we are done.
     148             :                  */
     149           0 :                 unlock_page(page);
     150             :         }
     151             : 
     152             :         return 0;
     153           0 : error:
     154           0 :         unlock_page(page);
     155           0 :         return err;
     156             : }
     157             : 
     158             : const struct pipe_buf_operations page_cache_pipe_buf_ops = {
     159             :         .confirm        = page_cache_pipe_buf_confirm,
     160             :         .release        = page_cache_pipe_buf_release,
     161             :         .try_steal      = page_cache_pipe_buf_try_steal,
     162             :         .get            = generic_pipe_buf_get,
     163             : };
     164             : 
     165           0 : static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     166             :                 struct pipe_buffer *buf)
     167             : {
     168           0 :         if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
     169             :                 return false;
     170             : 
     171           0 :         buf->flags |= PIPE_BUF_FLAG_LRU;
     172           0 :         return generic_pipe_buf_try_steal(pipe, buf);
     173             : }
     174             : 
     175             : static const struct pipe_buf_operations user_page_pipe_buf_ops = {
     176             :         .release        = page_cache_pipe_buf_release,
     177             :         .try_steal      = user_page_pipe_buf_try_steal,
     178             :         .get            = generic_pipe_buf_get,
     179             : };
     180             : 
     181      946146 : static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
     182             : {
     183      946146 :         smp_mb();
     184      946150 :         if (waitqueue_active(&pipe->rd_wait))
     185           0 :                 wake_up_interruptible(&pipe->rd_wait);
     186      946150 :         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     187      946149 : }
     188             : 
     189             : /**
     190             :  * splice_to_pipe - fill passed data into a pipe
     191             :  * @pipe:       pipe to fill
     192             :  * @spd:        data to fill
     193             :  *
     194             :  * Description:
     195             :  *    @spd contains a map of pages and len/offset tuples, along with
     196             :  *    the struct pipe_buf_operations associated with these pages. This
     197             :  *    function will link that data to the pipe.
     198             :  *
     199             :  */
     200       58978 : ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
     201             :                        struct splice_pipe_desc *spd)
     202             : {
     203       58978 :         unsigned int spd_pages = spd->nr_pages;
     204       58978 :         unsigned int tail = pipe->tail;
     205       58978 :         unsigned int head = pipe->head;
     206       58978 :         unsigned int mask = pipe->ring_size - 1;
     207       58978 :         int ret = 0, page_nr = 0;
     208             : 
     209       58978 :         if (!spd_pages)
     210             :                 return 0;
     211             : 
     212       58978 :         if (unlikely(!pipe->readers)) {
     213           0 :                 send_sig(SIGPIPE, current, 0);
     214           0 :                 ret = -EPIPE;
     215           0 :                 goto out;
     216             :         }
     217             : 
     218       64626 :         while (!pipe_full(head, tail, pipe->max_usage)) {
     219       64624 :                 struct pipe_buffer *buf = &pipe->bufs[head & mask];
     220             : 
     221       64624 :                 buf->page = spd->pages[page_nr];
     222       64624 :                 buf->offset = spd->partial[page_nr].offset;
     223       64624 :                 buf->len = spd->partial[page_nr].len;
     224       64624 :                 buf->private = spd->partial[page_nr].private;
     225       64624 :                 buf->ops = spd->ops;
     226       64624 :                 buf->flags = 0;
     227             : 
     228       64624 :                 head++;
     229       64624 :                 pipe->head = head;
     230       64624 :                 page_nr++;
     231       64624 :                 ret += buf->len;
     232             : 
     233       64624 :                 if (!--spd->nr_pages)
     234             :                         break;
     235             :         }
     236             : 
     237       58978 :         if (!ret)
     238           1 :                 ret = -EAGAIN;
     239             : 
     240       58978 : out:
     241       58981 :         while (page_nr < spd_pages)
     242           3 :                 spd->spd_release(spd, page_nr++);
     243             : 
     244       58978 :         return ret;
     245             : }
     246             : EXPORT_SYMBOL_GPL(splice_to_pipe);
     247             : 
     248        2557 : ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     249             : {
     250        2557 :         unsigned int head = pipe->head;
     251        2557 :         unsigned int tail = pipe->tail;
     252        2557 :         unsigned int mask = pipe->ring_size - 1;
     253        2557 :         int ret;
     254             : 
     255        2557 :         if (unlikely(!pipe->readers)) {
     256           0 :                 send_sig(SIGPIPE, current, 0);
     257           0 :                 ret = -EPIPE;
     258        2557 :         } else if (pipe_full(head, tail, pipe->max_usage)) {
     259             :                 ret = -EAGAIN;
     260             :         } else {
     261        2559 :                 pipe->bufs[head & mask] = *buf;
     262        2559 :                 pipe->head = head + 1;
     263        2559 :                 return buf->len;
     264             :         }
     265           0 :         pipe_buf_release(pipe, buf);
     266           0 :         return ret;
     267             : }
     268             : EXPORT_SYMBOL(add_to_pipe);
     269             : 
     270             : /*
     271             :  * Check if we need to grow the arrays holding pages and partial page
     272             :  * descriptions.
     273             :  */
     274           0 : int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
     275             : {
     276           0 :         unsigned int max_usage = READ_ONCE(pipe->max_usage);
     277             : 
     278           0 :         spd->nr_pages_max = max_usage;
     279           0 :         if (max_usage <= PIPE_DEF_BUFFERS)
     280             :                 return 0;
     281             : 
     282           0 :         spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
     283           0 :         spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
     284             :                                      GFP_KERNEL);
     285             : 
     286           0 :         if (spd->pages && spd->partial)
     287             :                 return 0;
     288             : 
     289           0 :         kfree(spd->pages);
     290           0 :         kfree(spd->partial);
     291           0 :         return -ENOMEM;
     292             : }
     293             : 
     294           0 : void splice_shrink_spd(struct splice_pipe_desc *spd)
     295             : {
     296           0 :         if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
     297             :                 return;
     298             : 
     299           0 :         kfree(spd->pages);
     300           0 :         kfree(spd->partial);
     301             : }
     302             : 
     303             : /**
     304             :  * copy_splice_read -  Copy data from a file and splice the copy into a pipe
     305             :  * @in: The file to read from
     306             :  * @ppos: Pointer to the file position to read from
     307             :  * @pipe: The pipe to splice into
     308             :  * @len: The amount to splice
     309             :  * @flags: The SPLICE_F_* flags
     310             :  *
     311             :  * This function allocates a bunch of pages sufficient to hold the requested
     312             :  * amount of data (but limited by the remaining pipe capacity), passes it to
     313             :  * the file's ->read_iter() to read into and then splices the used pages into
     314             :  * the pipe.
     315             :  *
     316             :  * Return: On success, the number of bytes read will be returned and *@ppos
     317             :  * will be updated if appropriate; 0 will be returned if there is no more data
     318             :  * to be read; -EAGAIN will be returned if the pipe had no space, and some
     319             :  * other negative error code will be returned on error.  A short read may occur
     320             :  * if the pipe has insufficient space, we reach the end of the data or we hit a
     321             :  * hole.
     322             :  */
     323         918 : ssize_t copy_splice_read(struct file *in, loff_t *ppos,
     324             :                          struct pipe_inode_info *pipe,
     325             :                          size_t len, unsigned int flags)
     326             : {
     327         918 :         struct iov_iter to;
     328         918 :         struct bio_vec *bv;
     329         918 :         struct kiocb kiocb;
     330         918 :         struct page **pages;
     331         918 :         ssize_t ret;
     332         918 :         size_t used, npages, chunk, remain, keep = 0;
     333         918 :         int i;
     334             : 
     335             :         /* Work out how much data we can actually add into the pipe */
     336         918 :         used = pipe_occupancy(pipe->head, pipe->tail);
     337         918 :         npages = max_t(ssize_t, pipe->max_usage - used, 0);
     338         918 :         len = min_t(size_t, len, npages * PAGE_SIZE);
     339         918 :         npages = DIV_ROUND_UP(len, PAGE_SIZE);
     340             : 
     341         918 :         bv = kzalloc(array_size(npages, sizeof(bv[0])) +
     342             :                      array_size(npages, sizeof(struct page *)), GFP_KERNEL);
     343         918 :         if (!bv)
     344             :                 return -ENOMEM;
     345             : 
     346         918 :         pages = (struct page **)(bv + npages);
     347         918 :         npages = alloc_pages_bulk_array(GFP_USER, npages, pages);
     348         918 :         if (!npages) {
     349           0 :                 kfree(bv);
     350           0 :                 return -ENOMEM;
     351             :         }
     352             : 
     353         918 :         remain = len = min_t(size_t, len, npages * PAGE_SIZE);
     354             : 
     355        1840 :         for (i = 0; i < npages; i++) {
     356         922 :                 chunk = min_t(size_t, PAGE_SIZE, remain);
     357         922 :                 bv[i].bv_page = pages[i];
     358         922 :                 bv[i].bv_offset = 0;
     359         922 :                 bv[i].bv_len = chunk;
     360         922 :                 remain -= chunk;
     361             :         }
     362             : 
     363             :         /* Do the I/O */
     364         918 :         iov_iter_bvec(&to, ITER_DEST, bv, npages, len);
     365         918 :         init_sync_kiocb(&kiocb, in);
     366         918 :         kiocb.ki_pos = *ppos;
     367         918 :         ret = call_read_iter(in, &kiocb, &to);
     368             : 
     369         918 :         if (ret > 0) {
     370         918 :                 keep = DIV_ROUND_UP(ret, PAGE_SIZE);
     371         918 :                 *ppos = kiocb.ki_pos;
     372             :         }
     373             : 
     374             :         /*
     375             :          * Callers of ->splice_read() expect -EAGAIN on "can't put anything in
     376             :          * there", rather than -EFAULT.
     377             :          */
     378         918 :         if (ret == -EFAULT)
     379           0 :                 ret = -EAGAIN;
     380             : 
     381             :         /* Free any pages that didn't get touched at all. */
     382         918 :         if (keep < npages)
     383           0 :                 release_pages(pages + keep, npages - keep);
     384             : 
     385             :         /* Push the remaining pages into the pipe. */
     386         918 :         remain = ret;
     387        1840 :         for (i = 0; i < keep; i++) {
     388         922 :                 struct pipe_buffer *buf = pipe_head_buf(pipe);
     389             : 
     390         922 :                 chunk = min_t(size_t, remain, PAGE_SIZE);
     391         922 :                 *buf = (struct pipe_buffer) {
     392             :                         .ops    = &default_pipe_buf_ops,
     393         922 :                         .page   = bv[i].bv_page,
     394             :                         .offset = 0,
     395             :                         .len    = chunk,
     396             :                 };
     397         922 :                 pipe->head++;
     398         922 :                 remain -= chunk;
     399             :         }
     400             : 
     401         918 :         kfree(bv);
     402         918 :         return ret;
     403             : }
     404             : EXPORT_SYMBOL(copy_splice_read);
     405             : 
     406             : const struct pipe_buf_operations default_pipe_buf_ops = {
     407             :         .release        = generic_pipe_buf_release,
     408             :         .try_steal      = generic_pipe_buf_try_steal,
     409             :         .get            = generic_pipe_buf_get,
     410             : };
     411             : 
     412             : /* Pipe buffer operations for a socket and similar. */
     413             : const struct pipe_buf_operations nosteal_pipe_buf_ops = {
     414             :         .release        = generic_pipe_buf_release,
     415             :         .get            = generic_pipe_buf_get,
     416             : };
     417             : EXPORT_SYMBOL(nosteal_pipe_buf_ops);
     418             : 
     419      931492 : static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
     420             : {
     421      931492 :         smp_mb();
     422      931492 :         if (waitqueue_active(&pipe->wr_wait))
     423           0 :                 wake_up_interruptible(&pipe->wr_wait);
     424      931492 :         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     425      931491 : }
     426             : 
     427             : /**
     428             :  * splice_from_pipe_feed - feed available data from a pipe to a file
     429             :  * @pipe:       pipe to splice from
     430             :  * @sd:         information to @actor
     431             :  * @actor:      handler that splices the data
     432             :  *
     433             :  * Description:
     434             :  *    This function loops over the pipe and calls @actor to do the
     435             :  *    actual moving of a single struct pipe_buffer to the desired
     436             :  *    destination.  It returns when there's no more buffers left in
     437             :  *    the pipe or if the requested number of bytes (@sd->total_len)
     438             :  *    have been copied.  It returns a positive number (one) if the
     439             :  *    pipe needs to be filled with more data, zero if the required
     440             :  *    number of bytes have been copied and -errno on error.
     441             :  *
     442             :  *    This, together with splice_from_pipe_{begin,end,next}, may be
     443             :  *    used to implement the functionality of __splice_from_pipe() when
     444             :  *    locking is required around copying the pipe buffers to the
     445             :  *    destination.
     446             :  */
     447         181 : static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
     448             :                           splice_actor *actor)
     449             : {
     450         181 :         unsigned int head = pipe->head;
     451         181 :         unsigned int tail = pipe->tail;
     452         181 :         unsigned int mask = pipe->ring_size - 1;
     453         181 :         int ret;
     454             : 
     455         362 :         while (!pipe_empty(head, tail)) {
     456         181 :                 struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     457             : 
     458         181 :                 sd->len = buf->len;
     459         181 :                 if (sd->len > sd->total_len)
     460           0 :                         sd->len = sd->total_len;
     461             : 
     462         181 :                 ret = pipe_buf_confirm(pipe, buf);
     463         181 :                 if (unlikely(ret)) {
     464           0 :                         if (ret == -ENODATA)
     465           0 :                                 ret = 0;
     466           0 :                         return ret;
     467             :                 }
     468             : 
     469         181 :                 ret = actor(pipe, buf, sd);
     470         181 :                 if (ret <= 0)
     471           0 :                         return ret;
     472             : 
     473         181 :                 buf->offset += ret;
     474         181 :                 buf->len -= ret;
     475             : 
     476         181 :                 sd->num_spliced += ret;
     477         181 :                 sd->len -= ret;
     478         181 :                 sd->pos += ret;
     479         181 :                 sd->total_len -= ret;
     480             : 
     481         181 :                 if (!buf->len) {
     482         181 :                         pipe_buf_release(pipe, buf);
     483         181 :                         tail++;
     484         181 :                         pipe->tail = tail;
     485         181 :                         if (pipe->files)
     486         181 :                                 sd->need_wakeup = true;
     487             :                 }
     488             : 
     489         181 :                 if (!sd->total_len)
     490             :                         return 0;
     491             :         }
     492             : 
     493             :         return 1;
     494             : }
     495             : 
     496             : /* We know we have a pipe buffer, but maybe it's empty? */
     497     4921089 : static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
     498             : {
     499     4921089 :         unsigned int tail = pipe->tail;
     500     4921089 :         unsigned int mask = pipe->ring_size - 1;
     501     4921089 :         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     502             : 
     503     4921089 :         if (unlikely(!buf->len)) {
     504           0 :                 pipe_buf_release(pipe, buf);
     505           0 :                 pipe->tail = tail+1;
     506           0 :                 return true;
     507             :         }
     508             : 
     509             :         return false;
     510             : }
     511             : 
     512             : /**
     513             :  * splice_from_pipe_next - wait for some data to splice from
     514             :  * @pipe:       pipe to splice from
     515             :  * @sd:         information about the splice operation
     516             :  *
     517             :  * Description:
     518             :  *    This function will wait for some data and return a positive
     519             :  *    value (one) if pipe buffers are available.  It will return zero
     520             :  *    or -errno if no more data needs to be spliced.
     521             :  */
     522     5005967 : static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
     523             : {
     524             :         /*
     525             :          * Check for signal early to make process killable when there are
     526             :          * always buffers available
     527             :          */
     528     5005967 :         if (signal_pending(current))
     529             :                 return -ERESTARTSYS;
     530             : 
     531     5005967 : repeat:
     532     5045996 :         while (pipe_empty(pipe->head, pipe->tail)) {
     533      124851 :                 if (!pipe->writers)
     534             :                         return 0;
     535             : 
     536       40113 :                 if (sd->num_spliced)
     537             :                         return 0;
     538             : 
     539       39989 :                 if (sd->flags & SPLICE_F_NONBLOCK)
     540             :                         return -EAGAIN;
     541             : 
     542       39989 :                 if (signal_pending(current))
     543             :                         return -ERESTARTSYS;
     544             : 
     545       39989 :                 if (sd->need_wakeup) {
     546           0 :                         wakeup_pipe_writers(pipe);
     547           0 :                         sd->need_wakeup = false;
     548             :                 }
     549             : 
     550       39989 :                 pipe_wait_readable(pipe);
     551             :         }
     552             : 
     553     4921145 :         if (eat_empty_buffer(pipe))
     554           0 :                 goto repeat;
     555             : 
     556             :         return 1;
     557             : }
     558             : 
     559             : /**
     560             :  * splice_from_pipe_begin - start splicing from pipe
     561             :  * @sd:         information about the splice operation
     562             :  *
     563             :  * Description:
     564             :  *    This function should be called before a loop containing
     565             :  *    splice_from_pipe_next() and splice_from_pipe_feed() to
     566             :  *    initialize the necessary fields of @sd.
     567             :  */
     568             : static void splice_from_pipe_begin(struct splice_desc *sd)
     569             : {
     570     5005604 :         sd->num_spliced = 0;
     571     5005604 :         sd->need_wakeup = false;
     572     5005604 : }
     573             : 
     574             : /**
     575             :  * splice_from_pipe_end - finish splicing from pipe
     576             :  * @pipe:       pipe to splice from
     577             :  * @sd:         information about the splice operation
     578             :  *
     579             :  * Description:
     580             :  *    This function will wake up pipe writers if necessary.  It should
     581             :  *    be called after a loop containing splice_from_pipe_next() and
     582             :  *    splice_from_pipe_feed().
     583             :  */
     584             : static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
     585             : {
     586     5005644 :         if (sd->need_wakeup)
     587      872734 :                 wakeup_pipe_writers(pipe);
     588             : }
     589             : 
     590             : /**
     591             :  * __splice_from_pipe - splice data from a pipe to given actor
     592             :  * @pipe:       pipe to splice from
     593             :  * @sd:         information to @actor
     594             :  * @actor:      handler that splices the data
     595             :  *
     596             :  * Description:
     597             :  *    This function does little more than loop over the pipe and call
     598             :  *    @actor to do the actual moving of a single struct pipe_buffer to
     599             :  *    the desired destination. See pipe_to_file, pipe_to_sendmsg, or
     600             :  *    pipe_to_user.
     601             :  *
     602             :  */
     603       84865 : ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
     604             :                            splice_actor *actor)
     605             : {
     606       84865 :         int ret;
     607             : 
     608       84865 :         splice_from_pipe_begin(sd);
     609       85046 :         do {
     610       85046 :                 cond_resched();
     611       85044 :                 ret = splice_from_pipe_next(pipe, sd);
     612       85044 :                 if (ret > 0)
     613         181 :                         ret = splice_from_pipe_feed(pipe, sd, actor);
     614       85044 :         } while (ret > 0);
     615       84863 :         splice_from_pipe_end(pipe, sd);
     616             : 
     617       84863 :         return sd->num_spliced ? sd->num_spliced : ret;
     618             : }
     619             : EXPORT_SYMBOL(__splice_from_pipe);
     620             : 
     621             : /**
     622             :  * splice_from_pipe - splice data from a pipe to a file
     623             :  * @pipe:       pipe to splice from
     624             :  * @out:        file to splice to
     625             :  * @ppos:       position in @out
     626             :  * @len:        how many bytes to splice
     627             :  * @flags:      splice modifier flags
     628             :  * @actor:      handler that splices the data
     629             :  *
     630             :  * Description:
     631             :  *    See __splice_from_pipe. This function locks the pipe inode,
     632             :  *    otherwise it's identical to __splice_from_pipe().
     633             :  *
     634             :  */
     635       84872 : ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
     636             :                          loff_t *ppos, size_t len, unsigned int flags,
     637             :                          splice_actor *actor)
     638             : {
     639       84872 :         ssize_t ret;
     640       84872 :         struct splice_desc sd = {
     641             :                 .total_len = len,
     642             :                 .flags = flags,
     643       84872 :                 .pos = *ppos,
     644             :                 .u.file = out,
     645             :         };
     646             : 
     647       84872 :         pipe_lock(pipe);
     648       84869 :         ret = __splice_from_pipe(pipe, &sd, actor);
     649       84864 :         pipe_unlock(pipe);
     650             : 
     651       84860 :         return ret;
     652             : }
     653             : 
     654             : /**
     655             :  * iter_file_splice_write - splice data from a pipe to a file
     656             :  * @pipe:       pipe info
     657             :  * @out:        file to write to
     658             :  * @ppos:       position in @out
     659             :  * @len:        number of bytes to splice
     660             :  * @flags:      splice modifier flags
     661             :  *
     662             :  * Description:
     663             :  *    Will either move or copy pages (determined by @flags options) from
     664             :  *    the given pipe inode to the given file.
     665             :  *    This one is ->write_iter-based.
     666             :  *
     667             :  */
     668             : ssize_t
     669     4920779 : iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
     670             :                           loff_t *ppos, size_t len, unsigned int flags)
     671             : {
     672     4920779 :         struct splice_desc sd = {
     673             :                 .total_len = len,
     674             :                 .flags = flags,
     675     4920779 :                 .pos = *ppos,
     676             :                 .u.file = out,
     677             :         };
     678     4920779 :         int nbufs = pipe->max_usage;
     679     4920779 :         struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
     680             :                                         GFP_KERNEL);
     681     4920734 :         ssize_t ret;
     682             : 
     683     4920734 :         if (unlikely(!array))
     684             :                 return -ENOMEM;
     685             : 
     686     4920734 :         pipe_lock(pipe);
     687             : 
     688     4920739 :         splice_from_pipe_begin(&sd);
     689     9767148 :         while (sd.total_len) {
     690     4920947 :                 struct iov_iter from;
     691     4920947 :                 unsigned int head, tail, mask;
     692     4920947 :                 size_t left;
     693     4920947 :                 int n;
     694             : 
     695     4920947 :                 ret = splice_from_pipe_next(pipe, &sd);
     696     4920924 :                 if (ret <= 0)
     697             :                         break;
     698             : 
     699     4920916 :                 if (unlikely(nbufs < pipe->max_usage)) {
     700           0 :                         kfree(array);
     701           0 :                         nbufs = pipe->max_usage;
     702           0 :                         array = kcalloc(nbufs, sizeof(struct bio_vec),
     703             :                                         GFP_KERNEL);
     704           0 :                         if (!array) {
     705             :                                 ret = -ENOMEM;
     706             :                                 break;
     707             :                         }
     708             :                 }
     709             : 
     710     4920916 :                 head = pipe->head;
     711     4920916 :                 tail = pipe->tail;
     712     4920916 :                 mask = pipe->ring_size - 1;
     713             : 
     714             :                 /* build the vector */
     715     4920916 :                 left = sd.total_len;
     716    14619628 :                 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
     717     9698677 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     718     9698677 :                         size_t this_len = buf->len;
     719             : 
     720             :                         /* zero-length bvecs are not supported, skip them */
     721     9698677 :                         if (!this_len)
     722           0 :                                 continue;
     723     9698677 :                         this_len = min(this_len, left);
     724             : 
     725     9698677 :                         ret = pipe_buf_confirm(pipe, buf);
     726     9698712 :                         if (unlikely(ret)) {
     727           0 :                                 if (ret == -ENODATA)
     728           0 :                                         ret = 0;
     729           0 :                                 goto done;
     730             :                         }
     731             : 
     732     9698712 :                         bvec_set_page(&array[n], buf->page, this_len,
     733             :                                       buf->offset);
     734     9698712 :                         left -= this_len;
     735     9698712 :                         n++;
     736             :                 }
     737             : 
     738     4920951 :                 iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
     739     4920922 :                 ret = vfs_iter_write(out, &from, &sd.pos, 0);
     740     4920956 :                 if (ret <= 0)
     741             :                         break;
     742             : 
     743     4846396 :                 sd.num_spliced += ret;
     744     4846396 :                 sd.total_len -= ret;
     745     4846396 :                 *ppos = sd.pos;
     746             : 
     747             :                 /* dismiss the fully eaten buffers, adjust the partial one */
     748     4846396 :                 tail = pipe->tail;
     749    14386785 :                 while (ret) {
     750     9540584 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     751     9540584 :                         if (ret >= buf->len) {
     752     9540376 :                                 ret -= buf->len;
     753     9540376 :                                 buf->len = 0;
     754     9540376 :                                 pipe_buf_release(pipe, buf);
     755     9540389 :                                 tail++;
     756     9540389 :                                 pipe->tail = tail;
     757     9540389 :                                 if (pipe->files)
     758     1734317 :                                         sd.need_wakeup = true;
     759             :                         } else {
     760         208 :                                 buf->offset += ret;
     761         208 :                                 buf->len -= ret;
     762         208 :                                 ret = 0;
     763             :                         }
     764             :                 }
     765             :         }
     766     4846201 : done:
     767     4920769 :         kfree(array);
     768     4920781 :         splice_from_pipe_end(pipe, &sd);
     769             : 
     770     4920780 :         pipe_unlock(pipe);
     771             : 
     772     4920750 :         if (sd.num_spliced)
     773     4846361 :                 ret = sd.num_spliced;
     774             : 
     775             :         return ret;
     776             : }
     777             : 
     778             : EXPORT_SYMBOL(iter_file_splice_write);
     779             : 
     780             : #ifdef CONFIG_NET
     781             : /**
     782             :  * splice_to_socket - splice data from a pipe to a socket
     783             :  * @pipe:       pipe to splice from
     784             :  * @out:        socket to write to
     785             :  * @ppos:       position in @out
     786             :  * @len:        number of bytes to splice
     787             :  * @flags:      splice modifier flags
     788             :  *
     789             :  * Description:
     790             :  *    Will send @len bytes from the pipe to a network socket. No data copying
     791             :  *    is involved.
     792             :  *
     793             :  */
     794       58757 : ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
     795             :                          loff_t *ppos, size_t len, unsigned int flags)
     796             : {
     797       58757 :         struct socket *sock = sock_from_file(out);
     798       58757 :         struct bio_vec bvec[16];
     799       58757 :         struct msghdr msg = {};
     800       58757 :         ssize_t ret = 0;
     801       58757 :         size_t spliced = 0;
     802       58757 :         bool need_wakeup = false;
     803             : 
     804       58757 :         pipe_lock(pipe);
     805             : 
     806      117514 :         while (len > 0) {
     807       58757 :                 unsigned int head, tail, mask, bc = 0;
     808       58757 :                 size_t remain = len;
     809             : 
     810             :                 /*
     811             :                  * Check for signal early to make process killable when there
     812             :                  * are always buffers available
     813             :                  */
     814       58757 :                 ret = -ERESTARTSYS;
     815       58757 :                 if (signal_pending(current))
     816             :                         break;
     817             : 
     818       58757 :                 while (pipe_empty(pipe->head, pipe->tail)) {
     819           0 :                         ret = 0;
     820           0 :                         if (!pipe->writers)
     821           0 :                                 goto out;
     822             : 
     823           0 :                         if (spliced)
     824           0 :                                 goto out;
     825             : 
     826           0 :                         ret = -EAGAIN;
     827           0 :                         if (flags & SPLICE_F_NONBLOCK)
     828           0 :                                 goto out;
     829             : 
     830           0 :                         ret = -ERESTARTSYS;
     831           0 :                         if (signal_pending(current))
     832           0 :                                 goto out;
     833             : 
     834           0 :                         if (need_wakeup) {
     835           0 :                                 wakeup_pipe_writers(pipe);
     836           0 :                                 need_wakeup = false;
     837             :                         }
     838             : 
     839           0 :                         pipe_wait_readable(pipe);
     840             :                 }
     841             : 
     842       58757 :                 head = pipe->head;
     843       58757 :                 tail = pipe->tail;
     844       58757 :                 mask = pipe->ring_size - 1;
     845             : 
     846       64624 :                 while (!pipe_empty(head, tail)) {
     847       64624 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     848       64624 :                         size_t seg;
     849             : 
     850       64624 :                         if (!buf->len) {
     851           0 :                                 tail++;
     852           0 :                                 continue;
     853             :                         }
     854             : 
     855       64624 :                         seg = min_t(size_t, remain, buf->len);
     856             : 
     857       64624 :                         ret = pipe_buf_confirm(pipe, buf);
     858       64624 :                         if (unlikely(ret)) {
     859           0 :                                 if (ret == -ENODATA)
     860           0 :                                         ret = 0;
     861             :                                 break;
     862             :                         }
     863             : 
     864       64624 :                         bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset);
     865       64624 :                         remain -= seg;
     866       64624 :                         if (remain == 0 || bc >= ARRAY_SIZE(bvec))
     867             :                                 break;
     868        5867 :                         tail++;
     869             :                 }
     870             : 
     871       58757 :                 if (!bc)
     872             :                         break;
     873             : 
     874       58757 :                 msg.msg_flags = MSG_SPLICE_PAGES;
     875       58757 :                 if (flags & SPLICE_F_MORE)
     876           0 :                         msg.msg_flags |= MSG_MORE;
     877       58757 :                 if (remain && pipe_occupancy(pipe->head, tail) > 0)
     878           0 :                         msg.msg_flags |= MSG_MORE;
     879             : 
     880       58757 :                 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc,
     881             :                               len - remain);
     882       58757 :                 ret = sock_sendmsg(sock, &msg);
     883       58757 :                 if (ret <= 0)
     884             :                         break;
     885             : 
     886       58757 :                 spliced += ret;
     887       58757 :                 len -= ret;
     888       58757 :                 tail = pipe->tail;
     889      123381 :                 while (ret > 0) {
     890       64624 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     891       64624 :                         size_t seg = min_t(size_t, ret, buf->len);
     892             : 
     893       64624 :                         buf->offset += seg;
     894       64624 :                         buf->len -= seg;
     895       64624 :                         ret -= seg;
     896             : 
     897       64624 :                         if (!buf->len) {
     898       64624 :                                 pipe_buf_release(pipe, buf);
     899       64624 :                                 tail++;
     900             :                         }
     901             :                 }
     902             : 
     903       58757 :                 if (tail != pipe->tail) {
     904       58757 :                         pipe->tail = tail;
     905       58757 :                         if (pipe->files)
     906       58757 :                                 need_wakeup = true;
     907             :                 }
     908             :         }
     909             : 
     910       58757 : out:
     911       58757 :         pipe_unlock(pipe);
     912       58757 :         if (need_wakeup)
     913       58757 :                 wakeup_pipe_writers(pipe);
     914       58757 :         return spliced ?: ret;
     915             : }
     916             : #endif
     917             : 
     918             : static int warn_unsupported(struct file *file, const char *op)
     919             : {
     920             :         pr_debug_ratelimited(
     921             :                 "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
     922             :                 op, file, current->pid, current->comm);
     923             :         return -EINVAL;
     924             : }
     925             : 
     926             : /*
     927             :  * Attempt to initiate a splice from pipe to file.
     928             :  */
     929             : static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
     930             :                            loff_t *ppos, size_t len, unsigned int flags)
     931             : {
     932     5064359 :         if (unlikely(!out->f_op->splice_write))
     933             :                 return warn_unsupported(out, "write");
     934     5064359 :         return out->f_op->splice_write(pipe, out, ppos, len, flags);
     935             : }
     936             : 
     937             : /*
     938             :  * Indicate to the caller that there was a premature EOF when reading from the
     939             :  * source and the caller didn't indicate they would be sending more data after
     940             :  * this.
     941             :  */
     942             : static void do_splice_eof(struct splice_desc *sd)
     943             : {
     944           0 :         if (sd->splice_eof)
     945           0 :                 sd->splice_eof(sd);
     946             : }
     947             : 
     948             : /**
     949             :  * vfs_splice_read - Read data from a file and splice it into a pipe
     950             :  * @in:         File to splice from
     951             :  * @ppos:       Input file offset
     952             :  * @pipe:       Pipe to splice to
     953             :  * @len:        Number of bytes to splice
     954             :  * @flags:      Splice modifier flags (SPLICE_F_*)
     955             :  *
     956             :  * Splice the requested amount of data from the input file to the pipe.  This
     957             :  * is synchronous as the caller must hold the pipe lock across the entire
     958             :  * operation.
     959             :  *
     960             :  * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
     961             :  * a hole and a negative error code otherwise.
     962             :  */
     963     4987132 : long vfs_splice_read(struct file *in, loff_t *ppos,
     964             :                      struct pipe_inode_info *pipe, size_t len,
     965             :                      unsigned int flags)
     966             : {
     967     4987132 :         unsigned int p_space;
     968     4987132 :         int ret;
     969             : 
     970     4987132 :         if (unlikely(!(in->f_mode & FMODE_READ)))
     971             :                 return -EBADF;
     972     4987132 :         if (!len)
     973             :                 return 0;
     974             : 
     975             :         /* Don't try to read more the pipe has space for. */
     976     4987103 :         p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
     977     4987103 :         len = min_t(size_t, len, p_space << PAGE_SHIFT);
     978             : 
     979     4987103 :         ret = rw_verify_area(READ, in, ppos, len);
     980     4987091 :         if (unlikely(ret < 0))
     981           0 :                 return ret;
     982             : 
     983     4987091 :         if (unlikely(len > MAX_RW_COUNT))
     984           0 :                 len = MAX_RW_COUNT;
     985             : 
     986     4987091 :         if (unlikely(!in->f_op->splice_read))
     987             :                 return warn_unsupported(in, "read");
     988             :         /*
     989             :          * O_DIRECT and DAX don't deal with the pagecache, so we allocate a
     990             :          * buffer, copy into it and splice that into the pipe.
     991             :          */
     992     4987091 :         if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
     993         918 :                 return copy_splice_read(in, ppos, pipe, len, flags);
     994     4986173 :         return in->f_op->splice_read(in, ppos, pipe, len, flags);
     995             : }
     996             : EXPORT_SYMBOL_GPL(vfs_splice_read);
     997             : 
     998             : /**
     999             :  * splice_direct_to_actor - splices data directly between two non-pipes
    1000             :  * @in:         file to splice from
    1001             :  * @sd:         actor information on where to splice to
    1002             :  * @actor:      handles the data splicing
    1003             :  *
    1004             :  * Description:
    1005             :  *    This is a special case helper to splice directly between two
    1006             :  *    points, without requiring an explicit pipe. Internally an allocated
    1007             :  *    pipe is cached in the process, and reused during the lifetime of
    1008             :  *    that process.
    1009             :  *
    1010             :  */
    1011     4033357 : ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
    1012             :                                splice_direct_actor *actor)
    1013             : {
    1014     4033357 :         struct pipe_inode_info *pipe;
    1015     4033357 :         long ret, bytes;
    1016     4033357 :         size_t len;
    1017     4033357 :         int i, flags, more;
    1018             : 
    1019             :         /*
    1020             :          * We require the input to be seekable, as we don't want to randomly
    1021             :          * drop data for eg socket -> socket splicing. Use the piped splicing
    1022             :          * for that!
    1023             :          */
    1024     4033357 :         if (unlikely(!(in->f_mode & FMODE_LSEEK)))
    1025             :                 return -EINVAL;
    1026             : 
    1027             :         /*
    1028             :          * neither in nor out is a pipe, setup an internal pipe attached to
    1029             :          * 'out' and transfer the wanted data from 'in' to 'out' through that
    1030             :          */
    1031     4033357 :         pipe = current->splice_pipe;
    1032     4033357 :         if (unlikely(!pipe)) {
    1033       31389 :                 pipe = alloc_pipe_info();
    1034       31389 :                 if (!pipe)
    1035             :                         return -ENOMEM;
    1036             : 
    1037             :                 /*
    1038             :                  * We don't have an immediate reader, but we'll read the stuff
    1039             :                  * out of the pipe right after the splice_to_pipe(). So set
    1040             :                  * PIPE_READERS appropriately.
    1041             :                  */
    1042       31389 :                 pipe->readers = 1;
    1043             : 
    1044       31389 :                 current->splice_pipe = pipe;
    1045             :         }
    1046             : 
    1047             :         /*
    1048             :          * Do the splice.
    1049             :          */
    1050     4033357 :         bytes = 0;
    1051     4033357 :         len = sd->total_len;
    1052             : 
    1053             :         /* Don't block on output, we have to drain the direct pipe. */
    1054     4033357 :         flags = sd->flags;
    1055     4033357 :         sd->flags &= ~SPLICE_F_NONBLOCK;
    1056             : 
    1057             :         /*
    1058             :          * We signal MORE until we've read sufficient data to fulfill the
    1059             :          * request and we keep signalling it if the caller set it.
    1060             :          */
    1061     4033357 :         more = sd->flags & SPLICE_F_MORE;
    1062     4033357 :         sd->flags |= SPLICE_F_MORE;
    1063             : 
    1064     4033357 :         WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
    1065             : 
    1066     8007027 :         while (len) {
    1067     4033443 :                 size_t read_len;
    1068     4033443 :                 loff_t pos = sd->pos, prev_pos = pos;
    1069             : 
    1070     4033443 :                 ret = vfs_splice_read(in, &pos, pipe, len, flags);
    1071     4033476 :                 if (unlikely(ret <= 0))
    1072         107 :                         goto read_failure;
    1073             : 
    1074     4033369 :                 read_len = ret;
    1075     4033369 :                 sd->total_len = read_len;
    1076             : 
    1077             :                 /*
    1078             :                  * If we now have sufficient data to fulfill the request then
    1079             :                  * we clear SPLICE_F_MORE if it was not set initially.
    1080             :                  */
    1081     4033369 :                 if (read_len >= len && !more)
    1082     4033263 :                         sd->flags &= ~SPLICE_F_MORE;
    1083             : 
    1084             :                 /*
    1085             :                  * NOTE: nonblocking mode only applies to the input. We
    1086             :                  * must not do the output in nonblocking mode as then we
    1087             :                  * could get stuck data in the internal pipe:
    1088             :                  */
    1089     4033369 :                 ret = actor(pipe, sd);
    1090     4033355 :                 if (unlikely(ret <= 0)) {
    1091       59553 :                         sd->pos = prev_pos;
    1092       59685 :                         goto out_release;
    1093             :                 }
    1094             : 
    1095     3973802 :                 bytes += ret;
    1096     3973802 :                 len -= ret;
    1097     3973802 :                 sd->pos = pos;
    1098             : 
    1099     3973802 :                 if (ret < read_len) {
    1100         132 :                         sd->pos = prev_pos + ret;
    1101         132 :                         goto out_release;
    1102             :                 }
    1103             :         }
    1104             : 
    1105     3973584 : done:
    1106     4033391 :         pipe->tail = pipe->head = 0;
    1107     4033391 :         file_accessed(in);
    1108     4033391 :         return bytes;
    1109             : 
    1110             : read_failure:
    1111             :         /*
    1112             :          * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that
    1113             :          * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a
    1114             :          * "->splice_in()" that returned EOF (ie zero) *and* we have sent at
    1115             :          * least 1 byte *then* we will also do the ->splice_eof() call.
    1116             :          */
    1117         107 :         if (ret == 0 && !more && len > 0 && bytes)
    1118           0 :                 do_splice_eof(sd);
    1119         107 : out_release:
    1120             :         /*
    1121             :          * If we did an incomplete transfer we must release
    1122             :          * the pipe buffers in question:
    1123             :          */
    1124     1016726 :         for (i = 0; i < pipe->ring_size; i++) {
    1125      956919 :                 struct pipe_buffer *buf = &pipe->bufs[i];
    1126             : 
    1127      956919 :                 if (buf->ops)
    1128      126444 :                         pipe_buf_release(pipe, buf);
    1129             :         }
    1130             : 
    1131       59807 :         if (!bytes)
    1132       59652 :                 bytes = ret;
    1133             : 
    1134       59807 :         goto done;
    1135             : }
    1136             : EXPORT_SYMBOL(splice_direct_to_actor);
    1137             : 
    1138     4033335 : static int direct_splice_actor(struct pipe_inode_info *pipe,
    1139             :                                struct splice_desc *sd)
    1140             : {
    1141     4033335 :         struct file *file = sd->u.file;
    1142             : 
    1143     4033335 :         return do_splice_from(pipe, file, sd->opos, sd->total_len,
    1144             :                               sd->flags);
    1145             : }
    1146             : 
    1147           0 : static void direct_file_splice_eof(struct splice_desc *sd)
    1148             : {
    1149           0 :         struct file *file = sd->u.file;
    1150             : 
    1151           0 :         if (file->f_op->splice_eof)
    1152           0 :                 file->f_op->splice_eof(file);
    1153           0 : }
    1154             : 
    1155             : /**
    1156             :  * do_splice_direct - splices data directly between two files
    1157             :  * @in:         file to splice from
    1158             :  * @ppos:       input file offset
    1159             :  * @out:        file to splice to
    1160             :  * @opos:       output file offset
    1161             :  * @len:        number of bytes to splice
    1162             :  * @flags:      splice modifier flags
    1163             :  *
    1164             :  * Description:
    1165             :  *    For use by do_sendfile(). splice can easily emulate sendfile, but
    1166             :  *    doing it in the application would incur an extra system call
    1167             :  *    (splice in + splice out, as compared to just sendfile()). So this helper
    1168             :  *    can splice directly through a process-private pipe.
    1169             :  *
    1170             :  */
    1171     4033373 : long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
    1172             :                       loff_t *opos, size_t len, unsigned int flags)
    1173             : {
    1174     4033373 :         struct splice_desc sd = {
    1175             :                 .len            = len,
    1176             :                 .total_len      = len,
    1177             :                 .flags          = flags,
    1178     4033373 :                 .pos            = *ppos,
    1179             :                 .u.file         = out,
    1180             :                 .splice_eof     = direct_file_splice_eof,
    1181             :                 .opos           = opos,
    1182             :         };
    1183     4033373 :         long ret;
    1184             : 
    1185     4033373 :         if (unlikely(!(out->f_mode & FMODE_WRITE)))
    1186             :                 return -EBADF;
    1187             : 
    1188     4033373 :         if (unlikely(out->f_flags & O_APPEND))
    1189             :                 return -EINVAL;
    1190             : 
    1191     4033373 :         ret = rw_verify_area(WRITE, out, opos, len);
    1192     4033372 :         if (unlikely(ret < 0))
    1193             :                 return ret;
    1194             : 
    1195     4033372 :         ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
    1196     4033372 :         if (ret > 0)
    1197     3973732 :                 *ppos = sd.pos;
    1198             : 
    1199             :         return ret;
    1200             : }
    1201             : EXPORT_SYMBOL(do_splice_direct);
    1202             : 
    1203      956270 : static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
    1204             : {
    1205      956270 :         for (;;) {
    1206      956270 :                 if (unlikely(!pipe->readers)) {
    1207           0 :                         send_sig(SIGPIPE, current, 0);
    1208           0 :                         return -EPIPE;
    1209             :                 }
    1210      956270 :                 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
    1211             :                         return 0;
    1212          12 :                 if (flags & SPLICE_F_NONBLOCK)
    1213             :                         return -EAGAIN;
    1214          12 :                 if (signal_pending(current))
    1215             :                         return -ERESTARTSYS;
    1216           0 :                 pipe_wait_writable(pipe);
    1217             :         }
    1218             : }
    1219             : 
    1220             : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1221             :                                struct pipe_inode_info *opipe,
    1222             :                                size_t len, unsigned int flags);
    1223             : 
    1224      953709 : long splice_file_to_pipe(struct file *in,
    1225             :                          struct pipe_inode_info *opipe,
    1226             :                          loff_t *offset,
    1227             :                          size_t len, unsigned int flags)
    1228             : {
    1229      953709 :         long ret;
    1230             : 
    1231      953709 :         pipe_lock(opipe);
    1232      953700 :         ret = wait_for_space(opipe, flags);
    1233      953690 :         if (!ret)
    1234      953689 :                 ret = vfs_splice_read(in, offset, opipe, len, flags);
    1235      953716 :         pipe_unlock(opipe);
    1236      953714 :         if (ret > 0)
    1237      943591 :                 wakeup_pipe_readers(opipe);
    1238      953716 :         return ret;
    1239             : }
    1240             : 
    1241             : /*
    1242             :  * Determine where to splice to/from.
    1243             :  */
    1244     1984751 : long do_splice(struct file *in, loff_t *off_in, struct file *out,
    1245             :                loff_t *off_out, size_t len, unsigned int flags)
    1246             : {
    1247     1984751 :         struct pipe_inode_info *ipipe;
    1248     1984751 :         struct pipe_inode_info *opipe;
    1249     1984751 :         loff_t offset;
    1250     1984751 :         long ret;
    1251             : 
    1252     1984751 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1253             :                      !(out->f_mode & FMODE_WRITE)))
    1254             :                 return -EBADF;
    1255             : 
    1256     1984751 :         ipipe = get_pipe_info(in, true);
    1257     1984717 :         opipe = get_pipe_info(out, true);
    1258             : 
    1259     1984722 :         if (ipipe && opipe) {
    1260           0 :                 if (off_in || off_out)
    1261             :                         return -ESPIPE;
    1262             : 
    1263             :                 /* Splicing to self would be fun, but... */
    1264           0 :                 if (ipipe == opipe)
    1265             :                         return -EINVAL;
    1266             : 
    1267           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1268           0 :                         flags |= SPLICE_F_NONBLOCK;
    1269             : 
    1270           0 :                 return splice_pipe_to_pipe(ipipe, opipe, len, flags);
    1271             :         }
    1272             : 
    1273     1984722 :         if (ipipe) {
    1274     1031030 :                 if (off_in)
    1275             :                         return -ESPIPE;
    1276     1031030 :                 if (off_out) {
    1277      887413 :                         if (!(out->f_mode & FMODE_PWRITE))
    1278             :                                 return -EINVAL;
    1279      887413 :                         offset = *off_out;
    1280             :                 } else {
    1281      143617 :                         offset = out->f_pos;
    1282             :                 }
    1283             : 
    1284     1031030 :                 if (unlikely(out->f_flags & O_APPEND))
    1285             :                         return -EINVAL;
    1286             : 
    1287     1031030 :                 ret = rw_verify_area(WRITE, out, &offset, len);
    1288     1031032 :                 if (unlikely(ret < 0))
    1289             :                         return ret;
    1290             : 
    1291     1031032 :                 if (in->f_flags & O_NONBLOCK)
    1292       58757 :                         flags |= SPLICE_F_NONBLOCK;
    1293             : 
    1294     1031032 :                 file_start_write(out);
    1295     1031024 :                 ret = do_splice_from(ipipe, out, &offset, len, flags);
    1296     1031034 :                 file_end_write(out);
    1297             : 
    1298     1031035 :                 if (ret > 0)
    1299      931505 :                         fsnotify_modify(out);
    1300             : 
    1301     1031035 :                 if (!off_out)
    1302      143618 :                         out->f_pos = offset;
    1303             :                 else
    1304      887417 :                         *off_out = offset;
    1305             : 
    1306     1031035 :                 return ret;
    1307             :         }
    1308             : 
    1309      953692 :         if (opipe) {
    1310      953692 :                 if (off_out)
    1311             :                         return -ESPIPE;
    1312      953692 :                 if (off_in) {
    1313      884835 :                         if (!(in->f_mode & FMODE_PREAD))
    1314             :                                 return -EINVAL;
    1315      884835 :                         offset = *off_in;
    1316             :                 } else {
    1317       68857 :                         offset = in->f_pos;
    1318             :                 }
    1319             : 
    1320      953692 :                 if (out->f_flags & O_NONBLOCK)
    1321       68849 :                         flags |= SPLICE_F_NONBLOCK;
    1322             : 
    1323      953692 :                 ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
    1324             : 
    1325      953708 :                 if (ret > 0)
    1326      943591 :                         fsnotify_access(in);
    1327             : 
    1328      953709 :                 if (!off_in)
    1329       68857 :                         in->f_pos = offset;
    1330             :                 else
    1331      884852 :                         *off_in = offset;
    1332             : 
    1333      953709 :                 return ret;
    1334             :         }
    1335             : 
    1336             :         return -EINVAL;
    1337             : }
    1338             : 
    1339     1984746 : static long __do_splice(struct file *in, loff_t __user *off_in,
    1340             :                         struct file *out, loff_t __user *off_out,
    1341             :                         size_t len, unsigned int flags)
    1342             : {
    1343     1984746 :         struct pipe_inode_info *ipipe;
    1344     1984746 :         struct pipe_inode_info *opipe;
    1345     1984746 :         loff_t offset, *__off_in = NULL, *__off_out = NULL;
    1346     1984746 :         long ret;
    1347             : 
    1348     1984746 :         ipipe = get_pipe_info(in, true);
    1349     1984727 :         opipe = get_pipe_info(out, true);
    1350             : 
    1351     1984734 :         if (ipipe) {
    1352     1031033 :                 if (off_in)
    1353             :                         return -ESPIPE;
    1354     1031033 :                 pipe_clear_nowait(in);
    1355             :         }
    1356     1984736 :         if (opipe) {
    1357      953697 :                 if (off_out)
    1358             :                         return -ESPIPE;
    1359      953697 :                 pipe_clear_nowait(out);
    1360             :         }
    1361             : 
    1362     1984756 :         if (off_out) {
    1363      887415 :                 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
    1364             :                         return -EFAULT;
    1365             :                 __off_out = &offset;
    1366             :         }
    1367     1984757 :         if (off_in) {
    1368      884856 :                 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
    1369             :                         return -EFAULT;
    1370             :                 __off_in = &offset;
    1371             :         }
    1372             : 
    1373     1984743 :         ret = do_splice(in, __off_in, out, __off_out, len, flags);
    1374     1984746 :         if (ret < 0)
    1375             :                 return ret;
    1376             : 
    1377     2842376 :         if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
    1378           0 :                 return -EFAULT;
    1379     2854634 :         if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
    1380           0 :                 return -EFAULT;
    1381             : 
    1382             :         return ret;
    1383             : }
    1384             : 
    1385        2558 : static int iter_to_pipe(struct iov_iter *from,
    1386             :                         struct pipe_inode_info *pipe,
    1387             :                         unsigned flags)
    1388             : {
    1389        2558 :         struct pipe_buffer buf = {
    1390             :                 .ops = &user_page_pipe_buf_ops,
    1391             :                 .flags = flags
    1392             :         };
    1393        2558 :         size_t total = 0;
    1394        2558 :         int ret = 0;
    1395             : 
    1396        5113 :         while (iov_iter_count(from)) {
    1397        2558 :                 struct page *pages[16];
    1398        2558 :                 ssize_t left;
    1399        2558 :                 size_t start;
    1400        2558 :                 int i, n;
    1401             : 
    1402        2558 :                 left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start);
    1403        2558 :                 if (left <= 0) {
    1404           0 :                         ret = left;
    1405           0 :                         break;
    1406             :                 }
    1407             : 
    1408        2558 :                 n = DIV_ROUND_UP(left + start, PAGE_SIZE);
    1409        5116 :                 for (i = 0; i < n; i++) {
    1410        2561 :                         int size = min_t(int, left, PAGE_SIZE - start);
    1411             : 
    1412        2561 :                         buf.page = pages[i];
    1413        2561 :                         buf.offset = start;
    1414        2561 :                         buf.len = size;
    1415        2561 :                         ret = add_to_pipe(pipe, &buf);
    1416        2558 :                         if (unlikely(ret < 0)) {
    1417           0 :                                 iov_iter_revert(from, left);
    1418             :                                 // this one got dropped by add_to_pipe()
    1419           0 :                                 while (++i < n)
    1420           0 :                                         put_page(pages[i]);
    1421           0 :                                 goto out;
    1422             :                         }
    1423        2558 :                         total += ret;
    1424        2558 :                         left -= size;
    1425        2558 :                         start = 0;
    1426             :                 }
    1427             :         }
    1428        2555 : out:
    1429        2555 :         return total ? total : ret;
    1430             : }
    1431             : 
    1432           0 : static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
    1433             :                         struct splice_desc *sd)
    1434             : {
    1435           0 :         int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
    1436           0 :         return n == sd->len ? n : -EFAULT;
    1437             : }
    1438             : 
    1439             : /*
    1440             :  * For lack of a better implementation, implement vmsplice() to userspace
    1441             :  * as a simple copy of the pipes pages to the user iov.
    1442             :  */
    1443           0 : static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
    1444             :                              unsigned int flags)
    1445             : {
    1446           0 :         struct pipe_inode_info *pipe = get_pipe_info(file, true);
    1447           0 :         struct splice_desc sd = {
    1448             :                 .total_len = iov_iter_count(iter),
    1449             :                 .flags = flags,
    1450             :                 .u.data = iter
    1451             :         };
    1452           0 :         long ret = 0;
    1453             : 
    1454           0 :         if (!pipe)
    1455             :                 return -EBADF;
    1456             : 
    1457           0 :         pipe_clear_nowait(file);
    1458             : 
    1459           0 :         if (sd.total_len) {
    1460           0 :                 pipe_lock(pipe);
    1461           0 :                 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
    1462           0 :                 pipe_unlock(pipe);
    1463             :         }
    1464             : 
    1465             :         return ret;
    1466             : }
    1467             : 
    1468             : /*
    1469             :  * vmsplice splices a user address range into a pipe. It can be thought of
    1470             :  * as splice-from-memory, where the regular splice is splice-from-file (or
    1471             :  * to file). In both cases the output is a pipe, naturally.
    1472             :  */
    1473        2556 : static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
    1474             :                              unsigned int flags)
    1475             : {
    1476        2556 :         struct pipe_inode_info *pipe;
    1477        2556 :         long ret = 0;
    1478        2556 :         unsigned buf_flag = 0;
    1479             : 
    1480        2556 :         if (flags & SPLICE_F_GIFT)
    1481           0 :                 buf_flag = PIPE_BUF_FLAG_GIFT;
    1482             : 
    1483        2556 :         pipe = get_pipe_info(file, true);
    1484        2557 :         if (!pipe)
    1485             :                 return -EBADF;
    1486             : 
    1487        2557 :         pipe_clear_nowait(file);
    1488             : 
    1489        2557 :         pipe_lock(pipe);
    1490        2559 :         ret = wait_for_space(pipe, flags);
    1491        2559 :         if (!ret)
    1492        2557 :                 ret = iter_to_pipe(iter, pipe, buf_flag);
    1493        2558 :         pipe_unlock(pipe);
    1494        2555 :         if (ret > 0)
    1495        2555 :                 wakeup_pipe_readers(pipe);
    1496             :         return ret;
    1497             : }
    1498             : 
    1499        2558 : static int vmsplice_type(struct fd f, int *type)
    1500             : {
    1501        2558 :         if (!f.file)
    1502             :                 return -EBADF;
    1503        2558 :         if (f.file->f_mode & FMODE_WRITE) {
    1504        2558 :                 *type = ITER_SOURCE;
    1505           0 :         } else if (f.file->f_mode & FMODE_READ) {
    1506           0 :                 *type = ITER_DEST;
    1507             :         } else {
    1508           0 :                 fdput(f);
    1509           0 :                 return -EBADF;
    1510             :         }
    1511             :         return 0;
    1512             : }
    1513             : 
    1514             : /*
    1515             :  * Note that vmsplice only really supports true splicing _from_ user memory
    1516             :  * to a pipe, not the other way around. Splicing from user memory is a simple
    1517             :  * operation that can be supported without any funky alignment restrictions
    1518             :  * or nasty vm tricks. We simply map in the user memory and fill them into
    1519             :  * a pipe. The reverse isn't quite as easy, though. There are two possible
    1520             :  * solutions for that:
    1521             :  *
    1522             :  *      - memcpy() the data internally, at which point we might as well just
    1523             :  *        do a regular read() on the buffer anyway.
    1524             :  *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
    1525             :  *        has restriction limitations on both ends of the pipe).
    1526             :  *
    1527             :  * Currently we punt and implement it as a normal copy, see pipe_to_user().
    1528             :  *
    1529             :  */
    1530        5115 : SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
    1531             :                 unsigned long, nr_segs, unsigned int, flags)
    1532             : {
    1533        2558 :         struct iovec iovstack[UIO_FASTIOV];
    1534        2558 :         struct iovec *iov = iovstack;
    1535        2558 :         struct iov_iter iter;
    1536        2558 :         ssize_t error;
    1537        2558 :         struct fd f;
    1538        2558 :         int type;
    1539             : 
    1540        2558 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1541             :                 return -EINVAL;
    1542             : 
    1543        2558 :         f = fdget(fd);
    1544        2557 :         error = vmsplice_type(f, &type);
    1545        2558 :         if (error)
    1546             :                 return error;
    1547             : 
    1548        2558 :         error = import_iovec(type, uiov, nr_segs,
    1549             :                              ARRAY_SIZE(iovstack), &iov, &iter);
    1550        2558 :         if (error < 0)
    1551           0 :                 goto out_fdput;
    1552             : 
    1553        2558 :         if (!iov_iter_count(&iter))
    1554             :                 error = 0;
    1555        2556 :         else if (type == ITER_SOURCE)
    1556        2556 :                 error = vmsplice_to_pipe(f.file, &iter, flags);
    1557             :         else
    1558           0 :                 error = vmsplice_to_user(f.file, &iter, flags);
    1559             : 
    1560        2557 :         kfree(iov);
    1561        2555 : out_fdput:
    1562        2555 :         fdput(f);
    1563        2555 :         return error;
    1564             : }
    1565             : 
    1566     3969470 : SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
    1567             :                 int, fd_out, loff_t __user *, off_out,
    1568             :                 size_t, len, unsigned int, flags)
    1569             : {
    1570     1984736 :         struct fd in, out;
    1571     1984736 :         long error;
    1572             : 
    1573     1984736 :         if (unlikely(!len))
    1574             :                 return 0;
    1575             : 
    1576     1984736 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1577             :                 return -EINVAL;
    1578             : 
    1579     1984736 :         error = -EBADF;
    1580     1984736 :         in = fdget(fd_in);
    1581     1984724 :         if (in.file) {
    1582     1984724 :                 out = fdget(fd_out);
    1583     1984741 :                 if (out.file) {
    1584     1984741 :                         error = __do_splice(in.file, off_in, out.file, off_out,
    1585             :                                                 len, flags);
    1586     1984741 :                         fdput(out);
    1587             :                 }
    1588     1984741 :                 fdput(in);
    1589             :         }
    1590             :         return error;
    1591             : }
    1592             : 
    1593             : /*
    1594             :  * Make sure there's data to read. Wait for input if we can, otherwise
    1595             :  * return an appropriate error.
    1596             :  */
    1597           0 : static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1598             : {
    1599           0 :         int ret;
    1600             : 
    1601             :         /*
    1602             :          * Check the pipe occupancy without the inode lock first. This function
    1603             :          * is speculative anyways, so missing one is ok.
    1604             :          */
    1605           0 :         if (!pipe_empty(pipe->head, pipe->tail))
    1606             :                 return 0;
    1607             : 
    1608           0 :         ret = 0;
    1609           0 :         pipe_lock(pipe);
    1610             : 
    1611           0 :         while (pipe_empty(pipe->head, pipe->tail)) {
    1612           0 :                 if (signal_pending(current)) {
    1613             :                         ret = -ERESTARTSYS;
    1614             :                         break;
    1615             :                 }
    1616           0 :                 if (!pipe->writers)
    1617             :                         break;
    1618           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1619             :                         ret = -EAGAIN;
    1620             :                         break;
    1621             :                 }
    1622           0 :                 pipe_wait_readable(pipe);
    1623             :         }
    1624             : 
    1625           0 :         pipe_unlock(pipe);
    1626           0 :         return ret;
    1627             : }
    1628             : 
    1629             : /*
    1630             :  * Make sure there's writeable room. Wait for room if we can, otherwise
    1631             :  * return an appropriate error.
    1632             :  */
    1633           0 : static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1634             : {
    1635           0 :         int ret;
    1636             : 
    1637             :         /*
    1638             :          * Check pipe occupancy without the inode lock first. This function
    1639             :          * is speculative anyways, so missing one is ok.
    1640             :          */
    1641           0 :         if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
    1642             :                 return 0;
    1643             : 
    1644           0 :         ret = 0;
    1645           0 :         pipe_lock(pipe);
    1646             : 
    1647           0 :         while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
    1648           0 :                 if (!pipe->readers) {
    1649           0 :                         send_sig(SIGPIPE, current, 0);
    1650           0 :                         ret = -EPIPE;
    1651           0 :                         break;
    1652             :                 }
    1653           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1654             :                         ret = -EAGAIN;
    1655             :                         break;
    1656             :                 }
    1657           0 :                 if (signal_pending(current)) {
    1658             :                         ret = -ERESTARTSYS;
    1659             :                         break;
    1660             :                 }
    1661           0 :                 pipe_wait_writable(pipe);
    1662             :         }
    1663             : 
    1664           0 :         pipe_unlock(pipe);
    1665           0 :         return ret;
    1666             : }
    1667             : 
    1668             : /*
    1669             :  * Splice contents of ipipe to opipe.
    1670             :  */
    1671           0 : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1672             :                                struct pipe_inode_info *opipe,
    1673             :                                size_t len, unsigned int flags)
    1674             : {
    1675           0 :         struct pipe_buffer *ibuf, *obuf;
    1676           0 :         unsigned int i_head, o_head;
    1677           0 :         unsigned int i_tail, o_tail;
    1678           0 :         unsigned int i_mask, o_mask;
    1679           0 :         int ret = 0;
    1680           0 :         bool input_wakeup = false;
    1681             : 
    1682             : 
    1683           0 : retry:
    1684           0 :         ret = ipipe_prep(ipipe, flags);
    1685           0 :         if (ret)
    1686           0 :                 return ret;
    1687             : 
    1688           0 :         ret = opipe_prep(opipe, flags);
    1689           0 :         if (ret)
    1690           0 :                 return ret;
    1691             : 
    1692             :         /*
    1693             :          * Potential ABBA deadlock, work around it by ordering lock
    1694             :          * grabbing by pipe info address. Otherwise two different processes
    1695             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1696             :          */
    1697           0 :         pipe_double_lock(ipipe, opipe);
    1698             : 
    1699           0 :         i_tail = ipipe->tail;
    1700           0 :         i_mask = ipipe->ring_size - 1;
    1701           0 :         o_head = opipe->head;
    1702           0 :         o_mask = opipe->ring_size - 1;
    1703             : 
    1704           0 :         do {
    1705           0 :                 size_t o_len;
    1706             : 
    1707           0 :                 if (!opipe->readers) {
    1708           0 :                         send_sig(SIGPIPE, current, 0);
    1709           0 :                         if (!ret)
    1710           0 :                                 ret = -EPIPE;
    1711             :                         break;
    1712             :                 }
    1713             : 
    1714           0 :                 i_head = ipipe->head;
    1715           0 :                 o_tail = opipe->tail;
    1716             : 
    1717           0 :                 if (pipe_empty(i_head, i_tail) && !ipipe->writers)
    1718             :                         break;
    1719             : 
    1720             :                 /*
    1721             :                  * Cannot make any progress, because either the input
    1722             :                  * pipe is empty or the output pipe is full.
    1723             :                  */
    1724           0 :                 if (pipe_empty(i_head, i_tail) ||
    1725           0 :                     pipe_full(o_head, o_tail, opipe->max_usage)) {
    1726             :                         /* Already processed some buffers, break */
    1727           0 :                         if (ret)
    1728             :                                 break;
    1729             : 
    1730           0 :                         if (flags & SPLICE_F_NONBLOCK) {
    1731             :                                 ret = -EAGAIN;
    1732             :                                 break;
    1733             :                         }
    1734             : 
    1735             :                         /*
    1736             :                          * We raced with another reader/writer and haven't
    1737             :                          * managed to process any buffers.  A zero return
    1738             :                          * value means EOF, so retry instead.
    1739             :                          */
    1740           0 :                         pipe_unlock(ipipe);
    1741           0 :                         pipe_unlock(opipe);
    1742           0 :                         goto retry;
    1743             :                 }
    1744             : 
    1745           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1746           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1747             : 
    1748           0 :                 if (len >= ibuf->len) {
    1749             :                         /*
    1750             :                          * Simply move the whole buffer from ipipe to opipe
    1751             :                          */
    1752           0 :                         *obuf = *ibuf;
    1753           0 :                         ibuf->ops = NULL;
    1754           0 :                         i_tail++;
    1755           0 :                         ipipe->tail = i_tail;
    1756           0 :                         input_wakeup = true;
    1757           0 :                         o_len = obuf->len;
    1758           0 :                         o_head++;
    1759           0 :                         opipe->head = o_head;
    1760             :                 } else {
    1761             :                         /*
    1762             :                          * Get a reference to this pipe buffer,
    1763             :                          * so we can copy the contents over.
    1764             :                          */
    1765           0 :                         if (!pipe_buf_get(ipipe, ibuf)) {
    1766           0 :                                 if (ret == 0)
    1767           0 :                                         ret = -EFAULT;
    1768             :                                 break;
    1769             :                         }
    1770           0 :                         *obuf = *ibuf;
    1771             : 
    1772             :                         /*
    1773             :                          * Don't inherit the gift and merge flags, we need to
    1774             :                          * prevent multiple steals of this page.
    1775             :                          */
    1776           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1777           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1778             : 
    1779           0 :                         obuf->len = len;
    1780           0 :                         ibuf->offset += len;
    1781           0 :                         ibuf->len -= len;
    1782           0 :                         o_len = len;
    1783           0 :                         o_head++;
    1784           0 :                         opipe->head = o_head;
    1785             :                 }
    1786           0 :                 ret += o_len;
    1787           0 :                 len -= o_len;
    1788           0 :         } while (len);
    1789             : 
    1790           0 :         pipe_unlock(ipipe);
    1791           0 :         pipe_unlock(opipe);
    1792             : 
    1793             :         /*
    1794             :          * If we put data in the output pipe, wakeup any potential readers.
    1795             :          */
    1796           0 :         if (ret > 0)
    1797           0 :                 wakeup_pipe_readers(opipe);
    1798             : 
    1799           0 :         if (input_wakeup)
    1800           0 :                 wakeup_pipe_writers(ipipe);
    1801             : 
    1802             :         return ret;
    1803             : }
    1804             : 
    1805             : /*
    1806             :  * Link contents of ipipe to opipe.
    1807             :  */
    1808           0 : static int link_pipe(struct pipe_inode_info *ipipe,
    1809             :                      struct pipe_inode_info *opipe,
    1810             :                      size_t len, unsigned int flags)
    1811             : {
    1812           0 :         struct pipe_buffer *ibuf, *obuf;
    1813           0 :         unsigned int i_head, o_head;
    1814           0 :         unsigned int i_tail, o_tail;
    1815           0 :         unsigned int i_mask, o_mask;
    1816           0 :         int ret = 0;
    1817             : 
    1818             :         /*
    1819             :          * Potential ABBA deadlock, work around it by ordering lock
    1820             :          * grabbing by pipe info address. Otherwise two different processes
    1821             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1822             :          */
    1823           0 :         pipe_double_lock(ipipe, opipe);
    1824             : 
    1825           0 :         i_tail = ipipe->tail;
    1826           0 :         i_mask = ipipe->ring_size - 1;
    1827           0 :         o_head = opipe->head;
    1828           0 :         o_mask = opipe->ring_size - 1;
    1829             : 
    1830           0 :         do {
    1831           0 :                 if (!opipe->readers) {
    1832           0 :                         send_sig(SIGPIPE, current, 0);
    1833           0 :                         if (!ret)
    1834           0 :                                 ret = -EPIPE;
    1835             :                         break;
    1836             :                 }
    1837             : 
    1838           0 :                 i_head = ipipe->head;
    1839           0 :                 o_tail = opipe->tail;
    1840             : 
    1841             :                 /*
    1842             :                  * If we have iterated all input buffers or run out of
    1843             :                  * output room, break.
    1844             :                  */
    1845           0 :                 if (pipe_empty(i_head, i_tail) ||
    1846           0 :                     pipe_full(o_head, o_tail, opipe->max_usage))
    1847             :                         break;
    1848             : 
    1849           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1850           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1851             : 
    1852             :                 /*
    1853             :                  * Get a reference to this pipe buffer,
    1854             :                  * so we can copy the contents over.
    1855             :                  */
    1856           0 :                 if (!pipe_buf_get(ipipe, ibuf)) {
    1857           0 :                         if (ret == 0)
    1858           0 :                                 ret = -EFAULT;
    1859             :                         break;
    1860             :                 }
    1861             : 
    1862           0 :                 *obuf = *ibuf;
    1863             : 
    1864             :                 /*
    1865             :                  * Don't inherit the gift and merge flag, we need to prevent
    1866             :                  * multiple steals of this page.
    1867             :                  */
    1868           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1869           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1870             : 
    1871           0 :                 if (obuf->len > len)
    1872           0 :                         obuf->len = len;
    1873           0 :                 ret += obuf->len;
    1874           0 :                 len -= obuf->len;
    1875             : 
    1876           0 :                 o_head++;
    1877           0 :                 opipe->head = o_head;
    1878           0 :                 i_tail++;
    1879           0 :         } while (len);
    1880             : 
    1881           0 :         pipe_unlock(ipipe);
    1882           0 :         pipe_unlock(opipe);
    1883             : 
    1884             :         /*
    1885             :          * If we put data in the output pipe, wakeup any potential readers.
    1886             :          */
    1887           0 :         if (ret > 0)
    1888           0 :                 wakeup_pipe_readers(opipe);
    1889             : 
    1890           0 :         return ret;
    1891             : }
    1892             : 
    1893             : /*
    1894             :  * This is a tee(1) implementation that works on pipes. It doesn't copy
    1895             :  * any data, it simply references the 'in' pages on the 'out' pipe.
    1896             :  * The 'flags' used are the SPLICE_F_* variants, currently the only
    1897             :  * applicable one is SPLICE_F_NONBLOCK.
    1898             :  */
    1899           0 : long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
    1900             : {
    1901           0 :         struct pipe_inode_info *ipipe = get_pipe_info(in, true);
    1902           0 :         struct pipe_inode_info *opipe = get_pipe_info(out, true);
    1903           0 :         int ret = -EINVAL;
    1904             : 
    1905           0 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1906             :                      !(out->f_mode & FMODE_WRITE)))
    1907             :                 return -EBADF;
    1908             : 
    1909             :         /*
    1910             :          * Duplicate the contents of ipipe to opipe without actually
    1911             :          * copying the data.
    1912             :          */
    1913           0 :         if (ipipe && opipe && ipipe != opipe) {
    1914           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1915           0 :                         flags |= SPLICE_F_NONBLOCK;
    1916             : 
    1917             :                 /*
    1918             :                  * Keep going, unless we encounter an error. The ipipe/opipe
    1919             :                  * ordering doesn't really matter.
    1920             :                  */
    1921           0 :                 ret = ipipe_prep(ipipe, flags);
    1922           0 :                 if (!ret) {
    1923           0 :                         ret = opipe_prep(opipe, flags);
    1924           0 :                         if (!ret)
    1925           0 :                                 ret = link_pipe(ipipe, opipe, len, flags);
    1926             :                 }
    1927             :         }
    1928             : 
    1929           0 :         return ret;
    1930             : }
    1931             : 
    1932           0 : SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
    1933             : {
    1934           0 :         struct fd in, out;
    1935           0 :         int error;
    1936             : 
    1937           0 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1938             :                 return -EINVAL;
    1939             : 
    1940           0 :         if (unlikely(!len))
    1941             :                 return 0;
    1942             : 
    1943           0 :         error = -EBADF;
    1944           0 :         in = fdget(fdin);
    1945           0 :         if (in.file) {
    1946           0 :                 out = fdget(fdout);
    1947           0 :                 if (out.file) {
    1948           0 :                         error = do_tee(in.file, out.file, len, flags);
    1949           0 :                         fdput(out);
    1950             :                 }
    1951           0 :                 fdput(in);
    1952             :         }
    1953             : 
    1954           0 :         return error;
    1955             : }

Generated by: LCOV version 1.14