LCOV - code coverage report
Current view: top level - fs - splice.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 525 832 63.1 %
Date: 2023-07-31 20:08:27 Functions: 30 44 68.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * "splice": joining two ropes together by interweaving their strands.
       4             :  *
       5             :  * This is the "extended pipe" functionality, where a pipe is used as
       6             :  * an arbitrary in-memory buffer. Think of a pipe as a small kernel
       7             :  * buffer that you can use to transfer data from one end to the other.
       8             :  *
       9             :  * The traditional unix read/write is extended with a "splice()" operation
      10             :  * that transfers data buffers to or from a pipe buffer.
      11             :  *
      12             :  * Named by Larry McVoy, original implementation from Linus, extended by
      13             :  * Jens to support splicing to files, network, direct splicing, etc and
      14             :  * fixing lots of bugs.
      15             :  *
      16             :  * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
      17             :  * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
      18             :  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
      19             :  *
      20             :  */
      21             : #include <linux/bvec.h>
      22             : #include <linux/fs.h>
      23             : #include <linux/file.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/splice.h>
      26             : #include <linux/memcontrol.h>
      27             : #include <linux/mm_inline.h>
      28             : #include <linux/swap.h>
      29             : #include <linux/writeback.h>
      30             : #include <linux/export.h>
      31             : #include <linux/syscalls.h>
      32             : #include <linux/uio.h>
      33             : #include <linux/fsnotify.h>
      34             : #include <linux/security.h>
      35             : #include <linux/gfp.h>
      36             : #include <linux/net.h>
      37             : #include <linux/socket.h>
      38             : #include <linux/sched/signal.h>
      39             : 
      40             : #include "internal.h"
      41             : 
      42             : /*
      43             :  * Splice doesn't support FMODE_NOWAIT. Since pipes may set this flag to
      44             :  * indicate they support non-blocking reads or writes, we must clear it
      45             :  * here if set to avoid blocking other users of this pipe if splice is
      46             :  * being done on it.
      47             :  */
      48     2567300 : static noinline void noinline pipe_clear_nowait(struct file *file)
      49             : {
      50     2567300 :         fmode_t fmode = READ_ONCE(file->f_mode);
      51             : 
      52     2567300 :         do {
      53     2567300 :                 if (!(fmode & FMODE_NOWAIT))
      54             :                         break;
      55     2443298 :         } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_NOWAIT));
      56     2567301 : }
      57             : 
      58             : /*
      59             :  * Attempt to steal a page from a pipe buffer. This should perhaps go into
      60             :  * a vm helper function, it's already simplified quite a bit by the
      61             :  * addition of remove_mapping(). If success is returned, the caller may
      62             :  * attempt to reuse this page for another destination.
      63             :  */
      64           0 : static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
      65             :                 struct pipe_buffer *buf)
      66             : {
      67           0 :         struct folio *folio = page_folio(buf->page);
      68           0 :         struct address_space *mapping;
      69             : 
      70           0 :         folio_lock(folio);
      71             : 
      72           0 :         mapping = folio_mapping(folio);
      73           0 :         if (mapping) {
      74           0 :                 WARN_ON(!folio_test_uptodate(folio));
      75             : 
      76             :                 /*
      77             :                  * At least for ext2 with nobh option, we need to wait on
      78             :                  * writeback completing on this folio, since we'll remove it
      79             :                  * from the pagecache.  Otherwise truncate wont wait on the
      80             :                  * folio, allowing the disk blocks to be reused by someone else
      81             :                  * before we actually wrote our data to them. fs corruption
      82             :                  * ensues.
      83             :                  */
      84           0 :                 folio_wait_writeback(folio);
      85             : 
      86           0 :                 if (folio_has_private(folio) &&
      87           0 :                     !filemap_release_folio(folio, GFP_KERNEL))
      88           0 :                         goto out_unlock;
      89             : 
      90             :                 /*
      91             :                  * If we succeeded in removing the mapping, set LRU flag
      92             :                  * and return good.
      93             :                  */
      94           0 :                 if (remove_mapping(mapping, folio)) {
      95           0 :                         buf->flags |= PIPE_BUF_FLAG_LRU;
      96           0 :                         return true;
      97             :                 }
      98             :         }
      99             : 
     100             :         /*
     101             :          * Raced with truncate or failed to remove folio from current
     102             :          * address space, unlock and return failure.
     103             :          */
     104           0 : out_unlock:
     105           0 :         folio_unlock(folio);
     106           0 :         return false;
     107             : }
     108             : 
     109    12548559 : static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
     110             :                                         struct pipe_buffer *buf)
     111             : {
     112    12548559 :         put_page(buf->page);
     113    12548609 :         buf->flags &= ~PIPE_BUF_FLAG_LRU;
     114    12548609 : }
     115             : 
     116             : /*
     117             :  * Check whether the contents of buf is OK to access. Since the content
     118             :  * is a page cache page, IO may be in flight.
     119             :  */
     120    12546465 : static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
     121             :                                        struct pipe_buffer *buf)
     122             : {
     123    12546465 :         struct page *page = buf->page;
     124    12546465 :         int err;
     125             : 
     126    12546465 :         if (!PageUptodate(page)) {
     127           0 :                 lock_page(page);
     128             : 
     129             :                 /*
     130             :                  * Page got truncated/unhashed. This will cause a 0-byte
     131             :                  * splice, if this is the first page.
     132             :                  */
     133           0 :                 if (!page->mapping) {
     134           0 :                         err = -ENODATA;
     135           0 :                         goto error;
     136             :                 }
     137             : 
     138             :                 /*
     139             :                  * Uh oh, read-error from disk.
     140             :                  */
     141           0 :                 if (!PageUptodate(page)) {
     142           0 :                         err = -EIO;
     143           0 :                         goto error;
     144             :                 }
     145             : 
     146             :                 /*
     147             :                  * Page is ok afterall, we are done.
     148             :                  */
     149           0 :                 unlock_page(page);
     150             :         }
     151             : 
     152             :         return 0;
     153           0 : error:
     154           0 :         unlock_page(page);
     155           0 :         return err;
     156             : }
     157             : 
     158             : const struct pipe_buf_operations page_cache_pipe_buf_ops = {
     159             :         .confirm        = page_cache_pipe_buf_confirm,
     160             :         .release        = page_cache_pipe_buf_release,
     161             :         .try_steal      = page_cache_pipe_buf_try_steal,
     162             :         .get            = generic_pipe_buf_get,
     163             : };
     164             : 
     165           0 : static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     166             :                 struct pipe_buffer *buf)
     167             : {
     168           0 :         if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
     169             :                 return false;
     170             : 
     171           0 :         buf->flags |= PIPE_BUF_FLAG_LRU;
     172           0 :         return generic_pipe_buf_try_steal(pipe, buf);
     173             : }
     174             : 
     175             : static const struct pipe_buf_operations user_page_pipe_buf_ops = {
     176             :         .release        = page_cache_pipe_buf_release,
     177             :         .try_steal      = user_page_pipe_buf_try_steal,
     178             :         .get            = generic_pipe_buf_get,
     179             : };
     180             : 
     181     1238626 : static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
     182             : {
     183     1238626 :         smp_mb();
     184     1238626 :         if (waitqueue_active(&pipe->rd_wait))
     185           0 :                 wake_up_interruptible(&pipe->rd_wait);
     186     1238626 :         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     187     1238626 : }
     188             : 
     189             : /**
     190             :  * splice_to_pipe - fill passed data into a pipe
     191             :  * @pipe:       pipe to fill
     192             :  * @spd:        data to fill
     193             :  *
     194             :  * Description:
     195             :  *    @spd contains a map of pages and len/offset tuples, along with
     196             :  *    the struct pipe_buf_operations associated with these pages. This
     197             :  *    function will link that data to the pipe.
     198             :  *
     199             :  */
     200       64624 : ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
     201             :                        struct splice_pipe_desc *spd)
     202             : {
     203       64624 :         unsigned int spd_pages = spd->nr_pages;
     204       64624 :         unsigned int tail = pipe->tail;
     205       64624 :         unsigned int head = pipe->head;
     206       64624 :         unsigned int mask = pipe->ring_size - 1;
     207       64624 :         int ret = 0, page_nr = 0;
     208             : 
     209       64624 :         if (!spd_pages)
     210             :                 return 0;
     211             : 
     212       64624 :         if (unlikely(!pipe->readers)) {
     213           0 :                 send_sig(SIGPIPE, current, 0);
     214           0 :                 ret = -EPIPE;
     215           0 :                 goto out;
     216             :         }
     217             : 
     218       71436 :         while (!pipe_full(head, tail, pipe->max_usage)) {
     219       71434 :                 struct pipe_buffer *buf = &pipe->bufs[head & mask];
     220             : 
     221       71434 :                 buf->page = spd->pages[page_nr];
     222       71434 :                 buf->offset = spd->partial[page_nr].offset;
     223       71434 :                 buf->len = spd->partial[page_nr].len;
     224       71434 :                 buf->private = spd->partial[page_nr].private;
     225       71434 :                 buf->ops = spd->ops;
     226       71434 :                 buf->flags = 0;
     227             : 
     228       71434 :                 head++;
     229       71434 :                 pipe->head = head;
     230       71434 :                 page_nr++;
     231       71434 :                 ret += buf->len;
     232             : 
     233       71434 :                 if (!--spd->nr_pages)
     234             :                         break;
     235             :         }
     236             : 
     237       64624 :         if (!ret)
     238           1 :                 ret = -EAGAIN;
     239             : 
     240       64624 : out:
     241       64626 :         while (page_nr < spd_pages)
     242           2 :                 spd->spd_release(spd, page_nr++);
     243             : 
     244       64624 :         return ret;
     245             : }
     246             : EXPORT_SYMBOL_GPL(splice_to_pipe);
     247             : 
     248        2560 : ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     249             : {
     250        2560 :         unsigned int head = pipe->head;
     251        2560 :         unsigned int tail = pipe->tail;
     252        2560 :         unsigned int mask = pipe->ring_size - 1;
     253        2560 :         int ret;
     254             : 
     255        2560 :         if (unlikely(!pipe->readers)) {
     256           0 :                 send_sig(SIGPIPE, current, 0);
     257           0 :                 ret = -EPIPE;
     258        2560 :         } else if (pipe_full(head, tail, pipe->max_usage)) {
     259             :                 ret = -EAGAIN;
     260             :         } else {
     261        2559 :                 pipe->bufs[head & mask] = *buf;
     262        2559 :                 pipe->head = head + 1;
     263        2559 :                 return buf->len;
     264             :         }
     265           1 :         pipe_buf_release(pipe, buf);
     266           0 :         return ret;
     267             : }
     268             : EXPORT_SYMBOL(add_to_pipe);
     269             : 
     270             : /*
     271             :  * Check if we need to grow the arrays holding pages and partial page
     272             :  * descriptions.
     273             :  */
     274           0 : int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
     275             : {
     276           0 :         unsigned int max_usage = READ_ONCE(pipe->max_usage);
     277             : 
     278           0 :         spd->nr_pages_max = max_usage;
     279           0 :         if (max_usage <= PIPE_DEF_BUFFERS)
     280             :                 return 0;
     281             : 
     282           0 :         spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
     283           0 :         spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
     284             :                                      GFP_KERNEL);
     285             : 
     286           0 :         if (spd->pages && spd->partial)
     287             :                 return 0;
     288             : 
     289           0 :         kfree(spd->pages);
     290           0 :         kfree(spd->partial);
     291           0 :         return -ENOMEM;
     292             : }
     293             : 
     294           0 : void splice_shrink_spd(struct splice_pipe_desc *spd)
     295             : {
     296           0 :         if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
     297             :                 return;
     298             : 
     299           0 :         kfree(spd->pages);
     300           0 :         kfree(spd->partial);
     301             : }
     302             : 
     303             : /**
     304             :  * copy_splice_read -  Copy data from a file and splice the copy into a pipe
     305             :  * @in: The file to read from
     306             :  * @ppos: Pointer to the file position to read from
     307             :  * @pipe: The pipe to splice into
     308             :  * @len: The amount to splice
     309             :  * @flags: The SPLICE_F_* flags
     310             :  *
     311             :  * This function allocates a bunch of pages sufficient to hold the requested
     312             :  * amount of data (but limited by the remaining pipe capacity), passes it to
     313             :  * the file's ->read_iter() to read into and then splices the used pages into
     314             :  * the pipe.
     315             :  *
     316             :  * Return: On success, the number of bytes read will be returned and *@ppos
     317             :  * will be updated if appropriate; 0 will be returned if there is no more data
     318             :  * to be read; -EAGAIN will be returned if the pipe had no space, and some
     319             :  * other negative error code will be returned on error.  A short read may occur
     320             :  * if the pipe has insufficient space, we reach the end of the data or we hit a
     321             :  * hole.
     322             :  */
     323         918 : ssize_t copy_splice_read(struct file *in, loff_t *ppos,
     324             :                          struct pipe_inode_info *pipe,
     325             :                          size_t len, unsigned int flags)
     326             : {
     327         918 :         struct iov_iter to;
     328         918 :         struct bio_vec *bv;
     329         918 :         struct kiocb kiocb;
     330         918 :         struct page **pages;
     331         918 :         ssize_t ret;
     332         918 :         size_t used, npages, chunk, remain, keep = 0;
     333         918 :         int i;
     334             : 
     335             :         /* Work out how much data we can actually add into the pipe */
     336         918 :         used = pipe_occupancy(pipe->head, pipe->tail);
     337         918 :         npages = max_t(ssize_t, pipe->max_usage - used, 0);
     338         918 :         len = min_t(size_t, len, npages * PAGE_SIZE);
     339         918 :         npages = DIV_ROUND_UP(len, PAGE_SIZE);
     340             : 
     341         918 :         bv = kzalloc(array_size(npages, sizeof(bv[0])) +
     342             :                      array_size(npages, sizeof(struct page *)), GFP_KERNEL);
     343         918 :         if (!bv)
     344             :                 return -ENOMEM;
     345             : 
     346         918 :         pages = (struct page **)(bv + npages);
     347         918 :         npages = alloc_pages_bulk_array(GFP_USER, npages, pages);
     348         918 :         if (!npages) {
     349           0 :                 kfree(bv);
     350           0 :                 return -ENOMEM;
     351             :         }
     352             : 
     353         918 :         remain = len = min_t(size_t, len, npages * PAGE_SIZE);
     354             : 
     355        1840 :         for (i = 0; i < npages; i++) {
     356         922 :                 chunk = min_t(size_t, PAGE_SIZE, remain);
     357         922 :                 bv[i].bv_page = pages[i];
     358         922 :                 bv[i].bv_offset = 0;
     359         922 :                 bv[i].bv_len = chunk;
     360         922 :                 remain -= chunk;
     361             :         }
     362             : 
     363             :         /* Do the I/O */
     364         918 :         iov_iter_bvec(&to, ITER_DEST, bv, npages, len);
     365         918 :         init_sync_kiocb(&kiocb, in);
     366         918 :         kiocb.ki_pos = *ppos;
     367         918 :         ret = call_read_iter(in, &kiocb, &to);
     368             : 
     369         918 :         if (ret > 0) {
     370         918 :                 keep = DIV_ROUND_UP(ret, PAGE_SIZE);
     371         918 :                 *ppos = kiocb.ki_pos;
     372             :         }
     373             : 
     374             :         /*
     375             :          * Callers of ->splice_read() expect -EAGAIN on "can't put anything in
     376             :          * there", rather than -EFAULT.
     377             :          */
     378         918 :         if (ret == -EFAULT)
     379           0 :                 ret = -EAGAIN;
     380             : 
     381             :         /* Free any pages that didn't get touched at all. */
     382         918 :         if (keep < npages)
     383           0 :                 release_pages(pages + keep, npages - keep);
     384             : 
     385             :         /* Push the remaining pages into the pipe. */
     386         918 :         remain = ret;
     387        1840 :         for (i = 0; i < keep; i++) {
     388         922 :                 struct pipe_buffer *buf = pipe_head_buf(pipe);
     389             : 
     390         922 :                 chunk = min_t(size_t, remain, PAGE_SIZE);
     391         922 :                 *buf = (struct pipe_buffer) {
     392             :                         .ops    = &default_pipe_buf_ops,
     393         922 :                         .page   = bv[i].bv_page,
     394             :                         .offset = 0,
     395             :                         .len    = chunk,
     396             :                 };
     397         922 :                 pipe->head++;
     398         922 :                 remain -= chunk;
     399             :         }
     400             : 
     401         918 :         kfree(bv);
     402         918 :         return ret;
     403             : }
     404             : EXPORT_SYMBOL(copy_splice_read);
     405             : 
     406             : const struct pipe_buf_operations default_pipe_buf_ops = {
     407             :         .release        = generic_pipe_buf_release,
     408             :         .try_steal      = generic_pipe_buf_try_steal,
     409             :         .get            = generic_pipe_buf_get,
     410             : };
     411             : 
     412             : /* Pipe buffer operations for a socket and similar. */
     413             : const struct pipe_buf_operations nosteal_pipe_buf_ops = {
     414             :         .release        = generic_pipe_buf_release,
     415             :         .get            = generic_pipe_buf_get,
     416             : };
     417             : EXPORT_SYMBOL(nosteal_pipe_buf_ops);
     418             : 
     419     1218379 : static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
     420             : {
     421     1218379 :         smp_mb();
     422     1218379 :         if (waitqueue_active(&pipe->wr_wait))
     423           0 :                 wake_up_interruptible(&pipe->wr_wait);
     424     1218379 :         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     425     1218379 : }
     426             : 
     427             : /**
     428             :  * splice_from_pipe_feed - feed available data from a pipe to a file
     429             :  * @pipe:       pipe to splice from
     430             :  * @sd:         information to @actor
     431             :  * @actor:      handler that splices the data
     432             :  *
     433             :  * Description:
     434             :  *    This function loops over the pipe and calls @actor to do the
     435             :  *    actual moving of a single struct pipe_buffer to the desired
     436             :  *    destination.  It returns when there's no more buffers left in
     437             :  *    the pipe or if the requested number of bytes (@sd->total_len)
     438             :  *    have been copied.  It returns a positive number (one) if the
     439             :  *    pipe needs to be filled with more data, zero if the required
     440             :  *    number of bytes have been copied and -errno on error.
     441             :  *
     442             :  *    This, together with splice_from_pipe_{begin,end,next}, may be
     443             :  *    used to implement the functionality of __splice_from_pipe() when
     444             :  *    locking is required around copying the pipe buffers to the
     445             :  *    destination.
     446             :  */
     447         397 : static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
     448             :                           splice_actor *actor)
     449             : {
     450         397 :         unsigned int head = pipe->head;
     451         397 :         unsigned int tail = pipe->tail;
     452         397 :         unsigned int mask = pipe->ring_size - 1;
     453         397 :         int ret;
     454             : 
     455         794 :         while (!pipe_empty(head, tail)) {
     456         397 :                 struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     457             : 
     458         397 :                 sd->len = buf->len;
     459         397 :                 if (sd->len > sd->total_len)
     460           0 :                         sd->len = sd->total_len;
     461             : 
     462         397 :                 ret = pipe_buf_confirm(pipe, buf);
     463         397 :                 if (unlikely(ret)) {
     464           0 :                         if (ret == -ENODATA)
     465           0 :                                 ret = 0;
     466           0 :                         return ret;
     467             :                 }
     468             : 
     469         397 :                 ret = actor(pipe, buf, sd);
     470         397 :                 if (ret <= 0)
     471           0 :                         return ret;
     472             : 
     473         397 :                 buf->offset += ret;
     474         397 :                 buf->len -= ret;
     475             : 
     476         397 :                 sd->num_spliced += ret;
     477         397 :                 sd->len -= ret;
     478         397 :                 sd->pos += ret;
     479         397 :                 sd->total_len -= ret;
     480             : 
     481         397 :                 if (!buf->len) {
     482         397 :                         pipe_buf_release(pipe, buf);
     483         397 :                         tail++;
     484         397 :                         pipe->tail = tail;
     485         397 :                         if (pipe->files)
     486         397 :                                 sd->need_wakeup = true;
     487             :                 }
     488             : 
     489         397 :                 if (!sd->total_len)
     490             :                         return 0;
     491             :         }
     492             : 
     493             :         return 1;
     494             : }
     495             : 
     496             : /* We know we have a pipe buffer, but maybe it's empty? */
     497     6349755 : static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
     498             : {
     499     6349755 :         unsigned int tail = pipe->tail;
     500     6349755 :         unsigned int mask = pipe->ring_size - 1;
     501     6349755 :         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     502             : 
     503     6349755 :         if (unlikely(!buf->len)) {
     504           0 :                 pipe_buf_release(pipe, buf);
     505           0 :                 pipe->tail = tail+1;
     506           0 :                 return true;
     507             :         }
     508             : 
     509             :         return false;
     510             : }
     511             : 
     512             : /**
     513             :  * splice_from_pipe_next - wait for some data to splice from
     514             :  * @pipe:       pipe to splice from
     515             :  * @sd:         information about the splice operation
     516             :  *
     517             :  * Description:
     518             :  *    This function will wait for some data and return a positive
     519             :  *    value (one) if pipe buffers are available.  It will return zero
     520             :  *    or -errno if no more data needs to be spliced.
     521             :  */
     522     6427142 : static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
     523             : {
     524             :         /*
     525             :          * Check for signal early to make process killable when there are
     526             :          * always buffers available
     527             :          */
     528     6427142 :         if (signal_pending(current))
     529             :                 return -ERESTARTSYS;
     530             : 
     531     6427183 : repeat:
     532     6462813 :         while (pipe_empty(pipe->head, pipe->tail)) {
     533      113057 :                 if (!pipe->writers)
     534             :                         return 0;
     535             : 
     536       35979 :                 if (sd->num_spliced)
     537             :                         return 0;
     538             : 
     539       35631 :                 if (sd->flags & SPLICE_F_NONBLOCK)
     540             :                         return -EAGAIN;
     541             : 
     542       35631 :                 if (signal_pending(current))
     543             :                         return -ERESTARTSYS;
     544             : 
     545       35631 :                 if (sd->need_wakeup) {
     546           0 :                         wakeup_pipe_writers(pipe);
     547           0 :                         sd->need_wakeup = false;
     548             :                 }
     549             : 
     550       35631 :                 pipe_wait_readable(pipe);
     551             :         }
     552             : 
     553     6349756 :         if (eat_empty_buffer(pipe))
     554           0 :                 goto repeat;
     555             : 
     556             :         return 1;
     557             : }
     558             : 
     559             : /**
     560             :  * splice_from_pipe_begin - start splicing from pipe
     561             :  * @sd:         information about the splice operation
     562             :  *
     563             :  * Description:
     564             :  *    This function should be called before a loop containing
     565             :  *    splice_from_pipe_next() and splice_from_pipe_feed() to
     566             :  *    initialize the necessary fields of @sd.
     567             :  */
     568             : static void splice_from_pipe_begin(struct splice_desc *sd)
     569             : {
     570     6426504 :         sd->num_spliced = 0;
     571     6426504 :         sd->need_wakeup = false;
     572     6426504 : }
     573             : 
     574             : /**
     575             :  * splice_from_pipe_end - finish splicing from pipe
     576             :  * @pipe:       pipe to splice from
     577             :  * @sd:         information about the splice operation
     578             :  *
     579             :  * Description:
     580             :  *    This function will wake up pipe writers if necessary.  It should
     581             :  *    be called after a loop containing splice_from_pipe_next() and
     582             :  *    splice_from_pipe_feed().
     583             :  */
     584             : static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
     585             : {
     586     6426527 :         if (sd->need_wakeup)
     587     1154002 :                 wakeup_pipe_writers(pipe);
     588             : }
     589             : 
     590             : /**
     591             :  * __splice_from_pipe - splice data from a pipe to given actor
     592             :  * @pipe:       pipe to splice from
     593             :  * @sd:         information to @actor
     594             :  * @actor:      handler that splices the data
     595             :  *
     596             :  * Description:
     597             :  *    This function does little more than loop over the pipe and call
     598             :  *    @actor to do the actual moving of a single struct pipe_buffer to
     599             :  *    the desired destination. See pipe_to_file, pipe_to_sendmsg, or
     600             :  *    pipe_to_user.
     601             :  *
     602             :  */
     603       77433 : ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
     604             :                            splice_actor *actor)
     605             : {
     606       77433 :         int ret;
     607             : 
     608       77433 :         splice_from_pipe_begin(sd);
     609       77830 :         do {
     610       77830 :                 cond_resched();
     611       77830 :                 ret = splice_from_pipe_next(pipe, sd);
     612       77824 :                 if (ret > 0)
     613         397 :                         ret = splice_from_pipe_feed(pipe, sd, actor);
     614       77824 :         } while (ret > 0);
     615       77427 :         splice_from_pipe_end(pipe, sd);
     616             : 
     617       77427 :         return sd->num_spliced ? sd->num_spliced : ret;
     618             : }
     619             : EXPORT_SYMBOL(__splice_from_pipe);
     620             : 
     621             : /**
     622             :  * splice_from_pipe - splice data from a pipe to a file
     623             :  * @pipe:       pipe to splice from
     624             :  * @out:        file to splice to
     625             :  * @ppos:       position in @out
     626             :  * @len:        how many bytes to splice
     627             :  * @flags:      splice modifier flags
     628             :  * @actor:      handler that splices the data
     629             :  *
     630             :  * Description:
     631             :  *    See __splice_from_pipe. This function locks the pipe inode,
     632             :  *    otherwise it's identical to __splice_from_pipe().
     633             :  *
     634             :  */
     635       77432 : ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
     636             :                          loff_t *ppos, size_t len, unsigned int flags,
     637             :                          splice_actor *actor)
     638             : {
     639       77432 :         ssize_t ret;
     640       77432 :         struct splice_desc sd = {
     641             :                 .total_len = len,
     642             :                 .flags = flags,
     643       77432 :                 .pos = *ppos,
     644             :                 .u.file = out,
     645             :         };
     646             : 
     647       77432 :         pipe_lock(pipe);
     648       77431 :         ret = __splice_from_pipe(pipe, &sd, actor);
     649       77423 :         pipe_unlock(pipe);
     650             : 
     651       77424 :         return ret;
     652             : }
     653             : 
     654             : /**
     655             :  * iter_file_splice_write - splice data from a pipe to a file
     656             :  * @pipe:       pipe info
     657             :  * @out:        file to write to
     658             :  * @ppos:       position in @out
     659             :  * @len:        number of bytes to splice
     660             :  * @flags:      splice modifier flags
     661             :  *
     662             :  * Description:
     663             :  *    Will either move or copy pages (determined by @flags options) from
     664             :  *    the given pipe inode to the given file.
     665             :  *    This one is ->write_iter-based.
     666             :  *
     667             :  */
     668             : ssize_t
     669     6349082 : iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
     670             :                           loff_t *ppos, size_t len, unsigned int flags)
     671             : {
     672     6349082 :         struct splice_desc sd = {
     673             :                 .total_len = len,
     674             :                 .flags = flags,
     675     6349082 :                 .pos = *ppos,
     676             :                 .u.file = out,
     677             :         };
     678     6349082 :         int nbufs = pipe->max_usage;
     679     6349082 :         struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
     680             :                                         GFP_KERNEL);
     681     6349069 :         ssize_t ret;
     682             : 
     683     6349069 :         if (unlikely(!array))
     684             :                 return -ENOMEM;
     685             : 
     686     6349069 :         pipe_lock(pipe);
     687             : 
     688     6349071 :         splice_from_pipe_begin(&sd);
     689    12594149 :         while (sd.total_len) {
     690     6349345 :                 struct iov_iter from;
     691     6349345 :                 unsigned int head, tail, mask;
     692     6349345 :                 size_t left;
     693     6349345 :                 int n;
     694             : 
     695     6349345 :                 ret = splice_from_pipe_next(pipe, &sd);
     696     6349340 :                 if (ret <= 0)
     697             :                         break;
     698             : 
     699     6349334 :                 if (unlikely(nbufs < pipe->max_usage)) {
     700           0 :                         kfree(array);
     701           0 :                         nbufs = pipe->max_usage;
     702           0 :                         array = kcalloc(nbufs, sizeof(struct bio_vec),
     703             :                                         GFP_KERNEL);
     704           0 :                         if (!array) {
     705             :                                 ret = -ENOMEM;
     706             :                                 break;
     707             :                         }
     708             :                 }
     709             : 
     710     6349334 :                 head = pipe->head;
     711     6349334 :                 tail = pipe->tail;
     712     6349334 :                 mask = pipe->ring_size - 1;
     713             : 
     714             :                 /* build the vector */
     715     6349334 :                 left = sd.total_len;
     716    18899274 :                 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
     717    12549942 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     718    12549942 :                         size_t this_len = buf->len;
     719             : 
     720             :                         /* zero-length bvecs are not supported, skip them */
     721    12549942 :                         if (!this_len)
     722           0 :                                 continue;
     723    12549942 :                         this_len = min(this_len, left);
     724             : 
     725    12549942 :                         ret = pipe_buf_confirm(pipe, buf);
     726    12549940 :                         if (unlikely(ret)) {
     727           0 :                                 if (ret == -ENODATA)
     728           0 :                                         ret = 0;
     729           0 :                                 goto done;
     730             :                         }
     731             : 
     732    12549940 :                         bvec_set_page(&array[n], buf->page, this_len,
     733             :                                       buf->offset);
     734    12549940 :                         left -= this_len;
     735    12549940 :                         n++;
     736             :                 }
     737             : 
     738     6349332 :                 iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
     739     6349322 :                 ret = vfs_iter_write(out, &from, &sd.pos, 0);
     740     6349348 :                 if (ret <= 0)
     741             :                         break;
     742             : 
     743     6245062 :                 sd.num_spliced += ret;
     744     6245062 :                 sd.total_len -= ret;
     745     6245062 :                 *ppos = sd.pos;
     746             : 
     747             :                 /* dismiss the fully eaten buffers, adjust the partial one */
     748     6245062 :                 tail = pipe->tail;
     749    18574009 :                 while (ret) {
     750    12329206 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     751    12329206 :                         if (ret >= buf->len) {
     752    12328931 :                                 ret -= buf->len;
     753    12328931 :                                 buf->len = 0;
     754    12328931 :                                 pipe_buf_release(pipe, buf);
     755    12328947 :                                 tail++;
     756    12328947 :                                 pipe->tail = tail;
     757    12328947 :                                 if (pipe->files)
     758     2295503 :                                         sd.need_wakeup = true;
     759             :                         } else {
     760         275 :                                 buf->offset += ret;
     761         275 :                                 buf->len -= ret;
     762         275 :                                 ret = 0;
     763             :                         }
     764             :                 }
     765             :         }
     766     6244804 : done:
     767     6349096 :         kfree(array);
     768     6349100 :         splice_from_pipe_end(pipe, &sd);
     769             : 
     770     6349100 :         pipe_unlock(pipe);
     771             : 
     772     6349099 :         if (sd.num_spliced)
     773     6245029 :                 ret = sd.num_spliced;
     774             : 
     775             :         return ret;
     776             : }
     777             : 
     778             : EXPORT_SYMBOL(iter_file_splice_write);
     779             : 
     780             : #ifdef CONFIG_NET
     781             : /**
     782             :  * splice_to_socket - splice data from a pipe to a socket
     783             :  * @pipe:       pipe to splice from
     784             :  * @out:        socket to write to
     785             :  * @ppos:       position in @out
     786             :  * @len:        number of bytes to splice
     787             :  * @flags:      splice modifier flags
     788             :  *
     789             :  * Description:
     790             :  *    Will send @len bytes from the pipe to a network socket. No data copying
     791             :  *    is involved.
     792             :  *
     793             :  */
     794       65536 : ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
     795             :                          loff_t *ppos, size_t len, unsigned int flags)
     796             : {
     797       65536 :         struct socket *sock = sock_from_file(out);
     798       65536 :         struct bio_vec bvec[16];
     799       65536 :         struct msghdr msg = {};
     800       65536 :         ssize_t ret = 0;
     801       65536 :         size_t spliced = 0;
     802       65536 :         bool need_wakeup = false;
     803             : 
     804       65536 :         pipe_lock(pipe);
     805             : 
     806      131072 :         while (len > 0) {
     807       66703 :                 unsigned int head, tail, mask, bc = 0;
     808       66703 :                 size_t remain = len;
     809             : 
     810             :                 /*
     811             :                  * Check for signal early to make process killable when there
     812             :                  * are always buffers available
     813             :                  */
     814       66703 :                 ret = -ERESTARTSYS;
     815       66703 :                 if (signal_pending(current))
     816             :                         break;
     817             : 
     818       66703 :                 while (pipe_empty(pipe->head, pipe->tail)) {
     819           0 :                         ret = 0;
     820           0 :                         if (!pipe->writers)
     821           0 :                                 goto out;
     822             : 
     823           0 :                         if (spliced)
     824           0 :                                 goto out;
     825             : 
     826           0 :                         ret = -EAGAIN;
     827           0 :                         if (flags & SPLICE_F_NONBLOCK)
     828           0 :                                 goto out;
     829             : 
     830           0 :                         ret = -ERESTARTSYS;
     831           0 :                         if (signal_pending(current))
     832           0 :                                 goto out;
     833             : 
     834           0 :                         if (need_wakeup) {
     835           0 :                                 wakeup_pipe_writers(pipe);
     836           0 :                                 need_wakeup = false;
     837             :                         }
     838             : 
     839           0 :                         pipe_wait_readable(pipe);
     840             :                 }
     841             : 
     842       66703 :                 head = pipe->head;
     843       66703 :                 tail = pipe->tail;
     844       66703 :                 mask = pipe->ring_size - 1;
     845             : 
     846       74686 :                 while (!pipe_empty(head, tail)) {
     847       74686 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     848       74686 :                         size_t seg;
     849             : 
     850       74686 :                         if (!buf->len) {
     851           0 :                                 tail++;
     852           0 :                                 continue;
     853             :                         }
     854             : 
     855       74686 :                         seg = min_t(size_t, remain, buf->len);
     856             : 
     857       74686 :                         ret = pipe_buf_confirm(pipe, buf);
     858       74686 :                         if (unlikely(ret)) {
     859           0 :                                 if (ret == -ENODATA)
     860           0 :                                         ret = 0;
     861             :                                 break;
     862             :                         }
     863             : 
     864       74686 :                         bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset);
     865       74686 :                         remain -= seg;
     866       74686 :                         if (remain == 0 || bc >= ARRAY_SIZE(bvec))
     867             :                                 break;
     868        7983 :                         tail++;
     869             :                 }
     870             : 
     871       66703 :                 if (!bc)
     872             :                         break;
     873             : 
     874       66703 :                 msg.msg_flags = MSG_SPLICE_PAGES;
     875       66703 :                 if (flags & SPLICE_F_MORE)
     876           0 :                         msg.msg_flags |= MSG_MORE;
     877       66703 :                 if (remain && pipe_occupancy(pipe->head, tail) > 0)
     878           0 :                         msg.msg_flags |= MSG_MORE;
     879       66703 :                 if (out->f_flags & O_NONBLOCK)
     880       66703 :                         msg.msg_flags |= MSG_DONTWAIT;
     881             : 
     882       66703 :                 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc,
     883             :                               len - remain);
     884       66703 :                 ret = sock_sendmsg(sock, &msg);
     885       66703 :                 if (ret <= 0)
     886             :                         break;
     887             : 
     888       65536 :                 spliced += ret;
     889       65536 :                 len -= ret;
     890       65536 :                 tail = pipe->tail;
     891      138137 :                 while (ret > 0) {
     892       72601 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     893       72601 :                         size_t seg = min_t(size_t, ret, buf->len);
     894             : 
     895       72601 :                         buf->offset += seg;
     896       72601 :                         buf->len -= seg;
     897       72601 :                         ret -= seg;
     898             : 
     899       72601 :                         if (!buf->len) {
     900       71434 :                                 pipe_buf_release(pipe, buf);
     901       71434 :                                 tail++;
     902             :                         }
     903             :                 }
     904             : 
     905       65536 :                 if (tail != pipe->tail) {
     906       64377 :                         pipe->tail = tail;
     907       64377 :                         if (pipe->files)
     908       64377 :                                 need_wakeup = true;
     909             :                 }
     910             :         }
     911             : 
     912       65536 : out:
     913       65536 :         pipe_unlock(pipe);
     914       65536 :         if (need_wakeup)
     915       64377 :                 wakeup_pipe_writers(pipe);
     916       65536 :         return spliced ?: ret;
     917             : }
     918             : #endif
     919             : 
     920             : static int warn_unsupported(struct file *file, const char *op)
     921             : {
     922             :         pr_debug_ratelimited(
     923             :                 "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
     924             :                 op, file, current->pid, current->comm);
     925             :         return -EINVAL;
     926             : }
     927             : 
     928             : /*
     929             :  * Attempt to initiate a splice from pipe to file.
     930             :  */
     931             : static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
     932             :                            loff_t *ppos, size_t len, unsigned int flags)
     933             : {
     934     6492031 :         if (unlikely(!out->f_op->splice_write))
     935             :                 return warn_unsupported(out, "write");
     936     6492031 :         return out->f_op->splice_write(pipe, out, ppos, len, flags);
     937             : }
     938             : 
     939             : /*
     940             :  * Indicate to the caller that there was a premature EOF when reading from the
     941             :  * source and the caller didn't indicate they would be sending more data after
     942             :  * this.
     943             :  */
     944             : static void do_splice_eof(struct splice_desc *sd)
     945             : {
     946           0 :         if (sd->splice_eof)
     947           0 :                 sd->splice_eof(sd);
     948             : }
     949             : 
     950             : /**
     951             :  * vfs_splice_read - Read data from a file and splice it into a pipe
     952             :  * @in:         File to splice from
     953             :  * @ppos:       Input file offset
     954             :  * @pipe:       Pipe to splice to
     955             :  * @len:        Number of bytes to splice
     956             :  * @flags:      Splice modifier flags (SPLICE_F_*)
     957             :  *
     958             :  * Splice the requested amount of data from the input file to the pipe.  This
     959             :  * is synchronous as the caller must hold the pipe lock across the entire
     960             :  * operation.
     961             :  *
     962             :  * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
     963             :  * a hole and a negative error code otherwise.
     964             :  */
     965     6422408 : long vfs_splice_read(struct file *in, loff_t *ppos,
     966             :                      struct pipe_inode_info *pipe, size_t len,
     967             :                      unsigned int flags)
     968             : {
     969     6422408 :         unsigned int p_space;
     970     6422408 :         int ret;
     971             : 
     972     6422408 :         if (unlikely(!(in->f_mode & FMODE_READ)))
     973             :                 return -EBADF;
     974     6422408 :         if (!len)
     975             :                 return 0;
     976             : 
     977             :         /* Don't try to read more the pipe has space for. */
     978     6422410 :         p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
     979     6422410 :         len = min_t(size_t, len, p_space << PAGE_SHIFT);
     980             : 
     981     6422410 :         ret = rw_verify_area(READ, in, ppos, len);
     982     6422372 :         if (unlikely(ret < 0))
     983           0 :                 return ret;
     984             : 
     985     6422372 :         if (unlikely(len > MAX_RW_COUNT))
     986           0 :                 len = MAX_RW_COUNT;
     987             : 
     988     6422372 :         if (unlikely(!in->f_op->splice_read))
     989             :                 return warn_unsupported(in, "read");
     990             :         /*
     991             :          * O_DIRECT and DAX don't deal with the pagecache, so we allocate a
     992             :          * buffer, copy into it and splice that into the pipe.
     993             :          */
     994     6422372 :         if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
     995         918 :                 return copy_splice_read(in, ppos, pipe, len, flags);
     996     6421454 :         return in->f_op->splice_read(in, ppos, pipe, len, flags);
     997             : }
     998             : EXPORT_SYMBOL_GPL(vfs_splice_read);
     999             : 
    1000             : /**
    1001             :  * splice_direct_to_actor - splices data directly between two non-pipes
    1002             :  * @in:         file to splice from
    1003             :  * @sd:         actor information on where to splice to
    1004             :  * @actor:      handles the data splicing
    1005             :  *
    1006             :  * Description:
    1007             :  *    This is a special case helper to splice directly between two
    1008             :  *    points, without requiring an explicit pipe. Internally an allocated
    1009             :  *    pipe is cached in the process, and reused during the lifetime of
    1010             :  *    that process.
    1011             :  *
    1012             :  */
    1013     5174825 : ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
    1014             :                                splice_direct_actor *actor)
    1015             : {
    1016     5174825 :         struct pipe_inode_info *pipe;
    1017     5174825 :         long ret, bytes;
    1018     5174825 :         size_t len;
    1019     5174825 :         int i, flags, more;
    1020             : 
    1021             :         /*
    1022             :          * We require the input to be seekable, as we don't want to randomly
    1023             :          * drop data for eg socket -> socket splicing. Use the piped splicing
    1024             :          * for that!
    1025             :          */
    1026     5174825 :         if (unlikely(!(in->f_mode & FMODE_LSEEK)))
    1027             :                 return -EINVAL;
    1028             : 
    1029             :         /*
    1030             :          * neither in nor out is a pipe, setup an internal pipe attached to
    1031             :          * 'out' and transfer the wanted data from 'in' to 'out' through that
    1032             :          */
    1033     5174825 :         pipe = current->splice_pipe;
    1034     5174825 :         if (unlikely(!pipe)) {
    1035       31848 :                 pipe = alloc_pipe_info();
    1036       31848 :                 if (!pipe)
    1037             :                         return -ENOMEM;
    1038             : 
    1039             :                 /*
    1040             :                  * We don't have an immediate reader, but we'll read the stuff
    1041             :                  * out of the pipe right after the splice_to_pipe(). So set
    1042             :                  * PIPE_READERS appropriately.
    1043             :                  */
    1044       31848 :                 pipe->readers = 1;
    1045             : 
    1046       31848 :                 current->splice_pipe = pipe;
    1047             :         }
    1048             : 
    1049             :         /*
    1050             :          * Do the splice.
    1051             :          */
    1052     5174825 :         bytes = 0;
    1053     5174825 :         len = sd->total_len;
    1054             : 
    1055             :         /* Don't block on output, we have to drain the direct pipe. */
    1056     5174825 :         flags = sd->flags;
    1057     5174825 :         sd->flags &= ~SPLICE_F_NONBLOCK;
    1058             : 
    1059             :         /*
    1060             :          * We signal MORE until we've read sufficient data to fulfill the
    1061             :          * request and we keep signalling it if the caller set it.
    1062             :          */
    1063     5174825 :         more = sd->flags & SPLICE_F_MORE;
    1064     5174825 :         sd->flags |= SPLICE_F_MORE;
    1065             : 
    1066     5174825 :         WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
    1067             : 
    1068    10266044 :         while (len) {
    1069     5174920 :                 size_t read_len;
    1070     5174920 :                 loff_t pos = sd->pos, prev_pos = pos;
    1071             : 
    1072     5174920 :                 ret = vfs_splice_read(in, &pos, pipe, len, flags);
    1073     5174938 :                 if (unlikely(ret <= 0))
    1074         117 :                         goto read_failure;
    1075             : 
    1076     5174821 :                 read_len = ret;
    1077     5174821 :                 sd->total_len = read_len;
    1078             : 
    1079             :                 /*
    1080             :                  * If we now have sufficient data to fulfill the request then
    1081             :                  * we clear SPLICE_F_MORE if it was not set initially.
    1082             :                  */
    1083     5174821 :                 if (read_len >= len && !more)
    1084     5174725 :                         sd->flags &= ~SPLICE_F_MORE;
    1085             : 
    1086             :                 /*
    1087             :                  * NOTE: nonblocking mode only applies to the input. We
    1088             :                  * must not do the output in nonblocking mode as then we
    1089             :                  * could get stuck data in the internal pipe:
    1090             :                  */
    1091     5174821 :                 ret = actor(pipe, sd);
    1092     5174825 :                 if (unlikely(ret <= 0)) {
    1093       83416 :                         sd->pos = prev_pos;
    1094       83606 :                         goto out_release;
    1095             :                 }
    1096             : 
    1097     5091409 :                 bytes += ret;
    1098     5091409 :                 len -= ret;
    1099     5091409 :                 sd->pos = pos;
    1100             : 
    1101     5091409 :                 if (ret < read_len) {
    1102         190 :                         sd->pos = prev_pos + ret;
    1103         190 :                         goto out_release;
    1104             :                 }
    1105             :         }
    1106             : 
    1107     5091124 : done:
    1108     5174857 :         pipe->tail = pipe->head = 0;
    1109     5174857 :         file_accessed(in);
    1110     5174857 :         return bytes;
    1111             : 
    1112             : read_failure:
    1113             :         /*
    1114             :          * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that
    1115             :          * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a
    1116             :          * "->splice_in()" that returned EOF (ie zero) *and* we have sent at
    1117             :          * least 1 byte *then* we will also do the ->splice_eof() call.
    1118             :          */
    1119         117 :         if (ret == 0 && !more && len > 0 && bytes)
    1120           0 :                 do_splice_eof(sd);
    1121         117 : out_release:
    1122             :         /*
    1123             :          * If we did an incomplete transfer we must release
    1124             :          * the pipe buffers in question:
    1125             :          */
    1126     1423447 :         for (i = 0; i < pipe->ring_size; i++) {
    1127     1339714 :                 struct pipe_buffer *buf = &pipe->bufs[i];
    1128             : 
    1129     1339714 :                 if (buf->ops)
    1130      176828 :                         pipe_buf_release(pipe, buf);
    1131             :         }
    1132             : 
    1133       83733 :         if (!bytes)
    1134       83513 :                 bytes = ret;
    1135             : 
    1136       83733 :         goto done;
    1137             : }
    1138             : EXPORT_SYMBOL(splice_direct_to_actor);
    1139             : 
    1140     5174804 : static int direct_splice_actor(struct pipe_inode_info *pipe,
    1141             :                                struct splice_desc *sd)
    1142             : {
    1143     5174804 :         struct file *file = sd->u.file;
    1144             : 
    1145     5174804 :         return do_splice_from(pipe, file, sd->opos, sd->total_len,
    1146             :                               sd->flags);
    1147             : }
    1148             : 
    1149           0 : static void direct_file_splice_eof(struct splice_desc *sd)
    1150             : {
    1151           0 :         struct file *file = sd->u.file;
    1152             : 
    1153           0 :         if (file->f_op->splice_eof)
    1154           0 :                 file->f_op->splice_eof(file);
    1155           0 : }
    1156             : 
    1157             : /**
    1158             :  * do_splice_direct - splices data directly between two files
    1159             :  * @in:         file to splice from
    1160             :  * @ppos:       input file offset
    1161             :  * @out:        file to splice to
    1162             :  * @opos:       output file offset
    1163             :  * @len:        number of bytes to splice
    1164             :  * @flags:      splice modifier flags
    1165             :  *
    1166             :  * Description:
    1167             :  *    For use by do_sendfile(). splice can easily emulate sendfile, but
    1168             :  *    doing it in the application would incur an extra system call
    1169             :  *    (splice in + splice out, as compared to just sendfile()). So this helper
    1170             :  *    can splice directly through a process-private pipe.
    1171             :  *
    1172             :  */
    1173     5174854 : long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
    1174             :                       loff_t *opos, size_t len, unsigned int flags)
    1175             : {
    1176     5174854 :         struct splice_desc sd = {
    1177             :                 .len            = len,
    1178             :                 .total_len      = len,
    1179             :                 .flags          = flags,
    1180     5174854 :                 .pos            = *ppos,
    1181             :                 .u.file         = out,
    1182             :                 .splice_eof     = direct_file_splice_eof,
    1183             :                 .opos           = opos,
    1184             :         };
    1185     5174854 :         long ret;
    1186             : 
    1187     5174854 :         if (unlikely(!(out->f_mode & FMODE_WRITE)))
    1188             :                 return -EBADF;
    1189             : 
    1190     5174854 :         if (unlikely(out->f_flags & O_APPEND))
    1191             :                 return -EINVAL;
    1192             : 
    1193     5174854 :         ret = rw_verify_area(WRITE, out, opos, len);
    1194     5174847 :         if (unlikely(ret < 0))
    1195             :                 return ret;
    1196             : 
    1197     5174847 :         ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
    1198     5174847 :         if (ret > 0)
    1199     5091337 :                 *ppos = sd.pos;
    1200             : 
    1201             :         return ret;
    1202             : }
    1203             : EXPORT_SYMBOL(do_splice_direct);
    1204             : 
    1205     1250057 : static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
    1206             : {
    1207     1250057 :         for (;;) {
    1208     1250057 :                 if (unlikely(!pipe->readers)) {
    1209           0 :                         send_sig(SIGPIPE, current, 0);
    1210           0 :                         return -EPIPE;
    1211             :                 }
    1212     1250057 :                 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
    1213             :                         return 0;
    1214           2 :                 if (flags & SPLICE_F_NONBLOCK)
    1215             :                         return -EAGAIN;
    1216           2 :                 if (signal_pending(current))
    1217             :                         return -ERESTARTSYS;
    1218           0 :                 pipe_wait_writable(pipe);
    1219             :         }
    1220             : }
    1221             : 
    1222             : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1223             :                                struct pipe_inode_info *opipe,
    1224             :                                size_t len, unsigned int flags);
    1225             : 
    1226     1247498 : long splice_file_to_pipe(struct file *in,
    1227             :                          struct pipe_inode_info *opipe,
    1228             :                          loff_t *offset,
    1229             :                          size_t len, unsigned int flags)
    1230             : {
    1231     1247498 :         long ret;
    1232             : 
    1233     1247498 :         pipe_lock(opipe);
    1234     1247503 :         ret = wait_for_space(opipe, flags);
    1235     1247495 :         if (!ret)
    1236     1247496 :                 ret = vfs_splice_read(in, offset, opipe, len, flags);
    1237     1247503 :         pipe_unlock(opipe);
    1238     1247503 :         if (ret > 0)
    1239     1236065 :                 wakeup_pipe_readers(opipe);
    1240     1247504 :         return ret;
    1241             : }
    1242             : 
    1243             : /*
    1244             :  * Determine where to splice to/from.
    1245             :  */
    1246     2564746 : long do_splice(struct file *in, loff_t *off_in, struct file *out,
    1247             :                loff_t *off_out, size_t len, unsigned int flags)
    1248             : {
    1249     2564746 :         struct pipe_inode_info *ipipe;
    1250     2564746 :         struct pipe_inode_info *opipe;
    1251     2564746 :         loff_t offset;
    1252     2564746 :         long ret;
    1253             : 
    1254     2564746 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1255             :                      !(out->f_mode & FMODE_WRITE)))
    1256             :                 return -EBADF;
    1257             : 
    1258     2564746 :         ipipe = get_pipe_info(in, true);
    1259     2564734 :         opipe = get_pipe_info(out, true);
    1260             : 
    1261     2564735 :         if (ipipe && opipe) {
    1262           0 :                 if (off_in || off_out)
    1263             :                         return -ESPIPE;
    1264             : 
    1265             :                 /* Splicing to self would be fun, but... */
    1266           0 :                 if (ipipe == opipe)
    1267             :                         return -EINVAL;
    1268             : 
    1269           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1270           0 :                         flags |= SPLICE_F_NONBLOCK;
    1271             : 
    1272           0 :                 return splice_pipe_to_pipe(ipipe, opipe, len, flags);
    1273             :         }
    1274             : 
    1275     2564735 :         if (ipipe) {
    1276     1317235 :                 if (off_in)
    1277             :                         return -ESPIPE;
    1278     1317235 :                 if (off_out) {
    1279     1174276 :                         if (!(out->f_mode & FMODE_PWRITE))
    1280             :                                 return -EINVAL;
    1281     1174276 :                         offset = *off_out;
    1282             :                 } else {
    1283      142959 :                         offset = out->f_pos;
    1284             :                 }
    1285             : 
    1286     1317235 :                 if (unlikely(out->f_flags & O_APPEND))
    1287             :                         return -EINVAL;
    1288             : 
    1289     1317235 :                 ret = rw_verify_area(WRITE, out, &offset, len);
    1290     1317232 :                 if (unlikely(ret < 0))
    1291             :                         return ret;
    1292             : 
    1293     1317232 :                 if (in->f_flags & O_NONBLOCK)
    1294       65536 :                         flags |= SPLICE_F_NONBLOCK;
    1295             : 
    1296     1317232 :                 file_start_write(out);
    1297     1317227 :                 ret = do_splice_from(ipipe, out, &offset, len, flags);
    1298     1317238 :                 file_end_write(out);
    1299             : 
    1300     1317239 :                 if (ret > 0)
    1301     1219556 :                         fsnotify_modify(out);
    1302             : 
    1303     1317238 :                 if (!off_out)
    1304      142960 :                         out->f_pos = offset;
    1305             :                 else
    1306     1174278 :                         *off_out = offset;
    1307             : 
    1308     1317238 :                 return ret;
    1309             :         }
    1310             : 
    1311     1247500 :         if (opipe) {
    1312     1247500 :                 if (off_out)
    1313             :                         return -ESPIPE;
    1314     1247500 :                 if (off_in) {
    1315     1171706 :                         if (!(in->f_mode & FMODE_PREAD))
    1316             :                                 return -EINVAL;
    1317     1171706 :                         offset = *off_in;
    1318             :                 } else {
    1319       75794 :                         offset = in->f_pos;
    1320             :                 }
    1321             : 
    1322     1247500 :                 if (out->f_flags & O_NONBLOCK)
    1323       75786 :                         flags |= SPLICE_F_NONBLOCK;
    1324             : 
    1325     1247500 :                 ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
    1326             : 
    1327     1247501 :                 if (ret > 0)
    1328     1236067 :                         fsnotify_access(in);
    1329             : 
    1330     1247501 :                 if (!off_in)
    1331       75794 :                         in->f_pos = offset;
    1332             :                 else
    1333     1171707 :                         *off_in = offset;
    1334             : 
    1335     1247501 :                 return ret;
    1336             :         }
    1337             : 
    1338             :         return -EINVAL;
    1339             : }
    1340             : 
    1341     2564742 : static long __do_splice(struct file *in, loff_t __user *off_in,
    1342             :                         struct file *out, loff_t __user *off_out,
    1343             :                         size_t len, unsigned int flags)
    1344             : {
    1345     2564742 :         struct pipe_inode_info *ipipe;
    1346     2564742 :         struct pipe_inode_info *opipe;
    1347     2564742 :         loff_t offset, *__off_in = NULL, *__off_out = NULL;
    1348     2564742 :         long ret;
    1349             : 
    1350     2564742 :         ipipe = get_pipe_info(in, true);
    1351     2564733 :         opipe = get_pipe_info(out, true);
    1352             : 
    1353     2564734 :         if (ipipe) {
    1354     1317235 :                 if (off_in)
    1355             :                         return -ESPIPE;
    1356     1317235 :                 pipe_clear_nowait(in);
    1357             :         }
    1358     2564743 :         if (opipe) {
    1359     1247502 :                 if (off_out)
    1360             :                         return -ESPIPE;
    1361     1247502 :                 pipe_clear_nowait(out);
    1362             :         }
    1363             : 
    1364     2564746 :         if (off_out) {
    1365     1174274 :                 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
    1366             :                         return -EFAULT;
    1367             :                 __off_out = &offset;
    1368             :         }
    1369     2564746 :         if (off_in) {
    1370     1171707 :                 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
    1371             :                         return -EFAULT;
    1372             :                 __off_in = &offset;
    1373             :         }
    1374             : 
    1375     2564746 :         ret = do_splice(in, __off_in, out, __off_out, len, flags);
    1376     2564738 :         if (ret < 0)
    1377             :                 return ret;
    1378             : 
    1379     3697615 :         if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
    1380           0 :                 return -EFAULT;
    1381     3715678 :         if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
    1382           0 :                 return -EFAULT;
    1383             : 
    1384             :         return ret;
    1385             : }
    1386             : 
    1387        2557 : static int iter_to_pipe(struct iov_iter *from,
    1388             :                         struct pipe_inode_info *pipe,
    1389             :                         unsigned flags)
    1390             : {
    1391        2557 :         struct pipe_buffer buf = {
    1392             :                 .ops = &user_page_pipe_buf_ops,
    1393             :                 .flags = flags
    1394             :         };
    1395        2557 :         size_t total = 0;
    1396        2557 :         int ret = 0;
    1397             : 
    1398        5117 :         while (iov_iter_count(from)) {
    1399        2557 :                 struct page *pages[16];
    1400        2557 :                 ssize_t left;
    1401        2557 :                 size_t start;
    1402        2557 :                 int i, n;
    1403             : 
    1404        2557 :                 left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start);
    1405        2560 :                 if (left <= 0) {
    1406           0 :                         ret = left;
    1407           0 :                         break;
    1408             :                 }
    1409             : 
    1410        2560 :                 n = DIV_ROUND_UP(left + start, PAGE_SIZE);
    1411        5118 :                 for (i = 0; i < n; i++) {
    1412        2558 :                         int size = min_t(int, left, PAGE_SIZE - start);
    1413             : 
    1414        2558 :                         buf.page = pages[i];
    1415        2558 :                         buf.offset = start;
    1416        2558 :                         buf.len = size;
    1417        2558 :                         ret = add_to_pipe(pipe, &buf);
    1418        2558 :                         if (unlikely(ret < 0)) {
    1419           0 :                                 iov_iter_revert(from, left);
    1420             :                                 // this one got dropped by add_to_pipe()
    1421           0 :                                 while (++i < n)
    1422           0 :                                         put_page(pages[i]);
    1423           0 :                                 goto out;
    1424             :                         }
    1425        2558 :                         total += ret;
    1426        2558 :                         left -= size;
    1427        2558 :                         start = 0;
    1428             :                 }
    1429             :         }
    1430        2560 : out:
    1431        2560 :         return total ? total : ret;
    1432             : }
    1433             : 
    1434           0 : static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
    1435             :                         struct splice_desc *sd)
    1436             : {
    1437           0 :         int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
    1438           0 :         return n == sd->len ? n : -EFAULT;
    1439             : }
    1440             : 
    1441             : /*
    1442             :  * For lack of a better implementation, implement vmsplice() to userspace
    1443             :  * as a simple copy of the pipes pages to the user iov.
    1444             :  */
    1445           0 : static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
    1446             :                              unsigned int flags)
    1447             : {
    1448           0 :         struct pipe_inode_info *pipe = get_pipe_info(file, true);
    1449           0 :         struct splice_desc sd = {
    1450             :                 .total_len = iov_iter_count(iter),
    1451             :                 .flags = flags,
    1452             :                 .u.data = iter
    1453             :         };
    1454           0 :         long ret = 0;
    1455             : 
    1456           0 :         if (!pipe)
    1457             :                 return -EBADF;
    1458             : 
    1459           0 :         pipe_clear_nowait(file);
    1460             : 
    1461           0 :         if (sd.total_len) {
    1462           0 :                 pipe_lock(pipe);
    1463           0 :                 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
    1464           0 :                 pipe_unlock(pipe);
    1465             :         }
    1466             : 
    1467             :         return ret;
    1468             : }
    1469             : 
    1470             : /*
    1471             :  * vmsplice splices a user address range into a pipe. It can be thought of
    1472             :  * as splice-from-memory, where the regular splice is splice-from-file (or
    1473             :  * to file). In both cases the output is a pipe, naturally.
    1474             :  */
    1475        2559 : static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
    1476             :                              unsigned int flags)
    1477             : {
    1478        2559 :         struct pipe_inode_info *pipe;
    1479        2559 :         long ret = 0;
    1480        2559 :         unsigned buf_flag = 0;
    1481             : 
    1482        2559 :         if (flags & SPLICE_F_GIFT)
    1483           0 :                 buf_flag = PIPE_BUF_FLAG_GIFT;
    1484             : 
    1485        2559 :         pipe = get_pipe_info(file, true);
    1486        2557 :         if (!pipe)
    1487             :                 return -EBADF;
    1488             : 
    1489        2557 :         pipe_clear_nowait(file);
    1490             : 
    1491        2557 :         pipe_lock(pipe);
    1492        2557 :         ret = wait_for_space(pipe, flags);
    1493        2560 :         if (!ret)
    1494        2557 :                 ret = iter_to_pipe(iter, pipe, buf_flag);
    1495        2562 :         pipe_unlock(pipe);
    1496        2558 :         if (ret > 0)
    1497        2558 :                 wakeup_pipe_readers(pipe);
    1498             :         return ret;
    1499             : }
    1500             : 
    1501        2559 : static int vmsplice_type(struct fd f, int *type)
    1502             : {
    1503        2559 :         if (!f.file)
    1504             :                 return -EBADF;
    1505        2559 :         if (f.file->f_mode & FMODE_WRITE) {
    1506        2559 :                 *type = ITER_SOURCE;
    1507           0 :         } else if (f.file->f_mode & FMODE_READ) {
    1508           0 :                 *type = ITER_DEST;
    1509             :         } else {
    1510           0 :                 fdput(f);
    1511           0 :                 return -EBADF;
    1512             :         }
    1513             :         return 0;
    1514             : }
    1515             : 
    1516             : /*
    1517             :  * Note that vmsplice only really supports true splicing _from_ user memory
    1518             :  * to a pipe, not the other way around. Splicing from user memory is a simple
    1519             :  * operation that can be supported without any funky alignment restrictions
    1520             :  * or nasty vm tricks. We simply map in the user memory and fill them into
    1521             :  * a pipe. The reverse isn't quite as easy, though. There are two possible
    1522             :  * solutions for that:
    1523             :  *
    1524             :  *      - memcpy() the data internally, at which point we might as well just
    1525             :  *        do a regular read() on the buffer anyway.
    1526             :  *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
    1527             :  *        has restriction limitations on both ends of the pipe).
    1528             :  *
    1529             :  * Currently we punt and implement it as a normal copy, see pipe_to_user().
    1530             :  *
    1531             :  */
    1532        5117 : SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
    1533             :                 unsigned long, nr_segs, unsigned int, flags)
    1534             : {
    1535        2560 :         struct iovec iovstack[UIO_FASTIOV];
    1536        2560 :         struct iovec *iov = iovstack;
    1537        2560 :         struct iov_iter iter;
    1538        2560 :         ssize_t error;
    1539        2560 :         struct fd f;
    1540        2560 :         int type;
    1541             : 
    1542        2560 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1543             :                 return -EINVAL;
    1544             : 
    1545        2560 :         f = fdget(fd);
    1546        2560 :         error = vmsplice_type(f, &type);
    1547        2558 :         if (error)
    1548             :                 return error;
    1549             : 
    1550        2558 :         error = import_iovec(type, uiov, nr_segs,
    1551             :                              ARRAY_SIZE(iovstack), &iov, &iter);
    1552        2557 :         if (error < 0)
    1553           0 :                 goto out_fdput;
    1554             : 
    1555        2557 :         if (!iov_iter_count(&iter))
    1556             :                 error = 0;
    1557        2557 :         else if (type == ITER_SOURCE)
    1558        2557 :                 error = vmsplice_to_pipe(f.file, &iter, flags);
    1559             :         else
    1560           0 :                 error = vmsplice_to_user(f.file, &iter, flags);
    1561             : 
    1562        2558 :         kfree(iov);
    1563        2558 : out_fdput:
    1564        2558 :         fdput(f);
    1565        2558 :         return error;
    1566             : }
    1567             : 
    1568     5129484 : SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
    1569             :                 int, fd_out, loff_t __user *, off_out,
    1570             :                 size_t, len, unsigned int, flags)
    1571             : {
    1572     2564744 :         struct fd in, out;
    1573     2564744 :         long error;
    1574             : 
    1575     2564744 :         if (unlikely(!len))
    1576             :                 return 0;
    1577             : 
    1578     2564744 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1579             :                 return -EINVAL;
    1580             : 
    1581     2564744 :         error = -EBADF;
    1582     2564744 :         in = fdget(fd_in);
    1583     2564744 :         if (in.file) {
    1584     2564744 :                 out = fdget(fd_out);
    1585     2564727 :                 if (out.file) {
    1586     2564727 :                         error = __do_splice(in.file, off_in, out.file, off_out,
    1587             :                                                 len, flags);
    1588     2564736 :                         fdput(out);
    1589             :                 }
    1590     2564736 :                 fdput(in);
    1591             :         }
    1592             :         return error;
    1593             : }
    1594             : 
    1595             : /*
    1596             :  * Make sure there's data to read. Wait for input if we can, otherwise
    1597             :  * return an appropriate error.
    1598             :  */
    1599           0 : static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1600             : {
    1601           0 :         int ret;
    1602             : 
    1603             :         /*
    1604             :          * Check the pipe occupancy without the inode lock first. This function
    1605             :          * is speculative anyways, so missing one is ok.
    1606             :          */
    1607           0 :         if (!pipe_empty(pipe->head, pipe->tail))
    1608             :                 return 0;
    1609             : 
    1610           0 :         ret = 0;
    1611           0 :         pipe_lock(pipe);
    1612             : 
    1613           0 :         while (pipe_empty(pipe->head, pipe->tail)) {
    1614           0 :                 if (signal_pending(current)) {
    1615             :                         ret = -ERESTARTSYS;
    1616             :                         break;
    1617             :                 }
    1618           0 :                 if (!pipe->writers)
    1619             :                         break;
    1620           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1621             :                         ret = -EAGAIN;
    1622             :                         break;
    1623             :                 }
    1624           0 :                 pipe_wait_readable(pipe);
    1625             :         }
    1626             : 
    1627           0 :         pipe_unlock(pipe);
    1628           0 :         return ret;
    1629             : }
    1630             : 
    1631             : /*
    1632             :  * Make sure there's writeable room. Wait for room if we can, otherwise
    1633             :  * return an appropriate error.
    1634             :  */
    1635           0 : static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
    1636             : {
    1637           0 :         int ret;
    1638             : 
    1639             :         /*
    1640             :          * Check pipe occupancy without the inode lock first. This function
    1641             :          * is speculative anyways, so missing one is ok.
    1642             :          */
    1643           0 :         if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
    1644             :                 return 0;
    1645             : 
    1646           0 :         ret = 0;
    1647           0 :         pipe_lock(pipe);
    1648             : 
    1649           0 :         while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
    1650           0 :                 if (!pipe->readers) {
    1651           0 :                         send_sig(SIGPIPE, current, 0);
    1652           0 :                         ret = -EPIPE;
    1653           0 :                         break;
    1654             :                 }
    1655           0 :                 if (flags & SPLICE_F_NONBLOCK) {
    1656             :                         ret = -EAGAIN;
    1657             :                         break;
    1658             :                 }
    1659           0 :                 if (signal_pending(current)) {
    1660             :                         ret = -ERESTARTSYS;
    1661             :                         break;
    1662             :                 }
    1663           0 :                 pipe_wait_writable(pipe);
    1664             :         }
    1665             : 
    1666           0 :         pipe_unlock(pipe);
    1667           0 :         return ret;
    1668             : }
    1669             : 
    1670             : /*
    1671             :  * Splice contents of ipipe to opipe.
    1672             :  */
    1673           0 : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
    1674             :                                struct pipe_inode_info *opipe,
    1675             :                                size_t len, unsigned int flags)
    1676             : {
    1677           0 :         struct pipe_buffer *ibuf, *obuf;
    1678           0 :         unsigned int i_head, o_head;
    1679           0 :         unsigned int i_tail, o_tail;
    1680           0 :         unsigned int i_mask, o_mask;
    1681           0 :         int ret = 0;
    1682           0 :         bool input_wakeup = false;
    1683             : 
    1684             : 
    1685           0 : retry:
    1686           0 :         ret = ipipe_prep(ipipe, flags);
    1687           0 :         if (ret)
    1688           0 :                 return ret;
    1689             : 
    1690           0 :         ret = opipe_prep(opipe, flags);
    1691           0 :         if (ret)
    1692           0 :                 return ret;
    1693             : 
    1694             :         /*
    1695             :          * Potential ABBA deadlock, work around it by ordering lock
    1696             :          * grabbing by pipe info address. Otherwise two different processes
    1697             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1698             :          */
    1699           0 :         pipe_double_lock(ipipe, opipe);
    1700             : 
    1701           0 :         i_tail = ipipe->tail;
    1702           0 :         i_mask = ipipe->ring_size - 1;
    1703           0 :         o_head = opipe->head;
    1704           0 :         o_mask = opipe->ring_size - 1;
    1705             : 
    1706           0 :         do {
    1707           0 :                 size_t o_len;
    1708             : 
    1709           0 :                 if (!opipe->readers) {
    1710           0 :                         send_sig(SIGPIPE, current, 0);
    1711           0 :                         if (!ret)
    1712           0 :                                 ret = -EPIPE;
    1713             :                         break;
    1714             :                 }
    1715             : 
    1716           0 :                 i_head = ipipe->head;
    1717           0 :                 o_tail = opipe->tail;
    1718             : 
    1719           0 :                 if (pipe_empty(i_head, i_tail) && !ipipe->writers)
    1720             :                         break;
    1721             : 
    1722             :                 /*
    1723             :                  * Cannot make any progress, because either the input
    1724             :                  * pipe is empty or the output pipe is full.
    1725             :                  */
    1726           0 :                 if (pipe_empty(i_head, i_tail) ||
    1727           0 :                     pipe_full(o_head, o_tail, opipe->max_usage)) {
    1728             :                         /* Already processed some buffers, break */
    1729           0 :                         if (ret)
    1730             :                                 break;
    1731             : 
    1732           0 :                         if (flags & SPLICE_F_NONBLOCK) {
    1733             :                                 ret = -EAGAIN;
    1734             :                                 break;
    1735             :                         }
    1736             : 
    1737             :                         /*
    1738             :                          * We raced with another reader/writer and haven't
    1739             :                          * managed to process any buffers.  A zero return
    1740             :                          * value means EOF, so retry instead.
    1741             :                          */
    1742           0 :                         pipe_unlock(ipipe);
    1743           0 :                         pipe_unlock(opipe);
    1744           0 :                         goto retry;
    1745             :                 }
    1746             : 
    1747           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1748           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1749             : 
    1750           0 :                 if (len >= ibuf->len) {
    1751             :                         /*
    1752             :                          * Simply move the whole buffer from ipipe to opipe
    1753             :                          */
    1754           0 :                         *obuf = *ibuf;
    1755           0 :                         ibuf->ops = NULL;
    1756           0 :                         i_tail++;
    1757           0 :                         ipipe->tail = i_tail;
    1758           0 :                         input_wakeup = true;
    1759           0 :                         o_len = obuf->len;
    1760           0 :                         o_head++;
    1761           0 :                         opipe->head = o_head;
    1762             :                 } else {
    1763             :                         /*
    1764             :                          * Get a reference to this pipe buffer,
    1765             :                          * so we can copy the contents over.
    1766             :                          */
    1767           0 :                         if (!pipe_buf_get(ipipe, ibuf)) {
    1768           0 :                                 if (ret == 0)
    1769           0 :                                         ret = -EFAULT;
    1770             :                                 break;
    1771             :                         }
    1772           0 :                         *obuf = *ibuf;
    1773             : 
    1774             :                         /*
    1775             :                          * Don't inherit the gift and merge flags, we need to
    1776             :                          * prevent multiple steals of this page.
    1777             :                          */
    1778           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1779           0 :                         obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1780             : 
    1781           0 :                         obuf->len = len;
    1782           0 :                         ibuf->offset += len;
    1783           0 :                         ibuf->len -= len;
    1784           0 :                         o_len = len;
    1785           0 :                         o_head++;
    1786           0 :                         opipe->head = o_head;
    1787             :                 }
    1788           0 :                 ret += o_len;
    1789           0 :                 len -= o_len;
    1790           0 :         } while (len);
    1791             : 
    1792           0 :         pipe_unlock(ipipe);
    1793           0 :         pipe_unlock(opipe);
    1794             : 
    1795             :         /*
    1796             :          * If we put data in the output pipe, wakeup any potential readers.
    1797             :          */
    1798           0 :         if (ret > 0)
    1799           0 :                 wakeup_pipe_readers(opipe);
    1800             : 
    1801           0 :         if (input_wakeup)
    1802           0 :                 wakeup_pipe_writers(ipipe);
    1803             : 
    1804             :         return ret;
    1805             : }
    1806             : 
    1807             : /*
    1808             :  * Link contents of ipipe to opipe.
    1809             :  */
    1810           0 : static int link_pipe(struct pipe_inode_info *ipipe,
    1811             :                      struct pipe_inode_info *opipe,
    1812             :                      size_t len, unsigned int flags)
    1813             : {
    1814           0 :         struct pipe_buffer *ibuf, *obuf;
    1815           0 :         unsigned int i_head, o_head;
    1816           0 :         unsigned int i_tail, o_tail;
    1817           0 :         unsigned int i_mask, o_mask;
    1818           0 :         int ret = 0;
    1819             : 
    1820             :         /*
    1821             :          * Potential ABBA deadlock, work around it by ordering lock
    1822             :          * grabbing by pipe info address. Otherwise two different processes
    1823             :          * could deadlock (one doing tee from A -> B, the other from B -> A).
    1824             :          */
    1825           0 :         pipe_double_lock(ipipe, opipe);
    1826             : 
    1827           0 :         i_tail = ipipe->tail;
    1828           0 :         i_mask = ipipe->ring_size - 1;
    1829           0 :         o_head = opipe->head;
    1830           0 :         o_mask = opipe->ring_size - 1;
    1831             : 
    1832           0 :         do {
    1833           0 :                 if (!opipe->readers) {
    1834           0 :                         send_sig(SIGPIPE, current, 0);
    1835           0 :                         if (!ret)
    1836           0 :                                 ret = -EPIPE;
    1837             :                         break;
    1838             :                 }
    1839             : 
    1840           0 :                 i_head = ipipe->head;
    1841           0 :                 o_tail = opipe->tail;
    1842             : 
    1843             :                 /*
    1844             :                  * If we have iterated all input buffers or run out of
    1845             :                  * output room, break.
    1846             :                  */
    1847           0 :                 if (pipe_empty(i_head, i_tail) ||
    1848           0 :                     pipe_full(o_head, o_tail, opipe->max_usage))
    1849             :                         break;
    1850             : 
    1851           0 :                 ibuf = &ipipe->bufs[i_tail & i_mask];
    1852           0 :                 obuf = &opipe->bufs[o_head & o_mask];
    1853             : 
    1854             :                 /*
    1855             :                  * Get a reference to this pipe buffer,
    1856             :                  * so we can copy the contents over.
    1857             :                  */
    1858           0 :                 if (!pipe_buf_get(ipipe, ibuf)) {
    1859           0 :                         if (ret == 0)
    1860           0 :                                 ret = -EFAULT;
    1861             :                         break;
    1862             :                 }
    1863             : 
    1864           0 :                 *obuf = *ibuf;
    1865             : 
    1866             :                 /*
    1867             :                  * Don't inherit the gift and merge flag, we need to prevent
    1868             :                  * multiple steals of this page.
    1869             :                  */
    1870           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
    1871           0 :                 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
    1872             : 
    1873           0 :                 if (obuf->len > len)
    1874           0 :                         obuf->len = len;
    1875           0 :                 ret += obuf->len;
    1876           0 :                 len -= obuf->len;
    1877             : 
    1878           0 :                 o_head++;
    1879           0 :                 opipe->head = o_head;
    1880           0 :                 i_tail++;
    1881           0 :         } while (len);
    1882             : 
    1883           0 :         pipe_unlock(ipipe);
    1884           0 :         pipe_unlock(opipe);
    1885             : 
    1886             :         /*
    1887             :          * If we put data in the output pipe, wakeup any potential readers.
    1888             :          */
    1889           0 :         if (ret > 0)
    1890           0 :                 wakeup_pipe_readers(opipe);
    1891             : 
    1892           0 :         return ret;
    1893             : }
    1894             : 
    1895             : /*
    1896             :  * This is a tee(1) implementation that works on pipes. It doesn't copy
    1897             :  * any data, it simply references the 'in' pages on the 'out' pipe.
    1898             :  * The 'flags' used are the SPLICE_F_* variants, currently the only
    1899             :  * applicable one is SPLICE_F_NONBLOCK.
    1900             :  */
    1901           0 : long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
    1902             : {
    1903           0 :         struct pipe_inode_info *ipipe = get_pipe_info(in, true);
    1904           0 :         struct pipe_inode_info *opipe = get_pipe_info(out, true);
    1905           0 :         int ret = -EINVAL;
    1906             : 
    1907           0 :         if (unlikely(!(in->f_mode & FMODE_READ) ||
    1908             :                      !(out->f_mode & FMODE_WRITE)))
    1909             :                 return -EBADF;
    1910             : 
    1911             :         /*
    1912             :          * Duplicate the contents of ipipe to opipe without actually
    1913             :          * copying the data.
    1914             :          */
    1915           0 :         if (ipipe && opipe && ipipe != opipe) {
    1916           0 :                 if ((in->f_flags | out->f_flags) & O_NONBLOCK)
    1917           0 :                         flags |= SPLICE_F_NONBLOCK;
    1918             : 
    1919             :                 /*
    1920             :                  * Keep going, unless we encounter an error. The ipipe/opipe
    1921             :                  * ordering doesn't really matter.
    1922             :                  */
    1923           0 :                 ret = ipipe_prep(ipipe, flags);
    1924           0 :                 if (!ret) {
    1925           0 :                         ret = opipe_prep(opipe, flags);
    1926           0 :                         if (!ret)
    1927           0 :                                 ret = link_pipe(ipipe, opipe, len, flags);
    1928             :                 }
    1929             :         }
    1930             : 
    1931           0 :         return ret;
    1932             : }
    1933             : 
    1934           0 : SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
    1935             : {
    1936           0 :         struct fd in, out;
    1937           0 :         int error;
    1938             : 
    1939           0 :         if (unlikely(flags & ~SPLICE_F_ALL))
    1940             :                 return -EINVAL;
    1941             : 
    1942           0 :         if (unlikely(!len))
    1943             :                 return 0;
    1944             : 
    1945           0 :         error = -EBADF;
    1946           0 :         in = fdget(fdin);
    1947           0 :         if (in.file) {
    1948           0 :                 out = fdget(fdout);
    1949           0 :                 if (out.file) {
    1950           0 :                         error = do_tee(in.file, out.file, len, flags);
    1951           0 :                         fdput(out);
    1952             :                 }
    1953           0 :                 fdput(in);
    1954             :         }
    1955             : 
    1956           0 :         return error;
    1957             : }

Generated by: LCOV version 1.14