LCOV - code coverage report
Current view: top level - fs/btrfs - send.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 3344 4068 82.2 %
Date: 2023-07-31 20:08:12 Functions: 147 153 96.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2012 Alexander Block.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/bsearch.h>
       7             : #include <linux/fs.h>
       8             : #include <linux/file.h>
       9             : #include <linux/sort.h>
      10             : #include <linux/mount.h>
      11             : #include <linux/xattr.h>
      12             : #include <linux/posix_acl_xattr.h>
      13             : #include <linux/radix-tree.h>
      14             : #include <linux/vmalloc.h>
      15             : #include <linux/string.h>
      16             : #include <linux/compat.h>
      17             : #include <linux/crc32c.h>
      18             : #include <linux/fsverity.h>
      19             : 
      20             : #include "send.h"
      21             : #include "ctree.h"
      22             : #include "backref.h"
      23             : #include "locking.h"
      24             : #include "disk-io.h"
      25             : #include "btrfs_inode.h"
      26             : #include "transaction.h"
      27             : #include "compression.h"
      28             : #include "xattr.h"
      29             : #include "print-tree.h"
      30             : #include "accessors.h"
      31             : #include "dir-item.h"
      32             : #include "file-item.h"
      33             : #include "ioctl.h"
      34             : #include "verity.h"
      35             : #include "lru_cache.h"
      36             : 
      37             : /*
      38             :  * Maximum number of references an extent can have in order for us to attempt to
      39             :  * issue clone operations instead of write operations. This currently exists to
      40             :  * avoid hitting limitations of the backreference walking code (taking a lot of
      41             :  * time and using too much memory for extents with large number of references).
      42             :  */
      43             : #define SEND_MAX_EXTENT_REFS    1024
      44             : 
      45             : /*
      46             :  * A fs_path is a helper to dynamically build path names with unknown size.
      47             :  * It reallocates the internal buffer on demand.
      48             :  * It allows fast adding of path elements on the right side (normal path) and
      49             :  * fast adding to the left side (reversed path). A reversed path can also be
      50             :  * unreversed if needed.
      51             :  */
      52             : struct fs_path {
      53             :         union {
      54             :                 struct {
      55             :                         char *start;
      56             :                         char *end;
      57             : 
      58             :                         char *buf;
      59             :                         unsigned short buf_len:15;
      60             :                         unsigned short reversed:1;
      61             :                         char inline_buf[];
      62             :                 };
      63             :                 /*
      64             :                  * Average path length does not exceed 200 bytes, we'll have
      65             :                  * better packing in the slab and higher chance to satisfy
      66             :                  * a allocation later during send.
      67             :                  */
      68             :                 char pad[256];
      69             :         };
      70             : };
      71             : #define FS_PATH_INLINE_SIZE \
      72             :         (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
      73             : 
      74             : 
      75             : /* reused for each extent */
      76             : struct clone_root {
      77             :         struct btrfs_root *root;
      78             :         u64 ino;
      79             :         u64 offset;
      80             :         u64 num_bytes;
      81             :         bool found_ref;
      82             : };
      83             : 
      84             : #define SEND_MAX_NAME_CACHE_SIZE                        256
      85             : 
      86             : /*
      87             :  * Limit the root_ids array of struct backref_cache_entry to 17 elements.
      88             :  * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
      89             :  * can be satisfied from the kmalloc-192 slab, without wasting any space.
      90             :  * The most common case is to have a single root for cloning, which corresponds
      91             :  * to the send root. Having the user specify more than 16 clone roots is not
      92             :  * common, and in such rare cases we simply don't use caching if the number of
      93             :  * cloning roots that lead down to a leaf is more than 17.
      94             :  */
      95             : #define SEND_MAX_BACKREF_CACHE_ROOTS                    17
      96             : 
      97             : /*
      98             :  * Max number of entries in the cache.
      99             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
     100             :  * maple tree's internal nodes, is 24K.
     101             :  */
     102             : #define SEND_MAX_BACKREF_CACHE_SIZE 128
     103             : 
     104             : /*
     105             :  * A backref cache entry maps a leaf to a list of IDs of roots from which the
     106             :  * leaf is accessible and we can use for clone operations.
     107             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
     108             :  * x86_64).
     109             :  */
     110             : struct backref_cache_entry {
     111             :         struct btrfs_lru_cache_entry entry;
     112             :         u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
     113             :         /* Number of valid elements in the root_ids array. */
     114             :         int num_roots;
     115             : };
     116             : 
     117             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     118             : static_assert(offsetof(struct backref_cache_entry, entry) == 0);
     119             : 
     120             : /*
     121             :  * Max number of entries in the cache that stores directories that were already
     122             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     123             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     124             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     125             :  */
     126             : #define SEND_MAX_DIR_CREATED_CACHE_SIZE                 64
     127             : 
     128             : /*
     129             :  * Max number of entries in the cache that stores directories that were already
     130             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     131             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     132             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     133             :  */
     134             : #define SEND_MAX_DIR_UTIMES_CACHE_SIZE                  64
     135             : 
     136             : struct send_ctx {
     137             :         struct file *send_filp;
     138             :         loff_t send_off;
     139             :         char *send_buf;
     140             :         u32 send_size;
     141             :         u32 send_max_size;
     142             :         /*
     143             :          * Whether BTRFS_SEND_A_DATA attribute was already added to current
     144             :          * command (since protocol v2, data must be the last attribute).
     145             :          */
     146             :         bool put_data;
     147             :         struct page **send_buf_pages;
     148             :         u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
     149             :         /* Protocol version compatibility requested */
     150             :         u32 proto;
     151             : 
     152             :         struct btrfs_root *send_root;
     153             :         struct btrfs_root *parent_root;
     154             :         struct clone_root *clone_roots;
     155             :         int clone_roots_cnt;
     156             : 
     157             :         /* current state of the compare_tree call */
     158             :         struct btrfs_path *left_path;
     159             :         struct btrfs_path *right_path;
     160             :         struct btrfs_key *cmp_key;
     161             : 
     162             :         /*
     163             :          * Keep track of the generation of the last transaction that was used
     164             :          * for relocating a block group. This is periodically checked in order
     165             :          * to detect if a relocation happened since the last check, so that we
     166             :          * don't operate on stale extent buffers for nodes (level >= 1) or on
     167             :          * stale disk_bytenr values of file extent items.
     168             :          */
     169             :         u64 last_reloc_trans;
     170             : 
     171             :         /*
     172             :          * infos of the currently processed inode. In case of deleted inodes,
     173             :          * these are the values from the deleted inode.
     174             :          */
     175             :         u64 cur_ino;
     176             :         u64 cur_inode_gen;
     177             :         u64 cur_inode_size;
     178             :         u64 cur_inode_mode;
     179             :         u64 cur_inode_rdev;
     180             :         u64 cur_inode_last_extent;
     181             :         u64 cur_inode_next_write_offset;
     182             :         bool cur_inode_new;
     183             :         bool cur_inode_new_gen;
     184             :         bool cur_inode_deleted;
     185             :         bool ignore_cur_inode;
     186             :         bool cur_inode_needs_verity;
     187             :         void *verity_descriptor;
     188             : 
     189             :         u64 send_progress;
     190             : 
     191             :         struct list_head new_refs;
     192             :         struct list_head deleted_refs;
     193             : 
     194             :         struct btrfs_lru_cache name_cache;
     195             : 
     196             :         /*
     197             :          * The inode we are currently processing. It's not NULL only when we
     198             :          * need to issue write commands for data extents from this inode.
     199             :          */
     200             :         struct inode *cur_inode;
     201             :         struct file_ra_state ra;
     202             :         u64 page_cache_clear_start;
     203             :         bool clean_page_cache;
     204             : 
     205             :         /*
     206             :          * We process inodes by their increasing order, so if before an
     207             :          * incremental send we reverse the parent/child relationship of
     208             :          * directories such that a directory with a lower inode number was
     209             :          * the parent of a directory with a higher inode number, and the one
     210             :          * becoming the new parent got renamed too, we can't rename/move the
     211             :          * directory with lower inode number when we finish processing it - we
     212             :          * must process the directory with higher inode number first, then
     213             :          * rename/move it and then rename/move the directory with lower inode
     214             :          * number. Example follows.
     215             :          *
     216             :          * Tree state when the first send was performed:
     217             :          *
     218             :          * .
     219             :          * |-- a                   (ino 257)
     220             :          *     |-- b               (ino 258)
     221             :          *         |
     222             :          *         |
     223             :          *         |-- c           (ino 259)
     224             :          *         |   |-- d       (ino 260)
     225             :          *         |
     226             :          *         |-- c2          (ino 261)
     227             :          *
     228             :          * Tree state when the second (incremental) send is performed:
     229             :          *
     230             :          * .
     231             :          * |-- a                   (ino 257)
     232             :          *     |-- b               (ino 258)
     233             :          *         |-- c2          (ino 261)
     234             :          *             |-- d2      (ino 260)
     235             :          *                 |-- cc  (ino 259)
     236             :          *
     237             :          * The sequence of steps that lead to the second state was:
     238             :          *
     239             :          * mv /a/b/c/d /a/b/c2/d2
     240             :          * mv /a/b/c /a/b/c2/d2/cc
     241             :          *
     242             :          * "c" has lower inode number, but we can't move it (2nd mv operation)
     243             :          * before we move "d", which has higher inode number.
     244             :          *
     245             :          * So we just memorize which move/rename operations must be performed
     246             :          * later when their respective parent is processed and moved/renamed.
     247             :          */
     248             : 
     249             :         /* Indexed by parent directory inode number. */
     250             :         struct rb_root pending_dir_moves;
     251             : 
     252             :         /*
     253             :          * Reverse index, indexed by the inode number of a directory that
     254             :          * is waiting for the move/rename of its immediate parent before its
     255             :          * own move/rename can be performed.
     256             :          */
     257             :         struct rb_root waiting_dir_moves;
     258             : 
     259             :         /*
     260             :          * A directory that is going to be rm'ed might have a child directory
     261             :          * which is in the pending directory moves index above. In this case,
     262             :          * the directory can only be removed after the move/rename of its child
     263             :          * is performed. Example:
     264             :          *
     265             :          * Parent snapshot:
     266             :          *
     267             :          * .                        (ino 256)
     268             :          * |-- a/                   (ino 257)
     269             :          *     |-- b/               (ino 258)
     270             :          *         |-- c/           (ino 259)
     271             :          *         |   |-- x/       (ino 260)
     272             :          *         |
     273             :          *         |-- y/           (ino 261)
     274             :          *
     275             :          * Send snapshot:
     276             :          *
     277             :          * .                        (ino 256)
     278             :          * |-- a/                   (ino 257)
     279             :          *     |-- b/               (ino 258)
     280             :          *         |-- YY/          (ino 261)
     281             :          *              |-- x/      (ino 260)
     282             :          *
     283             :          * Sequence of steps that lead to the send snapshot:
     284             :          * rm -f /a/b/c/foo.txt
     285             :          * mv /a/b/y /a/b/YY
     286             :          * mv /a/b/c/x /a/b/YY
     287             :          * rmdir /a/b/c
     288             :          *
     289             :          * When the child is processed, its move/rename is delayed until its
     290             :          * parent is processed (as explained above), but all other operations
     291             :          * like update utimes, chown, chgrp, etc, are performed and the paths
     292             :          * that it uses for those operations must use the orphanized name of
     293             :          * its parent (the directory we're going to rm later), so we need to
     294             :          * memorize that name.
     295             :          *
     296             :          * Indexed by the inode number of the directory to be deleted.
     297             :          */
     298             :         struct rb_root orphan_dirs;
     299             : 
     300             :         struct rb_root rbtree_new_refs;
     301             :         struct rb_root rbtree_deleted_refs;
     302             : 
     303             :         struct btrfs_lru_cache backref_cache;
     304             :         u64 backref_cache_last_reloc_trans;
     305             : 
     306             :         struct btrfs_lru_cache dir_created_cache;
     307             :         struct btrfs_lru_cache dir_utimes_cache;
     308             : };
     309             : 
     310             : struct pending_dir_move {
     311             :         struct rb_node node;
     312             :         struct list_head list;
     313             :         u64 parent_ino;
     314             :         u64 ino;
     315             :         u64 gen;
     316             :         struct list_head update_refs;
     317             : };
     318             : 
     319             : struct waiting_dir_move {
     320             :         struct rb_node node;
     321             :         u64 ino;
     322             :         /*
     323             :          * There might be some directory that could not be removed because it
     324             :          * was waiting for this directory inode to be moved first. Therefore
     325             :          * after this directory is moved, we can try to rmdir the ino rmdir_ino.
     326             :          */
     327             :         u64 rmdir_ino;
     328             :         u64 rmdir_gen;
     329             :         bool orphanized;
     330             : };
     331             : 
     332             : struct orphan_dir_info {
     333             :         struct rb_node node;
     334             :         u64 ino;
     335             :         u64 gen;
     336             :         u64 last_dir_index_offset;
     337             :         u64 dir_high_seq_ino;
     338             : };
     339             : 
     340             : struct name_cache_entry {
     341             :         /*
     342             :          * The key in the entry is an inode number, and the generation matches
     343             :          * the inode's generation.
     344             :          */
     345             :         struct btrfs_lru_cache_entry entry;
     346             :         u64 parent_ino;
     347             :         u64 parent_gen;
     348             :         int ret;
     349             :         int need_later_update;
     350             :         int name_len;
     351             :         char name[];
     352             : };
     353             : 
     354             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     355             : static_assert(offsetof(struct name_cache_entry, entry) == 0);
     356             : 
     357             : #define ADVANCE                                                 1
     358             : #define ADVANCE_ONLY_NEXT                                       -1
     359             : 
     360             : enum btrfs_compare_tree_result {
     361             :         BTRFS_COMPARE_TREE_NEW,
     362             :         BTRFS_COMPARE_TREE_DELETED,
     363             :         BTRFS_COMPARE_TREE_CHANGED,
     364             :         BTRFS_COMPARE_TREE_SAME,
     365             : };
     366             : 
     367             : __cold
     368           0 : static void inconsistent_snapshot_error(struct send_ctx *sctx,
     369             :                                         enum btrfs_compare_tree_result result,
     370             :                                         const char *what)
     371             : {
     372           0 :         const char *result_string;
     373             : 
     374           0 :         switch (result) {
     375             :         case BTRFS_COMPARE_TREE_NEW:
     376             :                 result_string = "new";
     377             :                 break;
     378             :         case BTRFS_COMPARE_TREE_DELETED:
     379             :                 result_string = "deleted";
     380             :                 break;
     381             :         case BTRFS_COMPARE_TREE_CHANGED:
     382             :                 result_string = "updated";
     383             :                 break;
     384             :         case BTRFS_COMPARE_TREE_SAME:
     385             :                 ASSERT(0);
     386             :                 result_string = "unchanged";
     387             :                 break;
     388             :         default:
     389             :                 ASSERT(0);
     390             :                 result_string = "unexpected";
     391             :         }
     392             : 
     393           0 :         btrfs_err(sctx->send_root->fs_info,
     394             :                   "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
     395             :                   result_string, what, sctx->cmp_key->objectid,
     396             :                   sctx->send_root->root_key.objectid,
     397             :                   (sctx->parent_root ?
     398             :                    sctx->parent_root->root_key.objectid : 0));
     399           0 : }
     400             : 
     401             : __maybe_unused
     402             : static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
     403             : {
     404     1009926 :         switch (sctx->proto) {
     405             :         case 1:  return cmd <= BTRFS_SEND_C_MAX_V1;
     406             :         case 2:  return cmd <= BTRFS_SEND_C_MAX_V2;
     407           0 :         case 3:  return cmd <= BTRFS_SEND_C_MAX_V3;
     408             :         default: return false;
     409             :         }
     410             : }
     411             : 
     412             : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
     413             : 
     414             : static struct waiting_dir_move *
     415             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
     416             : 
     417             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
     418             : 
     419     2071514 : static int need_send_hole(struct send_ctx *sctx)
     420             : {
     421     1648181 :         return (sctx->parent_root && !sctx->cur_inode_new &&
     422     2917728 :                 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
     423      846213 :                 S_ISREG(sctx->cur_inode_mode));
     424             : }
     425             : 
     426    22999155 : static void fs_path_reset(struct fs_path *p)
     427             : {
     428    22999155 :         if (p->reversed) {
     429     6107191 :                 p->start = p->buf + p->buf_len - 1;
     430     6107191 :                 p->end = p->start;
     431     6107191 :                 *p->start = 0;
     432             :         } else {
     433    16891964 :                 p->start = p->buf;
     434    16891964 :                 p->end = p->start;
     435    16891964 :                 *p->start = 0;
     436             :         }
     437    22999155 : }
     438             : 
     439    11576654 : static struct fs_path *fs_path_alloc(void)
     440             : {
     441    11576654 :         struct fs_path *p;
     442             : 
     443    11576654 :         p = kmalloc(sizeof(*p), GFP_KERNEL);
     444    11577502 :         if (!p)
     445             :                 return NULL;
     446    11577502 :         p->reversed = 0;
     447    11577502 :         p->buf = p->inline_buf;
     448    11577502 :         p->buf_len = FS_PATH_INLINE_SIZE;
     449    11577502 :         fs_path_reset(p);
     450    11577502 :         return p;
     451             : }
     452             : 
     453      611036 : static struct fs_path *fs_path_alloc_reversed(void)
     454             : {
     455      611036 :         struct fs_path *p;
     456             : 
     457      611036 :         p = fs_path_alloc();
     458      611048 :         if (!p)
     459             :                 return NULL;
     460      611048 :         p->reversed = 1;
     461      611048 :         fs_path_reset(p);
     462      611048 :         return p;
     463             : }
     464             : 
     465    13199156 : static void fs_path_free(struct fs_path *p)
     466             : {
     467    13199156 :         if (!p)
     468             :                 return;
     469    11578578 :         if (p->buf != p->inline_buf)
     470         343 :                 kfree(p->buf);
     471    11578578 :         kfree(p);
     472             : }
     473             : 
     474             : static int fs_path_len(struct fs_path *p)
     475             : {
     476     2838454 :         return p->end - p->start;
     477             : }
     478             : 
     479    12445732 : static int fs_path_ensure_buf(struct fs_path *p, int len)
     480             : {
     481    12445732 :         char *tmp_buf;
     482    12445732 :         int path_len;
     483    12445732 :         int old_buf_len;
     484             : 
     485    12445732 :         len++;
     486             : 
     487    12445732 :         if (p->buf_len >= len)
     488             :                 return 0;
     489             : 
     490         343 :         if (len > PATH_MAX) {
     491           0 :                 WARN_ON(1);
     492           0 :                 return -ENOMEM;
     493             :         }
     494             : 
     495         343 :         path_len = p->end - p->start;
     496         343 :         old_buf_len = p->buf_len;
     497             : 
     498             :         /*
     499             :          * Allocate to the next largest kmalloc bucket size, to let
     500             :          * the fast path happen most of the time.
     501             :          */
     502         343 :         len = kmalloc_size_roundup(len);
     503             :         /*
     504             :          * First time the inline_buf does not suffice
     505             :          */
     506         343 :         if (p->buf == p->inline_buf) {
     507         343 :                 tmp_buf = kmalloc(len, GFP_KERNEL);
     508         343 :                 if (tmp_buf)
     509         686 :                         memcpy(tmp_buf, p->buf, old_buf_len);
     510             :         } else {
     511           0 :                 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
     512             :         }
     513         343 :         if (!tmp_buf)
     514             :                 return -ENOMEM;
     515         343 :         p->buf = tmp_buf;
     516         343 :         p->buf_len = len;
     517             : 
     518         343 :         if (p->reversed) {
     519           4 :                 tmp_buf = p->buf + old_buf_len - path_len - 1;
     520           4 :                 p->end = p->buf + p->buf_len - 1;
     521           4 :                 p->start = p->end - path_len;
     522           8 :                 memmove(p->start, tmp_buf, path_len + 1);
     523             :         } else {
     524         339 :                 p->start = p->buf;
     525         339 :                 p->end = p->start + path_len;
     526             :         }
     527             :         return 0;
     528             : }
     529             : 
     530    12445826 : static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
     531             :                                    char **prepared)
     532             : {
     533    12445826 :         int ret;
     534    12445826 :         int new_len;
     535             : 
     536    12445826 :         new_len = p->end - p->start + name_len;
     537    12445826 :         if (p->start != p->end)
     538      428727 :                 new_len++;
     539    12445826 :         ret = fs_path_ensure_buf(p, new_len);
     540    12446352 :         if (ret < 0)
     541           0 :                 goto out;
     542             : 
     543    12446352 :         if (p->reversed) {
     544     5310571 :                 if (p->start != p->end)
     545      421527 :                         *--p->start = '/';
     546     5310571 :                 p->start -= name_len;
     547     5310571 :                 *prepared = p->start;
     548             :         } else {
     549     7135781 :                 if (p->start != p->end)
     550        7200 :                         *p->end++ = '/';
     551     7135781 :                 *prepared = p->end;
     552     7135781 :                 p->end += name_len;
     553     7135781 :                 *p->end = 0;
     554             :         }
     555             : 
     556    12446352 : out:
     557    12446352 :         return ret;
     558             : }
     559             : 
     560     4489475 : static int fs_path_add(struct fs_path *p, const char *name, int name_len)
     561             : {
     562     4489475 :         int ret;
     563     4489475 :         char *prepared;
     564             : 
     565     4489475 :         ret = fs_path_prepare_for_add(p, name_len, &prepared);
     566     4489332 :         if (ret < 0)
     567           0 :                 goto out;
     568     8978664 :         memcpy(prepared, name, name_len);
     569             : 
     570     4489332 : out:
     571     4489332 :         return ret;
     572             : }
     573             : 
     574     5914701 : static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
     575             : {
     576     5914701 :         int ret;
     577     5914701 :         char *prepared;
     578             : 
     579     5914701 :         ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
     580     5914426 :         if (ret < 0)
     581           0 :                 goto out;
     582    11828852 :         memcpy(prepared, p2->start, p2->end - p2->start);
     583             : 
     584     5914426 : out:
     585     5914426 :         return ret;
     586             : }
     587             : 
     588     2043018 : static int fs_path_add_from_extent_buffer(struct fs_path *p,
     589             :                                           struct extent_buffer *eb,
     590             :                                           unsigned long off, int len)
     591             : {
     592     2043018 :         int ret;
     593     2043018 :         char *prepared;
     594             : 
     595     2043018 :         ret = fs_path_prepare_for_add(p, len, &prepared);
     596     2043022 :         if (ret < 0)
     597           0 :                 goto out;
     598             : 
     599     2043022 :         read_extent_buffer(eb, prepared, off, len);
     600             : 
     601     2043024 : out:
     602     2043024 :         return ret;
     603             : }
     604             : 
     605      606938 : static int fs_path_copy(struct fs_path *p, struct fs_path *from)
     606             : {
     607      606938 :         p->reversed = from->reversed;
     608      606938 :         fs_path_reset(p);
     609             : 
     610      606938 :         return fs_path_add_path(p, from);
     611             : }
     612             : 
     613     4879006 : static void fs_path_unreverse(struct fs_path *p)
     614             : {
     615     4879006 :         char *tmp;
     616     4879006 :         int len;
     617             : 
     618     4879006 :         if (!p->reversed)
     619             :                 return;
     620             : 
     621     4879006 :         tmp = p->start;
     622     4879006 :         len = p->end - p->start;
     623     4879006 :         p->start = p->buf;
     624     4879006 :         p->end = p->start + len;
     625     9758012 :         memmove(p->start, tmp, len + 1);
     626     4879006 :         p->reversed = 0;
     627             : }
     628             : 
     629    18554573 : static struct btrfs_path *alloc_path_for_send(void)
     630             : {
     631    18554573 :         struct btrfs_path *path;
     632             : 
     633    18554573 :         path = btrfs_alloc_path();
     634    18555091 :         if (!path)
     635             :                 return NULL;
     636    18555091 :         path->search_commit_root = 1;
     637    18555091 :         path->skip_locking = 1;
     638    18555091 :         path->need_commit_sem = 1;
     639    18555091 :         return path;
     640             : }
     641             : 
     642     5503108 : static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
     643             : {
     644     5503108 :         int ret;
     645     5503108 :         u32 pos = 0;
     646             : 
     647    11006802 :         while (pos < len) {
     648     5503159 :                 ret = kernel_write(filp, buf + pos, len - pos, off);
     649     5503694 :                 if (ret < 0)
     650           0 :                         return ret;
     651     5503694 :                 if (ret == 0)
     652             :                         return -EIO;
     653     5503694 :                 pos += ret;
     654             :         }
     655             : 
     656             :         return 0;
     657             : }
     658             : 
     659    14456612 : static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
     660             : {
     661    14456612 :         struct btrfs_tlv_header *hdr;
     662    14456612 :         int total_len = sizeof(*hdr) + len;
     663    14456612 :         int left = sctx->send_max_size - sctx->send_size;
     664             : 
     665    14456612 :         if (WARN_ON_ONCE(sctx->put_data))
     666             :                 return -EINVAL;
     667             : 
     668    14456612 :         if (unlikely(left < total_len))
     669             :                 return -EOVERFLOW;
     670             : 
     671    14456612 :         hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
     672    14456612 :         put_unaligned_le16(attr, &hdr->tlv_type);
     673    14456612 :         put_unaligned_le16(len, &hdr->tlv_len);
     674    28913224 :         memcpy(hdr + 1, data, len);
     675    14456612 :         sctx->send_size += total_len;
     676             : 
     677    14456612 :         return 0;
     678             : }
     679             : 
     680             : #define TLV_PUT_DEFINE_INT(bits) \
     681             :         static int tlv_put_u##bits(struct send_ctx *sctx,               \
     682             :                         u##bits attr, u##bits value)                    \
     683             :         {                                                               \
     684             :                 __le##bits __tmp = cpu_to_le##bits(value);              \
     685             :                 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));  \
     686             :         }
     687             : 
     688             : TLV_PUT_DEFINE_INT(8)
     689           0 : TLV_PUT_DEFINE_INT(32)
     690        6121 : TLV_PUT_DEFINE_INT(64)
     691             : 
     692     6916307 : static int tlv_put_string(struct send_ctx *sctx, u16 attr,
     693             :                           const char *str, int len)
     694             : {
     695     6916307 :         if (len == -1)
     696           0 :                 len = strlen(str);
     697     6916307 :         return tlv_put(sctx, attr, str, len);
     698             : }
     699             : 
     700             : static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
     701             :                         const u8 *uuid)
     702             : {
     703        5532 :         return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
     704             : }
     705             : 
     706     3033666 : static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
     707             :                                   struct extent_buffer *eb,
     708             :                                   struct btrfs_timespec *ts)
     709             : {
     710     3033666 :         struct btrfs_timespec bts;
     711     3033666 :         read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
     712     3033634 :         return tlv_put(sctx, attr, &bts, sizeof(bts));
     713             : }
     714             : 
     715             : 
     716             : #define TLV_PUT(sctx, attrtype, data, attrlen) \
     717             :         do { \
     718             :                 ret = tlv_put(sctx, attrtype, data, attrlen); \
     719             :                 if (ret < 0) \
     720             :                         goto tlv_put_failure; \
     721             :         } while (0)
     722             : 
     723             : #define TLV_PUT_INT(sctx, attrtype, bits, value) \
     724             :         do { \
     725             :                 ret = tlv_put_u##bits(sctx, attrtype, value); \
     726             :                 if (ret < 0) \
     727             :                         goto tlv_put_failure; \
     728             :         } while (0)
     729             : 
     730             : #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
     731             : #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
     732             : #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
     733             : #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
     734             : #define TLV_PUT_STRING(sctx, attrtype, str, len) \
     735             :         do { \
     736             :                 ret = tlv_put_string(sctx, attrtype, str, len); \
     737             :                 if (ret < 0) \
     738             :                         goto tlv_put_failure; \
     739             :         } while (0)
     740             : #define TLV_PUT_PATH(sctx, attrtype, p) \
     741             :         do { \
     742             :                 ret = tlv_put_string(sctx, attrtype, p->start, \
     743             :                         p->end - p->start); \
     744             :                 if (ret < 0) \
     745             :                         goto tlv_put_failure; \
     746             :         } while(0)
     747             : #define TLV_PUT_UUID(sctx, attrtype, uuid) \
     748             :         do { \
     749             :                 ret = tlv_put_uuid(sctx, attrtype, uuid); \
     750             :                 if (ret < 0) \
     751             :                         goto tlv_put_failure; \
     752             :         } while (0)
     753             : #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
     754             :         do { \
     755             :                 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
     756             :                 if (ret < 0) \
     757             :                         goto tlv_put_failure; \
     758             :         } while (0)
     759             : 
     760         212 : static int send_header(struct send_ctx *sctx)
     761             : {
     762         212 :         struct btrfs_stream_header hdr;
     763             : 
     764         212 :         strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
     765         212 :         hdr.version = cpu_to_le32(sctx->proto);
     766         212 :         return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
     767             :                                         &sctx->send_off);
     768             : }
     769             : 
     770             : /*
     771             :  * For each command/item we want to send to userspace, we call this function.
     772             :  */
     773     5502436 : static int begin_cmd(struct send_ctx *sctx, int cmd)
     774             : {
     775     5502436 :         struct btrfs_cmd_header *hdr;
     776             : 
     777     5502436 :         if (WARN_ON(!sctx->send_buf))
     778             :                 return -EINVAL;
     779             : 
     780     5502436 :         BUG_ON(sctx->send_size);
     781             : 
     782     5502436 :         sctx->send_size += sizeof(*hdr);
     783     5502436 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     784     5502436 :         put_unaligned_le16(cmd, &hdr->cmd);
     785             : 
     786     5502436 :         return 0;
     787             : }
     788             : 
     789     5503011 : static int send_cmd(struct send_ctx *sctx)
     790             : {
     791     5503011 :         int ret;
     792     5503011 :         struct btrfs_cmd_header *hdr;
     793     5503011 :         u32 crc;
     794             : 
     795     5503011 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     796     5503011 :         put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
     797     5503011 :         put_unaligned_le32(0, &hdr->crc);
     798             : 
     799     5503011 :         crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
     800     5503130 :         put_unaligned_le32(crc, &hdr->crc);
     801             : 
     802     5503130 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
     803             :                                         &sctx->send_off);
     804             : 
     805     5503396 :         sctx->send_size = 0;
     806     5503396 :         sctx->put_data = false;
     807             : 
     808     5503396 :         return ret;
     809             : }
     810             : 
     811             : /*
     812             :  * Sends a move instruction to user space
     813             :  */
     814      605054 : static int send_rename(struct send_ctx *sctx,
     815             :                      struct fs_path *from, struct fs_path *to)
     816             : {
     817      605054 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     818      605054 :         int ret;
     819             : 
     820      605054 :         btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
     821             : 
     822      605054 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
     823      605053 :         if (ret < 0)
     824           0 :                 goto out;
     825             : 
     826      605053 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
     827      605053 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
     828             : 
     829      605054 :         ret = send_cmd(sctx);
     830             : 
     831      605056 : tlv_put_failure:
     832      605056 : out:
     833      605056 :         return ret;
     834             : }
     835             : 
     836             : /*
     837             :  * Sends a link instruction to user space
     838             :  */
     839        3016 : static int send_link(struct send_ctx *sctx,
     840             :                      struct fs_path *path, struct fs_path *lnk)
     841             : {
     842        3016 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     843        3016 :         int ret;
     844             : 
     845        3016 :         btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
     846             : 
     847        3016 :         ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
     848        3016 :         if (ret < 0)
     849           0 :                 goto out;
     850             : 
     851        3016 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     852        3016 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
     853             : 
     854        3016 :         ret = send_cmd(sctx);
     855             : 
     856        3016 : tlv_put_failure:
     857        3016 : out:
     858        3016 :         return ret;
     859             : }
     860             : 
     861             : /*
     862             :  * Sends an unlink instruction to user space
     863             :  */
     864        2005 : static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
     865             : {
     866        2005 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     867        2005 :         int ret;
     868             : 
     869        2005 :         btrfs_debug(fs_info, "send_unlink %s", path->start);
     870             : 
     871        2005 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
     872        2005 :         if (ret < 0)
     873           0 :                 goto out;
     874             : 
     875        2005 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     876             : 
     877        2005 :         ret = send_cmd(sctx);
     878             : 
     879        2005 : tlv_put_failure:
     880        2005 : out:
     881        2005 :         return ret;
     882             : }
     883             : 
     884             : /*
     885             :  * Sends a rmdir instruction to user space
     886             :  */
     887          20 : static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
     888             : {
     889          20 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     890          20 :         int ret;
     891             : 
     892          20 :         btrfs_debug(fs_info, "send_rmdir %s", path->start);
     893             : 
     894          20 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
     895          20 :         if (ret < 0)
     896           0 :                 goto out;
     897             : 
     898          20 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     899             : 
     900          20 :         ret = send_cmd(sctx);
     901             : 
     902          20 : tlv_put_failure:
     903          20 : out:
     904          20 :         return ret;
     905             : }
     906             : 
     907             : struct btrfs_inode_info {
     908             :         u64 size;
     909             :         u64 gen;
     910             :         u64 mode;
     911             :         u64 uid;
     912             :         u64 gid;
     913             :         u64 rdev;
     914             :         u64 fileattr;
     915             :         u64 nlink;
     916             : };
     917             : 
     918             : /*
     919             :  * Helper function to retrieve some fields from an inode item.
     920             :  */
     921    11206694 : static int get_inode_info(struct btrfs_root *root, u64 ino,
     922             :                           struct btrfs_inode_info *info)
     923             : {
     924    11206694 :         int ret;
     925    11206694 :         struct btrfs_path *path;
     926    11206694 :         struct btrfs_inode_item *ii;
     927    11206694 :         struct btrfs_key key;
     928             : 
     929    11206694 :         path = alloc_path_for_send();
     930    11206726 :         if (!path)
     931             :                 return -ENOMEM;
     932             : 
     933    11206726 :         key.objectid = ino;
     934    11206726 :         key.type = BTRFS_INODE_ITEM_KEY;
     935    11206726 :         key.offset = 0;
     936    11206726 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     937    11206843 :         if (ret) {
     938      404111 :                 if (ret > 0)
     939      404111 :                         ret = -ENOENT;
     940      404111 :                 goto out;
     941             :         }
     942             : 
     943    10802732 :         if (!info)
     944         187 :                 goto out;
     945             : 
     946    10802545 :         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
     947             :                         struct btrfs_inode_item);
     948    10802153 :         info->size = btrfs_inode_size(path->nodes[0], ii);
     949    10802247 :         info->gen = btrfs_inode_generation(path->nodes[0], ii);
     950    10802265 :         info->mode = btrfs_inode_mode(path->nodes[0], ii);
     951    10802261 :         info->uid = btrfs_inode_uid(path->nodes[0], ii);
     952    10802207 :         info->gid = btrfs_inode_gid(path->nodes[0], ii);
     953    10802242 :         info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
     954    10802264 :         info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
     955             :         /*
     956             :          * Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
     957             :          * otherwise logically split to 32/32 parts.
     958             :          */
     959    10802283 :         info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
     960             : 
     961    11206612 : out:
     962    11206612 :         btrfs_free_path(path);
     963    11206612 :         return ret;
     964             : }
     965             : 
     966     4073651 : static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
     967             : {
     968     4073651 :         int ret;
     969     4073651 :         struct btrfs_inode_info info = { 0 };
     970             : 
     971     4073651 :         ASSERT(gen);
     972             : 
     973     4073651 :         ret = get_inode_info(root, ino, &info);
     974     4073692 :         *gen = info.gen;
     975     4073692 :         return ret;
     976             : }
     977             : 
     978             : typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
     979             :                                    struct fs_path *p,
     980             :                                    void *ctx);
     981             : 
     982             : /*
     983             :  * Helper function to iterate the entries in ONE btrfs_inode_ref or
     984             :  * btrfs_inode_extref.
     985             :  * The iterate callback may return a non zero value to stop iteration. This can
     986             :  * be a negative value for error codes or 1 to simply stop it.
     987             :  *
     988             :  * path must point to the INODE_REF or INODE_EXTREF when called.
     989             :  */
     990      611038 : static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
     991             :                              struct btrfs_key *found_key, int resolve,
     992             :                              iterate_inode_ref_t iterate, void *ctx)
     993             : {
     994      611038 :         struct extent_buffer *eb = path->nodes[0];
     995      611038 :         struct btrfs_inode_ref *iref;
     996      611038 :         struct btrfs_inode_extref *extref;
     997      611038 :         struct btrfs_path *tmp_path;
     998      611038 :         struct fs_path *p;
     999      611038 :         u32 cur = 0;
    1000      611038 :         u32 total;
    1001      611038 :         int slot = path->slots[0];
    1002      611038 :         u32 name_len;
    1003      611038 :         char *start;
    1004      611038 :         int ret = 0;
    1005      611038 :         int num = 0;
    1006      611038 :         int index;
    1007      611038 :         u64 dir;
    1008      611038 :         unsigned long name_off;
    1009      611038 :         unsigned long elem_size;
    1010      611038 :         unsigned long ptr;
    1011             : 
    1012      611038 :         p = fs_path_alloc_reversed();
    1013      611052 :         if (!p)
    1014             :                 return -ENOMEM;
    1015             : 
    1016      611052 :         tmp_path = alloc_path_for_send();
    1017      611044 :         if (!tmp_path) {
    1018           0 :                 fs_path_free(p);
    1019           0 :                 return -ENOMEM;
    1020             :         }
    1021             : 
    1022             : 
    1023      611044 :         if (found_key->type == BTRFS_INODE_REF_KEY) {
    1024      609521 :                 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
    1025             :                                                     struct btrfs_inode_ref);
    1026      609510 :                 total = btrfs_item_size(eb, slot);
    1027      609510 :                 elem_size = sizeof(*iref);
    1028             :         } else {
    1029        1523 :                 ptr = btrfs_item_ptr_offset(eb, slot);
    1030        1523 :                 total = btrfs_item_size(eb, slot);
    1031        1523 :                 elem_size = sizeof(*extref);
    1032             :         }
    1033             : 
    1034     1223310 :         while (cur < total) {
    1035      614535 :                 fs_path_reset(p);
    1036             : 
    1037      614529 :                 if (found_key->type == BTRFS_INODE_REF_KEY) {
    1038      613006 :                         iref = (struct btrfs_inode_ref *)(ptr + cur);
    1039      613006 :                         name_len = btrfs_inode_ref_name_len(eb, iref);
    1040      613004 :                         name_off = (unsigned long)(iref + 1);
    1041      613004 :                         index = btrfs_inode_ref_index(eb, iref);
    1042      613007 :                         dir = found_key->offset;
    1043             :                 } else {
    1044        1523 :                         extref = (struct btrfs_inode_extref *)(ptr + cur);
    1045        1523 :                         name_len = btrfs_inode_extref_name_len(eb, extref);
    1046        1523 :                         name_off = (unsigned long)&extref->name;
    1047        1523 :                         index = btrfs_inode_extref_index(eb, extref);
    1048        1523 :                         dir = btrfs_inode_extref_parent(eb, extref);
    1049             :                 }
    1050             : 
    1051      614531 :                 if (resolve) {
    1052        2226 :                         start = btrfs_ref_to_path(root, tmp_path, name_len,
    1053             :                                                   name_off, eb, dir,
    1054        2226 :                                                   p->buf, p->buf_len);
    1055        2226 :                         if (IS_ERR(start)) {
    1056           0 :                                 ret = PTR_ERR(start);
    1057           0 :                                 goto out;
    1058             :                         }
    1059        2226 :                         if (start < p->buf) {
    1060             :                                 /* overflow , try again with larger buffer */
    1061           0 :                                 ret = fs_path_ensure_buf(p,
    1062           0 :                                                 p->buf_len + p->buf - start);
    1063           0 :                                 if (ret < 0)
    1064           0 :                                         goto out;
    1065           0 :                                 start = btrfs_ref_to_path(root, tmp_path,
    1066             :                                                           name_len, name_off,
    1067             :                                                           eb, dir,
    1068           0 :                                                           p->buf, p->buf_len);
    1069           0 :                                 if (IS_ERR(start)) {
    1070           0 :                                         ret = PTR_ERR(start);
    1071           0 :                                         goto out;
    1072             :                                 }
    1073           0 :                                 BUG_ON(start < p->buf);
    1074             :                         }
    1075        2226 :                         p->start = start;
    1076             :                 } else {
    1077      612305 :                         ret = fs_path_add_from_extent_buffer(p, eb, name_off,
    1078             :                                                              name_len);
    1079      612311 :                         if (ret < 0)
    1080           0 :                                 goto out;
    1081             :                 }
    1082             : 
    1083      614537 :                 cur += elem_size + name_len;
    1084      614537 :                 ret = iterate(num, dir, index, p, ctx);
    1085      614504 :                 if (ret)
    1086        2226 :                         goto out;
    1087      612278 :                 num++;
    1088             :         }
    1089             : 
    1090      608775 : out:
    1091      611001 :         btrfs_free_path(tmp_path);
    1092      611043 :         fs_path_free(p);
    1093      611043 :         return ret;
    1094             : }
    1095             : 
    1096             : typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
    1097             :                                   const char *name, int name_len,
    1098             :                                   const char *data, int data_len,
    1099             :                                   void *ctx);
    1100             : 
    1101             : /*
    1102             :  * Helper function to iterate the entries in ONE btrfs_dir_item.
    1103             :  * The iterate callback may return a non zero value to stop iteration. This can
    1104             :  * be a negative value for error codes or 1 to simply stop it.
    1105             :  *
    1106             :  * path must point to the dir item when called.
    1107             :  */
    1108      800439 : static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
    1109             :                             iterate_dir_item_t iterate, void *ctx)
    1110             : {
    1111      800439 :         int ret = 0;
    1112      800439 :         struct extent_buffer *eb;
    1113      800439 :         struct btrfs_dir_item *di;
    1114      800439 :         struct btrfs_key di_key;
    1115      800439 :         char *buf = NULL;
    1116      800439 :         int buf_len;
    1117      800439 :         u32 name_len;
    1118      800439 :         u32 data_len;
    1119      800439 :         u32 cur;
    1120      800439 :         u32 len;
    1121      800439 :         u32 total;
    1122      800439 :         int slot;
    1123      800439 :         int num;
    1124             : 
    1125             :         /*
    1126             :          * Start with a small buffer (1 page). If later we end up needing more
    1127             :          * space, which can happen for xattrs on a fs with a leaf size greater
    1128             :          * then the page size, attempt to increase the buffer. Typically xattr
    1129             :          * values are small.
    1130             :          */
    1131      800439 :         buf_len = PATH_MAX;
    1132      800439 :         buf = kmalloc(buf_len, GFP_KERNEL);
    1133      800439 :         if (!buf) {
    1134           0 :                 ret = -ENOMEM;
    1135           0 :                 goto out;
    1136             :         }
    1137             : 
    1138      800439 :         eb = path->nodes[0];
    1139      800439 :         slot = path->slots[0];
    1140      800439 :         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
    1141      800439 :         cur = 0;
    1142      800439 :         len = 0;
    1143      800439 :         total = btrfs_item_size(eb, slot);
    1144             : 
    1145      800439 :         num = 0;
    1146     1600818 :         while (cur < total) {
    1147      800439 :                 name_len = btrfs_dir_name_len(eb, di);
    1148      800439 :                 data_len = btrfs_dir_data_len(eb, di);
    1149      800439 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    1150             : 
    1151      800439 :                 if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
    1152      800439 :                         if (name_len > XATTR_NAME_MAX) {
    1153           0 :                                 ret = -ENAMETOOLONG;
    1154           0 :                                 goto out;
    1155             :                         }
    1156      800439 :                         if (name_len + data_len >
    1157      800439 :                                         BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
    1158           0 :                                 ret = -E2BIG;
    1159           0 :                                 goto out;
    1160             :                         }
    1161             :                 } else {
    1162             :                         /*
    1163             :                          * Path too long
    1164             :                          */
    1165           0 :                         if (name_len + data_len > PATH_MAX) {
    1166           0 :                                 ret = -ENAMETOOLONG;
    1167           0 :                                 goto out;
    1168             :                         }
    1169             :                 }
    1170             : 
    1171      800439 :                 if (name_len + data_len > buf_len) {
    1172           6 :                         buf_len = name_len + data_len;
    1173           6 :                         if (is_vmalloc_addr(buf)) {
    1174           0 :                                 vfree(buf);
    1175           0 :                                 buf = NULL;
    1176             :                         } else {
    1177           6 :                                 char *tmp = krealloc(buf, buf_len,
    1178             :                                                 GFP_KERNEL | __GFP_NOWARN);
    1179             : 
    1180           6 :                                 if (!tmp)
    1181           0 :                                         kfree(buf);
    1182           6 :                                 buf = tmp;
    1183             :                         }
    1184           6 :                         if (!buf) {
    1185           0 :                                 buf = kvmalloc(buf_len, GFP_KERNEL);
    1186           0 :                                 if (!buf) {
    1187           0 :                                         ret = -ENOMEM;
    1188           0 :                                         goto out;
    1189             :                                 }
    1190             :                         }
    1191             :                 }
    1192             : 
    1193      800439 :                 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
    1194             :                                 name_len + data_len);
    1195             : 
    1196      800439 :                 len = sizeof(*di) + name_len + data_len;
    1197      800439 :                 di = (struct btrfs_dir_item *)((char *)di + len);
    1198      800439 :                 cur += len;
    1199             : 
    1200      800439 :                 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
    1201             :                               data_len, ctx);
    1202      800439 :                 if (ret < 0)
    1203           0 :                         goto out;
    1204      800439 :                 if (ret) {
    1205          60 :                         ret = 0;
    1206          60 :                         goto out;
    1207             :                 }
    1208             : 
    1209      800379 :                 num++;
    1210             :         }
    1211             : 
    1212      800379 : out:
    1213      800439 :         kvfree(buf);
    1214      800439 :         return ret;
    1215             : }
    1216             : 
    1217        2226 : static int __copy_first_ref(int num, u64 dir, int index,
    1218             :                             struct fs_path *p, void *ctx)
    1219             : {
    1220        2226 :         int ret;
    1221        2226 :         struct fs_path *pt = ctx;
    1222             : 
    1223        2226 :         ret = fs_path_copy(pt, p);
    1224        2226 :         if (ret < 0)
    1225           0 :                 return ret;
    1226             : 
    1227             :         /* we want the first only */
    1228             :         return 1;
    1229             : }
    1230             : 
    1231             : /*
    1232             :  * Retrieve the first path of an inode. If an inode has more then one
    1233             :  * ref/hardlink, this is ignored.
    1234             :  */
    1235        2226 : static int get_inode_path(struct btrfs_root *root,
    1236             :                           u64 ino, struct fs_path *path)
    1237             : {
    1238        2226 :         int ret;
    1239        2226 :         struct btrfs_key key, found_key;
    1240        2226 :         struct btrfs_path *p;
    1241             : 
    1242        2226 :         p = alloc_path_for_send();
    1243        2226 :         if (!p)
    1244             :                 return -ENOMEM;
    1245             : 
    1246        2226 :         fs_path_reset(path);
    1247             : 
    1248        2226 :         key.objectid = ino;
    1249        2226 :         key.type = BTRFS_INODE_REF_KEY;
    1250        2226 :         key.offset = 0;
    1251             : 
    1252        2226 :         ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
    1253        2226 :         if (ret < 0)
    1254           0 :                 goto out;
    1255        2226 :         if (ret) {
    1256           0 :                 ret = 1;
    1257           0 :                 goto out;
    1258             :         }
    1259        2226 :         btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
    1260        2226 :         if (found_key.objectid != ino ||
    1261        2226 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    1262             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    1263           0 :                 ret = -ENOENT;
    1264           0 :                 goto out;
    1265             :         }
    1266             : 
    1267        2226 :         ret = iterate_inode_ref(root, p, &found_key, 1,
    1268             :                                 __copy_first_ref, path);
    1269        2226 :         if (ret < 0)
    1270             :                 goto out;
    1271             :         ret = 0;
    1272             : 
    1273        2226 : out:
    1274        2226 :         btrfs_free_path(p);
    1275        2226 :         return ret;
    1276             : }
    1277             : 
    1278             : struct backref_ctx {
    1279             :         struct send_ctx *sctx;
    1280             : 
    1281             :         /* number of total found references */
    1282             :         u64 found;
    1283             : 
    1284             :         /*
    1285             :          * used for clones found in send_root. clones found behind cur_objectid
    1286             :          * and cur_offset are not considered as allowed clones.
    1287             :          */
    1288             :         u64 cur_objectid;
    1289             :         u64 cur_offset;
    1290             : 
    1291             :         /* may be truncated in case it's the last extent in a file */
    1292             :         u64 extent_len;
    1293             : 
    1294             :         /* The bytenr the file extent item we are processing refers to. */
    1295             :         u64 bytenr;
    1296             :         /* The owner (root id) of the data backref for the current extent. */
    1297             :         u64 backref_owner;
    1298             :         /* The offset of the data backref for the current extent. */
    1299             :         u64 backref_offset;
    1300             : };
    1301             : 
    1302     2243357 : static int __clone_root_cmp_bsearch(const void *key, const void *elt)
    1303             : {
    1304     2243357 :         u64 root = (u64)(uintptr_t)key;
    1305     2243357 :         const struct clone_root *cr = elt;
    1306             : 
    1307     2243357 :         if (root < cr->root->root_key.objectid)
    1308             :                 return -1;
    1309     1457634 :         if (root > cr->root->root_key.objectid)
    1310      192728 :                 return 1;
    1311             :         return 0;
    1312             : }
    1313             : 
    1314         129 : static int __clone_root_cmp_sort(const void *e1, const void *e2)
    1315             : {
    1316         129 :         const struct clone_root *cr1 = e1;
    1317         129 :         const struct clone_root *cr2 = e2;
    1318             : 
    1319         129 :         if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
    1320             :                 return -1;
    1321           4 :         if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
    1322           4 :                 return 1;
    1323             :         return 0;
    1324             : }
    1325             : 
    1326             : /*
    1327             :  * Called for every backref that is found for the current extent.
    1328             :  * Results are collected in sctx->clone_roots->ino/offset.
    1329             :  */
    1330     1425979 : static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
    1331             :                             void *ctx_)
    1332             : {
    1333     1425979 :         struct backref_ctx *bctx = ctx_;
    1334     1425979 :         struct clone_root *clone_root;
    1335             : 
    1336             :         /* First check if the root is in the list of accepted clone sources */
    1337     1425979 :         clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
    1338     1425979 :                              bctx->sctx->clone_roots_cnt,
    1339             :                              sizeof(struct clone_root),
    1340             :                              __clone_root_cmp_bsearch);
    1341     1425985 :         if (!clone_root)
    1342             :                 return 0;
    1343             : 
    1344             :         /* This is our own reference, bail out as we can't clone from it. */
    1345     1102309 :         if (clone_root->root == bctx->sctx->send_root &&
    1346     1099872 :             ino == bctx->cur_objectid &&
    1347     1082339 :             offset == bctx->cur_offset)
    1348             :                 return 0;
    1349             : 
    1350             :         /*
    1351             :          * Make sure we don't consider clones from send_root that are
    1352             :          * behind the current inode/offset.
    1353             :          */
    1354       48248 :         if (clone_root->root == bctx->sctx->send_root) {
    1355             :                 /*
    1356             :                  * If the source inode was not yet processed we can't issue a
    1357             :                  * clone operation, as the source extent does not exist yet at
    1358             :                  * the destination of the stream.
    1359             :                  */
    1360       45811 :                 if (ino > bctx->cur_objectid)
    1361             :                         return 0;
    1362             :                 /*
    1363             :                  * We clone from the inode currently being sent as long as the
    1364             :                  * source extent is already processed, otherwise we could try
    1365             :                  * to clone from an extent that does not exist yet at the
    1366             :                  * destination of the stream.
    1367             :                  */
    1368       31333 :                 if (ino == bctx->cur_objectid &&
    1369       28278 :                     offset + bctx->extent_len >
    1370       28278 :                     bctx->sctx->cur_inode_next_write_offset)
    1371             :                         return 0;
    1372             :         }
    1373             : 
    1374        5801 :         bctx->found++;
    1375        5801 :         clone_root->found_ref = true;
    1376             : 
    1377             :         /*
    1378             :          * If the given backref refers to a file extent item with a larger
    1379             :          * number of bytes than what we found before, use the new one so that
    1380             :          * we clone more optimally and end up doing less writes and getting
    1381             :          * less exclusive, non-shared extents at the destination.
    1382             :          */
    1383        5801 :         if (num_bytes > clone_root->num_bytes) {
    1384        5495 :                 clone_root->ino = ino;
    1385        5495 :                 clone_root->offset = offset;
    1386        5495 :                 clone_root->num_bytes = num_bytes;
    1387             : 
    1388             :                 /*
    1389             :                  * Found a perfect candidate, so there's no need to continue
    1390             :                  * backref walking.
    1391             :                  */
    1392        5495 :                 if (num_bytes >= bctx->extent_len)
    1393        5299 :                         return BTRFS_ITERATE_EXTENT_INODES_STOP;
    1394             :         }
    1395             : 
    1396             :         return 0;
    1397             : }
    1398             : 
    1399     2624115 : static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
    1400             :                                  const u64 **root_ids_ret, int *root_count_ret)
    1401             : {
    1402     2624115 :         struct backref_ctx *bctx = ctx;
    1403     2624115 :         struct send_ctx *sctx = bctx->sctx;
    1404     2624115 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1405     2624115 :         const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
    1406     2624115 :         struct btrfs_lru_cache_entry *raw_entry;
    1407     2624115 :         struct backref_cache_entry *entry;
    1408             : 
    1409     2624115 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
    1410             :                 return false;
    1411             : 
    1412             :         /*
    1413             :          * If relocation happened since we first filled the cache, then we must
    1414             :          * empty the cache and can not use it, because even though we operate on
    1415             :          * read-only roots, their leaves and nodes may have been reallocated and
    1416             :          * now be used for different nodes/leaves of the same tree or some other
    1417             :          * tree.
    1418             :          *
    1419             :          * We are called from iterate_extent_inodes() while either holding a
    1420             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1421             :          * to take any lock here.
    1422             :          */
    1423     2613652 :         if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
    1424        1520 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    1425        1520 :                 return false;
    1426             :         }
    1427             : 
    1428     2612132 :         raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
    1429     2612132 :         if (!raw_entry)
    1430             :                 return false;
    1431             : 
    1432     1710009 :         entry = container_of(raw_entry, struct backref_cache_entry, entry);
    1433     1710009 :         *root_ids_ret = entry->root_ids;
    1434     1710009 :         *root_count_ret = entry->num_roots;
    1435             : 
    1436     1710009 :         return true;
    1437             : }
    1438             : 
    1439      353068 : static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
    1440             :                                 void *ctx)
    1441             : {
    1442      353068 :         struct backref_ctx *bctx = ctx;
    1443      353068 :         struct send_ctx *sctx = bctx->sctx;
    1444      353068 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1445      353068 :         struct backref_cache_entry *new_entry;
    1446      353068 :         struct ulist_iterator uiter;
    1447      353068 :         struct ulist_node *node;
    1448      353068 :         int ret;
    1449             : 
    1450             :         /*
    1451             :          * We're called while holding a transaction handle or while holding
    1452             :          * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
    1453             :          * NOFS allocation.
    1454             :          */
    1455      353068 :         new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS);
    1456             :         /* No worries, cache is optional. */
    1457      353066 :         if (!new_entry)
    1458           0 :                 return;
    1459             : 
    1460      353066 :         new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
    1461      353066 :         new_entry->entry.gen = 0;
    1462      353066 :         new_entry->num_roots = 0;
    1463      353066 :         ULIST_ITER_INIT(&uiter);
    1464      851342 :         while ((node = ulist_next(root_ids, &uiter)) != NULL) {
    1465      498272 :                 const u64 root_id = node->val;
    1466      498272 :                 struct clone_root *root;
    1467             : 
    1468      498272 :                 root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
    1469      498272 :                                sctx->clone_roots_cnt, sizeof(struct clone_root),
    1470             :                                __clone_root_cmp_bsearch);
    1471      498276 :                 if (!root)
    1472      335671 :                         continue;
    1473             : 
    1474             :                 /* Too many roots, just exit, no worries as caching is optional. */
    1475      162605 :                 if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
    1476           0 :                         kfree(new_entry);
    1477           0 :                         return;
    1478             :                 }
    1479             : 
    1480      162605 :                 new_entry->root_ids[new_entry->num_roots] = root_id;
    1481      162605 :                 new_entry->num_roots++;
    1482             :         }
    1483             : 
    1484             :         /*
    1485             :          * We may have not added any roots to the new cache entry, which means
    1486             :          * none of the roots is part of the list of roots from which we are
    1487             :          * allowed to clone. Cache the new entry as it's still useful to avoid
    1488             :          * backref walking to determine which roots have a path to the leaf.
    1489             :          *
    1490             :          * Also use GFP_NOFS because we're called while holding a transaction
    1491             :          * handle or while holding fs_info->commit_root_sem.
    1492             :          */
    1493      353068 :         ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
    1494             :                                     GFP_NOFS);
    1495      353069 :         ASSERT(ret == 0 || ret == -ENOMEM);
    1496      353069 :         if (ret) {
    1497             :                 /* Caching is optional, no worries. */
    1498           0 :                 kfree(new_entry);
    1499           0 :                 return;
    1500             :         }
    1501             : 
    1502             :         /*
    1503             :          * We are called from iterate_extent_inodes() while either holding a
    1504             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1505             :          * to take any lock here.
    1506             :          */
    1507      353069 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
    1508        1597 :                 sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
    1509             : }
    1510             : 
    1511     2252186 : static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
    1512             :                              const struct extent_buffer *leaf, void *ctx)
    1513             : {
    1514     2252186 :         const u64 refs = btrfs_extent_refs(leaf, ei);
    1515     2252191 :         const struct backref_ctx *bctx = ctx;
    1516     2252191 :         const struct send_ctx *sctx = bctx->sctx;
    1517             : 
    1518     2252191 :         if (bytenr == bctx->bytenr) {
    1519      634046 :                 const u64 flags = btrfs_extent_flags(leaf, ei);
    1520             : 
    1521      634045 :                 if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
    1522             :                         return -EUCLEAN;
    1523             : 
    1524             :                 /*
    1525             :                  * If we have only one reference and only the send root as a
    1526             :                  * clone source - meaning no clone roots were given in the
    1527             :                  * struct btrfs_ioctl_send_args passed to the send ioctl - then
    1528             :                  * it's our reference and there's no point in doing backref
    1529             :                  * walking which is expensive, so exit early.
    1530             :                  */
    1531      634045 :                 if (refs == 1 && sctx->clone_roots_cnt == 1)
    1532             :                         return -ENOENT;
    1533             :         }
    1534             : 
    1535             :         /*
    1536             :          * Backreference walking (iterate_extent_inodes() below) is currently
    1537             :          * too expensive when an extent has a large number of references, both
    1538             :          * in time spent and used memory. So for now just fallback to write
    1539             :          * operations instead of clone operations when an extent has more than
    1540             :          * a certain amount of references.
    1541             :          */
    1542     2232030 :         if (refs > SEND_MAX_EXTENT_REFS)
    1543        2689 :                 return -ENOENT;
    1544             : 
    1545             :         return 0;
    1546             : }
    1547             : 
    1548       70636 : static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
    1549             : {
    1550       70636 :         const struct backref_ctx *bctx = ctx;
    1551             : 
    1552       70636 :         if (ino == bctx->cur_objectid &&
    1553       70010 :             root == bctx->backref_owner &&
    1554       69896 :             offset == bctx->backref_offset)
    1555        4602 :                 return true;
    1556             : 
    1557             :         return false;
    1558             : }
    1559             : 
    1560             : /*
    1561             :  * Given an inode, offset and extent item, it finds a good clone for a clone
    1562             :  * instruction. Returns -ENOENT when none could be found. The function makes
    1563             :  * sure that the returned clone is usable at the point where sending is at the
    1564             :  * moment. This means, that no clones are accepted which lie behind the current
    1565             :  * inode+offset.
    1566             :  *
    1567             :  * path must point to the extent item when called.
    1568             :  */
    1569      634146 : static int find_extent_clone(struct send_ctx *sctx,
    1570             :                              struct btrfs_path *path,
    1571             :                              u64 ino, u64 data_offset,
    1572             :                              u64 ino_size,
    1573             :                              struct clone_root **found)
    1574             : {
    1575      634146 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1576      634146 :         int ret;
    1577      634146 :         int extent_type;
    1578      634146 :         u64 logical;
    1579      634146 :         u64 disk_byte;
    1580      634146 :         u64 num_bytes;
    1581      634146 :         struct btrfs_file_extent_item *fi;
    1582      634146 :         struct extent_buffer *eb = path->nodes[0];
    1583      634146 :         struct backref_ctx backref_ctx = { 0 };
    1584      634146 :         struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
    1585      634146 :         struct clone_root *cur_clone_root;
    1586      634146 :         int compressed;
    1587      634146 :         u32 i;
    1588             : 
    1589             :         /*
    1590             :          * With fallocate we can get prealloc extents beyond the inode's i_size,
    1591             :          * so we don't do anything here because clone operations can not clone
    1592             :          * to a range beyond i_size without increasing the i_size of the
    1593             :          * destination inode.
    1594             :          */
    1595      634146 :         if (data_offset >= ino_size)
    1596             :                 return 0;
    1597             : 
    1598      634061 :         fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
    1599      634061 :         extent_type = btrfs_file_extent_type(eb, fi);
    1600      634061 :         if (extent_type == BTRFS_FILE_EXTENT_INLINE)
    1601             :                 return -ENOENT;
    1602             : 
    1603      634047 :         disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
    1604      634047 :         if (disk_byte == 0)
    1605             :                 return -ENOENT;
    1606             : 
    1607      634047 :         compressed = btrfs_file_extent_compression(eb, fi);
    1608      634046 :         num_bytes = btrfs_file_extent_num_bytes(eb, fi);
    1609      634046 :         logical = disk_byte + btrfs_file_extent_offset(eb, fi);
    1610             : 
    1611             :         /*
    1612             :          * Setup the clone roots.
    1613             :          */
    1614     2314046 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1615     1045955 :                 cur_clone_root = sctx->clone_roots + i;
    1616     1045955 :                 cur_clone_root->ino = (u64)-1;
    1617     1045955 :                 cur_clone_root->offset = 0;
    1618     1045955 :                 cur_clone_root->num_bytes = 0;
    1619     1045955 :                 cur_clone_root->found_ref = false;
    1620             :         }
    1621             : 
    1622      634045 :         backref_ctx.sctx = sctx;
    1623      634045 :         backref_ctx.cur_objectid = ino;
    1624      634045 :         backref_ctx.cur_offset = data_offset;
    1625      634045 :         backref_ctx.bytenr = disk_byte;
    1626             :         /*
    1627             :          * Use the header owner and not the send root's id, because in case of a
    1628             :          * snapshot we can have shared subtrees.
    1629             :          */
    1630      634045 :         backref_ctx.backref_owner = btrfs_header_owner(eb);
    1631      634045 :         backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
    1632             : 
    1633             :         /*
    1634             :          * The last extent of a file may be too large due to page alignment.
    1635             :          * We need to adjust extent_len in this case so that the checks in
    1636             :          * iterate_backrefs() work.
    1637             :          */
    1638      634046 :         if (data_offset + num_bytes >= ino_size)
    1639      601355 :                 backref_ctx.extent_len = ino_size - data_offset;
    1640             :         else
    1641       32691 :                 backref_ctx.extent_len = num_bytes;
    1642             : 
    1643             :         /*
    1644             :          * Now collect all backrefs.
    1645             :          */
    1646      634046 :         backref_walk_ctx.bytenr = disk_byte;
    1647      634046 :         if (compressed == BTRFS_COMPRESS_NONE)
    1648      634026 :                 backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
    1649      634046 :         backref_walk_ctx.fs_info = fs_info;
    1650      634046 :         backref_walk_ctx.cache_lookup = lookup_backref_cache;
    1651      634046 :         backref_walk_ctx.cache_store = store_backref_cache;
    1652      634046 :         backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
    1653      634046 :         backref_walk_ctx.check_extent_item = check_extent_item;
    1654      634046 :         backref_walk_ctx.user_ctx = &backref_ctx;
    1655             : 
    1656             :         /*
    1657             :          * If have a single clone root, then it's the send root and we can tell
    1658             :          * the backref walking code to skip our own backref and not resolve it,
    1659             :          * since we can not use it for cloning - the source and destination
    1660             :          * ranges can't overlap and in case the leaf is shared through a subtree
    1661             :          * due to snapshots, we can't use those other roots since they are not
    1662             :          * in the list of clone roots.
    1663             :          */
    1664      634046 :         if (sctx->clone_roots_cnt == 1)
    1665      222145 :                 backref_walk_ctx.skip_data_ref = skip_self_data_ref;
    1666             : 
    1667      634046 :         ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
    1668             :                                     &backref_ctx);
    1669      634047 :         if (ret < 0)
    1670             :                 return ret;
    1671             : 
    1672      634047 :         down_read(&fs_info->commit_root_sem);
    1673      634047 :         if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    1674             :                 /*
    1675             :                  * A transaction commit for a transaction in which block group
    1676             :                  * relocation was done just happened.
    1677             :                  * The disk_bytenr of the file extent item we processed is
    1678             :                  * possibly stale, referring to the extent's location before
    1679             :                  * relocation. So act as if we haven't found any clone sources
    1680             :                  * and fallback to write commands, which will read the correct
    1681             :                  * data from the new extent location. Otherwise we will fail
    1682             :                  * below because we haven't found our own back reference or we
    1683             :                  * could be getting incorrect sources in case the old extent
    1684             :                  * was already reallocated after the relocation.
    1685             :                  */
    1686          19 :                 up_read(&fs_info->commit_root_sem);
    1687          19 :                 return -ENOENT;
    1688             :         }
    1689      634028 :         up_read(&fs_info->commit_root_sem);
    1690             : 
    1691      634026 :         btrfs_debug(fs_info,
    1692             :                     "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
    1693             :                     data_offset, ino, num_bytes, logical);
    1694             : 
    1695      634026 :         if (!backref_ctx.found) {
    1696             :                 btrfs_debug(fs_info, "no clones found");
    1697             :                 return -ENOENT;
    1698             :         }
    1699             : 
    1700             :         cur_clone_root = NULL;
    1701        7992 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1702        7978 :                 struct clone_root *clone_root = &sctx->clone_roots[i];
    1703             : 
    1704        7978 :                 if (!clone_root->found_ref)
    1705        2659 :                         continue;
    1706             : 
    1707             :                 /*
    1708             :                  * Choose the root from which we can clone more bytes, to
    1709             :                  * minimize write operations and therefore have more extent
    1710             :                  * sharing at the destination (the same as in the source).
    1711             :                  */
    1712        5319 :                 if (!cur_clone_root ||
    1713           6 :                     clone_root->num_bytes > cur_clone_root->num_bytes) {
    1714        5319 :                         cur_clone_root = clone_root;
    1715             : 
    1716             :                         /*
    1717             :                          * We found an optimal clone candidate (any inode from
    1718             :                          * any root is fine), so we're done.
    1719             :                          */
    1720        5319 :                         if (clone_root->num_bytes >= backref_ctx.extent_len)
    1721             :                                 break;
    1722             :                 }
    1723             :         }
    1724             : 
    1725        5313 :         if (cur_clone_root) {
    1726        5313 :                 *found = cur_clone_root;
    1727        5313 :                 ret = 0;
    1728             :         } else {
    1729             :                 ret = -ENOENT;
    1730             :         }
    1731             : 
    1732             :         return ret;
    1733             : }
    1734             : 
    1735         350 : static int read_symlink(struct btrfs_root *root,
    1736             :                         u64 ino,
    1737             :                         struct fs_path *dest)
    1738             : {
    1739         350 :         int ret;
    1740         350 :         struct btrfs_path *path;
    1741         350 :         struct btrfs_key key;
    1742         350 :         struct btrfs_file_extent_item *ei;
    1743         350 :         u8 type;
    1744         350 :         u8 compression;
    1745         350 :         unsigned long off;
    1746         350 :         int len;
    1747             : 
    1748         350 :         path = alloc_path_for_send();
    1749         350 :         if (!path)
    1750             :                 return -ENOMEM;
    1751             : 
    1752         350 :         key.objectid = ino;
    1753         350 :         key.type = BTRFS_EXTENT_DATA_KEY;
    1754         350 :         key.offset = 0;
    1755         350 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1756         350 :         if (ret < 0)
    1757           0 :                 goto out;
    1758         350 :         if (ret) {
    1759             :                 /*
    1760             :                  * An empty symlink inode. Can happen in rare error paths when
    1761             :                  * creating a symlink (transaction committed before the inode
    1762             :                  * eviction handler removed the symlink inode items and a crash
    1763             :                  * happened in between or the subvol was snapshoted in between).
    1764             :                  * Print an informative message to dmesg/syslog so that the user
    1765             :                  * can delete the symlink.
    1766             :                  */
    1767           0 :                 btrfs_err(root->fs_info,
    1768             :                           "Found empty symlink inode %llu at root %llu",
    1769             :                           ino, root->root_key.objectid);
    1770           0 :                 ret = -EIO;
    1771           0 :                 goto out;
    1772             :         }
    1773             : 
    1774         350 :         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    1775             :                         struct btrfs_file_extent_item);
    1776         350 :         type = btrfs_file_extent_type(path->nodes[0], ei);
    1777         350 :         if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
    1778           0 :                 ret = -EUCLEAN;
    1779           0 :                 btrfs_crit(root->fs_info,
    1780             : "send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
    1781             :                            ino, btrfs_root_id(root), type);
    1782           0 :                 goto out;
    1783             :         }
    1784         350 :         compression = btrfs_file_extent_compression(path->nodes[0], ei);
    1785         350 :         if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
    1786           0 :                 ret = -EUCLEAN;
    1787           0 :                 btrfs_crit(root->fs_info,
    1788             : "send: found symlink extent with compression, ino %llu root %llu compression type %d",
    1789             :                            ino, btrfs_root_id(root), compression);
    1790           0 :                 goto out;
    1791             :         }
    1792             : 
    1793         350 :         off = btrfs_file_extent_inline_start(ei);
    1794         350 :         len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
    1795             : 
    1796         350 :         ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
    1797             : 
    1798         350 : out:
    1799         350 :         btrfs_free_path(path);
    1800         350 :         return ret;
    1801             : }
    1802             : 
    1803             : /*
    1804             :  * Helper function to generate a file name that is unique in the root of
    1805             :  * send_root and parent_root. This is used to generate names for orphan inodes.
    1806             :  */
    1807     1209230 : static int gen_unique_name(struct send_ctx *sctx,
    1808             :                            u64 ino, u64 gen,
    1809             :                            struct fs_path *dest)
    1810             : {
    1811     1209230 :         int ret = 0;
    1812     1209230 :         struct btrfs_path *path;
    1813     1209230 :         struct btrfs_dir_item *di;
    1814     1209230 :         char tmp[64];
    1815     1209230 :         int len;
    1816     1209230 :         u64 idx = 0;
    1817             : 
    1818     1209230 :         path = alloc_path_for_send();
    1819     1208949 :         if (!path)
    1820             :                 return -ENOMEM;
    1821             : 
    1822     1208949 :         while (1) {
    1823     1208949 :                 struct fscrypt_str tmp_name;
    1824             : 
    1825     1208949 :                 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
    1826             :                                 ino, gen, idx);
    1827     1208949 :                 ASSERT(len < sizeof(tmp));
    1828     1208949 :                 tmp_name.name = tmp;
    1829     1208949 :                 tmp_name.len = strlen(tmp);
    1830             : 
    1831     1208835 :                 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
    1832             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1833             :                                 &tmp_name, 0);
    1834     1209440 :                 btrfs_release_path(path);
    1835     1209475 :                 if (IS_ERR(di)) {
    1836           0 :                         ret = PTR_ERR(di);
    1837           0 :                         goto out;
    1838             :                 }
    1839     1209475 :                 if (di) {
    1840             :                         /* not unique, try again */
    1841           0 :                         idx++;
    1842           0 :                         continue;
    1843             :                 }
    1844             : 
    1845     1209475 :                 if (!sctx->parent_root) {
    1846             :                         /* unique */
    1847             :                         ret = 0;
    1848     1209470 :                         break;
    1849             :                 }
    1850             : 
    1851      806285 :                 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
    1852             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1853             :                                 &tmp_name, 0);
    1854      806285 :                 btrfs_release_path(path);
    1855      806280 :                 if (IS_ERR(di)) {
    1856           0 :                         ret = PTR_ERR(di);
    1857           0 :                         goto out;
    1858             :                 }
    1859      806280 :                 if (di) {
    1860             :                         /* not unique, try again */
    1861           0 :                         idx++;
    1862           0 :                         continue;
    1863             :                 }
    1864             :                 /* unique */
    1865             :                 break;
    1866             :         }
    1867             : 
    1868     2418938 :         ret = fs_path_add(dest, tmp, strlen(tmp));
    1869             : 
    1870     1209465 : out:
    1871     1209465 :         btrfs_free_path(path);
    1872     1209465 :         return ret;
    1873             : }
    1874             : 
    1875             : enum inode_state {
    1876             :         inode_state_no_change,
    1877             :         inode_state_will_create,
    1878             :         inode_state_did_create,
    1879             :         inode_state_will_delete,
    1880             :         inode_state_did_delete,
    1881             : };
    1882             : 
    1883     3261915 : static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
    1884             :                                u64 *send_gen, u64 *parent_gen)
    1885             : {
    1886     3261915 :         int ret;
    1887     3261915 :         int left_ret;
    1888     3261915 :         int right_ret;
    1889     3261915 :         u64 left_gen;
    1890     3261915 :         u64 right_gen = 0;
    1891     3261915 :         struct btrfs_inode_info info;
    1892             : 
    1893     3261915 :         ret = get_inode_info(sctx->send_root, ino, &info);
    1894     3261965 :         if (ret < 0 && ret != -ENOENT)
    1895           0 :                 goto out;
    1896     3261965 :         left_ret = (info.nlink == 0) ? -ENOENT : ret;
    1897     3261965 :         left_gen = info.gen;
    1898     3261965 :         if (send_gen)
    1899       33630 :                 *send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
    1900             : 
    1901     3261965 :         if (!sctx->parent_root) {
    1902             :                 right_ret = -ENOENT;
    1903             :         } else {
    1904     2449685 :                 ret = get_inode_info(sctx->parent_root, ino, &info);
    1905     2449685 :                 if (ret < 0 && ret != -ENOENT)
    1906           0 :                         goto out;
    1907     2449685 :                 right_ret = (info.nlink == 0) ? -ENOENT : ret;
    1908     2449685 :                 right_gen = info.gen;
    1909     2449685 :                 if (parent_gen)
    1910        8052 :                         *parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
    1911             :         }
    1912             : 
    1913     3261965 :         if (!left_ret && !right_ret) {
    1914     2045573 :                 if (left_gen == gen && right_gen == gen) {
    1915             :                         ret = inode_state_no_change;
    1916          52 :                 } else if (left_gen == gen) {
    1917          24 :                         if (ino < sctx->send_progress)
    1918             :                                 ret = inode_state_did_create;
    1919             :                         else
    1920           8 :                                 ret = inode_state_will_create;
    1921          28 :                 } else if (right_gen == gen) {
    1922          28 :                         if (ino < sctx->send_progress)
    1923             :                                 ret = inode_state_did_delete;
    1924             :                         else
    1925          11 :                                 ret = inode_state_will_delete;
    1926             :                 } else  {
    1927             :                         ret = -ENOENT;
    1928             :                 }
    1929     1216392 :         } else if (!left_ret) {
    1930     1216204 :                 if (left_gen == gen) {
    1931     1216204 :                         if (ino < sctx->send_progress)
    1932             :                                 ret = inode_state_did_create;
    1933             :                         else
    1934         604 :                                 ret = inode_state_will_create;
    1935             :                 } else {
    1936             :                         ret = -ENOENT;
    1937             :                 }
    1938         188 :         } else if (!right_ret) {
    1939         188 :                 if (right_gen == gen) {
    1940         188 :                         if (ino < sctx->send_progress)
    1941             :                                 ret = inode_state_did_delete;
    1942             :                         else
    1943         132 :                                 ret = inode_state_will_delete;
    1944             :                 } else {
    1945             :                         ret = -ENOENT;
    1946             :                 }
    1947             :         } else {
    1948             :                 ret = -ENOENT;
    1949             :         }
    1950             : 
    1951     3261965 : out:
    1952     3261965 :         return ret;
    1953             : }
    1954             : 
    1955     3038455 : static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
    1956             :                              u64 *send_gen, u64 *parent_gen)
    1957             : {
    1958     3038455 :         int ret;
    1959             : 
    1960     3038455 :         if (ino == BTRFS_FIRST_FREE_OBJECTID)
    1961             :                 return 1;
    1962             : 
    1963     1437088 :         ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
    1964     1437086 :         if (ret < 0)
    1965           0 :                 goto out;
    1966             : 
    1967     1437086 :         if (ret == inode_state_no_change ||
    1968     1437086 :             ret == inode_state_did_create ||
    1969             :             ret == inode_state_will_delete)
    1970             :                 ret = 1;
    1971             :         else
    1972             :                 ret = 0;
    1973             : 
    1974             : out:
    1975             :         return ret;
    1976             : }
    1977             : 
    1978             : /*
    1979             :  * Helper function to lookup a dir item in a dir.
    1980             :  */
    1981     1622012 : static int lookup_dir_item_inode(struct btrfs_root *root,
    1982             :                                  u64 dir, const char *name, int name_len,
    1983             :                                  u64 *found_inode)
    1984             : {
    1985     1622012 :         int ret = 0;
    1986     1622012 :         struct btrfs_dir_item *di;
    1987     1622012 :         struct btrfs_key key;
    1988     1622012 :         struct btrfs_path *path;
    1989     1622012 :         struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
    1990             : 
    1991     1622012 :         path = alloc_path_for_send();
    1992     1622012 :         if (!path)
    1993             :                 return -ENOMEM;
    1994             : 
    1995     1622012 :         di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
    1996     2837703 :         if (IS_ERR_OR_NULL(di)) {
    1997      406321 :                 ret = di ? PTR_ERR(di) : -ENOENT;
    1998      406321 :                 goto out;
    1999             :         }
    2000     1215691 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
    2001     1215691 :         if (key.type == BTRFS_ROOT_ITEM_KEY) {
    2002           3 :                 ret = -ENOENT;
    2003           3 :                 goto out;
    2004             :         }
    2005     1215688 :         *found_inode = key.objectid;
    2006             : 
    2007     1622012 : out:
    2008     1622012 :         btrfs_free_path(path);
    2009     1622012 :         return ret;
    2010             : }
    2011             : 
    2012             : /*
    2013             :  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
    2014             :  * generation of the parent dir and the name of the dir entry.
    2015             :  */
    2016     1430423 : static int get_first_ref(struct btrfs_root *root, u64 ino,
    2017             :                          u64 *dir, u64 *dir_gen, struct fs_path *name)
    2018             : {
    2019     1430423 :         int ret;
    2020     1430423 :         struct btrfs_key key;
    2021     1430423 :         struct btrfs_key found_key;
    2022     1430423 :         struct btrfs_path *path;
    2023     1430423 :         int len;
    2024     1430423 :         u64 parent_dir;
    2025             : 
    2026     1430423 :         path = alloc_path_for_send();
    2027     1430427 :         if (!path)
    2028             :                 return -ENOMEM;
    2029             : 
    2030     1430427 :         key.objectid = ino;
    2031     1430427 :         key.type = BTRFS_INODE_REF_KEY;
    2032     1430427 :         key.offset = 0;
    2033             : 
    2034     1430427 :         ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
    2035     1430432 :         if (ret < 0)
    2036           0 :                 goto out;
    2037     1430432 :         if (!ret)
    2038     1430374 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    2039             :                                 path->slots[0]);
    2040     1430430 :         if (ret || found_key.objectid != ino ||
    2041     1430372 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    2042             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    2043          58 :                 ret = -ENOENT;
    2044          58 :                 goto out;
    2045             :         }
    2046             : 
    2047     1430372 :         if (found_key.type == BTRFS_INODE_REF_KEY) {
    2048     1430371 :                 struct btrfs_inode_ref *iref;
    2049     1430371 :                 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2050             :                                       struct btrfs_inode_ref);
    2051     1430360 :                 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
    2052     1430360 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2053     1430360 :                                                      (unsigned long)(iref + 1),
    2054             :                                                      len);
    2055     1430368 :                 parent_dir = found_key.offset;
    2056             :         } else {
    2057           1 :                 struct btrfs_inode_extref *extref;
    2058           1 :                 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2059             :                                         struct btrfs_inode_extref);
    2060           1 :                 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
    2061           1 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2062           1 :                                         (unsigned long)&extref->name, len);
    2063           1 :                 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
    2064             :         }
    2065     1430368 :         if (ret < 0)
    2066           0 :                 goto out;
    2067     1430368 :         btrfs_release_path(path);
    2068             : 
    2069     1430373 :         if (dir_gen) {
    2070     1425764 :                 ret = get_inode_gen(root, parent_dir, dir_gen);
    2071     1425765 :                 if (ret < 0)
    2072           0 :                         goto out;
    2073             :         }
    2074             : 
    2075     1430374 :         *dir = parent_dir;
    2076             : 
    2077     1430432 : out:
    2078     1430432 :         btrfs_free_path(path);
    2079     1430432 :         return ret;
    2080             : }
    2081             : 
    2082         245 : static int is_first_ref(struct btrfs_root *root,
    2083             :                         u64 ino, u64 dir,
    2084             :                         const char *name, int name_len)
    2085             : {
    2086         245 :         int ret;
    2087         245 :         struct fs_path *tmp_name;
    2088         245 :         u64 tmp_dir;
    2089             : 
    2090         245 :         tmp_name = fs_path_alloc();
    2091         245 :         if (!tmp_name)
    2092             :                 return -ENOMEM;
    2093             : 
    2094         245 :         ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
    2095         245 :         if (ret < 0)
    2096           0 :                 goto out;
    2097             : 
    2098         245 :         if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
    2099           6 :                 ret = 0;
    2100           6 :                 goto out;
    2101             :         }
    2102             : 
    2103         478 :         ret = !memcmp(tmp_name->start, name, name_len);
    2104             : 
    2105         245 : out:
    2106         245 :         fs_path_free(tmp_name);
    2107         245 :         return ret;
    2108             : }
    2109             : 
    2110             : /*
    2111             :  * Used by process_recorded_refs to determine if a new ref would overwrite an
    2112             :  * already existing ref. In case it detects an overwrite, it returns the
    2113             :  * inode/gen in who_ino/who_gen.
    2114             :  * When an overwrite is detected, process_recorded_refs does proper orphanizing
    2115             :  * to make sure later references to the overwritten inode are possible.
    2116             :  * Orphanizing is however only required for the first ref of an inode.
    2117             :  * process_recorded_refs does an additional is_first_ref check to see if
    2118             :  * orphanizing is really required.
    2119             :  */
    2120      607635 : static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
    2121             :                               const char *name, int name_len,
    2122             :                               u64 *who_ino, u64 *who_gen, u64 *who_mode)
    2123             : {
    2124      607635 :         int ret;
    2125      607635 :         u64 parent_root_dir_gen;
    2126      607635 :         u64 other_inode = 0;
    2127      607635 :         struct btrfs_inode_info info;
    2128             : 
    2129      607635 :         if (!sctx->parent_root)
    2130             :                 return 0;
    2131             : 
    2132      404138 :         ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
    2133      404138 :         if (ret <= 0)
    2134             :                 return 0;
    2135             : 
    2136             :         /*
    2137             :          * If we have a parent root we need to verify that the parent dir was
    2138             :          * not deleted and then re-created, if it was then we have no overwrite
    2139             :          * and we can just unlink this entry.
    2140             :          *
    2141             :          * @parent_root_dir_gen was set to 0 if the inode does not exist in the
    2142             :          * parent root.
    2143             :          */
    2144      404138 :         if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
    2145        4103 :             parent_root_dir_gen != dir_gen)
    2146             :                 return 0;
    2147             : 
    2148      403984 :         ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
    2149             :                                     &other_inode);
    2150      403984 :         if (ret == -ENOENT)
    2151             :                 return 0;
    2152         703 :         else if (ret < 0)
    2153             :                 return ret;
    2154             : 
    2155             :         /*
    2156             :          * Check if the overwritten ref was already processed. If yes, the ref
    2157             :          * was already unlinked/moved, so we can safely assume that we will not
    2158             :          * overwrite anything at this point in time.
    2159             :          */
    2160        1167 :         if (other_inode > sctx->send_progress ||
    2161         464 :             is_waiting_for_move(sctx, other_inode)) {
    2162         245 :                 ret = get_inode_info(sctx->parent_root, other_inode, &info);
    2163         245 :                 if (ret < 0)
    2164             :                         return ret;
    2165             : 
    2166         245 :                 *who_ino = other_inode;
    2167         245 :                 *who_gen = info.gen;
    2168         245 :                 *who_mode = info.mode;
    2169         245 :                 return 1;
    2170             :         }
    2171             : 
    2172             :         return 0;
    2173             : }
    2174             : 
    2175             : /*
    2176             :  * Checks if the ref was overwritten by an already processed inode. This is
    2177             :  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
    2178             :  * thus the orphan name needs be used.
    2179             :  * process_recorded_refs also uses it to avoid unlinking of refs that were
    2180             :  * overwritten.
    2181             :  */
    2182     1419735 : static int did_overwrite_ref(struct send_ctx *sctx,
    2183             :                             u64 dir, u64 dir_gen,
    2184             :                             u64 ino, u64 ino_gen,
    2185             :                             const char *name, int name_len)
    2186             : {
    2187     1419735 :         int ret;
    2188     1419735 :         u64 ow_inode;
    2189     1419735 :         u64 ow_gen = 0;
    2190     1419735 :         u64 send_root_dir_gen;
    2191             : 
    2192     1419735 :         if (!sctx->parent_root)
    2193             :                 return 0;
    2194             : 
    2195     1218168 :         ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
    2196     1218168 :         if (ret <= 0)
    2197             :                 return ret;
    2198             : 
    2199             :         /*
    2200             :          * @send_root_dir_gen was set to 0 if the inode does not exist in the
    2201             :          * send root.
    2202             :          */
    2203     1218041 :         if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
    2204             :                 return 0;
    2205             : 
    2206             :         /* check if the ref was overwritten by another ref */
    2207     1218028 :         ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
    2208             :                                     &ow_inode);
    2209     1218028 :         if (ret == -ENOENT) {
    2210             :                 /* was never and will never be overwritten */
    2211             :                 return 0;
    2212     1214985 :         } else if (ret < 0) {
    2213             :                 return ret;
    2214             :         }
    2215             : 
    2216     1214985 :         if (ow_inode == ino) {
    2217     1213243 :                 ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2218     1213243 :                 if (ret < 0)
    2219             :                         return ret;
    2220             : 
    2221             :                 /* It's the same inode, so no overwrite happened. */
    2222     1213243 :                 if (ow_gen == ino_gen)
    2223             :                         return 0;
    2224             :         }
    2225             : 
    2226             :         /*
    2227             :          * We know that it is or will be overwritten. Check this now.
    2228             :          * The current inode being processed might have been the one that caused
    2229             :          * inode 'ino' to be orphanized, therefore check if ow_inode matches
    2230             :          * the current inode being processed.
    2231             :          */
    2232        1742 :         if (ow_inode < sctx->send_progress)
    2233             :                 return 1;
    2234             : 
    2235        1311 :         if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
    2236           9 :                 if (ow_gen == 0) {
    2237           9 :                         ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2238           9 :                         if (ret < 0)
    2239             :                                 return ret;
    2240             :                 }
    2241           9 :                 if (ow_gen == sctx->cur_inode_gen)
    2242           9 :                         return 1;
    2243             :         }
    2244             : 
    2245             :         return 0;
    2246             : }
    2247             : 
    2248             : /*
    2249             :  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
    2250             :  * that got overwritten. This is used by process_recorded_refs to determine
    2251             :  * if it has to use the path as returned by get_cur_path or the orphan name.
    2252             :  */
    2253        1561 : static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
    2254             : {
    2255        1561 :         int ret = 0;
    2256        1561 :         struct fs_path *name = NULL;
    2257        1561 :         u64 dir;
    2258        1561 :         u64 dir_gen;
    2259             : 
    2260        1561 :         if (!sctx->parent_root)
    2261           0 :                 goto out;
    2262             : 
    2263        1561 :         name = fs_path_alloc();
    2264        1561 :         if (!name)
    2265             :                 return -ENOMEM;
    2266             : 
    2267        1561 :         ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
    2268        1561 :         if (ret < 0)
    2269           0 :                 goto out;
    2270             : 
    2271        1561 :         ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
    2272             :                         name->start, fs_path_len(name));
    2273             : 
    2274        1561 : out:
    2275        1561 :         fs_path_free(name);
    2276        1561 :         return ret;
    2277             : }
    2278             : 
    2279             : static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
    2280             :                                                          u64 ino, u64 gen)
    2281             : {
    2282     4696006 :         struct btrfs_lru_cache_entry *entry;
    2283             : 
    2284     4696006 :         entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
    2285     4696393 :         if (!entry)
    2286             :                 return NULL;
    2287             : 
    2288             :         return container_of(entry, struct name_cache_entry, entry);
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * Used by get_cur_path for each ref up to the root.
    2293             :  * Returns 0 if it succeeded.
    2294             :  * Returns 1 if the inode is not existent or got overwritten. In that case, the
    2295             :  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
    2296             :  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
    2297             :  * Returns <0 in case of error.
    2298             :  */
    2299     4695767 : static int __get_cur_name_and_parent(struct send_ctx *sctx,
    2300             :                                      u64 ino, u64 gen,
    2301             :                                      u64 *parent_ino,
    2302             :                                      u64 *parent_gen,
    2303             :                                      struct fs_path *dest)
    2304             : {
    2305     4695767 :         int ret;
    2306     4695767 :         int nce_ret;
    2307     4695767 :         struct name_cache_entry *nce;
    2308             : 
    2309             :         /*
    2310             :          * First check if we already did a call to this function with the same
    2311             :          * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
    2312             :          * return the cached result.
    2313             :          */
    2314     4695767 :         nce = name_cache_search(sctx, ino, gen);
    2315     3684716 :         if (nce) {
    2316     3684716 :                 if (ino < sctx->send_progress && nce->need_later_update) {
    2317      404707 :                         btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
    2318      404707 :                         nce = NULL;
    2319             :                 } else {
    2320     3280009 :                         *parent_ino = nce->parent_ino;
    2321     3280009 :                         *parent_gen = nce->parent_gen;
    2322     3280009 :                         ret = fs_path_add(dest, nce->name, nce->name_len);
    2323     3279703 :                         if (ret < 0)
    2324           0 :                                 goto out;
    2325     3279703 :                         ret = nce->ret;
    2326     3279703 :                         goto out;
    2327             :                 }
    2328             :         }
    2329             : 
    2330             :         /*
    2331             :          * If the inode is not existent yet, add the orphan name and return 1.
    2332             :          * This should only happen for the parent dir that we determine in
    2333             :          * record_new_ref_if_needed().
    2334             :          */
    2335     1416145 :         ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
    2336     1416149 :         if (ret < 0)
    2337           0 :                 goto out;
    2338             : 
    2339     1416149 :         if (!ret) {
    2340         156 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2341         156 :                 if (ret < 0)
    2342           0 :                         goto out;
    2343         156 :                 ret = 1;
    2344         156 :                 goto out_cache;
    2345             :         }
    2346             : 
    2347             :         /*
    2348             :          * Depending on whether the inode was already processed or not, use
    2349             :          * send_root or parent_root for ref lookup.
    2350             :          */
    2351     1415993 :         if (ino < sctx->send_progress)
    2352     1010803 :                 ret = get_first_ref(sctx->send_root, ino,
    2353             :                                     parent_ino, parent_gen, dest);
    2354             :         else
    2355      405190 :                 ret = get_first_ref(sctx->parent_root, ino,
    2356             :                                     parent_ino, parent_gen, dest);
    2357     1415994 :         if (ret < 0)
    2358           0 :                 goto out;
    2359             : 
    2360             :         /*
    2361             :          * Check if the ref was overwritten by an inode's ref that was processed
    2362             :          * earlier. If yes, treat as orphan and return 1.
    2363             :          */
    2364     1415994 :         ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
    2365     1415994 :                         dest->start, dest->end - dest->start);
    2366     1415997 :         if (ret < 0)
    2367           0 :                 goto out;
    2368     1415997 :         if (ret) {
    2369          25 :                 fs_path_reset(dest);
    2370          25 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2371          25 :                 if (ret < 0)
    2372           0 :                         goto out;
    2373             :                 ret = 1;
    2374             :         }
    2375             : 
    2376     1415972 : out_cache:
    2377             :         /*
    2378             :          * Store the result of the lookup in the name cache.
    2379             :          */
    2380     1416153 :         nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
    2381     1416137 :         if (!nce) {
    2382           0 :                 ret = -ENOMEM;
    2383           0 :                 goto out;
    2384             :         }
    2385             : 
    2386     1416137 :         nce->entry.key = ino;
    2387     1416137 :         nce->entry.gen = gen;
    2388     1416137 :         nce->parent_ino = *parent_ino;
    2389     1416137 :         nce->parent_gen = *parent_gen;
    2390     1416137 :         nce->name_len = fs_path_len(dest);
    2391     1416137 :         nce->ret = ret;
    2392     1416137 :         strcpy(nce->name, dest->start);
    2393             : 
    2394     1416137 :         if (ino < sctx->send_progress)
    2395     1010800 :                 nce->need_later_update = 0;
    2396             :         else
    2397      405337 :                 nce->need_later_update = 1;
    2398             : 
    2399     1416137 :         nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
    2400     1416139 :         if (nce_ret < 0) {
    2401           0 :                 kfree(nce);
    2402           0 :                 ret = nce_ret;
    2403             :         }
    2404             : 
    2405     1416139 : out:
    2406     4695842 :         return ret;
    2407             : }
    2408             : 
    2409             : /*
    2410             :  * Magic happens here. This function returns the first ref to an inode as it
    2411             :  * would look like while receiving the stream at this point in time.
    2412             :  * We walk the path up to the root. For every inode in between, we check if it
    2413             :  * was already processed/sent. If yes, we continue with the parent as found
    2414             :  * in send_root. If not, we continue with the parent as found in parent_root.
    2415             :  * If we encounter an inode that was deleted at this point in time, we use the
    2416             :  * inodes "orphan" name instead of the real name and stop. Same with new inodes
    2417             :  * that were not created yet and overwritten inodes/refs.
    2418             :  *
    2419             :  * When do we have orphan inodes:
    2420             :  * 1. When an inode is freshly created and thus no valid refs are available yet
    2421             :  * 2. When a directory lost all it's refs (deleted) but still has dir items
    2422             :  *    inside which were not processed yet (pending for move/delete). If anyone
    2423             :  *    tried to get the path to the dir items, it would get a path inside that
    2424             :  *    orphan directory.
    2425             :  * 3. When an inode is moved around or gets new links, it may overwrite the ref
    2426             :  *    of an unprocessed inode. If in that case the first ref would be
    2427             :  *    overwritten, the overwritten inode gets "orphanized". Later when we
    2428             :  *    process this overwritten inode, it is restored at a new place by moving
    2429             :  *    the orphan inode.
    2430             :  *
    2431             :  * sctx->send_progress tells this function at which point in time receiving
    2432             :  * would be.
    2433             :  */
    2434     4879111 : static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
    2435             :                         struct fs_path *dest)
    2436             : {
    2437     4879111 :         int ret = 0;
    2438     4879111 :         struct fs_path *name = NULL;
    2439     4879111 :         u64 parent_inode = 0;
    2440     4879111 :         u64 parent_gen = 0;
    2441     4879111 :         int stop = 0;
    2442             : 
    2443     4879111 :         name = fs_path_alloc();
    2444     4879065 :         if (!name) {
    2445           0 :                 ret = -ENOMEM;
    2446           0 :                 goto out;
    2447             :         }
    2448             : 
    2449     4879065 :         dest->reversed = 1;
    2450     4879065 :         fs_path_reset(dest);
    2451             : 
    2452     9574939 :         while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
    2453     4696375 :                 struct waiting_dir_move *wdm;
    2454             : 
    2455     4696375 :                 fs_path_reset(name);
    2456             : 
    2457     9392552 :                 if (is_waiting_for_rm(sctx, ino, gen)) {
    2458          41 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2459          41 :                         if (ret < 0)
    2460           0 :                                 goto out;
    2461          41 :                         ret = fs_path_add_path(dest, name);
    2462          41 :                         break;
    2463             :                 }
    2464             : 
    2465     4696265 :                 wdm = get_waiting_dir_move(sctx, ino);
    2466     4696265 :                 if (wdm && wdm->orphanized) {
    2467          67 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2468          67 :                         stop = 1;
    2469     4696198 :                 } else if (wdm) {
    2470        1158 :                         ret = get_first_ref(sctx->parent_root, ino,
    2471             :                                             &parent_inode, &parent_gen, name);
    2472             :                 } else {
    2473     4695040 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    2474             :                                                         &parent_inode,
    2475             :                                                         &parent_gen, name);
    2476     4695045 :                         if (ret)
    2477        1160 :                                 stop = 1;
    2478             :                 }
    2479             : 
    2480     4696207 :                 if (ret < 0)
    2481           0 :                         goto out;
    2482             : 
    2483     4696207 :                 ret = fs_path_add_path(dest, name);
    2484     4695874 :                 if (ret < 0)
    2485           0 :                         goto out;
    2486             : 
    2487     4695874 :                 ino = parent_inode;
    2488     4695874 :                 gen = parent_gen;
    2489             :         }
    2490             : 
    2491     4878888 : out:
    2492     4878929 :         fs_path_free(name);
    2493     4878889 :         if (!ret)
    2494     4878910 :                 fs_path_unreverse(dest);
    2495     4878457 :         return ret;
    2496             : }
    2497             : 
    2498             : /*
    2499             :  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
    2500             :  */
    2501         212 : static int send_subvol_begin(struct send_ctx *sctx)
    2502             : {
    2503         212 :         int ret;
    2504         212 :         struct btrfs_root *send_root = sctx->send_root;
    2505         212 :         struct btrfs_root *parent_root = sctx->parent_root;
    2506         212 :         struct btrfs_path *path;
    2507         212 :         struct btrfs_key key;
    2508         212 :         struct btrfs_root_ref *ref;
    2509         212 :         struct extent_buffer *leaf;
    2510         212 :         char *name = NULL;
    2511         212 :         int namelen;
    2512             : 
    2513         212 :         path = btrfs_alloc_path();
    2514         213 :         if (!path)
    2515             :                 return -ENOMEM;
    2516             : 
    2517         213 :         name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
    2518         213 :         if (!name) {
    2519           0 :                 btrfs_free_path(path);
    2520           0 :                 return -ENOMEM;
    2521             :         }
    2522             : 
    2523         213 :         key.objectid = send_root->root_key.objectid;
    2524         213 :         key.type = BTRFS_ROOT_BACKREF_KEY;
    2525         213 :         key.offset = 0;
    2526             : 
    2527         213 :         ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
    2528             :                                 &key, path, 1, 0);
    2529         213 :         if (ret < 0)
    2530           0 :                 goto out;
    2531         213 :         if (ret) {
    2532           0 :                 ret = -ENOENT;
    2533           0 :                 goto out;
    2534             :         }
    2535             : 
    2536         213 :         leaf = path->nodes[0];
    2537         213 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2538         213 :         if (key.type != BTRFS_ROOT_BACKREF_KEY ||
    2539         212 :             key.objectid != send_root->root_key.objectid) {
    2540           0 :                 ret = -ENOENT;
    2541           0 :                 goto out;
    2542             :         }
    2543         213 :         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
    2544         212 :         namelen = btrfs_root_ref_name_len(leaf, ref);
    2545         213 :         read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
    2546         213 :         btrfs_release_path(path);
    2547             : 
    2548         213 :         if (parent_root) {
    2549         126 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
    2550         125 :                 if (ret < 0)
    2551           0 :                         goto out;
    2552             :         } else {
    2553          87 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
    2554          87 :                 if (ret < 0)
    2555           0 :                         goto out;
    2556             :         }
    2557             : 
    2558         212 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
    2559             : 
    2560         212 :         if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
    2561           4 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2562             :                             sctx->send_root->root_item.received_uuid);
    2563             :         else
    2564         208 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2565             :                             sctx->send_root->root_item.uuid);
    2566             : 
    2567         212 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
    2568             :                     btrfs_root_ctransid(&sctx->send_root->root_item));
    2569         212 :         if (parent_root) {
    2570         125 :                 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
    2571           4 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2572             :                                      parent_root->root_item.received_uuid);
    2573             :                 else
    2574         121 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2575             :                                      parent_root->root_item.uuid);
    2576         125 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    2577             :                             btrfs_root_ctransid(&sctx->parent_root->root_item));
    2578             :         }
    2579             : 
    2580         212 :         ret = send_cmd(sctx);
    2581             : 
    2582         213 : tlv_put_failure:
    2583         213 : out:
    2584         213 :         btrfs_free_path(path);
    2585         213 :         kfree(name);
    2586         213 :         return ret;
    2587             : }
    2588             : 
    2589         438 : static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
    2590             : {
    2591         438 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2592         438 :         int ret = 0;
    2593         438 :         struct fs_path *p;
    2594             : 
    2595         438 :         btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
    2596             : 
    2597         438 :         p = fs_path_alloc();
    2598         438 :         if (!p)
    2599             :                 return -ENOMEM;
    2600             : 
    2601         438 :         ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
    2602         438 :         if (ret < 0)
    2603           0 :                 goto out;
    2604             : 
    2605         438 :         ret = get_cur_path(sctx, ino, gen, p);
    2606         438 :         if (ret < 0)
    2607           0 :                 goto out;
    2608         438 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2609         438 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
    2610             : 
    2611         438 :         ret = send_cmd(sctx);
    2612             : 
    2613         438 : tlv_put_failure:
    2614         438 : out:
    2615         438 :         fs_path_free(p);
    2616         438 :         return ret;
    2617             : }
    2618             : 
    2619      603770 : static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
    2620             : {
    2621      603770 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2622      603770 :         int ret = 0;
    2623      603770 :         struct fs_path *p;
    2624             : 
    2625      603770 :         btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
    2626             : 
    2627      603770 :         p = fs_path_alloc();
    2628      603742 :         if (!p)
    2629             :                 return -ENOMEM;
    2630             : 
    2631      603742 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
    2632      603671 :         if (ret < 0)
    2633           0 :                 goto out;
    2634             : 
    2635      603671 :         ret = get_cur_path(sctx, ino, gen, p);
    2636      603914 :         if (ret < 0)
    2637           0 :                 goto out;
    2638      603914 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2639      603875 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
    2640             : 
    2641      603877 :         ret = send_cmd(sctx);
    2642             : 
    2643      604042 : tlv_put_failure:
    2644      604042 : out:
    2645      604042 :         fs_path_free(p);
    2646      604042 :         return ret;
    2647             : }
    2648             : 
    2649         194 : static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
    2650             : {
    2651         194 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2652         194 :         int ret = 0;
    2653         194 :         struct fs_path *p;
    2654             : 
    2655         194 :         if (sctx->proto < 2)
    2656             :                 return 0;
    2657             : 
    2658           0 :         btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
    2659             : 
    2660           0 :         p = fs_path_alloc();
    2661           0 :         if (!p)
    2662             :                 return -ENOMEM;
    2663             : 
    2664           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
    2665           0 :         if (ret < 0)
    2666           0 :                 goto out;
    2667             : 
    2668           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2669           0 :         if (ret < 0)
    2670           0 :                 goto out;
    2671           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2672           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
    2673             : 
    2674           0 :         ret = send_cmd(sctx);
    2675             : 
    2676           0 : tlv_put_failure:
    2677           0 : out:
    2678           0 :         fs_path_free(p);
    2679           0 :         return ret;
    2680             : }
    2681             : 
    2682      604473 : static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
    2683             : {
    2684      604473 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2685      604473 :         int ret = 0;
    2686      604473 :         struct fs_path *p;
    2687             : 
    2688      604473 :         btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
    2689             :                     ino, uid, gid);
    2690             : 
    2691      604473 :         p = fs_path_alloc();
    2692      604119 :         if (!p)
    2693             :                 return -ENOMEM;
    2694             : 
    2695      604119 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
    2696      604131 :         if (ret < 0)
    2697           0 :                 goto out;
    2698             : 
    2699      604131 :         ret = get_cur_path(sctx, ino, gen, p);
    2700      604137 :         if (ret < 0)
    2701           0 :                 goto out;
    2702      604137 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2703      603960 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
    2704      604212 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
    2705             : 
    2706      604148 :         ret = send_cmd(sctx);
    2707             : 
    2708      604619 : tlv_put_failure:
    2709      604619 : out:
    2710      604619 :         fs_path_free(p);
    2711      604619 :         return ret;
    2712             : }
    2713             : 
    2714     1011160 : static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
    2715             : {
    2716     1011160 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2717     1011160 :         int ret = 0;
    2718     1011160 :         struct fs_path *p = NULL;
    2719     1011160 :         struct btrfs_inode_item *ii;
    2720     1011160 :         struct btrfs_path *path = NULL;
    2721     1011160 :         struct extent_buffer *eb;
    2722     1011160 :         struct btrfs_key key;
    2723     1011160 :         int slot;
    2724             : 
    2725     1011160 :         btrfs_debug(fs_info, "send_utimes %llu", ino);
    2726             : 
    2727     1011160 :         p = fs_path_alloc();
    2728     1011098 :         if (!p)
    2729             :                 return -ENOMEM;
    2730             : 
    2731     1011098 :         path = alloc_path_for_send();
    2732     1011120 :         if (!path) {
    2733           0 :                 ret = -ENOMEM;
    2734           0 :                 goto out;
    2735             :         }
    2736             : 
    2737     1011120 :         key.objectid = ino;
    2738     1011120 :         key.type = BTRFS_INODE_ITEM_KEY;
    2739     1011120 :         key.offset = 0;
    2740     1011120 :         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
    2741     1011393 :         if (ret > 0)
    2742             :                 ret = -ENOENT;
    2743     1011394 :         if (ret < 0)
    2744           0 :                 goto out;
    2745             : 
    2746     1011393 :         eb = path->nodes[0];
    2747     1011393 :         slot = path->slots[0];
    2748     1011393 :         ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
    2749             : 
    2750     1011329 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
    2751     1011328 :         if (ret < 0)
    2752           0 :                 goto out;
    2753             : 
    2754     1011328 :         ret = get_cur_path(sctx, ino, gen, p);
    2755     1011333 :         if (ret < 0)
    2756           0 :                 goto out;
    2757     1011333 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2758     1011316 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
    2759     1011323 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
    2760     1011330 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
    2761     1011331 :         if (sctx->proto >= 2)
    2762           0 :                 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_OTIME, eb, &ii->otime);
    2763             : 
    2764     1011331 :         ret = send_cmd(sctx);
    2765             : 
    2766     1011394 : tlv_put_failure:
    2767     1011394 : out:
    2768     1011394 :         fs_path_free(p);
    2769     1011378 :         btrfs_free_path(path);
    2770     1011378 :         return ret;
    2771             : }
    2772             : 
    2773             : /*
    2774             :  * If the cache is full, we can't remove entries from it and do a call to
    2775             :  * send_utimes() for each respective inode, because we might be finishing
    2776             :  * processing an inode that is a directory and it just got renamed, and existing
    2777             :  * entries in the cache may refer to inodes that have the directory in their
    2778             :  * full path - in which case we would generate outdated paths (pre-rename)
    2779             :  * for the inodes that the cache entries point to. Instead of prunning the
    2780             :  * cache when inserting, do it after we finish processing each inode at
    2781             :  * finish_inode_if_needed().
    2782             :  */
    2783      611568 : static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
    2784             : {
    2785      611568 :         struct btrfs_lru_cache_entry *entry;
    2786      611568 :         int ret;
    2787             : 
    2788      611568 :         entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
    2789      611568 :         if (entry != NULL)
    2790             :                 return 0;
    2791             : 
    2792             :         /* Caching is optional, don't fail if we can't allocate memory. */
    2793        3461 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2794        3461 :         if (!entry)
    2795           0 :                 return send_utimes(sctx, dir, gen);
    2796             : 
    2797        3461 :         entry->key = dir;
    2798        3461 :         entry->gen = gen;
    2799             : 
    2800        3461 :         ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
    2801        3461 :         ASSERT(ret != -EEXIST);
    2802        3461 :         if (ret) {
    2803           0 :                 kfree(entry);
    2804           0 :                 return send_utimes(sctx, dir, gen);
    2805             :         }
    2806             : 
    2807             :         return 0;
    2808             : }
    2809             : 
    2810     4284560 : static int trim_dir_utimes_cache(struct send_ctx *sctx)
    2811             : {
    2812     4286704 :         while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
    2813             :                SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
    2814        2166 :                 struct btrfs_lru_cache_entry *lru;
    2815        2166 :                 int ret;
    2816             : 
    2817        2166 :                 lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
    2818        2166 :                 ASSERT(lru != NULL);
    2819             : 
    2820        2166 :                 ret = send_utimes(sctx, lru->key, lru->gen);
    2821        2166 :                 if (ret)
    2822           0 :                         return ret;
    2823             : 
    2824        2166 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
    2825             :         }
    2826             : 
    2827             :         return 0;
    2828             : }
    2829             : 
    2830             : /*
    2831             :  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
    2832             :  * a valid path yet because we did not process the refs yet. So, the inode
    2833             :  * is created as orphan.
    2834             :  */
    2835      603924 : static int send_create_inode(struct send_ctx *sctx, u64 ino)
    2836             : {
    2837      603924 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2838      603924 :         int ret = 0;
    2839      603924 :         struct fs_path *p;
    2840      603924 :         int cmd;
    2841      603924 :         struct btrfs_inode_info info;
    2842      603924 :         u64 gen;
    2843      603924 :         u64 mode;
    2844      603924 :         u64 rdev;
    2845             : 
    2846      603924 :         btrfs_debug(fs_info, "send_create_inode %llu", ino);
    2847             : 
    2848      603924 :         p = fs_path_alloc();
    2849      604027 :         if (!p)
    2850             :                 return -ENOMEM;
    2851             : 
    2852      604027 :         if (ino != sctx->cur_ino) {
    2853         146 :                 ret = get_inode_info(sctx->send_root, ino, &info);
    2854         146 :                 if (ret < 0)
    2855           0 :                         goto out;
    2856         146 :                 gen = info.gen;
    2857         146 :                 mode = info.mode;
    2858         146 :                 rdev = info.rdev;
    2859             :         } else {
    2860      603881 :                 gen = sctx->cur_inode_gen;
    2861      603881 :                 mode = sctx->cur_inode_mode;
    2862      603881 :                 rdev = sctx->cur_inode_rdev;
    2863             :         }
    2864             : 
    2865      604027 :         if (S_ISREG(mode)) {
    2866             :                 cmd = BTRFS_SEND_C_MKFILE;
    2867        1562 :         } else if (S_ISDIR(mode)) {
    2868             :                 cmd = BTRFS_SEND_C_MKDIR;
    2869         939 :         } else if (S_ISLNK(mode)) {
    2870             :                 cmd = BTRFS_SEND_C_SYMLINK;
    2871         589 :         } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
    2872             :                 cmd = BTRFS_SEND_C_MKNOD;
    2873           0 :         } else if (S_ISFIFO(mode)) {
    2874             :                 cmd = BTRFS_SEND_C_MKFIFO;
    2875           0 :         } else if (S_ISSOCK(mode)) {
    2876             :                 cmd = BTRFS_SEND_C_MKSOCK;
    2877             :         } else {
    2878           0 :                 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
    2879             :                                 (int)(mode & S_IFMT));
    2880           0 :                 ret = -EOPNOTSUPP;
    2881           0 :                 goto out;
    2882             :         }
    2883             : 
    2884      604027 :         ret = begin_cmd(sctx, cmd);
    2885      603881 :         if (ret < 0)
    2886           0 :                 goto out;
    2887             : 
    2888      603881 :         ret = gen_unique_name(sctx, ino, gen, p);
    2889      604345 :         if (ret < 0)
    2890           0 :                 goto out;
    2891             : 
    2892      604345 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2893      604343 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
    2894             : 
    2895      604345 :         if (S_ISLNK(mode)) {
    2896         350 :                 fs_path_reset(p);
    2897         350 :                 ret = read_symlink(sctx->send_root, ino, p);
    2898         350 :                 if (ret < 0)
    2899           0 :                         goto out;
    2900         350 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
    2901      603995 :         } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
    2902      603406 :                    S_ISFIFO(mode) || S_ISSOCK(mode)) {
    2903         589 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
    2904         589 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
    2905             :         }
    2906             : 
    2907      604345 :         ret = send_cmd(sctx);
    2908      604345 :         if (ret < 0)
    2909           0 :                 goto out;
    2910             : 
    2911             : 
    2912      604345 : tlv_put_failure:
    2913      604345 : out:
    2914      604345 :         fs_path_free(p);
    2915      604345 :         return ret;
    2916             : }
    2917             : 
    2918         623 : static void cache_dir_created(struct send_ctx *sctx, u64 dir)
    2919             : {
    2920         623 :         struct btrfs_lru_cache_entry *entry;
    2921         623 :         int ret;
    2922             : 
    2923             :         /* Caching is optional, ignore any failures. */
    2924         623 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2925         623 :         if (!entry)
    2926             :                 return;
    2927             : 
    2928         623 :         entry->key = dir;
    2929         623 :         entry->gen = 0;
    2930         623 :         ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
    2931         623 :         if (ret < 0)
    2932           0 :                 kfree(entry);
    2933             : }
    2934             : 
    2935             : /*
    2936             :  * We need some special handling for inodes that get processed before the parent
    2937             :  * directory got created. See process_recorded_refs for details.
    2938             :  * This function does the check if we already created the dir out of order.
    2939             :  */
    2940         812 : static int did_create_dir(struct send_ctx *sctx, u64 dir)
    2941             : {
    2942         812 :         int ret = 0;
    2943         812 :         int iter_ret = 0;
    2944         812 :         struct btrfs_path *path = NULL;
    2945         812 :         struct btrfs_key key;
    2946         812 :         struct btrfs_key found_key;
    2947         812 :         struct btrfs_key di_key;
    2948         812 :         struct btrfs_dir_item *di;
    2949             : 
    2950         812 :         if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
    2951             :                 return 1;
    2952             : 
    2953         623 :         path = alloc_path_for_send();
    2954         623 :         if (!path)
    2955             :                 return -ENOMEM;
    2956             : 
    2957         623 :         key.objectid = dir;
    2958         623 :         key.type = BTRFS_DIR_INDEX_KEY;
    2959         623 :         key.offset = 0;
    2960             : 
    2961        2324 :         btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
    2962        2292 :                 struct extent_buffer *eb = path->nodes[0];
    2963             : 
    2964        2292 :                 if (found_key.objectid != key.objectid ||
    2965        1701 :                     found_key.type != key.type) {
    2966             :                         ret = 0;
    2967             :                         break;
    2968             :                 }
    2969             : 
    2970        1701 :                 di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
    2971        1701 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    2972             : 
    2973        1701 :                 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
    2974        1700 :                     di_key.objectid < sctx->send_progress) {
    2975           0 :                         ret = 1;
    2976           0 :                         cache_dir_created(sctx, dir);
    2977           0 :                         break;
    2978             :                 }
    2979             :         }
    2980             :         /* Catch error found during iteration */
    2981         623 :         if (iter_ret < 0)
    2982           0 :                 ret = iter_ret;
    2983             : 
    2984         623 :         btrfs_free_path(path);
    2985         623 :         return ret;
    2986             : }
    2987             : 
    2988             : /*
    2989             :  * Only creates the inode if it is:
    2990             :  * 1. Not a directory
    2991             :  * 2. Or a directory which was not created already due to out of order
    2992             :  *    directories. See did_create_dir and process_recorded_refs for details.
    2993             :  */
    2994      604152 : static int send_create_inode_if_needed(struct send_ctx *sctx)
    2995             : {
    2996      604152 :         int ret;
    2997             : 
    2998      604152 :         if (S_ISDIR(sctx->cur_inode_mode)) {
    2999         623 :                 ret = did_create_dir(sctx, sctx->cur_ino);
    3000         623 :                 if (ret < 0)
    3001             :                         return ret;
    3002         623 :                 else if (ret > 0)
    3003             :                         return 0;
    3004             :         }
    3005             : 
    3006      604006 :         ret = send_create_inode(sctx, sctx->cur_ino);
    3007             : 
    3008      604200 :         if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
    3009         477 :                 cache_dir_created(sctx, sctx->cur_ino);
    3010             : 
    3011             :         return ret;
    3012             : }
    3013             : 
    3014             : struct recorded_ref {
    3015             :         struct list_head list;
    3016             :         char *name;
    3017             :         struct fs_path *full_path;
    3018             :         u64 dir;
    3019             :         u64 dir_gen;
    3020             :         int name_len;
    3021             :         struct rb_node node;
    3022             :         struct rb_root *root;
    3023             : };
    3024             : 
    3025     1221794 : static struct recorded_ref *recorded_ref_alloc(void)
    3026             : {
    3027     1221794 :         struct recorded_ref *ref;
    3028             : 
    3029     1221794 :         ref = kzalloc(sizeof(*ref), GFP_KERNEL);
    3030     1221805 :         if (!ref)
    3031             :                 return NULL;
    3032     1221805 :         RB_CLEAR_NODE(&ref->node);
    3033     1221805 :         INIT_LIST_HEAD(&ref->list);
    3034     1221805 :         return ref;
    3035             : }
    3036             : 
    3037     1221826 : static void recorded_ref_free(struct recorded_ref *ref)
    3038             : {
    3039     1221826 :         if (!ref)
    3040             :                 return;
    3041     1221826 :         if (!RB_EMPTY_NODE(&ref->node))
    3042      611318 :                 rb_erase(&ref->node, ref->root);
    3043     1221825 :         list_del(&ref->list);
    3044     1221824 :         fs_path_free(ref->full_path);
    3045     1221825 :         kfree(ref);
    3046             : }
    3047             : 
    3048     1223624 : static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
    3049             : {
    3050     1223624 :         ref->full_path = path;
    3051     1223624 :         ref->name = (char *)kbasename(ref->full_path->start);
    3052     1223624 :         ref->name_len = ref->full_path->end - ref->name;
    3053     1223624 : }
    3054             : 
    3055      610509 : static int dup_ref(struct recorded_ref *ref, struct list_head *list)
    3056             : {
    3057      610509 :         struct recorded_ref *new;
    3058             : 
    3059      610509 :         new = recorded_ref_alloc();
    3060      610510 :         if (!new)
    3061             :                 return -ENOMEM;
    3062             : 
    3063      610510 :         new->dir = ref->dir;
    3064      610510 :         new->dir_gen = ref->dir_gen;
    3065      610510 :         list_add_tail(&new->list, list);
    3066      610510 :         return 0;
    3067             : }
    3068             : 
    3069             : static void __free_recorded_refs(struct list_head *head)
    3070             : {
    3071     1212122 :         struct recorded_ref *cur;
    3072             : 
    3073     3039059 :         while (!list_empty(head)) {
    3074     1220819 :                 cur = list_entry(head->next, struct recorded_ref, list);
    3075     1220819 :                 recorded_ref_free(cur);
    3076             :         }
    3077             : }
    3078             : 
    3079      606118 : static void free_recorded_refs(struct send_ctx *sctx)
    3080             : {
    3081      606118 :         __free_recorded_refs(&sctx->new_refs);
    3082      606117 :         __free_recorded_refs(&sctx->deleted_refs);
    3083      606119 : }
    3084             : 
    3085             : /*
    3086             :  * Renames/moves a file/dir to its orphan name. Used when the first
    3087             :  * ref of an unprocessed inode gets overwritten and for all non empty
    3088             :  * directories.
    3089             :  */
    3090         248 : static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
    3091             :                           struct fs_path *path)
    3092             : {
    3093         248 :         int ret;
    3094         248 :         struct fs_path *orphan;
    3095             : 
    3096         248 :         orphan = fs_path_alloc();
    3097         248 :         if (!orphan)
    3098             :                 return -ENOMEM;
    3099             : 
    3100         248 :         ret = gen_unique_name(sctx, ino, gen, orphan);
    3101         248 :         if (ret < 0)
    3102           0 :                 goto out;
    3103             : 
    3104         248 :         ret = send_rename(sctx, path, orphan);
    3105             : 
    3106         248 : out:
    3107         248 :         fs_path_free(orphan);
    3108         248 :         return ret;
    3109             : }
    3110             : 
    3111           9 : static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
    3112             :                                                    u64 dir_ino, u64 dir_gen)
    3113             : {
    3114           9 :         struct rb_node **p = &sctx->orphan_dirs.rb_node;
    3115           9 :         struct rb_node *parent = NULL;
    3116           9 :         struct orphan_dir_info *entry, *odi;
    3117             : 
    3118          10 :         while (*p) {
    3119           1 :                 parent = *p;
    3120           1 :                 entry = rb_entry(parent, struct orphan_dir_info, node);
    3121           1 :                 if (dir_ino < entry->ino)
    3122           0 :                         p = &(*p)->rb_left;
    3123           1 :                 else if (dir_ino > entry->ino)
    3124           1 :                         p = &(*p)->rb_right;
    3125           0 :                 else if (dir_gen < entry->gen)
    3126           0 :                         p = &(*p)->rb_left;
    3127           0 :                 else if (dir_gen > entry->gen)
    3128           0 :                         p = &(*p)->rb_right;
    3129             :                 else
    3130           0 :                         return entry;
    3131             :         }
    3132             : 
    3133           9 :         odi = kmalloc(sizeof(*odi), GFP_KERNEL);
    3134           9 :         if (!odi)
    3135             :                 return ERR_PTR(-ENOMEM);
    3136           9 :         odi->ino = dir_ino;
    3137           9 :         odi->gen = dir_gen;
    3138           9 :         odi->last_dir_index_offset = 0;
    3139           9 :         odi->dir_high_seq_ino = 0;
    3140             : 
    3141           9 :         rb_link_node(&odi->node, parent, p);
    3142           9 :         rb_insert_color(&odi->node, &sctx->orphan_dirs);
    3143           9 :         return odi;
    3144             : }
    3145             : 
    3146     4696982 : static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
    3147             :                                                    u64 dir_ino, u64 gen)
    3148             : {
    3149     4696982 :         struct rb_node *n = sctx->orphan_dirs.rb_node;
    3150     4696982 :         struct orphan_dir_info *entry;
    3151             : 
    3152     4697089 :         while (n) {
    3153         171 :                 entry = rb_entry(n, struct orphan_dir_info, node);
    3154         171 :                 if (dir_ino < entry->ino)
    3155          55 :                         n = n->rb_left;
    3156         116 :                 else if (dir_ino > entry->ino)
    3157          49 :                         n = n->rb_right;
    3158          67 :                 else if (gen < entry->gen)
    3159           0 :                         n = n->rb_left;
    3160          67 :                 else if (gen > entry->gen)
    3161           3 :                         n = n->rb_right;
    3162             :                 else
    3163          64 :                         return entry;
    3164             :         }
    3165             :         return NULL;
    3166             : }
    3167             : 
    3168             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
    3169             : {
    3170     4696246 :         struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
    3171             : 
    3172     4696306 :         return odi != NULL;
    3173             : }
    3174             : 
    3175          11 : static void free_orphan_dir_info(struct send_ctx *sctx,
    3176             :                                  struct orphan_dir_info *odi)
    3177             : {
    3178          11 :         if (!odi)
    3179             :                 return;
    3180           9 :         rb_erase(&odi->node, &sctx->orphan_dirs);
    3181           9 :         kfree(odi);
    3182             : }
    3183             : 
    3184             : /*
    3185             :  * Returns 1 if a directory can be removed at this point in time.
    3186             :  * We check this by iterating all dir items and checking if the inode behind
    3187             :  * the dir item was already processed.
    3188             :  */
    3189          44 : static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
    3190             : {
    3191          44 :         int ret = 0;
    3192          44 :         int iter_ret = 0;
    3193          44 :         struct btrfs_root *root = sctx->parent_root;
    3194          44 :         struct btrfs_path *path;
    3195          44 :         struct btrfs_key key;
    3196          44 :         struct btrfs_key found_key;
    3197          44 :         struct btrfs_key loc;
    3198          44 :         struct btrfs_dir_item *di;
    3199          44 :         struct orphan_dir_info *odi = NULL;
    3200          44 :         u64 dir_high_seq_ino = 0;
    3201          44 :         u64 last_dir_index_offset = 0;
    3202             : 
    3203             :         /*
    3204             :          * Don't try to rmdir the top/root subvolume dir.
    3205             :          */
    3206          44 :         if (dir == BTRFS_FIRST_FREE_OBJECTID)
    3207             :                 return 0;
    3208             : 
    3209          40 :         odi = get_orphan_dir_info(sctx, dir, dir_gen);
    3210          40 :         if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
    3211             :                 return 0;
    3212             : 
    3213          32 :         path = alloc_path_for_send();
    3214          32 :         if (!path)
    3215             :                 return -ENOMEM;
    3216             : 
    3217          32 :         if (!odi) {
    3218             :                 /*
    3219             :                  * Find the inode number associated with the last dir index
    3220             :                  * entry. This is very likely the inode with the highest number
    3221             :                  * of all inodes that have an entry in the directory. We can
    3222             :                  * then use it to avoid future calls to can_rmdir(), when
    3223             :                  * processing inodes with a lower number, from having to search
    3224             :                  * the parent root b+tree for dir index keys.
    3225             :                  */
    3226          20 :                 key.objectid = dir;
    3227          20 :                 key.type = BTRFS_DIR_INDEX_KEY;
    3228          20 :                 key.offset = (u64)-1;
    3229             : 
    3230          20 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    3231          20 :                 if (ret < 0) {
    3232           0 :                         goto out;
    3233          20 :                 } else if (ret > 0) {
    3234             :                         /* Can't happen, the root is never empty. */
    3235          20 :                         ASSERT(path->slots[0] > 0);
    3236          20 :                         if (WARN_ON(path->slots[0] == 0)) {
    3237           0 :                                 ret = -EUCLEAN;
    3238           0 :                                 goto out;
    3239             :                         }
    3240          20 :                         path->slots[0]--;
    3241             :                 }
    3242             : 
    3243          20 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    3244          20 :                 if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
    3245             :                         /* No index keys, dir can be removed. */
    3246           9 :                         ret = 1;
    3247           9 :                         goto out;
    3248             :                 }
    3249             : 
    3250          11 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3251             :                                     struct btrfs_dir_item);
    3252          11 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3253          11 :                 dir_high_seq_ino = loc.objectid;
    3254          11 :                 if (sctx->cur_ino < dir_high_seq_ino) {
    3255           9 :                         ret = 0;
    3256           9 :                         goto out;
    3257             :                 }
    3258             : 
    3259           2 :                 btrfs_release_path(path);
    3260             :         }
    3261             : 
    3262          14 :         key.objectid = dir;
    3263          14 :         key.type = BTRFS_DIR_INDEX_KEY;
    3264          14 :         key.offset = (odi ? odi->last_dir_index_offset : 0);
    3265             : 
    3266          38 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    3267          37 :                 struct waiting_dir_move *dm;
    3268             : 
    3269          37 :                 if (found_key.objectid != key.objectid ||
    3270          27 :                     found_key.type != key.type)
    3271             :                         break;
    3272             : 
    3273          27 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3274             :                                 struct btrfs_dir_item);
    3275          27 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3276             : 
    3277          27 :                 dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
    3278          27 :                 last_dir_index_offset = found_key.offset;
    3279             : 
    3280          27 :                 dm = get_waiting_dir_move(sctx, loc.objectid);
    3281          27 :                 if (dm) {
    3282           3 :                         dm->rmdir_ino = dir;
    3283           3 :                         dm->rmdir_gen = dir_gen;
    3284           3 :                         ret = 0;
    3285           3 :                         goto out;
    3286             :                 }
    3287             : 
    3288          24 :                 if (loc.objectid > sctx->cur_ino) {
    3289           0 :                         ret = 0;
    3290           0 :                         goto out;
    3291             :                 }
    3292             :         }
    3293          11 :         if (iter_ret < 0) {
    3294           0 :                 ret = iter_ret;
    3295           0 :                 goto out;
    3296             :         }
    3297          11 :         free_orphan_dir_info(sctx, odi);
    3298             : 
    3299          11 :         ret = 1;
    3300             : 
    3301          32 : out:
    3302          32 :         btrfs_free_path(path);
    3303             : 
    3304          32 :         if (ret)
    3305             :                 return ret;
    3306             : 
    3307          12 :         if (!odi) {
    3308           9 :                 odi = add_orphan_dir_info(sctx, dir, dir_gen);
    3309           9 :                 if (IS_ERR(odi))
    3310           0 :                         return PTR_ERR(odi);
    3311             : 
    3312           9 :                 odi->gen = dir_gen;
    3313             :         }
    3314             : 
    3315          12 :         odi->last_dir_index_offset = last_dir_index_offset;
    3316          12 :         odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
    3317             : 
    3318          12 :         return 0;
    3319             : }
    3320             : 
    3321     1015591 : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
    3322             : {
    3323     1015591 :         struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
    3324             : 
    3325     1015591 :         return entry != NULL;
    3326             : }
    3327             : 
    3328          99 : static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
    3329             : {
    3330          99 :         struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
    3331          99 :         struct rb_node *parent = NULL;
    3332          99 :         struct waiting_dir_move *entry, *dm;
    3333             : 
    3334          99 :         dm = kmalloc(sizeof(*dm), GFP_KERNEL);
    3335          99 :         if (!dm)
    3336             :                 return -ENOMEM;
    3337          99 :         dm->ino = ino;
    3338          99 :         dm->rmdir_ino = 0;
    3339          99 :         dm->rmdir_gen = 0;
    3340          99 :         dm->orphanized = orphanized;
    3341             : 
    3342         238 :         while (*p) {
    3343         139 :                 parent = *p;
    3344         139 :                 entry = rb_entry(parent, struct waiting_dir_move, node);
    3345         139 :                 if (ino < entry->ino) {
    3346           1 :                         p = &(*p)->rb_left;
    3347         138 :                 } else if (ino > entry->ino) {
    3348         138 :                         p = &(*p)->rb_right;
    3349             :                 } else {
    3350           0 :                         kfree(dm);
    3351           0 :                         return -EEXIST;
    3352             :                 }
    3353             :         }
    3354             : 
    3355          99 :         rb_link_node(&dm->node, parent, p);
    3356          99 :         rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
    3357          99 :         return 0;
    3358             : }
    3359             : 
    3360             : static struct waiting_dir_move *
    3361             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
    3362             : {
    3363     5712256 :         struct rb_node *n = sctx->waiting_dir_moves.rb_node;
    3364     5712256 :         struct waiting_dir_move *entry;
    3365             : 
    3366     5832592 :         while (n) {
    3367      121834 :                 entry = rb_entry(n, struct waiting_dir_move, node);
    3368      121834 :                 if (ino < entry->ino)
    3369       75617 :                         n = n->rb_left;
    3370       46217 :                 else if (ino > entry->ino)
    3371       44719 :                         n = n->rb_right;
    3372             :                 else
    3373             :                         return entry;
    3374             :         }
    3375             :         return NULL;
    3376             : }
    3377             : 
    3378          99 : static void free_waiting_dir_move(struct send_ctx *sctx,
    3379             :                                   struct waiting_dir_move *dm)
    3380             : {
    3381          99 :         if (!dm)
    3382             :                 return;
    3383          99 :         rb_erase(&dm->node, &sctx->waiting_dir_moves);
    3384          99 :         kfree(dm);
    3385             : }
    3386             : 
    3387          99 : static int add_pending_dir_move(struct send_ctx *sctx,
    3388             :                                 u64 ino,
    3389             :                                 u64 ino_gen,
    3390             :                                 u64 parent_ino,
    3391             :                                 struct list_head *new_refs,
    3392             :                                 struct list_head *deleted_refs,
    3393             :                                 const bool is_orphan)
    3394             : {
    3395          99 :         struct rb_node **p = &sctx->pending_dir_moves.rb_node;
    3396          99 :         struct rb_node *parent = NULL;
    3397          99 :         struct pending_dir_move *entry = NULL, *pm;
    3398          99 :         struct recorded_ref *cur;
    3399          99 :         int exists = 0;
    3400          99 :         int ret;
    3401             : 
    3402          99 :         pm = kmalloc(sizeof(*pm), GFP_KERNEL);
    3403          99 :         if (!pm)
    3404             :                 return -ENOMEM;
    3405          99 :         pm->parent_ino = parent_ino;
    3406          99 :         pm->ino = ino;
    3407          99 :         pm->gen = ino_gen;
    3408          99 :         INIT_LIST_HEAD(&pm->list);
    3409          99 :         INIT_LIST_HEAD(&pm->update_refs);
    3410          99 :         RB_CLEAR_NODE(&pm->node);
    3411             : 
    3412         167 :         while (*p) {
    3413          94 :                 parent = *p;
    3414          94 :                 entry = rb_entry(parent, struct pending_dir_move, node);
    3415          94 :                 if (parent_ino < entry->parent_ino) {
    3416          29 :                         p = &(*p)->rb_left;
    3417          65 :                 } else if (parent_ino > entry->parent_ino) {
    3418          39 :                         p = &(*p)->rb_right;
    3419             :                 } else {
    3420             :                         exists = 1;
    3421             :                         break;
    3422             :                 }
    3423             :         }
    3424             : 
    3425         196 :         list_for_each_entry(cur, deleted_refs, list) {
    3426          97 :                 ret = dup_ref(cur, &pm->update_refs);
    3427          97 :                 if (ret < 0)
    3428           0 :                         goto out;
    3429             :         }
    3430         200 :         list_for_each_entry(cur, new_refs, list) {
    3431         101 :                 ret = dup_ref(cur, &pm->update_refs);
    3432         101 :                 if (ret < 0)
    3433           0 :                         goto out;
    3434             :         }
    3435             : 
    3436          99 :         ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
    3437          99 :         if (ret)
    3438           0 :                 goto out;
    3439             : 
    3440          99 :         if (exists) {
    3441          26 :                 list_add_tail(&pm->list, &entry->list);
    3442             :         } else {
    3443          73 :                 rb_link_node(&pm->node, parent, p);
    3444          73 :                 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
    3445             :         }
    3446             :         ret = 0;
    3447           0 : out:
    3448           0 :         if (ret) {
    3449             :                 __free_recorded_refs(&pm->update_refs);
    3450           0 :                 kfree(pm);
    3451             :         }
    3452             :         return ret;
    3453             : }
    3454             : 
    3455             : static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
    3456             :                                                       u64 parent_ino)
    3457             : {
    3458     1009930 :         struct rb_node *n = sctx->pending_dir_moves.rb_node;
    3459     1009930 :         struct pending_dir_move *entry;
    3460             : 
    3461     1011819 :         while (n) {
    3462        1962 :                 entry = rb_entry(n, struct pending_dir_move, node);
    3463        1962 :                 if (parent_ino < entry->parent_ino)
    3464        1596 :                         n = n->rb_left;
    3465         366 :                 else if (parent_ino > entry->parent_ino)
    3466         293 :                         n = n->rb_right;
    3467             :                 else
    3468             :                         return entry;
    3469             :         }
    3470             :         return NULL;
    3471             : }
    3472             : 
    3473          99 : static int path_loop(struct send_ctx *sctx, struct fs_path *name,
    3474             :                      u64 ino, u64 gen, u64 *ancestor_ino)
    3475             : {
    3476          99 :         int ret = 0;
    3477          99 :         u64 parent_inode = 0;
    3478          99 :         u64 parent_gen = 0;
    3479          99 :         u64 start_ino = ino;
    3480             : 
    3481          99 :         *ancestor_ino = 0;
    3482         767 :         while (ino != BTRFS_FIRST_FREE_OBJECTID) {
    3483         680 :                 fs_path_reset(name);
    3484             : 
    3485         680 :                 if (is_waiting_for_rm(sctx, ino, gen))
    3486             :                         break;
    3487         680 :                 if (is_waiting_for_move(sctx, ino)) {
    3488          26 :                         if (*ancestor_ino == 0)
    3489          10 :                                 *ancestor_ino = ino;
    3490          26 :                         ret = get_first_ref(sctx->parent_root, ino,
    3491             :                                             &parent_inode, &parent_gen, name);
    3492             :                 } else {
    3493         654 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    3494             :                                                         &parent_inode,
    3495             :                                                         &parent_gen, name);
    3496         654 :                         if (ret > 0) {
    3497             :                                 ret = 0;
    3498             :                                 break;
    3499             :                         }
    3500             :                 }
    3501         670 :                 if (ret < 0)
    3502             :                         break;
    3503         670 :                 if (parent_inode == start_ino) {
    3504           2 :                         ret = 1;
    3505           2 :                         if (*ancestor_ino == 0)
    3506           0 :                                 *ancestor_ino = ino;
    3507             :                         break;
    3508             :                 }
    3509         668 :                 ino = parent_inode;
    3510         668 :                 gen = parent_gen;
    3511             :         }
    3512          99 :         return ret;
    3513             : }
    3514             : 
    3515          99 : static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
    3516             : {
    3517          99 :         struct fs_path *from_path = NULL;
    3518          99 :         struct fs_path *to_path = NULL;
    3519          99 :         struct fs_path *name = NULL;
    3520          99 :         u64 orig_progress = sctx->send_progress;
    3521          99 :         struct recorded_ref *cur;
    3522          99 :         u64 parent_ino, parent_gen;
    3523          99 :         struct waiting_dir_move *dm = NULL;
    3524          99 :         u64 rmdir_ino = 0;
    3525          99 :         u64 rmdir_gen;
    3526          99 :         u64 ancestor;
    3527          99 :         bool is_orphan;
    3528          99 :         int ret;
    3529             : 
    3530          99 :         name = fs_path_alloc();
    3531          99 :         from_path = fs_path_alloc();
    3532          99 :         if (!name || !from_path) {
    3533           0 :                 ret = -ENOMEM;
    3534           0 :                 goto out;
    3535             :         }
    3536             : 
    3537          99 :         dm = get_waiting_dir_move(sctx, pm->ino);
    3538          99 :         ASSERT(dm);
    3539          99 :         rmdir_ino = dm->rmdir_ino;
    3540          99 :         rmdir_gen = dm->rmdir_gen;
    3541          99 :         is_orphan = dm->orphanized;
    3542          99 :         free_waiting_dir_move(sctx, dm);
    3543             : 
    3544          99 :         if (is_orphan) {
    3545          13 :                 ret = gen_unique_name(sctx, pm->ino,
    3546             :                                       pm->gen, from_path);
    3547             :         } else {
    3548          86 :                 ret = get_first_ref(sctx->parent_root, pm->ino,
    3549             :                                     &parent_ino, &parent_gen, name);
    3550          86 :                 if (ret < 0)
    3551           0 :                         goto out;
    3552          86 :                 ret = get_cur_path(sctx, parent_ino, parent_gen,
    3553             :                                    from_path);
    3554          86 :                 if (ret < 0)
    3555           0 :                         goto out;
    3556          86 :                 ret = fs_path_add_path(from_path, name);
    3557             :         }
    3558          99 :         if (ret < 0)
    3559           0 :                 goto out;
    3560             : 
    3561          99 :         sctx->send_progress = sctx->cur_ino + 1;
    3562          99 :         ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
    3563          99 :         if (ret < 0)
    3564           0 :                 goto out;
    3565          99 :         if (ret) {
    3566           2 :                 LIST_HEAD(deleted_refs);
    3567           2 :                 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
    3568           2 :                 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
    3569             :                                            &pm->update_refs, &deleted_refs,
    3570             :                                            is_orphan);
    3571           2 :                 if (ret < 0)
    3572           0 :                         goto out;
    3573           2 :                 if (rmdir_ino) {
    3574           0 :                         dm = get_waiting_dir_move(sctx, pm->ino);
    3575           0 :                         ASSERT(dm);
    3576           0 :                         dm->rmdir_ino = rmdir_ino;
    3577           0 :                         dm->rmdir_gen = rmdir_gen;
    3578             :                 }
    3579           2 :                 goto out;
    3580             :         }
    3581          97 :         fs_path_reset(name);
    3582          97 :         to_path = name;
    3583          97 :         name = NULL;
    3584          97 :         ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
    3585          97 :         if (ret < 0)
    3586           0 :                 goto out;
    3587             : 
    3588          97 :         ret = send_rename(sctx, from_path, to_path);
    3589          97 :         if (ret < 0)
    3590           0 :                 goto out;
    3591             : 
    3592          97 :         if (rmdir_ino) {
    3593           3 :                 struct orphan_dir_info *odi;
    3594           3 :                 u64 gen;
    3595             : 
    3596           3 :                 odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
    3597           3 :                 if (!odi) {
    3598             :                         /* already deleted */
    3599           0 :                         goto finish;
    3600             :                 }
    3601           3 :                 gen = odi->gen;
    3602             : 
    3603           3 :                 ret = can_rmdir(sctx, rmdir_ino, gen);
    3604           3 :                 if (ret < 0)
    3605           0 :                         goto out;
    3606           3 :                 if (!ret)
    3607           1 :                         goto finish;
    3608             : 
    3609           2 :                 name = fs_path_alloc();
    3610           2 :                 if (!name) {
    3611           0 :                         ret = -ENOMEM;
    3612           0 :                         goto out;
    3613             :                 }
    3614           2 :                 ret = get_cur_path(sctx, rmdir_ino, gen, name);
    3615           2 :                 if (ret < 0)
    3616           0 :                         goto out;
    3617           2 :                 ret = send_rmdir(sctx, name);
    3618           2 :                 if (ret < 0)
    3619           0 :                         goto out;
    3620             :         }
    3621             : 
    3622          96 : finish:
    3623          97 :         ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
    3624          97 :         if (ret < 0)
    3625           0 :                 goto out;
    3626             : 
    3627             :         /*
    3628             :          * After rename/move, need to update the utimes of both new parent(s)
    3629             :          * and old parent(s).
    3630             :          */
    3631         291 :         list_for_each_entry(cur, &pm->update_refs, list) {
    3632             :                 /*
    3633             :                  * The parent inode might have been deleted in the send snapshot
    3634             :                  */
    3635         194 :                 ret = get_inode_info(sctx->send_root, cur->dir, NULL);
    3636         194 :                 if (ret == -ENOENT) {
    3637           7 :                         ret = 0;
    3638           7 :                         continue;
    3639             :                 }
    3640         187 :                 if (ret < 0)
    3641           0 :                         goto out;
    3642             : 
    3643         187 :                 ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    3644         187 :                 if (ret < 0)
    3645           0 :                         goto out;
    3646             :         }
    3647             : 
    3648          97 : out:
    3649          99 :         fs_path_free(name);
    3650          99 :         fs_path_free(from_path);
    3651          99 :         fs_path_free(to_path);
    3652          99 :         sctx->send_progress = orig_progress;
    3653             : 
    3654          99 :         return ret;
    3655             : }
    3656             : 
    3657          99 : static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
    3658             : {
    3659          99 :         if (!list_empty(&m->list))
    3660          99 :                 list_del(&m->list);
    3661          99 :         if (!RB_EMPTY_NODE(&m->node))
    3662           0 :                 rb_erase(&m->node, &sctx->pending_dir_moves);
    3663          99 :         __free_recorded_refs(&m->update_refs);
    3664          99 :         kfree(m);
    3665          99 : }
    3666             : 
    3667          73 : static void tail_append_pending_moves(struct send_ctx *sctx,
    3668             :                                       struct pending_dir_move *moves,
    3669             :                                       struct list_head *stack)
    3670             : {
    3671          73 :         if (list_empty(&moves->list)) {
    3672          53 :                 list_add_tail(&moves->list, stack);
    3673             :         } else {
    3674          20 :                 LIST_HEAD(list);
    3675          20 :                 list_splice_init(&moves->list, &list);
    3676          20 :                 list_add_tail(&moves->list, stack);
    3677          20 :                 list_splice_tail(&list, stack);
    3678             :         }
    3679          73 :         if (!RB_EMPTY_NODE(&moves->node)) {
    3680          73 :                 rb_erase(&moves->node, &sctx->pending_dir_moves);
    3681          73 :                 RB_CLEAR_NODE(&moves->node);
    3682             :         }
    3683          73 : }
    3684             : 
    3685     1009831 : static int apply_children_dir_moves(struct send_ctx *sctx)
    3686             : {
    3687     1009831 :         struct pending_dir_move *pm;
    3688     1009831 :         struct list_head stack;
    3689     1009831 :         u64 parent_ino = sctx->cur_ino;
    3690     1009831 :         int ret = 0;
    3691             : 
    3692     1009831 :         pm = get_pending_dir_moves(sctx, parent_ino);
    3693     1009831 :         if (!pm)
    3694             :                 return 0;
    3695             : 
    3696          48 :         INIT_LIST_HEAD(&stack);
    3697          48 :         tail_append_pending_moves(sctx, pm, &stack);
    3698             : 
    3699         195 :         while (!list_empty(&stack)) {
    3700          99 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3701          99 :                 parent_ino = pm->ino;
    3702          99 :                 ret = apply_dir_move(sctx, pm);
    3703          99 :                 free_pending_move(sctx, pm);
    3704          99 :                 if (ret)
    3705           0 :                         goto out;
    3706          99 :                 pm = get_pending_dir_moves(sctx, parent_ino);
    3707          99 :                 if (pm)
    3708          25 :                         tail_append_pending_moves(sctx, pm, &stack);
    3709             :         }
    3710             :         return 0;
    3711             : 
    3712             : out:
    3713           0 :         while (!list_empty(&stack)) {
    3714           0 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3715           0 :                 free_pending_move(sctx, pm);
    3716             :         }
    3717             :         return ret;
    3718             : }
    3719             : 
    3720             : /*
    3721             :  * We might need to delay a directory rename even when no ancestor directory
    3722             :  * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
    3723             :  * renamed. This happens when we rename a directory to the old name (the name
    3724             :  * in the parent root) of some other unrelated directory that got its rename
    3725             :  * delayed due to some ancestor with higher number that got renamed.
    3726             :  *
    3727             :  * Example:
    3728             :  *
    3729             :  * Parent snapshot:
    3730             :  * .                                       (ino 256)
    3731             :  * |---- a/                                (ino 257)
    3732             :  * |     |---- file                        (ino 260)
    3733             :  * |
    3734             :  * |---- b/                                (ino 258)
    3735             :  * |---- c/                                (ino 259)
    3736             :  *
    3737             :  * Send snapshot:
    3738             :  * .                                       (ino 256)
    3739             :  * |---- a/                                (ino 258)
    3740             :  * |---- x/                                (ino 259)
    3741             :  *       |---- y/                          (ino 257)
    3742             :  *             |----- file                 (ino 260)
    3743             :  *
    3744             :  * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
    3745             :  * from 'a' to 'x/y' happening first, which in turn depends on the rename of
    3746             :  * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
    3747             :  * must issue is:
    3748             :  *
    3749             :  * 1 - rename 259 from 'c' to 'x'
    3750             :  * 2 - rename 257 from 'a' to 'x/y'
    3751             :  * 3 - rename 258 from 'b' to 'a'
    3752             :  *
    3753             :  * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
    3754             :  * be done right away and < 0 on error.
    3755             :  */
    3756         607 : static int wait_for_dest_dir_move(struct send_ctx *sctx,
    3757             :                                   struct recorded_ref *parent_ref,
    3758             :                                   const bool is_orphan)
    3759             : {
    3760         607 :         struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
    3761         607 :         struct btrfs_path *path;
    3762         607 :         struct btrfs_key key;
    3763         607 :         struct btrfs_key di_key;
    3764         607 :         struct btrfs_dir_item *di;
    3765         607 :         u64 left_gen;
    3766         607 :         u64 right_gen;
    3767         607 :         int ret = 0;
    3768         607 :         struct waiting_dir_move *wdm;
    3769             : 
    3770         607 :         if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
    3771             :                 return 0;
    3772             : 
    3773         142 :         path = alloc_path_for_send();
    3774         142 :         if (!path)
    3775             :                 return -ENOMEM;
    3776             : 
    3777         142 :         key.objectid = parent_ref->dir;
    3778         142 :         key.type = BTRFS_DIR_ITEM_KEY;
    3779         142 :         key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
    3780             : 
    3781         142 :         ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
    3782         142 :         if (ret < 0) {
    3783           0 :                 goto out;
    3784         142 :         } else if (ret > 0) {
    3785         107 :                 ret = 0;
    3786         107 :                 goto out;
    3787             :         }
    3788             : 
    3789          35 :         di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
    3790             :                                        parent_ref->name_len);
    3791          35 :         if (!di) {
    3792           0 :                 ret = 0;
    3793           0 :                 goto out;
    3794             :         }
    3795             :         /*
    3796             :          * di_key.objectid has the number of the inode that has a dentry in the
    3797             :          * parent directory with the same name that sctx->cur_ino is being
    3798             :          * renamed to. We need to check if that inode is in the send root as
    3799             :          * well and if it is currently marked as an inode with a pending rename,
    3800             :          * if it is, we need to delay the rename of sctx->cur_ino as well, so
    3801             :          * that it happens after that other inode is renamed.
    3802             :          */
    3803          35 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
    3804          35 :         if (di_key.type != BTRFS_INODE_ITEM_KEY) {
    3805           0 :                 ret = 0;
    3806           0 :                 goto out;
    3807             :         }
    3808             : 
    3809          35 :         ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
    3810          35 :         if (ret < 0)
    3811           0 :                 goto out;
    3812          35 :         ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
    3813          35 :         if (ret < 0) {
    3814           0 :                 if (ret == -ENOENT)
    3815           0 :                         ret = 0;
    3816           0 :                 goto out;
    3817             :         }
    3818             : 
    3819             :         /* Different inode, no need to delay the rename of sctx->cur_ino */
    3820          35 :         if (right_gen != left_gen) {
    3821           0 :                 ret = 0;
    3822           0 :                 goto out;
    3823             :         }
    3824             : 
    3825          35 :         wdm = get_waiting_dir_move(sctx, di_key.objectid);
    3826          35 :         if (wdm && !wdm->orphanized) {
    3827           0 :                 ret = add_pending_dir_move(sctx,
    3828             :                                            sctx->cur_ino,
    3829             :                                            sctx->cur_inode_gen,
    3830             :                                            di_key.objectid,
    3831             :                                            &sctx->new_refs,
    3832             :                                            &sctx->deleted_refs,
    3833             :                                            is_orphan);
    3834           0 :                 if (!ret)
    3835           0 :                         ret = 1;
    3836             :         }
    3837          35 : out:
    3838         142 :         btrfs_free_path(path);
    3839         142 :         return ret;
    3840             : }
    3841             : 
    3842             : /*
    3843             :  * Check if inode ino2, or any of its ancestors, is inode ino1.
    3844             :  * Return 1 if true, 0 if false and < 0 on error.
    3845             :  */
    3846         302 : static int check_ino_in_path(struct btrfs_root *root,
    3847             :                              const u64 ino1,
    3848             :                              const u64 ino1_gen,
    3849             :                              const u64 ino2,
    3850             :                              const u64 ino2_gen,
    3851             :                              struct fs_path *fs_path)
    3852             : {
    3853         302 :         u64 ino = ino2;
    3854             : 
    3855         302 :         if (ino1 == ino2)
    3856           7 :                 return ino1_gen == ino2_gen;
    3857             : 
    3858        2809 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3859        2518 :                 u64 parent;
    3860        2518 :                 u64 parent_gen;
    3861        2518 :                 int ret;
    3862             : 
    3863        2518 :                 fs_path_reset(fs_path);
    3864        2518 :                 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
    3865        2518 :                 if (ret < 0)
    3866           4 :                         return ret;
    3867        2518 :                 if (parent == ino1)
    3868           4 :                         return parent_gen == ino1_gen;
    3869        2514 :                 ino = parent;
    3870             :         }
    3871             :         return 0;
    3872             : }
    3873             : 
    3874             : /*
    3875             :  * Check if inode ino1 is an ancestor of inode ino2 in the given root for any
    3876             :  * possible path (in case ino2 is not a directory and has multiple hard links).
    3877             :  * Return 1 if true, 0 if false and < 0 on error.
    3878             :  */
    3879         269 : static int is_ancestor(struct btrfs_root *root,
    3880             :                        const u64 ino1,
    3881             :                        const u64 ino1_gen,
    3882             :                        const u64 ino2,
    3883             :                        struct fs_path *fs_path)
    3884             : {
    3885         269 :         bool free_fs_path = false;
    3886         269 :         int ret = 0;
    3887         269 :         int iter_ret = 0;
    3888         269 :         struct btrfs_path *path = NULL;
    3889         269 :         struct btrfs_key key;
    3890             : 
    3891         269 :         if (!fs_path) {
    3892         239 :                 fs_path = fs_path_alloc();
    3893         239 :                 if (!fs_path)
    3894             :                         return -ENOMEM;
    3895             :                 free_fs_path = true;
    3896             :         }
    3897             : 
    3898         269 :         path = alloc_path_for_send();
    3899         269 :         if (!path) {
    3900           0 :                 ret = -ENOMEM;
    3901           0 :                 goto out;
    3902             :         }
    3903             : 
    3904         269 :         key.objectid = ino2;
    3905         269 :         key.type = BTRFS_INODE_REF_KEY;
    3906         269 :         key.offset = 0;
    3907             : 
    3908         560 :         btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
    3909         560 :                 struct extent_buffer *leaf = path->nodes[0];
    3910         560 :                 int slot = path->slots[0];
    3911         560 :                 u32 cur_offset = 0;
    3912         560 :                 u32 item_size;
    3913             : 
    3914         560 :                 if (key.objectid != ino2)
    3915             :                         break;
    3916         477 :                 if (key.type != BTRFS_INODE_REF_KEY &&
    3917             :                     key.type != BTRFS_INODE_EXTREF_KEY)
    3918             :                         break;
    3919             : 
    3920         302 :                 item_size = btrfs_item_size(leaf, slot);
    3921         593 :                 while (cur_offset < item_size) {
    3922         302 :                         u64 parent;
    3923         302 :                         u64 parent_gen;
    3924             : 
    3925         302 :                         if (key.type == BTRFS_INODE_EXTREF_KEY) {
    3926           0 :                                 unsigned long ptr;
    3927           0 :                                 struct btrfs_inode_extref *extref;
    3928             : 
    3929           0 :                                 ptr = btrfs_item_ptr_offset(leaf, slot);
    3930           0 :                                 extref = (struct btrfs_inode_extref *)
    3931           0 :                                         (ptr + cur_offset);
    3932           0 :                                 parent = btrfs_inode_extref_parent(leaf,
    3933             :                                                                    extref);
    3934           0 :                                 cur_offset += sizeof(*extref);
    3935           0 :                                 cur_offset += btrfs_inode_extref_name_len(leaf,
    3936             :                                                                   extref);
    3937             :                         } else {
    3938         302 :                                 parent = key.offset;
    3939         302 :                                 cur_offset = item_size;
    3940             :                         }
    3941             : 
    3942         302 :                         ret = get_inode_gen(root, parent, &parent_gen);
    3943         302 :                         if (ret < 0)
    3944          11 :                                 goto out;
    3945         302 :                         ret = check_ino_in_path(root, ino1, ino1_gen,
    3946             :                                                 parent, parent_gen, fs_path);
    3947         302 :                         if (ret)
    3948          11 :                                 goto out;
    3949             :                 }
    3950             :         }
    3951         258 :         ret = 0;
    3952         258 :         if (iter_ret < 0)
    3953             :                 ret = iter_ret;
    3954             : 
    3955         258 : out:
    3956         269 :         btrfs_free_path(path);
    3957         269 :         if (free_fs_path)
    3958         239 :                 fs_path_free(fs_path);
    3959             :         return ret;
    3960             : }
    3961             : 
    3962         607 : static int wait_for_parent_move(struct send_ctx *sctx,
    3963             :                                 struct recorded_ref *parent_ref,
    3964             :                                 const bool is_orphan)
    3965             : {
    3966         607 :         int ret = 0;
    3967         607 :         u64 ino = parent_ref->dir;
    3968         607 :         u64 ino_gen = parent_ref->dir_gen;
    3969         607 :         u64 parent_ino_before, parent_ino_after;
    3970         607 :         struct fs_path *path_before = NULL;
    3971         607 :         struct fs_path *path_after = NULL;
    3972         607 :         int len1, len2;
    3973             : 
    3974         607 :         path_after = fs_path_alloc();
    3975         607 :         path_before = fs_path_alloc();
    3976         607 :         if (!path_after || !path_before) {
    3977           0 :                 ret = -ENOMEM;
    3978           0 :                 goto out;
    3979             :         }
    3980             : 
    3981             :         /*
    3982             :          * Our current directory inode may not yet be renamed/moved because some
    3983             :          * ancestor (immediate or not) has to be renamed/moved first. So find if
    3984             :          * such ancestor exists and make sure our own rename/move happens after
    3985             :          * that ancestor is processed to avoid path build infinite loops (done
    3986             :          * at get_cur_path()).
    3987             :          */
    3988        4880 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3989        4427 :                 u64 parent_ino_after_gen;
    3990             : 
    3991        4427 :                 if (is_waiting_for_move(sctx, ino)) {
    3992             :                         /*
    3993             :                          * If the current inode is an ancestor of ino in the
    3994             :                          * parent root, we need to delay the rename of the
    3995             :                          * current inode, otherwise don't delayed the rename
    3996             :                          * because we can end up with a circular dependency
    3997             :                          * of renames, resulting in some directories never
    3998             :                          * getting the respective rename operations issued in
    3999             :                          * the send stream or getting into infinite path build
    4000             :                          * loops.
    4001             :                          */
    4002          30 :                         ret = is_ancestor(sctx->parent_root,
    4003             :                                           sctx->cur_ino, sctx->cur_inode_gen,
    4004             :                                           ino, path_before);
    4005          30 :                         if (ret)
    4006             :                                 break;
    4007             :                 }
    4008             : 
    4009        4421 :                 fs_path_reset(path_before);
    4010        4421 :                 fs_path_reset(path_after);
    4011             : 
    4012        4421 :                 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
    4013             :                                     &parent_ino_after_gen, path_after);
    4014        4421 :                 if (ret < 0)
    4015           0 :                         goto out;
    4016        4421 :                 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
    4017             :                                     NULL, path_before);
    4018        4421 :                 if (ret < 0 && ret != -ENOENT) {
    4019           0 :                         goto out;
    4020        4421 :                 } else if (ret == -ENOENT) {
    4021             :                         ret = 0;
    4022             :                         break;
    4023             :                 }
    4024             : 
    4025        4364 :                 len1 = fs_path_len(path_before);
    4026        4364 :                 len2 = fs_path_len(path_after);
    4027        4364 :                 if (ino > sctx->cur_ino &&
    4028         468 :                     (parent_ino_before != parent_ino_after || len1 != len2 ||
    4029         189 :                      memcmp(path_before->start, path_after->start, len1))) {
    4030          93 :                         u64 parent_ino_gen;
    4031             : 
    4032          93 :                         ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
    4033          93 :                         if (ret < 0)
    4034           0 :                                 goto out;
    4035          93 :                         if (ino_gen == parent_ino_gen) {
    4036          91 :                                 ret = 1;
    4037          91 :                                 break;
    4038             :                         }
    4039             :                 }
    4040        4273 :                 ino = parent_ino_after;
    4041        4273 :                 ino_gen = parent_ino_after_gen;
    4042             :         }
    4043             : 
    4044         453 : out:
    4045         607 :         fs_path_free(path_before);
    4046         607 :         fs_path_free(path_after);
    4047             : 
    4048         607 :         if (ret == 1) {
    4049          97 :                 ret = add_pending_dir_move(sctx,
    4050             :                                            sctx->cur_ino,
    4051             :                                            sctx->cur_inode_gen,
    4052             :                                            ino,
    4053             :                                            &sctx->new_refs,
    4054             :                                            &sctx->deleted_refs,
    4055             :                                            is_orphan);
    4056          97 :                 if (!ret)
    4057          97 :                         ret = 1;
    4058             :         }
    4059             : 
    4060         607 :         return ret;
    4061             : }
    4062             : 
    4063          15 : static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4064             : {
    4065          15 :         int ret;
    4066          15 :         struct fs_path *new_path;
    4067             : 
    4068             :         /*
    4069             :          * Our reference's name member points to its full_path member string, so
    4070             :          * we use here a new path.
    4071             :          */
    4072          15 :         new_path = fs_path_alloc();
    4073          15 :         if (!new_path)
    4074             :                 return -ENOMEM;
    4075             : 
    4076          15 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
    4077          15 :         if (ret < 0) {
    4078           0 :                 fs_path_free(new_path);
    4079           0 :                 return ret;
    4080             :         }
    4081          15 :         ret = fs_path_add(new_path, ref->name, ref->name_len);
    4082          15 :         if (ret < 0) {
    4083           0 :                 fs_path_free(new_path);
    4084           0 :                 return ret;
    4085             :         }
    4086             : 
    4087          15 :         fs_path_free(ref->full_path);
    4088          15 :         set_ref_path(ref, new_path);
    4089             : 
    4090          15 :         return 0;
    4091             : }
    4092             : 
    4093             : /*
    4094             :  * When processing the new references for an inode we may orphanize an existing
    4095             :  * directory inode because its old name conflicts with one of the new references
    4096             :  * of the current inode. Later, when processing another new reference of our
    4097             :  * inode, we might need to orphanize another inode, but the path we have in the
    4098             :  * reference reflects the pre-orphanization name of the directory we previously
    4099             :  * orphanized. For example:
    4100             :  *
    4101             :  * parent snapshot looks like:
    4102             :  *
    4103             :  * .                                     (ino 256)
    4104             :  * |----- f1                             (ino 257)
    4105             :  * |----- f2                             (ino 258)
    4106             :  * |----- d1/                            (ino 259)
    4107             :  *        |----- d2/                     (ino 260)
    4108             :  *
    4109             :  * send snapshot looks like:
    4110             :  *
    4111             :  * .                                     (ino 256)
    4112             :  * |----- d1                             (ino 258)
    4113             :  * |----- f2/                            (ino 259)
    4114             :  *        |----- f2_link/                (ino 260)
    4115             :  *        |       |----- f1              (ino 257)
    4116             :  *        |
    4117             :  *        |----- d2                      (ino 258)
    4118             :  *
    4119             :  * When processing inode 257 we compute the name for inode 259 as "d1", and we
    4120             :  * cache it in the name cache. Later when we start processing inode 258, when
    4121             :  * collecting all its new references we set a full path of "d1/d2" for its new
    4122             :  * reference with name "d2". When we start processing the new references we
    4123             :  * start by processing the new reference with name "d1", and this results in
    4124             :  * orphanizing inode 259, since its old reference causes a conflict. Then we
    4125             :  * move on the next new reference, with name "d2", and we find out we must
    4126             :  * orphanize inode 260, as its old reference conflicts with ours - but for the
    4127             :  * orphanization we use a source path corresponding to the path we stored in the
    4128             :  * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
    4129             :  * receiver fail since the path component "d1/" no longer exists, it was renamed
    4130             :  * to "o259-6-0/" when processing the previous new reference. So in this case we
    4131             :  * must recompute the path in the new reference and use it for the new
    4132             :  * orphanization operation.
    4133             :  */
    4134           2 : static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4135             : {
    4136           2 :         char *name;
    4137           2 :         int ret;
    4138             : 
    4139           2 :         name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
    4140           2 :         if (!name)
    4141             :                 return -ENOMEM;
    4142             : 
    4143           2 :         fs_path_reset(ref->full_path);
    4144           2 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
    4145           2 :         if (ret < 0)
    4146           0 :                 goto out;
    4147             : 
    4148           2 :         ret = fs_path_add(ref->full_path, name, ref->name_len);
    4149           2 :         if (ret < 0)
    4150           0 :                 goto out;
    4151             : 
    4152             :         /* Update the reference's base name pointer. */
    4153           2 :         set_ref_path(ref, ref->full_path);
    4154           2 : out:
    4155           2 :         kfree(name);
    4156           2 :         return ret;
    4157             : }
    4158             : 
    4159             : /*
    4160             :  * This does all the move/link/unlink/rmdir magic.
    4161             :  */
    4162      605890 : static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
    4163             : {
    4164      605890 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    4165      605890 :         int ret = 0;
    4166      605890 :         struct recorded_ref *cur;
    4167      605890 :         struct recorded_ref *cur2;
    4168      605890 :         struct list_head check_dirs;
    4169      605890 :         struct fs_path *valid_path = NULL;
    4170      605890 :         u64 ow_inode = 0;
    4171      605890 :         u64 ow_gen;
    4172      605890 :         u64 ow_mode;
    4173      605890 :         int did_overwrite = 0;
    4174      605890 :         int is_orphan = 0;
    4175      605890 :         u64 last_dir_ino_rm = 0;
    4176      605890 :         bool can_rename = true;
    4177      605890 :         bool orphanized_dir = false;
    4178      605890 :         bool orphanized_ancestor = false;
    4179             : 
    4180      605890 :         btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
    4181             : 
    4182             :         /*
    4183             :          * This should never happen as the root dir always has the same ref
    4184             :          * which is always '..'
    4185             :          */
    4186      605890 :         BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
    4187      605890 :         INIT_LIST_HEAD(&check_dirs);
    4188             : 
    4189      605890 :         valid_path = fs_path_alloc();
    4190      605883 :         if (!valid_path) {
    4191           0 :                 ret = -ENOMEM;
    4192           0 :                 goto out;
    4193             :         }
    4194             : 
    4195             :         /*
    4196             :          * First, check if the first ref of the current inode was overwritten
    4197             :          * before. If yes, we know that the current inode was already orphanized
    4198             :          * and thus use the orphan name. If not, we can use get_cur_path to
    4199             :          * get the path of the first ref as it would like while receiving at
    4200             :          * this point in time.
    4201             :          * New inodes are always orphan at the beginning, so force to use the
    4202             :          * orphan name in this case.
    4203             :          * The first ref is stored in valid_path and will be updated if it
    4204             :          * gets moved around.
    4205             :          */
    4206      605883 :         if (!sctx->cur_inode_new) {
    4207        1561 :                 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
    4208             :                                 sctx->cur_inode_gen);
    4209        1561 :                 if (ret < 0)
    4210           0 :                         goto out;
    4211        1561 :                 if (ret)
    4212         233 :                         did_overwrite = 1;
    4213             :         }
    4214      605883 :         if (sctx->cur_inode_new || did_overwrite) {
    4215      604555 :                 ret = gen_unique_name(sctx, sctx->cur_ino,
    4216             :                                 sctx->cur_inode_gen, valid_path);
    4217      604578 :                 if (ret < 0)
    4218           0 :                         goto out;
    4219             :                 is_orphan = 1;
    4220             :         } else {
    4221        1328 :                 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    4222             :                                 valid_path);
    4223        1328 :                 if (ret < 0)
    4224           0 :                         goto out;
    4225             :         }
    4226             : 
    4227             :         /*
    4228             :          * Before doing any rename and link operations, do a first pass on the
    4229             :          * new references to orphanize any unprocessed inodes that may have a
    4230             :          * reference that conflicts with one of the new references of the current
    4231             :          * inode. This needs to happen first because a new reference may conflict
    4232             :          * with the old reference of a parent directory, so we must make sure
    4233             :          * that the path used for link and rename commands don't use an
    4234             :          * orphanized name when an ancestor was not yet orphanized.
    4235             :          *
    4236             :          * Example:
    4237             :          *
    4238             :          * Parent snapshot:
    4239             :          *
    4240             :          * .                                                      (ino 256)
    4241             :          * |----- testdir/                                        (ino 259)
    4242             :          * |          |----- a                                    (ino 257)
    4243             :          * |
    4244             :          * |----- b                                               (ino 258)
    4245             :          *
    4246             :          * Send snapshot:
    4247             :          *
    4248             :          * .                                                      (ino 256)
    4249             :          * |----- testdir_2/                                      (ino 259)
    4250             :          * |          |----- a                                    (ino 260)
    4251             :          * |
    4252             :          * |----- testdir                                         (ino 257)
    4253             :          * |----- b                                               (ino 257)
    4254             :          * |----- b2                                              (ino 258)
    4255             :          *
    4256             :          * Processing the new reference for inode 257 with name "b" may happen
    4257             :          * before processing the new reference with name "testdir". If so, we
    4258             :          * must make sure that by the time we send a link command to create the
    4259             :          * hard link "b", inode 259 was already orphanized, since the generated
    4260             :          * path in "valid_path" already contains the orphanized name for 259.
    4261             :          * We are processing inode 257, so only later when processing 259 we do
    4262             :          * the rename operation to change its temporary (orphanized) name to
    4263             :          * "testdir_2".
    4264             :          */
    4265     1213731 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4266      607824 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4267      607821 :                 if (ret < 0)
    4268           0 :                         goto out;
    4269      607821 :                 if (ret == inode_state_will_create)
    4270         190 :                         continue;
    4271             : 
    4272             :                 /*
    4273             :                  * Check if this new ref would overwrite the first ref of another
    4274             :                  * unprocessed inode. If yes, orphanize the overwritten inode.
    4275             :                  * If we find an overwritten ref that is not the first ref,
    4276             :                  * simply unlink it.
    4277             :                  */
    4278      607631 :                 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4279      607631 :                                 cur->name, cur->name_len,
    4280             :                                 &ow_inode, &ow_gen, &ow_mode);
    4281      607635 :                 if (ret < 0)
    4282           0 :                         goto out;
    4283      607635 :                 if (ret) {
    4284         245 :                         ret = is_first_ref(sctx->parent_root,
    4285         245 :                                            ow_inode, cur->dir, cur->name,
    4286             :                                            cur->name_len);
    4287         245 :                         if (ret < 0)
    4288           0 :                                 goto out;
    4289         245 :                         if (ret) {
    4290         239 :                                 struct name_cache_entry *nce;
    4291         239 :                                 struct waiting_dir_move *wdm;
    4292             : 
    4293         239 :                                 if (orphanized_dir) {
    4294           1 :                                         ret = refresh_ref_path(sctx, cur);
    4295           1 :                                         if (ret < 0)
    4296           0 :                                                 goto out;
    4297             :                                 }
    4298             : 
    4299         239 :                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
    4300             :                                                 cur->full_path);
    4301         239 :                                 if (ret < 0)
    4302           0 :                                         goto out;
    4303         239 :                                 if (S_ISDIR(ow_mode))
    4304          63 :                                         orphanized_dir = true;
    4305             : 
    4306             :                                 /*
    4307             :                                  * If ow_inode has its rename operation delayed
    4308             :                                  * make sure that its orphanized name is used in
    4309             :                                  * the source path when performing its rename
    4310             :                                  * operation.
    4311             :                                  */
    4312         239 :                                 wdm = get_waiting_dir_move(sctx, ow_inode);
    4313         239 :                                 if (wdm)
    4314           6 :                                         wdm->orphanized = true;
    4315             : 
    4316             :                                 /*
    4317             :                                  * Make sure we clear our orphanized inode's
    4318             :                                  * name from the name cache. This is because the
    4319             :                                  * inode ow_inode might be an ancestor of some
    4320             :                                  * other inode that will be orphanized as well
    4321             :                                  * later and has an inode number greater than
    4322             :                                  * sctx->send_progress. We need to prevent
    4323             :                                  * future name lookups from using the old name
    4324             :                                  * and get instead the orphan name.
    4325             :                                  */
    4326         239 :                                 nce = name_cache_search(sctx, ow_inode, ow_gen);
    4327          32 :                                 if (nce)
    4328          32 :                                         btrfs_lru_cache_remove(&sctx->name_cache,
    4329             :                                                                &nce->entry);
    4330             : 
    4331             :                                 /*
    4332             :                                  * ow_inode might currently be an ancestor of
    4333             :                                  * cur_ino, therefore compute valid_path (the
    4334             :                                  * current path of cur_ino) again because it
    4335             :                                  * might contain the pre-orphanization name of
    4336             :                                  * ow_inode, which is no longer valid.
    4337             :                                  */
    4338         239 :                                 ret = is_ancestor(sctx->parent_root,
    4339             :                                                   ow_inode, ow_gen,
    4340             :                                                   sctx->cur_ino, NULL);
    4341         239 :                                 if (ret > 0) {
    4342           5 :                                         orphanized_ancestor = true;
    4343           5 :                                         fs_path_reset(valid_path);
    4344           5 :                                         ret = get_cur_path(sctx, sctx->cur_ino,
    4345             :                                                            sctx->cur_inode_gen,
    4346             :                                                            valid_path);
    4347             :                                 }
    4348         239 :                                 if (ret < 0)
    4349           0 :                                         goto out;
    4350             :                         } else {
    4351             :                                 /*
    4352             :                                  * If we previously orphanized a directory that
    4353             :                                  * collided with a new reference that we already
    4354             :                                  * processed, recompute the current path because
    4355             :                                  * that directory may be part of the path.
    4356             :                                  */
    4357           6 :                                 if (orphanized_dir) {
    4358           1 :                                         ret = refresh_ref_path(sctx, cur);
    4359           1 :                                         if (ret < 0)
    4360           0 :                                                 goto out;
    4361             :                                 }
    4362           6 :                                 ret = send_unlink(sctx, cur->full_path);
    4363           6 :                                 if (ret < 0)
    4364           0 :                                         goto out;
    4365             :                         }
    4366             :                 }
    4367             : 
    4368             :         }
    4369             : 
    4370     1213730 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4371             :                 /*
    4372             :                  * We may have refs where the parent directory does not exist
    4373             :                  * yet. This happens if the parent directories inum is higher
    4374             :                  * than the current inum. To handle this case, we create the
    4375             :                  * parent directory out of order. But we need to check if this
    4376             :                  * did already happen before due to other refs in the same dir.
    4377             :                  */
    4378      607827 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4379      607824 :                 if (ret < 0)
    4380           0 :                         goto out;
    4381      607824 :                 if (ret == inode_state_will_create) {
    4382         190 :                         ret = 0;
    4383             :                         /*
    4384             :                          * First check if any of the current inodes refs did
    4385             :                          * already create the dir.
    4386             :                          */
    4387         210 :                         list_for_each_entry(cur2, &sctx->new_refs, list) {
    4388         210 :                                 if (cur == cur2)
    4389             :                                         break;
    4390          21 :                                 if (cur2->dir == cur->dir) {
    4391             :                                         ret = 1;
    4392             :                                         break;
    4393             :                                 }
    4394             :                         }
    4395             : 
    4396             :                         /*
    4397             :                          * If that did not happen, check if a previous inode
    4398             :                          * did already create the dir.
    4399             :                          */
    4400         190 :                         if (!ret)
    4401         189 :                                 ret = did_create_dir(sctx, cur->dir);
    4402         190 :                         if (ret < 0)
    4403           0 :                                 goto out;
    4404         190 :                         if (!ret) {
    4405         146 :                                 ret = send_create_inode(sctx, cur->dir);
    4406         146 :                                 if (ret < 0)
    4407           0 :                                         goto out;
    4408         146 :                                 cache_dir_created(sctx, cur->dir);
    4409             :                         }
    4410             :                 }
    4411             : 
    4412      607824 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
    4413         607 :                         ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
    4414         607 :                         if (ret < 0)
    4415           0 :                                 goto out;
    4416         607 :                         if (ret == 1) {
    4417           0 :                                 can_rename = false;
    4418           0 :                                 *pending_move = 1;
    4419             :                         }
    4420             :                 }
    4421             : 
    4422      607824 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
    4423             :                     can_rename) {
    4424         607 :                         ret = wait_for_parent_move(sctx, cur, is_orphan);
    4425         607 :                         if (ret < 0)
    4426           0 :                                 goto out;
    4427         607 :                         if (ret == 1) {
    4428          97 :                                 can_rename = false;
    4429          97 :                                 *pending_move = 1;
    4430             :                         }
    4431             :                 }
    4432             : 
    4433             :                 /*
    4434             :                  * link/move the ref to the new place. If we have an orphan
    4435             :                  * inode, move it and update valid_path. If not, link or move
    4436             :                  * it depending on the inode mode.
    4437             :                  */
    4438      607824 :                 if (is_orphan && can_rename) {
    4439      604565 :                         ret = send_rename(sctx, valid_path, cur->full_path);
    4440      604566 :                         if (ret < 0)
    4441           0 :                                 goto out;
    4442      604566 :                         is_orphan = 0;
    4443      604566 :                         ret = fs_path_copy(valid_path, cur->full_path);
    4444      604565 :                         if (ret < 0)
    4445           0 :                                 goto out;
    4446        3259 :                 } else if (can_rename) {
    4447        3162 :                         if (S_ISDIR(sctx->cur_inode_mode)) {
    4448             :                                 /*
    4449             :                                  * Dirs can't be linked, so move it. For moved
    4450             :                                  * dirs, we always have one new and one deleted
    4451             :                                  * ref. The deleted ref is ignored later.
    4452             :                                  */
    4453         146 :                                 ret = send_rename(sctx, valid_path,
    4454             :                                                   cur->full_path);
    4455         146 :                                 if (!ret)
    4456         146 :                                         ret = fs_path_copy(valid_path,
    4457             :                                                            cur->full_path);
    4458         146 :                                 if (ret < 0)
    4459           0 :                                         goto out;
    4460             :                         } else {
    4461             :                                 /*
    4462             :                                  * We might have previously orphanized an inode
    4463             :                                  * which is an ancestor of our current inode,
    4464             :                                  * so our reference's full path, which was
    4465             :                                  * computed before any such orphanizations, must
    4466             :                                  * be updated.
    4467             :                                  */
    4468        3016 :                                 if (orphanized_dir) {
    4469          11 :                                         ret = update_ref_path(sctx, cur);
    4470          11 :                                         if (ret < 0)
    4471           0 :                                                 goto out;
    4472             :                                 }
    4473        3016 :                                 ret = send_link(sctx, cur->full_path,
    4474             :                                                 valid_path);
    4475        3016 :                                 if (ret < 0)
    4476           0 :                                         goto out;
    4477             :                         }
    4478             :                 }
    4479      607824 :                 ret = dup_ref(cur, &check_dirs);
    4480      607823 :                 if (ret < 0)
    4481           0 :                         goto out;
    4482             :         }
    4483             : 
    4484      605903 :         if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
    4485             :                 /*
    4486             :                  * Check if we can already rmdir the directory. If not,
    4487             :                  * orphanize it. For every dir item inside that gets deleted
    4488             :                  * later, we do this check again and rmdir it then if possible.
    4489             :                  * See the use of check_dirs for more details.
    4490             :                  */
    4491          20 :                 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    4492          20 :                 if (ret < 0)
    4493           0 :                         goto out;
    4494          20 :                 if (ret) {
    4495          11 :                         ret = send_rmdir(sctx, valid_path);
    4496          11 :                         if (ret < 0)
    4497           0 :                                 goto out;
    4498           9 :                 } else if (!is_orphan) {
    4499           9 :                         ret = orphanize_inode(sctx, sctx->cur_ino,
    4500             :                                         sctx->cur_inode_gen, valid_path);
    4501           9 :                         if (ret < 0)
    4502           0 :                                 goto out;
    4503             :                         is_orphan = 1;
    4504             :                 }
    4505             : 
    4506          40 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4507          20 :                         ret = dup_ref(cur, &check_dirs);
    4508          20 :                         if (ret < 0)
    4509           0 :                                 goto out;
    4510             :                 }
    4511      605883 :         } else if (S_ISDIR(sctx->cur_inode_mode) &&
    4512         915 :                    !list_empty(&sctx->deleted_refs)) {
    4513             :                 /*
    4514             :                  * We have a moved dir. Add the old parent to check_dirs
    4515             :                  */
    4516         292 :                 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
    4517             :                                 list);
    4518         292 :                 ret = dup_ref(cur, &check_dirs);
    4519         292 :                 if (ret < 0)
    4520           0 :                         goto out;
    4521      605591 :         } else if (!S_ISDIR(sctx->cur_inode_mode)) {
    4522             :                 /*
    4523             :                  * We have a non dir inode. Go through all deleted refs and
    4524             :                  * unlink them if they were not already overwritten by other
    4525             :                  * inodes.
    4526             :                  */
    4527      607144 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4528        2179 :                         ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4529             :                                         sctx->cur_ino, sctx->cur_inode_gen,
    4530        2179 :                                         cur->name, cur->name_len);
    4531        2176 :                         if (ret < 0)
    4532           0 :                                 goto out;
    4533        2176 :                         if (!ret) {
    4534             :                                 /*
    4535             :                                  * If we orphanized any ancestor before, we need
    4536             :                                  * to recompute the full path for deleted names,
    4537             :                                  * since any such path was computed before we
    4538             :                                  * processed any references and orphanized any
    4539             :                                  * ancestor inode.
    4540             :                                  */
    4541        1994 :                                 if (orphanized_ancestor) {
    4542           4 :                                         ret = update_ref_path(sctx, cur);
    4543           4 :                                         if (ret < 0)
    4544           0 :                                                 goto out;
    4545             :                                 }
    4546        1994 :                                 ret = send_unlink(sctx, cur->full_path);
    4547        1994 :                                 if (ret < 0)
    4548           0 :                                         goto out;
    4549             :                         }
    4550        2176 :                         ret = dup_ref(cur, &check_dirs);
    4551        2176 :                         if (ret < 0)
    4552           0 :                                 goto out;
    4553             :                 }
    4554             :                 /*
    4555             :                  * If the inode is still orphan, unlink the orphan. This may
    4556             :                  * happen when a previous inode did overwrite the first ref
    4557             :                  * of this inode and no new refs were added for the current
    4558             :                  * inode. Unlinking does not mean that the inode is deleted in
    4559             :                  * all cases. There may still be links to this inode in other
    4560             :                  * places.
    4561             :                  */
    4562      604965 :                 if (is_orphan) {
    4563           5 :                         ret = send_unlink(sctx, valid_path);
    4564           5 :                         if (ret < 0)
    4565           0 :                                 goto out;
    4566             :                 }
    4567             :         }
    4568             : 
    4569             :         /*
    4570             :          * We did collect all parent dirs where cur_inode was once located. We
    4571             :          * now go through all these dirs and check if they are pending for
    4572             :          * deletion and if it's finally possible to perform the rmdir now.
    4573             :          * We also update the inode stats of the parent dirs here.
    4574             :          */
    4575     1216211 :         list_for_each_entry(cur, &check_dirs, list) {
    4576             :                 /*
    4577             :                  * In case we had refs into dirs that were not processed yet,
    4578             :                  * we don't need to do the utime and rmdir logic for these dirs.
    4579             :                  * The dir will be processed later.
    4580             :                  */
    4581      610306 :                 if (cur->dir > sctx->cur_ino)
    4582        1085 :                         continue;
    4583             : 
    4584      609221 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4585      609226 :                 if (ret < 0)
    4586           0 :                         goto out;
    4587             : 
    4588      609226 :                 if (ret == inode_state_did_create ||
    4589      609226 :                     ret == inode_state_no_change) {
    4590      609204 :                         ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    4591      609204 :                         if (ret < 0)
    4592           0 :                                 goto out;
    4593          22 :                 } else if (ret == inode_state_did_delete &&
    4594          22 :                            cur->dir != last_dir_ino_rm) {
    4595          21 :                         ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
    4596          21 :                         if (ret < 0)
    4597           0 :                                 goto out;
    4598          21 :                         if (ret) {
    4599           7 :                                 ret = get_cur_path(sctx, cur->dir,
    4600             :                                                    cur->dir_gen, valid_path);
    4601           7 :                                 if (ret < 0)
    4602           0 :                                         goto out;
    4603           7 :                                 ret = send_rmdir(sctx, valid_path);
    4604           7 :                                 if (ret < 0)
    4605           0 :                                         goto out;
    4606           7 :                                 last_dir_ino_rm = cur->dir;
    4607             :                         }
    4608             :                 }
    4609             :         }
    4610             : 
    4611             :         ret = 0;
    4612             : 
    4613      605905 : out:
    4614      605905 :         __free_recorded_refs(&check_dirs);
    4615      605905 :         free_recorded_refs(sctx);
    4616      605903 :         fs_path_free(valid_path);
    4617      605905 :         return ret;
    4618             : }
    4619             : 
    4620       78350 : static int rbtree_ref_comp(const void *k, const struct rb_node *node)
    4621             : {
    4622       78350 :         const struct recorded_ref *data = k;
    4623       78350 :         const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
    4624       78350 :         int result;
    4625             : 
    4626       78350 :         if (data->dir > ref->dir)
    4627             :                 return 1;
    4628       76933 :         if (data->dir < ref->dir)
    4629             :                 return -1;
    4630       76933 :         if (data->dir_gen > ref->dir_gen)
    4631             :                 return 1;
    4632       76933 :         if (data->dir_gen < ref->dir_gen)
    4633             :                 return -1;
    4634       76933 :         if (data->name_len > ref->name_len)
    4635             :                 return 1;
    4636       71796 :         if (data->name_len < ref->name_len)
    4637             :                 return -1;
    4638       62679 :         result = strcmp(data->name, ref->name);
    4639       62679 :         if (result > 0)
    4640             :                 return 1;
    4641        9205 :         if (result < 0)
    4642        8198 :                 return -1;
    4643             :         return 0;
    4644             : }
    4645             : 
    4646             : static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
    4647             : {
    4648       60261 :         const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
    4649             : 
    4650       60261 :         return rbtree_ref_comp(entry, parent) < 0;
    4651             : }
    4652             : 
    4653      611308 : static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
    4654             :                               struct fs_path *name, u64 dir, u64 dir_gen,
    4655             :                               struct send_ctx *sctx)
    4656             : {
    4657      611308 :         int ret = 0;
    4658      611308 :         struct fs_path *path = NULL;
    4659      611308 :         struct recorded_ref *ref = NULL;
    4660             : 
    4661      611308 :         path = fs_path_alloc();
    4662      611299 :         if (!path) {
    4663           0 :                 ret = -ENOMEM;
    4664           0 :                 goto out;
    4665             :         }
    4666             : 
    4667      611299 :         ref = recorded_ref_alloc();
    4668      611302 :         if (!ref) {
    4669           0 :                 ret = -ENOMEM;
    4670           0 :                 goto out;
    4671             :         }
    4672             : 
    4673      611302 :         ret = get_cur_path(sctx, dir, dir_gen, path);
    4674      611293 :         if (ret < 0)
    4675           0 :                 goto out;
    4676      611293 :         ret = fs_path_add_path(path, name);
    4677      611288 :         if (ret < 0)
    4678           0 :                 goto out;
    4679             : 
    4680      611288 :         ref->dir = dir;
    4681      611288 :         ref->dir_gen = dir_gen;
    4682      611288 :         set_ref_path(ref, path);
    4683      611291 :         list_add_tail(&ref->list, refs);
    4684      611300 :         rb_add(&ref->node, root, rbtree_ref_less);
    4685      611262 :         ref->root = root;
    4686      611262 : out:
    4687      611262 :         if (ret) {
    4688           0 :                 if (path && (!ref || !ref->full_path))
    4689           0 :                         fs_path_free(path);
    4690           0 :                 recorded_ref_free(ref);
    4691             :         }
    4692      611262 :         return ret;
    4693             : }
    4694             : 
    4695      608816 : static int record_new_ref_if_needed(int num, u64 dir, int index,
    4696             :                                     struct fs_path *name, void *ctx)
    4697             : {
    4698      608816 :         int ret = 0;
    4699      608816 :         struct send_ctx *sctx = ctx;
    4700      608816 :         struct rb_node *node = NULL;
    4701      608816 :         struct recorded_ref data;
    4702      608816 :         struct recorded_ref *ref;
    4703      608816 :         u64 dir_gen;
    4704             : 
    4705      608816 :         ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
    4706      608830 :         if (ret < 0)
    4707           0 :                 goto out;
    4708             : 
    4709      608830 :         data.dir = dir;
    4710      608830 :         data.dir_gen = dir_gen;
    4711      608830 :         set_ref_path(&data, name);
    4712      608816 :         node = rb_find(&data, &sctx->rbtree_deleted_refs, rbtree_ref_comp);
    4713      608816 :         if (node) {
    4714           0 :                 ref = rb_entry(node, struct recorded_ref, node);
    4715           0 :                 recorded_ref_free(ref);
    4716             :         } else {
    4717      608816 :                 ret = record_ref_in_tree(&sctx->rbtree_new_refs,
    4718             :                                          &sctx->new_refs, name, dir, dir_gen,
    4719             :                                          sctx);
    4720             :         }
    4721      608777 : out:
    4722      608777 :         return ret;
    4723             : }
    4724             : 
    4725        3495 : static int record_deleted_ref_if_needed(int num, u64 dir, int index,
    4726             :                                         struct fs_path *name, void *ctx)
    4727             : {
    4728        3495 :         int ret = 0;
    4729        3495 :         struct send_ctx *sctx = ctx;
    4730        3495 :         struct rb_node *node = NULL;
    4731        3495 :         struct recorded_ref data;
    4732        3495 :         struct recorded_ref *ref;
    4733        3495 :         u64 dir_gen;
    4734             : 
    4735        3495 :         ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
    4736        3495 :         if (ret < 0)
    4737           0 :                 goto out;
    4738             : 
    4739        3495 :         data.dir = dir;
    4740        3495 :         data.dir_gen = dir_gen;
    4741        3495 :         set_ref_path(&data, name);
    4742        3495 :         node = rb_find(&data, &sctx->rbtree_new_refs, rbtree_ref_comp);
    4743        3495 :         if (node) {
    4744        1007 :                 ref = rb_entry(node, struct recorded_ref, node);
    4745        1007 :                 recorded_ref_free(ref);
    4746             :         } else {
    4747        2488 :                 ret = record_ref_in_tree(&sctx->rbtree_deleted_refs,
    4748             :                                          &sctx->deleted_refs, name, dir,
    4749             :                                          dir_gen, sctx);
    4750             :         }
    4751        3495 : out:
    4752        3495 :         return ret;
    4753             : }
    4754             : 
    4755      606511 : static int record_new_ref(struct send_ctx *sctx)
    4756             : {
    4757      606511 :         int ret;
    4758             : 
    4759      606511 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4760             :                                 sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4761      606518 :         if (ret < 0)
    4762             :                 goto out;
    4763             :         ret = 0;
    4764             : 
    4765             : out:
    4766      606518 :         return ret;
    4767             : }
    4768             : 
    4769        2189 : static int record_deleted_ref(struct send_ctx *sctx)
    4770             : {
    4771        2189 :         int ret;
    4772             : 
    4773        2189 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4774             :                                 sctx->cmp_key, 0, record_deleted_ref_if_needed,
    4775             :                                 sctx);
    4776        2189 :         if (ret < 0)
    4777             :                 goto out;
    4778             :         ret = 0;
    4779             : 
    4780             : out:
    4781        2189 :         return ret;
    4782             : }
    4783             : 
    4784          53 : static int record_changed_ref(struct send_ctx *sctx)
    4785             : {
    4786          53 :         int ret = 0;
    4787             : 
    4788          53 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4789             :                         sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4790          53 :         if (ret < 0)
    4791           0 :                 goto out;
    4792          53 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4793             :                         sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
    4794          53 :         if (ret < 0)
    4795             :                 goto out;
    4796             :         ret = 0;
    4797             : 
    4798          53 : out:
    4799          53 :         return ret;
    4800             : }
    4801             : 
    4802             : /*
    4803             :  * Record and process all refs at once. Needed when an inode changes the
    4804             :  * generation number, which means that it was deleted and recreated.
    4805             :  */
    4806          17 : static int process_all_refs(struct send_ctx *sctx,
    4807             :                             enum btrfs_compare_tree_result cmd)
    4808             : {
    4809          17 :         int ret = 0;
    4810          17 :         int iter_ret = 0;
    4811          17 :         struct btrfs_root *root;
    4812          17 :         struct btrfs_path *path;
    4813          17 :         struct btrfs_key key;
    4814          17 :         struct btrfs_key found_key;
    4815          17 :         iterate_inode_ref_t cb;
    4816          17 :         int pending_move = 0;
    4817             : 
    4818          17 :         path = alloc_path_for_send();
    4819          17 :         if (!path)
    4820             :                 return -ENOMEM;
    4821             : 
    4822          17 :         if (cmd == BTRFS_COMPARE_TREE_NEW) {
    4823           7 :                 root = sctx->send_root;
    4824           7 :                 cb = record_new_ref_if_needed;
    4825          10 :         } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
    4826          10 :                 root = sctx->parent_root;
    4827          10 :                 cb = record_deleted_ref_if_needed;
    4828             :         } else {
    4829           0 :                 btrfs_err(sctx->send_root->fs_info,
    4830             :                                 "Wrong command %d in process_all_refs", cmd);
    4831           0 :                 ret = -EINVAL;
    4832           0 :                 goto out;
    4833             :         }
    4834             : 
    4835          17 :         key.objectid = sctx->cmp_key->objectid;
    4836          17 :         key.type = BTRFS_INODE_REF_KEY;
    4837          17 :         key.offset = 0;
    4838          34 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    4839          31 :                 if (found_key.objectid != key.objectid ||
    4840          24 :                     (found_key.type != BTRFS_INODE_REF_KEY &&
    4841             :                      found_key.type != BTRFS_INODE_EXTREF_KEY))
    4842             :                         break;
    4843             : 
    4844          17 :                 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
    4845          17 :                 if (ret < 0)
    4846           0 :                         goto out;
    4847             :         }
    4848             :         /* Catch error found during iteration */
    4849          17 :         if (iter_ret < 0) {
    4850           0 :                 ret = iter_ret;
    4851           0 :                 goto out;
    4852             :         }
    4853          17 :         btrfs_release_path(path);
    4854             : 
    4855             :         /*
    4856             :          * We don't actually care about pending_move as we are simply
    4857             :          * re-creating this inode and will be rename'ing it into place once we
    4858             :          * rename the parent directory.
    4859             :          */
    4860          17 :         ret = process_recorded_refs(sctx, &pending_move);
    4861          17 : out:
    4862          17 :         btrfs_free_path(path);
    4863          17 :         return ret;
    4864             : }
    4865             : 
    4866      800319 : static int send_set_xattr(struct send_ctx *sctx,
    4867             :                           struct fs_path *path,
    4868             :                           const char *name, int name_len,
    4869             :                           const char *data, int data_len)
    4870             : {
    4871      800319 :         int ret = 0;
    4872             : 
    4873      800319 :         ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
    4874      800319 :         if (ret < 0)
    4875           0 :                 goto out;
    4876             : 
    4877      800319 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4878      800319 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4879      800319 :         TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
    4880             : 
    4881      800319 :         ret = send_cmd(sctx);
    4882             : 
    4883      800319 : tlv_put_failure:
    4884      800319 : out:
    4885      800319 :         return ret;
    4886             : }
    4887             : 
    4888          30 : static int send_remove_xattr(struct send_ctx *sctx,
    4889             :                           struct fs_path *path,
    4890             :                           const char *name, int name_len)
    4891             : {
    4892          30 :         int ret = 0;
    4893             : 
    4894          30 :         ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
    4895          30 :         if (ret < 0)
    4896           0 :                 goto out;
    4897             : 
    4898          30 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4899          30 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4900             : 
    4901          30 :         ret = send_cmd(sctx);
    4902             : 
    4903          30 : tlv_put_failure:
    4904          30 : out:
    4905          30 :         return ret;
    4906             : }
    4907             : 
    4908      800315 : static int __process_new_xattr(int num, struct btrfs_key *di_key,
    4909             :                                const char *name, int name_len, const char *data,
    4910             :                                int data_len, void *ctx)
    4911             : {
    4912      800315 :         int ret;
    4913      800315 :         struct send_ctx *sctx = ctx;
    4914      800315 :         struct fs_path *p;
    4915      800315 :         struct posix_acl_xattr_header dummy_acl;
    4916             : 
    4917             :         /* Capabilities are emitted by finish_inode_if_needed */
    4918      800315 :         if (!strncmp(name, XATTR_NAME_CAPS, name_len))
    4919             :                 return 0;
    4920             : 
    4921      800303 :         p = fs_path_alloc();
    4922      800303 :         if (!p)
    4923             :                 return -ENOMEM;
    4924             : 
    4925             :         /*
    4926             :          * This hack is needed because empty acls are stored as zero byte
    4927             :          * data in xattrs. Problem with that is, that receiving these zero byte
    4928             :          * acls will fail later. To fix this, we send a dummy acl list that
    4929             :          * only contains the version number and no entries.
    4930             :          */
    4931      800303 :         if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
    4932      800303 :             !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
    4933           0 :                 if (data_len == 0) {
    4934           0 :                         dummy_acl.a_version =
    4935             :                                         cpu_to_le32(POSIX_ACL_XATTR_VERSION);
    4936           0 :                         data = (char *)&dummy_acl;
    4937           0 :                         data_len = sizeof(dummy_acl);
    4938             :                 }
    4939             :         }
    4940             : 
    4941      800303 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4942      800303 :         if (ret < 0)
    4943           0 :                 goto out;
    4944             : 
    4945      800303 :         ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
    4946             : 
    4947      800303 : out:
    4948      800303 :         fs_path_free(p);
    4949      800303 :         return ret;
    4950             : }
    4951             : 
    4952          30 : static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
    4953             :                                    const char *name, int name_len,
    4954             :                                    const char *data, int data_len, void *ctx)
    4955             : {
    4956          30 :         int ret;
    4957          30 :         struct send_ctx *sctx = ctx;
    4958          30 :         struct fs_path *p;
    4959             : 
    4960          30 :         p = fs_path_alloc();
    4961          30 :         if (!p)
    4962             :                 return -ENOMEM;
    4963             : 
    4964          30 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4965          30 :         if (ret < 0)
    4966           0 :                 goto out;
    4967             : 
    4968          30 :         ret = send_remove_xattr(sctx, p, name, name_len);
    4969             : 
    4970          30 : out:
    4971          30 :         fs_path_free(p);
    4972          30 :         return ret;
    4973             : }
    4974             : 
    4975      800289 : static int process_new_xattr(struct send_ctx *sctx)
    4976             : {
    4977      800289 :         int ret = 0;
    4978             : 
    4979      800289 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    4980             :                                __process_new_xattr, sctx);
    4981             : 
    4982      800289 :         return ret;
    4983             : }
    4984             : 
    4985          30 : static int process_deleted_xattr(struct send_ctx *sctx)
    4986             : {
    4987          30 :         return iterate_dir_item(sctx->parent_root, sctx->right_path,
    4988             :                                 __process_deleted_xattr, sctx);
    4989             : }
    4990             : 
    4991             : struct find_xattr_ctx {
    4992             :         const char *name;
    4993             :         int name_len;
    4994             :         int found_idx;
    4995             :         char *found_data;
    4996             :         int found_data_len;
    4997             : };
    4998             : 
    4999          60 : static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
    5000             :                         int name_len, const char *data, int data_len, void *vctx)
    5001             : {
    5002          60 :         struct find_xattr_ctx *ctx = vctx;
    5003             : 
    5004          60 :         if (name_len == ctx->name_len &&
    5005          60 :             strncmp(name, ctx->name, name_len) == 0) {
    5006          60 :                 ctx->found_idx = num;
    5007          60 :                 ctx->found_data_len = data_len;
    5008          60 :                 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
    5009          60 :                 if (!ctx->found_data)
    5010             :                         return -ENOMEM;
    5011          60 :                 return 1;
    5012             :         }
    5013             :         return 0;
    5014             : }
    5015             : 
    5016          60 : static int find_xattr(struct btrfs_root *root,
    5017             :                       struct btrfs_path *path,
    5018             :                       struct btrfs_key *key,
    5019             :                       const char *name, int name_len,
    5020             :                       char **data, int *data_len)
    5021             : {
    5022          60 :         int ret;
    5023          60 :         struct find_xattr_ctx ctx;
    5024             : 
    5025          60 :         ctx.name = name;
    5026          60 :         ctx.name_len = name_len;
    5027          60 :         ctx.found_idx = -1;
    5028          60 :         ctx.found_data = NULL;
    5029          60 :         ctx.found_data_len = 0;
    5030             : 
    5031          60 :         ret = iterate_dir_item(root, path, __find_xattr, &ctx);
    5032          60 :         if (ret < 0)
    5033             :                 return ret;
    5034             : 
    5035          60 :         if (ctx.found_idx == -1)
    5036             :                 return -ENOENT;
    5037          60 :         if (data) {
    5038          30 :                 *data = ctx.found_data;
    5039          30 :                 *data_len = ctx.found_data_len;
    5040             :         } else {
    5041          30 :                 kfree(ctx.found_data);
    5042             :         }
    5043          60 :         return ctx.found_idx;
    5044             : }
    5045             : 
    5046             : 
    5047          30 : static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
    5048             :                                        const char *name, int name_len,
    5049             :                                        const char *data, int data_len,
    5050             :                                        void *ctx)
    5051             : {
    5052          30 :         int ret;
    5053          30 :         struct send_ctx *sctx = ctx;
    5054          30 :         char *found_data = NULL;
    5055          30 :         int found_data_len  = 0;
    5056             : 
    5057          30 :         ret = find_xattr(sctx->parent_root, sctx->right_path,
    5058             :                          sctx->cmp_key, name, name_len, &found_data,
    5059             :                          &found_data_len);
    5060          30 :         if (ret == -ENOENT) {
    5061           0 :                 ret = __process_new_xattr(num, di_key, name, name_len, data,
    5062             :                                           data_len, ctx);
    5063          30 :         } else if (ret >= 0) {
    5064          39 :                 if (data_len != found_data_len ||
    5065           9 :                     memcmp(data, found_data, data_len)) {
    5066          26 :                         ret = __process_new_xattr(num, di_key, name, name_len,
    5067             :                                                   data, data_len, ctx);
    5068             :                 } else {
    5069             :                         ret = 0;
    5070             :                 }
    5071             :         }
    5072             : 
    5073          30 :         kfree(found_data);
    5074          30 :         return ret;
    5075             : }
    5076             : 
    5077          30 : static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
    5078             :                                            const char *name, int name_len,
    5079             :                                            const char *data, int data_len,
    5080             :                                            void *ctx)
    5081             : {
    5082          30 :         int ret;
    5083          30 :         struct send_ctx *sctx = ctx;
    5084             : 
    5085          30 :         ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
    5086             :                          name, name_len, NULL, NULL);
    5087          30 :         if (ret == -ENOENT)
    5088           0 :                 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
    5089             :                                               data_len, ctx);
    5090          30 :         else if (ret >= 0)
    5091             :                 ret = 0;
    5092             : 
    5093          30 :         return ret;
    5094             : }
    5095             : 
    5096          30 : static int process_changed_xattr(struct send_ctx *sctx)
    5097             : {
    5098          30 :         int ret = 0;
    5099             : 
    5100          30 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    5101             :                         __process_changed_new_xattr, sctx);
    5102          30 :         if (ret < 0)
    5103           0 :                 goto out;
    5104          30 :         ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
    5105             :                         __process_changed_deleted_xattr, sctx);
    5106             : 
    5107          30 : out:
    5108          30 :         return ret;
    5109             : }
    5110             : 
    5111           7 : static int process_all_new_xattrs(struct send_ctx *sctx)
    5112             : {
    5113           7 :         int ret = 0;
    5114           7 :         int iter_ret = 0;
    5115           7 :         struct btrfs_root *root;
    5116           7 :         struct btrfs_path *path;
    5117           7 :         struct btrfs_key key;
    5118           7 :         struct btrfs_key found_key;
    5119             : 
    5120           7 :         path = alloc_path_for_send();
    5121           7 :         if (!path)
    5122             :                 return -ENOMEM;
    5123             : 
    5124           7 :         root = sctx->send_root;
    5125             : 
    5126           7 :         key.objectid = sctx->cmp_key->objectid;
    5127           7 :         key.type = BTRFS_XATTR_ITEM_KEY;
    5128           7 :         key.offset = 0;
    5129           7 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    5130           5 :                 if (found_key.objectid != key.objectid ||
    5131           2 :                     found_key.type != key.type) {
    5132             :                         ret = 0;
    5133             :                         break;
    5134             :                 }
    5135             : 
    5136           0 :                 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
    5137           0 :                 if (ret < 0)
    5138             :                         break;
    5139             :         }
    5140             :         /* Catch error found during iteration */
    5141           7 :         if (iter_ret < 0)
    5142           0 :                 ret = iter_ret;
    5143             : 
    5144           7 :         btrfs_free_path(path);
    5145           7 :         return ret;
    5146             : }
    5147             : 
    5148             : static int send_verity(struct send_ctx *sctx, struct fs_path *path,
    5149             :                        struct fsverity_descriptor *desc)
    5150             : {
    5151             :         int ret;
    5152             : 
    5153             :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
    5154             :         if (ret < 0)
    5155             :                 goto out;
    5156             : 
    5157             :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    5158             :         TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
    5159             :                         le8_to_cpu(desc->hash_algorithm));
    5160             :         TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
    5161             :                         1U << le8_to_cpu(desc->log_blocksize));
    5162             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
    5163             :                         le8_to_cpu(desc->salt_size));
    5164             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
    5165             :                         le32_to_cpu(desc->sig_size));
    5166             : 
    5167             :         ret = send_cmd(sctx);
    5168             : 
    5169             : tlv_put_failure:
    5170             : out:
    5171             :         return ret;
    5172             : }
    5173             : 
    5174           0 : static int process_verity(struct send_ctx *sctx)
    5175             : {
    5176           0 :         int ret = 0;
    5177           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5178           0 :         struct inode *inode;
    5179           0 :         struct fs_path *p;
    5180             : 
    5181           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
    5182           0 :         if (IS_ERR(inode))
    5183           0 :                 return PTR_ERR(inode);
    5184             : 
    5185           0 :         ret = btrfs_get_verity_descriptor(inode, NULL, 0);
    5186           0 :         if (ret < 0)
    5187           0 :                 goto iput;
    5188             : 
    5189             :         if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
    5190             :                 ret = -EMSGSIZE;
    5191             :                 goto iput;
    5192             :         }
    5193             :         if (!sctx->verity_descriptor) {
    5194             :                 sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
    5195             :                                                    GFP_KERNEL);
    5196             :                 if (!sctx->verity_descriptor) {
    5197             :                         ret = -ENOMEM;
    5198             :                         goto iput;
    5199             :                 }
    5200             :         }
    5201             : 
    5202             :         ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
    5203             :         if (ret < 0)
    5204             :                 goto iput;
    5205             : 
    5206             :         p = fs_path_alloc();
    5207             :         if (!p) {
    5208             :                 ret = -ENOMEM;
    5209             :                 goto iput;
    5210             :         }
    5211             :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5212             :         if (ret < 0)
    5213             :                 goto free_path;
    5214             : 
    5215             :         ret = send_verity(sctx, p, sctx->verity_descriptor);
    5216             :         if (ret < 0)
    5217             :                 goto free_path;
    5218             : 
    5219             : free_path:
    5220             :         fs_path_free(p);
    5221             : iput:
    5222           0 :         iput(inode);
    5223           0 :         return ret;
    5224             : }
    5225             : 
    5226             : static inline u64 max_send_read_size(const struct send_ctx *sctx)
    5227             : {
    5228      632345 :         return sctx->send_max_size - SZ_16K;
    5229             : }
    5230             : 
    5231     1262726 : static int put_data_header(struct send_ctx *sctx, u32 len)
    5232             : {
    5233     1262726 :         if (WARN_ON_ONCE(sctx->put_data))
    5234             :                 return -EINVAL;
    5235     1262726 :         sctx->put_data = true;
    5236     1262726 :         if (sctx->proto >= 2) {
    5237             :                 /*
    5238             :                  * Since v2, the data attribute header doesn't include a length,
    5239             :                  * it is implicitly to the end of the command.
    5240             :                  */
    5241           0 :                 if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)
    5242             :                         return -EOVERFLOW;
    5243           0 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
    5244           0 :                 sctx->send_size += sizeof(__le16);
    5245             :         } else {
    5246     1262726 :                 struct btrfs_tlv_header *hdr;
    5247             : 
    5248     1262726 :                 if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
    5249             :                         return -EOVERFLOW;
    5250     1262726 :                 hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
    5251     1262726 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
    5252     1262726 :                 put_unaligned_le16(len, &hdr->tlv_len);
    5253     1262726 :                 sctx->send_size += sizeof(*hdr);
    5254             :         }
    5255             :         return 0;
    5256             : }
    5257             : 
    5258     1234245 : static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
    5259             : {
    5260     1234245 :         struct btrfs_root *root = sctx->send_root;
    5261     1234245 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5262     1234245 :         struct page *page;
    5263     1234245 :         pgoff_t index = offset >> PAGE_SHIFT;
    5264     1234245 :         pgoff_t last_index;
    5265     1234245 :         unsigned pg_offset = offset_in_page(offset);
    5266     1234245 :         int ret;
    5267             : 
    5268     1234245 :         ret = put_data_header(sctx, len);
    5269     1234466 :         if (ret)
    5270             :                 return ret;
    5271             : 
    5272     1234466 :         last_index = (offset + len - 1) >> PAGE_SHIFT;
    5273             : 
    5274    10959571 :         while (index <= last_index) {
    5275     9724941 :                 unsigned cur_len = min_t(unsigned, len,
    5276             :                                          PAGE_SIZE - pg_offset);
    5277             : 
    5278     9724941 :                 page = find_lock_page(sctx->cur_inode->i_mapping, index);
    5279     9725340 :                 if (!page) {
    5280      565538 :                         page_cache_sync_readahead(sctx->cur_inode->i_mapping,
    5281             :                                                   &sctx->ra, NULL, index,
    5282      565538 :                                                   last_index + 1 - index);
    5283             : 
    5284      565530 :                         page = find_or_create_page(sctx->cur_inode->i_mapping,
    5285             :                                                    index, GFP_KERNEL);
    5286      565519 :                         if (!page) {
    5287             :                                 ret = -ENOMEM;
    5288             :                                 break;
    5289             :                         }
    5290             :                 }
    5291             : 
    5292     9725321 :                 if (PageReadahead(page))
    5293          50 :                         page_cache_async_readahead(sctx->cur_inode->i_mapping,
    5294          50 :                                                    &sctx->ra, NULL, page_folio(page),
    5295          50 :                                                    index, last_index + 1 - index);
    5296             : 
    5297     9725321 :                 if (!PageUptodate(page)) {
    5298           0 :                         btrfs_read_folio(NULL, page_folio(page));
    5299           0 :                         lock_page(page);
    5300           0 :                         if (!PageUptodate(page)) {
    5301           0 :                                 unlock_page(page);
    5302           0 :                                 btrfs_err(fs_info,
    5303             :                         "send: IO error at offset %llu for inode %llu root %llu",
    5304             :                                         page_offset(page), sctx->cur_ino,
    5305             :                                         sctx->send_root->root_key.objectid);
    5306           0 :                                 put_page(page);
    5307           0 :                                 ret = -EIO;
    5308           0 :                                 break;
    5309             :                         }
    5310             :                 }
    5311             : 
    5312     9725200 :                 memcpy_from_page(sctx->send_buf + sctx->send_size, page,
    5313             :                                  pg_offset, cur_len);
    5314     9725237 :                 unlock_page(page);
    5315     9725606 :                 put_page(page);
    5316     9725105 :                 index++;
    5317     9725105 :                 pg_offset = 0;
    5318     9725105 :                 len -= cur_len;
    5319     9725105 :                 sctx->send_size += cur_len;
    5320             :         }
    5321             : 
    5322             :         return ret;
    5323             : }
    5324             : 
    5325             : /*
    5326             :  * Read some bytes from the current inode/file and send a write command to
    5327             :  * user space.
    5328             :  */
    5329     1234470 : static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
    5330             : {
    5331     1234470 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5332     1234470 :         int ret = 0;
    5333     1234470 :         struct fs_path *p;
    5334             : 
    5335     1234470 :         p = fs_path_alloc();
    5336     1234368 :         if (!p)
    5337             :                 return -ENOMEM;
    5338             : 
    5339     1234368 :         btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
    5340             : 
    5341     1234368 :         ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5342     1234409 :         if (ret < 0)
    5343           0 :                 goto out;
    5344             : 
    5345     1234409 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5346     1234323 :         if (ret < 0)
    5347           0 :                 goto out;
    5348             : 
    5349     1234323 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5350     1234236 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5351     1234301 :         ret = put_file_data(sctx, offset, len);
    5352     1234626 :         if (ret < 0)
    5353           0 :                 goto out;
    5354             : 
    5355     1234626 :         ret = send_cmd(sctx);
    5356             : 
    5357     1234352 : tlv_put_failure:
    5358     1234352 : out:
    5359     1234352 :         fs_path_free(p);
    5360     1234352 :         return ret;
    5361             : }
    5362             : 
    5363             : /*
    5364             :  * Send a clone command to user space.
    5365             :  */
    5366        5195 : static int send_clone(struct send_ctx *sctx,
    5367             :                       u64 offset, u32 len,
    5368             :                       struct clone_root *clone_root)
    5369             : {
    5370        5195 :         int ret = 0;
    5371        5195 :         struct fs_path *p;
    5372        5195 :         u64 gen;
    5373             : 
    5374        5195 :         btrfs_debug(sctx->send_root->fs_info,
    5375             :                     "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
    5376             :                     offset, len, clone_root->root->root_key.objectid,
    5377             :                     clone_root->ino, clone_root->offset);
    5378             : 
    5379        5195 :         p = fs_path_alloc();
    5380        5195 :         if (!p)
    5381             :                 return -ENOMEM;
    5382             : 
    5383        5195 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
    5384        5195 :         if (ret < 0)
    5385           0 :                 goto out;
    5386             : 
    5387        5195 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5388        5195 :         if (ret < 0)
    5389           0 :                 goto out;
    5390             : 
    5391        5195 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5392        5195 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
    5393        5195 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5394             : 
    5395        5195 :         if (clone_root->root == sctx->send_root) {
    5396        2969 :                 ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
    5397        2969 :                 if (ret < 0)
    5398           0 :                         goto out;
    5399        2969 :                 ret = get_cur_path(sctx, clone_root->ino, gen, p);
    5400             :         } else {
    5401        2226 :                 ret = get_inode_path(clone_root->root, clone_root->ino, p);
    5402             :         }
    5403        5195 :         if (ret < 0)
    5404           0 :                 goto out;
    5405             : 
    5406             :         /*
    5407             :          * If the parent we're using has a received_uuid set then use that as
    5408             :          * our clone source as that is what we will look for when doing a
    5409             :          * receive.
    5410             :          *
    5411             :          * This covers the case that we create a snapshot off of a received
    5412             :          * subvolume and then use that as the parent and try to receive on a
    5413             :          * different host.
    5414             :          */
    5415        5195 :         if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
    5416           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5417             :                              clone_root->root->root_item.received_uuid);
    5418             :         else
    5419        5195 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5420             :                              clone_root->root->root_item.uuid);
    5421        5195 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    5422             :                     btrfs_root_ctransid(&clone_root->root->root_item));
    5423        5195 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
    5424        5195 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
    5425             :                         clone_root->offset);
    5426             : 
    5427        5195 :         ret = send_cmd(sctx);
    5428             : 
    5429        5195 : tlv_put_failure:
    5430        5195 : out:
    5431        5195 :         fs_path_free(p);
    5432        5195 :         return ret;
    5433             : }
    5434             : 
    5435             : /*
    5436             :  * Send an update extent command to user space.
    5437             :  */
    5438           0 : static int send_update_extent(struct send_ctx *sctx,
    5439             :                               u64 offset, u32 len)
    5440             : {
    5441           0 :         int ret = 0;
    5442           0 :         struct fs_path *p;
    5443             : 
    5444           0 :         p = fs_path_alloc();
    5445           0 :         if (!p)
    5446             :                 return -ENOMEM;
    5447             : 
    5448           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
    5449           0 :         if (ret < 0)
    5450           0 :                 goto out;
    5451             : 
    5452           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5453           0 :         if (ret < 0)
    5454           0 :                 goto out;
    5455             : 
    5456           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5457           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5458           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
    5459             : 
    5460           0 :         ret = send_cmd(sctx);
    5461             : 
    5462           0 : tlv_put_failure:
    5463           0 : out:
    5464           0 :         fs_path_free(p);
    5465           0 :         return ret;
    5466             : }
    5467             : 
    5468        3428 : static int send_hole(struct send_ctx *sctx, u64 end)
    5469             : {
    5470        3428 :         struct fs_path *p = NULL;
    5471        3428 :         u64 read_size = max_send_read_size(sctx);
    5472        3428 :         u64 offset = sctx->cur_inode_last_extent;
    5473        3428 :         int ret = 0;
    5474             : 
    5475             :         /*
    5476             :          * A hole that starts at EOF or beyond it. Since we do not yet support
    5477             :          * fallocate (for extent preallocation and hole punching), sending a
    5478             :          * write of zeroes starting at EOF or beyond would later require issuing
    5479             :          * a truncate operation which would undo the write and achieve nothing.
    5480             :          */
    5481        3428 :         if (offset >= sctx->cur_inode_size)
    5482             :                 return 0;
    5483             : 
    5484             :         /*
    5485             :          * Don't go beyond the inode's i_size due to prealloc extents that start
    5486             :          * after the i_size.
    5487             :          */
    5488        3426 :         end = min_t(u64, end, sctx->cur_inode_size);
    5489             : 
    5490        3426 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5491           0 :                 return send_update_extent(sctx, offset, end - offset);
    5492             : 
    5493        3426 :         p = fs_path_alloc();
    5494        3426 :         if (!p)
    5495             :                 return -ENOMEM;
    5496        3426 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5497        3426 :         if (ret < 0)
    5498           0 :                 goto tlv_put_failure;
    5499       31682 :         while (offset < end) {
    5500       28256 :                 u64 len = min(end - offset, read_size);
    5501             : 
    5502       28256 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5503       28256 :                 if (ret < 0)
    5504             :                         break;
    5505       28256 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5506       28256 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5507       28256 :                 ret = put_data_header(sctx, len);
    5508       28256 :                 if (ret < 0)
    5509             :                         break;
    5510       28256 :                 memset(sctx->send_buf + sctx->send_size, 0, len);
    5511       28256 :                 sctx->send_size += len;
    5512       28256 :                 ret = send_cmd(sctx);
    5513       28256 :                 if (ret < 0)
    5514             :                         break;
    5515       28256 :                 offset += len;
    5516             :         }
    5517        3426 :         sctx->cur_inode_next_write_offset = offset;
    5518        3426 : tlv_put_failure:
    5519        3426 :         fs_path_free(p);
    5520        3426 :         return ret;
    5521             : }
    5522             : 
    5523           0 : static int send_encoded_inline_extent(struct send_ctx *sctx,
    5524             :                                       struct btrfs_path *path, u64 offset,
    5525             :                                       u64 len)
    5526             : {
    5527           0 :         struct btrfs_root *root = sctx->send_root;
    5528           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5529           0 :         struct inode *inode;
    5530           0 :         struct fs_path *fspath;
    5531           0 :         struct extent_buffer *leaf = path->nodes[0];
    5532           0 :         struct btrfs_key key;
    5533           0 :         struct btrfs_file_extent_item *ei;
    5534           0 :         u64 ram_bytes;
    5535           0 :         size_t inline_size;
    5536           0 :         int ret;
    5537             : 
    5538           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5539           0 :         if (IS_ERR(inode))
    5540           0 :                 return PTR_ERR(inode);
    5541             : 
    5542           0 :         fspath = fs_path_alloc();
    5543           0 :         if (!fspath) {
    5544           0 :                 ret = -ENOMEM;
    5545           0 :                 goto out;
    5546             :         }
    5547             : 
    5548           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5549           0 :         if (ret < 0)
    5550           0 :                 goto out;
    5551             : 
    5552           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5553           0 :         if (ret < 0)
    5554           0 :                 goto out;
    5555             : 
    5556           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5557           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5558           0 :         ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
    5559           0 :         inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
    5560             : 
    5561           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5562           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5563           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5564             :                     min(key.offset + ram_bytes - offset, len));
    5565           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
    5566           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
    5567           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5568             :                                 btrfs_file_extent_compression(leaf, ei));
    5569           0 :         if (ret < 0)
    5570           0 :                 goto out;
    5571           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5572             : 
    5573           0 :         ret = put_data_header(sctx, inline_size);
    5574           0 :         if (ret < 0)
    5575           0 :                 goto out;
    5576           0 :         read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
    5577             :                            btrfs_file_extent_inline_start(ei), inline_size);
    5578           0 :         sctx->send_size += inline_size;
    5579             : 
    5580           0 :         ret = send_cmd(sctx);
    5581             : 
    5582           0 : tlv_put_failure:
    5583           0 : out:
    5584           0 :         fs_path_free(fspath);
    5585           0 :         iput(inode);
    5586           0 :         return ret;
    5587             : }
    5588             : 
    5589           0 : static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
    5590             :                                u64 offset, u64 len)
    5591             : {
    5592           0 :         struct btrfs_root *root = sctx->send_root;
    5593           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5594           0 :         struct inode *inode;
    5595           0 :         struct fs_path *fspath;
    5596           0 :         struct extent_buffer *leaf = path->nodes[0];
    5597           0 :         struct btrfs_key key;
    5598           0 :         struct btrfs_file_extent_item *ei;
    5599           0 :         u64 disk_bytenr, disk_num_bytes;
    5600           0 :         u32 data_offset;
    5601           0 :         struct btrfs_cmd_header *hdr;
    5602           0 :         u32 crc;
    5603           0 :         int ret;
    5604             : 
    5605           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5606           0 :         if (IS_ERR(inode))
    5607           0 :                 return PTR_ERR(inode);
    5608             : 
    5609           0 :         fspath = fs_path_alloc();
    5610           0 :         if (!fspath) {
    5611           0 :                 ret = -ENOMEM;
    5612           0 :                 goto out;
    5613             :         }
    5614             : 
    5615           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5616           0 :         if (ret < 0)
    5617           0 :                 goto out;
    5618             : 
    5619           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5620           0 :         if (ret < 0)
    5621           0 :                 goto out;
    5622             : 
    5623           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5624           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5625           0 :         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
    5626           0 :         disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
    5627             : 
    5628           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5629           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5630           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5631             :                     min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
    5632             :                         len));
    5633           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
    5634             :                     btrfs_file_extent_ram_bytes(leaf, ei));
    5635           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
    5636             :                     offset - key.offset + btrfs_file_extent_offset(leaf, ei));
    5637           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5638             :                                 btrfs_file_extent_compression(leaf, ei));
    5639           0 :         if (ret < 0)
    5640           0 :                 goto out;
    5641           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5642           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
    5643             : 
    5644           0 :         ret = put_data_header(sctx, disk_num_bytes);
    5645           0 :         if (ret < 0)
    5646           0 :                 goto out;
    5647             : 
    5648             :         /*
    5649             :          * We want to do I/O directly into the send buffer, so get the next page
    5650             :          * boundary in the send buffer. This means that there may be a gap
    5651             :          * between the beginning of the command and the file data.
    5652             :          */
    5653           0 :         data_offset = PAGE_ALIGN(sctx->send_size);
    5654           0 :         if (data_offset > sctx->send_max_size ||
    5655           0 :             sctx->send_max_size - data_offset < disk_num_bytes) {
    5656           0 :                 ret = -EOVERFLOW;
    5657           0 :                 goto out;
    5658             :         }
    5659             : 
    5660             :         /*
    5661             :          * Note that send_buf is a mapping of send_buf_pages, so this is really
    5662             :          * reading into send_buf.
    5663             :          */
    5664           0 :         ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
    5665             :                                                     disk_bytenr, disk_num_bytes,
    5666           0 :                                                     sctx->send_buf_pages +
    5667           0 :                                                     (data_offset >> PAGE_SHIFT));
    5668           0 :         if (ret)
    5669           0 :                 goto out;
    5670             : 
    5671           0 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
    5672           0 :         hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
    5673           0 :         hdr->crc = 0;
    5674           0 :         crc = btrfs_crc32c(0, sctx->send_buf, sctx->send_size);
    5675           0 :         crc = btrfs_crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
    5676           0 :         hdr->crc = cpu_to_le32(crc);
    5677             : 
    5678           0 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
    5679             :                         &sctx->send_off);
    5680           0 :         if (!ret) {
    5681           0 :                 ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
    5682             :                                 disk_num_bytes, &sctx->send_off);
    5683             :         }
    5684           0 :         sctx->send_size = 0;
    5685           0 :         sctx->put_data = false;
    5686             : 
    5687           0 : tlv_put_failure:
    5688           0 : out:
    5689           0 :         fs_path_free(fspath);
    5690           0 :         iput(inode);
    5691           0 :         return ret;
    5692             : }
    5693             : 
    5694      628917 : static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
    5695             :                             const u64 offset, const u64 len)
    5696             : {
    5697      628917 :         const u64 end = offset + len;
    5698      628917 :         struct extent_buffer *leaf = path->nodes[0];
    5699      628917 :         struct btrfs_file_extent_item *ei;
    5700      628917 :         u64 read_size = max_send_read_size(sctx);
    5701      628917 :         u64 sent = 0;
    5702             : 
    5703      628917 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5704           0 :                 return send_update_extent(sctx, offset, len);
    5705             : 
    5706      628917 :         ei = btrfs_item_ptr(leaf, path->slots[0],
    5707             :                             struct btrfs_file_extent_item);
    5708      628914 :         if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
    5709             :             btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
    5710           0 :                 bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
    5711             :                                   BTRFS_FILE_EXTENT_INLINE);
    5712             : 
    5713             :                 /*
    5714             :                  * Send the compressed extent unless the compressed data is
    5715             :                  * larger than the decompressed data. This can happen if we're
    5716             :                  * not sending the entire extent, either because it has been
    5717             :                  * partially overwritten/truncated or because this is a part of
    5718             :                  * the extent that we couldn't clone in clone_range().
    5719             :                  */
    5720           0 :                 if (is_inline &&
    5721           0 :                     btrfs_file_extent_inline_item_len(leaf,
    5722             :                                                       path->slots[0]) <= len) {
    5723           0 :                         return send_encoded_inline_extent(sctx, path, offset,
    5724             :                                                           len);
    5725           0 :                 } else if (!is_inline &&
    5726             :                            btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
    5727           0 :                         return send_encoded_extent(sctx, path, offset, len);
    5728             :                 }
    5729             :         }
    5730             : 
    5731      628914 :         if (sctx->cur_inode == NULL) {
    5732      602424 :                 struct btrfs_root *root = sctx->send_root;
    5733             : 
    5734      602424 :                 sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
    5735      602431 :                 if (IS_ERR(sctx->cur_inode)) {
    5736           0 :                         int err = PTR_ERR(sctx->cur_inode);
    5737             : 
    5738           0 :                         sctx->cur_inode = NULL;
    5739           0 :                         return err;
    5740             :                 }
    5741      602431 :                 memset(&sctx->ra, 0, sizeof(struct file_ra_state));
    5742      602431 :                 file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
    5743             : 
    5744             :                 /*
    5745             :                  * It's very likely there are no pages from this inode in the page
    5746             :                  * cache, so after reading extents and sending their data, we clean
    5747             :                  * the page cache to avoid trashing the page cache (adding pressure
    5748             :                  * to the page cache and forcing eviction of other data more useful
    5749             :                  * for applications).
    5750             :                  *
    5751             :                  * We decide if we should clean the page cache simply by checking
    5752             :                  * if the inode's mapping nrpages is 0 when we first open it, and
    5753             :                  * not by using something like filemap_range_has_page() before
    5754             :                  * reading an extent because when we ask the readahead code to
    5755             :                  * read a given file range, it may (and almost always does) read
    5756             :                  * pages from beyond that range (see the documentation for
    5757             :                  * page_cache_sync_readahead()), so it would not be reliable,
    5758             :                  * because after reading the first extent future calls to
    5759             :                  * filemap_range_has_page() would return true because the readahead
    5760             :                  * on the previous extent resulted in reading pages of the current
    5761             :                  * extent as well.
    5762             :                  */
    5763      602429 :                 sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
    5764      602429 :                 sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
    5765             :         }
    5766             : 
    5767     1863128 :         while (sent < len) {
    5768     1234514 :                 u64 size = min(len - sent, read_size);
    5769     1234514 :                 int ret;
    5770             : 
    5771     1234514 :                 ret = send_write(sctx, offset + sent, size);
    5772     1234209 :                 if (ret < 0)
    5773           0 :                         return ret;
    5774     1234209 :                 sent += size;
    5775             :         }
    5776             : 
    5777      628614 :         if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
    5778             :                 /*
    5779             :                  * Always operate only on ranges that are a multiple of the page
    5780             :                  * size. This is not only to prevent zeroing parts of a page in
    5781             :                  * the case of subpage sector size, but also to guarantee we evict
    5782             :                  * pages, as passing a range that is smaller than page size does
    5783             :                  * not evict the respective page (only zeroes part of its content).
    5784             :                  *
    5785             :                  * Always start from the end offset of the last range cleared.
    5786             :                  * This is because the readahead code may (and very often does)
    5787             :                  * reads pages beyond the range we request for readahead. So if
    5788             :                  * we have an extent layout like this:
    5789             :                  *
    5790             :                  *            [ extent A ] [ extent B ] [ extent C ]
    5791             :                  *
    5792             :                  * When we ask page_cache_sync_readahead() to read extent A, it
    5793             :                  * may also trigger reads for pages of extent B. If we are doing
    5794             :                  * an incremental send and extent B has not changed between the
    5795             :                  * parent and send snapshots, some or all of its pages may end
    5796             :                  * up being read and placed in the page cache. So when truncating
    5797             :                  * the page cache we always start from the end offset of the
    5798             :                  * previously processed extent up to the end of the current
    5799             :                  * extent.
    5800             :                  */
    5801      565144 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    5802      565144 :                                            sctx->page_cache_clear_start,
    5803      565144 :                                            end - 1);
    5804      565291 :                 sctx->page_cache_clear_start = end;
    5805             :         }
    5806             : 
    5807             :         return 0;
    5808             : }
    5809             : 
    5810             : /*
    5811             :  * Search for a capability xattr related to sctx->cur_ino. If the capability is
    5812             :  * found, call send_set_xattr function to emit it.
    5813             :  *
    5814             :  * Return 0 if there isn't a capability, or when the capability was emitted
    5815             :  * successfully, or < 0 if an error occurred.
    5816             :  */
    5817     1009837 : static int send_capabilities(struct send_ctx *sctx)
    5818             : {
    5819     1009837 :         struct fs_path *fspath = NULL;
    5820     1009837 :         struct btrfs_path *path;
    5821     1009837 :         struct btrfs_dir_item *di;
    5822     1009837 :         struct extent_buffer *leaf;
    5823     1009837 :         unsigned long data_ptr;
    5824     1009837 :         char *buf = NULL;
    5825     1009837 :         int buf_len;
    5826     1009837 :         int ret = 0;
    5827             : 
    5828     1009837 :         path = alloc_path_for_send();
    5829     1009666 :         if (!path)
    5830             :                 return -ENOMEM;
    5831             : 
    5832     1009666 :         di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
    5833             :                                 XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
    5834     1010101 :         if (!di) {
    5835             :                 /* There is no xattr for this inode */
    5836     1010085 :                 goto out;
    5837          16 :         } else if (IS_ERR(di)) {
    5838           0 :                 ret = PTR_ERR(di);
    5839           0 :                 goto out;
    5840             :         }
    5841             : 
    5842          16 :         leaf = path->nodes[0];
    5843          16 :         buf_len = btrfs_dir_data_len(leaf, di);
    5844             : 
    5845          16 :         fspath = fs_path_alloc();
    5846          16 :         buf = kmalloc(buf_len, GFP_KERNEL);
    5847          16 :         if (!fspath || !buf) {
    5848           0 :                 ret = -ENOMEM;
    5849           0 :                 goto out;
    5850             :         }
    5851             : 
    5852          16 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5853          16 :         if (ret < 0)
    5854           0 :                 goto out;
    5855             : 
    5856          16 :         data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
    5857          16 :         read_extent_buffer(leaf, buf, data_ptr, buf_len);
    5858             : 
    5859          16 :         ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
    5860             :                         strlen(XATTR_NAME_CAPS), buf, buf_len);
    5861     1010101 : out:
    5862     1010101 :         kfree(buf);
    5863     1010049 :         fs_path_free(fspath);
    5864     1009930 :         btrfs_free_path(path);
    5865     1009930 :         return ret;
    5866             : }
    5867             : 
    5868        5266 : static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
    5869             :                        struct clone_root *clone_root, const u64 disk_byte,
    5870             :                        u64 data_offset, u64 offset, u64 len)
    5871             : {
    5872        5266 :         struct btrfs_path *path;
    5873        5266 :         struct btrfs_key key;
    5874        5266 :         int ret;
    5875        5266 :         struct btrfs_inode_info info;
    5876        5266 :         u64 clone_src_i_size = 0;
    5877             : 
    5878             :         /*
    5879             :          * Prevent cloning from a zero offset with a length matching the sector
    5880             :          * size because in some scenarios this will make the receiver fail.
    5881             :          *
    5882             :          * For example, if in the source filesystem the extent at offset 0
    5883             :          * has a length of sectorsize and it was written using direct IO, then
    5884             :          * it can never be an inline extent (even if compression is enabled).
    5885             :          * Then this extent can be cloned in the original filesystem to a non
    5886             :          * zero file offset, but it may not be possible to clone in the
    5887             :          * destination filesystem because it can be inlined due to compression
    5888             :          * on the destination filesystem (as the receiver's write operations are
    5889             :          * always done using buffered IO). The same happens when the original
    5890             :          * filesystem does not have compression enabled but the destination
    5891             :          * filesystem has.
    5892             :          */
    5893        5266 :         if (clone_root->offset == 0 &&
    5894         337 :             len == sctx->send_root->fs_info->sectorsize)
    5895           2 :                 return send_extent_data(sctx, dst_path, offset, len);
    5896             : 
    5897        5264 :         path = alloc_path_for_send();
    5898        5264 :         if (!path)
    5899             :                 return -ENOMEM;
    5900             : 
    5901             :         /*
    5902             :          * There are inodes that have extents that lie behind its i_size. Don't
    5903             :          * accept clones from these extents.
    5904             :          */
    5905        5264 :         ret = get_inode_info(clone_root->root, clone_root->ino, &info);
    5906        5264 :         btrfs_release_path(path);
    5907        5264 :         if (ret < 0)
    5908           0 :                 goto out;
    5909        5264 :         clone_src_i_size = info.size;
    5910             : 
    5911             :         /*
    5912             :          * We can't send a clone operation for the entire range if we find
    5913             :          * extent items in the respective range in the source file that
    5914             :          * refer to different extents or if we find holes.
    5915             :          * So check for that and do a mix of clone and regular write/copy
    5916             :          * operations if needed.
    5917             :          *
    5918             :          * Example:
    5919             :          *
    5920             :          * mkfs.btrfs -f /dev/sda
    5921             :          * mount /dev/sda /mnt
    5922             :          * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
    5923             :          * cp --reflink=always /mnt/foo /mnt/bar
    5924             :          * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
    5925             :          * btrfs subvolume snapshot -r /mnt /mnt/snap
    5926             :          *
    5927             :          * If when we send the snapshot and we are processing file bar (which
    5928             :          * has a higher inode number than foo) we blindly send a clone operation
    5929             :          * for the [0, 100K[ range from foo to bar, the receiver ends up getting
    5930             :          * a file bar that matches the content of file foo - iow, doesn't match
    5931             :          * the content from bar in the original filesystem.
    5932             :          */
    5933        5264 :         key.objectid = clone_root->ino;
    5934        5264 :         key.type = BTRFS_EXTENT_DATA_KEY;
    5935        5264 :         key.offset = clone_root->offset;
    5936        5264 :         ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
    5937        5264 :         if (ret < 0)
    5938           0 :                 goto out;
    5939        5264 :         if (ret > 0 && path->slots[0] > 0) {
    5940        1576 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
    5941        1576 :                 if (key.objectid == clone_root->ino &&
    5942        1576 :                     key.type == BTRFS_EXTENT_DATA_KEY)
    5943        1576 :                         path->slots[0]--;
    5944             :         }
    5945             : 
    5946        5313 :         while (true) {
    5947        5313 :                 struct extent_buffer *leaf = path->nodes[0];
    5948        5313 :                 int slot = path->slots[0];
    5949        5313 :                 struct btrfs_file_extent_item *ei;
    5950        5313 :                 u8 type;
    5951        5313 :                 u64 ext_len;
    5952        5313 :                 u64 clone_len;
    5953        5313 :                 u64 clone_data_offset;
    5954        5313 :                 bool crossed_src_i_size = false;
    5955             : 
    5956        5313 :                 if (slot >= btrfs_header_nritems(leaf)) {
    5957           0 :                         ret = btrfs_next_leaf(clone_root->root, path);
    5958           0 :                         if (ret < 0)
    5959           0 :                                 goto out;
    5960           0 :                         else if (ret > 0)
    5961             :                                 break;
    5962           0 :                         continue;
    5963             :                 }
    5964             : 
    5965        5313 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    5966             : 
    5967             :                 /*
    5968             :                  * We might have an implicit trailing hole (NO_HOLES feature
    5969             :                  * enabled). We deal with it after leaving this loop.
    5970             :                  */
    5971        5313 :                 if (key.objectid != clone_root->ino ||
    5972        5304 :                     key.type != BTRFS_EXTENT_DATA_KEY)
    5973             :                         break;
    5974             : 
    5975        5304 :                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    5976        5304 :                 type = btrfs_file_extent_type(leaf, ei);
    5977        5304 :                 if (type == BTRFS_FILE_EXTENT_INLINE) {
    5978           0 :                         ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
    5979           0 :                         ext_len = PAGE_ALIGN(ext_len);
    5980             :                 } else {
    5981        5304 :                         ext_len = btrfs_file_extent_num_bytes(leaf, ei);
    5982             :                 }
    5983             : 
    5984        5304 :                 if (key.offset + ext_len <= clone_root->offset)
    5985           0 :                         goto next;
    5986             : 
    5987        5304 :                 if (key.offset > clone_root->offset) {
    5988             :                         /* Implicit hole, NO_HOLES feature enabled. */
    5989          27 :                         u64 hole_len = key.offset - clone_root->offset;
    5990             : 
    5991          27 :                         if (hole_len > len)
    5992             :                                 hole_len = len;
    5993          27 :                         ret = send_extent_data(sctx, dst_path, offset,
    5994             :                                                hole_len);
    5995          27 :                         if (ret < 0)
    5996           0 :                                 goto out;
    5997             : 
    5998          27 :                         len -= hole_len;
    5999          27 :                         if (len == 0)
    6000             :                                 break;
    6001           2 :                         offset += hole_len;
    6002           2 :                         clone_root->offset += hole_len;
    6003           2 :                         data_offset += hole_len;
    6004             :                 }
    6005             : 
    6006        5279 :                 if (key.offset >= clone_root->offset + len)
    6007             :                         break;
    6008             : 
    6009        5279 :                 if (key.offset >= clone_src_i_size)
    6010             :                         break;
    6011             : 
    6012        5221 :                 if (key.offset + ext_len > clone_src_i_size) {
    6013         165 :                         ext_len = clone_src_i_size - key.offset;
    6014         165 :                         crossed_src_i_size = true;
    6015             :                 }
    6016             : 
    6017        5221 :                 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
    6018        5221 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
    6019        5208 :                         clone_root->offset = key.offset;
    6020        5208 :                         if (clone_data_offset < data_offset &&
    6021        1543 :                                 clone_data_offset + ext_len > data_offset) {
    6022        1535 :                                 u64 extent_offset;
    6023             : 
    6024        1535 :                                 extent_offset = data_offset - clone_data_offset;
    6025        1535 :                                 ext_len -= extent_offset;
    6026        1535 :                                 clone_data_offset += extent_offset;
    6027        1535 :                                 clone_root->offset += extent_offset;
    6028             :                         }
    6029             :                 }
    6030             : 
    6031        5221 :                 clone_len = min_t(u64, ext_len, len);
    6032             : 
    6033        5221 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
    6034             :                     clone_data_offset == data_offset) {
    6035        5200 :                         const u64 src_end = clone_root->offset + clone_len;
    6036        5200 :                         const u64 sectorsize = SZ_64K;
    6037             : 
    6038             :                         /*
    6039             :                          * We can't clone the last block, when its size is not
    6040             :                          * sector size aligned, into the middle of a file. If we
    6041             :                          * do so, the receiver will get a failure (-EINVAL) when
    6042             :                          * trying to clone or will silently corrupt the data in
    6043             :                          * the destination file if it's on a kernel without the
    6044             :                          * fix introduced by commit ac765f83f1397646
    6045             :                          * ("Btrfs: fix data corruption due to cloning of eof
    6046             :                          * block).
    6047             :                          *
    6048             :                          * So issue a clone of the aligned down range plus a
    6049             :                          * regular write for the eof block, if we hit that case.
    6050             :                          *
    6051             :                          * Also, we use the maximum possible sector size, 64K,
    6052             :                          * because we don't know what's the sector size of the
    6053             :                          * filesystem that receives the stream, so we have to
    6054             :                          * assume the largest possible sector size.
    6055             :                          */
    6056        5200 :                         if (src_end == clone_src_i_size &&
    6057         246 :                             !IS_ALIGNED(src_end, sectorsize) &&
    6058          24 :                             offset + clone_len < sctx->cur_inode_size) {
    6059           9 :                                 u64 slen;
    6060             : 
    6061           9 :                                 slen = ALIGN_DOWN(src_end - clone_root->offset,
    6062             :                                                   sectorsize);
    6063           9 :                                 if (slen > 0) {
    6064           4 :                                         ret = send_clone(sctx, offset, slen,
    6065             :                                                          clone_root);
    6066           4 :                                         if (ret < 0)
    6067           0 :                                                 goto out;
    6068             :                                 }
    6069           9 :                                 ret = send_extent_data(sctx, dst_path,
    6070             :                                                        offset + slen,
    6071             :                                                        clone_len - slen);
    6072             :                         } else {
    6073        5191 :                                 ret = send_clone(sctx, offset, clone_len,
    6074             :                                                  clone_root);
    6075             :                         }
    6076          21 :                 } else if (crossed_src_i_size && clone_len < len) {
    6077             :                         /*
    6078             :                          * If we are at i_size of the clone source inode and we
    6079             :                          * can not clone from it, terminate the loop. This is
    6080             :                          * to avoid sending two write operations, one with a
    6081             :                          * length matching clone_len and the final one after
    6082             :                          * this loop with a length of len - clone_len.
    6083             :                          *
    6084             :                          * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
    6085             :                          * was passed to the send ioctl), this helps avoid
    6086             :                          * sending an encoded write for an offset that is not
    6087             :                          * sector size aligned, in case the i_size of the source
    6088             :                          * inode is not sector size aligned. That will make the
    6089             :                          * receiver fallback to decompression of the data and
    6090             :                          * writing it using regular buffered IO, therefore while
    6091             :                          * not incorrect, it's not optimal due decompression and
    6092             :                          * possible re-compression at the receiver.
    6093             :                          */
    6094             :                         break;
    6095             :                 } else {
    6096          18 :                         ret = send_extent_data(sctx, dst_path, offset,
    6097             :                                                clone_len);
    6098             :                 }
    6099             : 
    6100        5218 :                 if (ret < 0)
    6101           0 :                         goto out;
    6102             : 
    6103        5218 :                 len -= clone_len;
    6104        5218 :                 if (len == 0)
    6105             :                         break;
    6106          49 :                 offset += clone_len;
    6107          49 :                 clone_root->offset += clone_len;
    6108             : 
    6109             :                 /*
    6110             :                  * If we are cloning from the file we are currently processing,
    6111             :                  * and using the send root as the clone root, we must stop once
    6112             :                  * the current clone offset reaches the current eof of the file
    6113             :                  * at the receiver, otherwise we would issue an invalid clone
    6114             :                  * operation (source range going beyond eof) and cause the
    6115             :                  * receiver to fail. So if we reach the current eof, bail out
    6116             :                  * and fallback to a regular write.
    6117             :                  */
    6118          49 :                 if (clone_root->root == sctx->send_root &&
    6119          36 :                     clone_root->ino == sctx->cur_ino &&
    6120           0 :                     clone_root->offset >= sctx->cur_inode_next_write_offset)
    6121             :                         break;
    6122             : 
    6123          49 :                 data_offset += clone_len;
    6124          49 : next:
    6125          49 :                 path->slots[0]++;
    6126             :         }
    6127             : 
    6128        5264 :         if (len > 0)
    6129          70 :                 ret = send_extent_data(sctx, dst_path, offset, len);
    6130             :         else
    6131             :                 ret = 0;
    6132        5264 : out:
    6133        5264 :         btrfs_free_path(path);
    6134        5264 :         return ret;
    6135             : }
    6136             : 
    6137      634143 : static int send_write_or_clone(struct send_ctx *sctx,
    6138             :                                struct btrfs_path *path,
    6139             :                                struct btrfs_key *key,
    6140             :                                struct clone_root *clone_root)
    6141             : {
    6142      634143 :         int ret = 0;
    6143      634143 :         u64 offset = key->offset;
    6144      634143 :         u64 end;
    6145      634143 :         u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
    6146             : 
    6147      634143 :         end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
    6148      634144 :         if (offset >= end)
    6149             :                 return 0;
    6150             : 
    6151      639324 :         if (clone_root && IS_ALIGNED(end, bs)) {
    6152        5266 :                 struct btrfs_file_extent_item *ei;
    6153        5266 :                 u64 disk_byte;
    6154        5266 :                 u64 data_offset;
    6155             : 
    6156        5266 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6157             :                                     struct btrfs_file_extent_item);
    6158        5266 :                 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
    6159        5266 :                 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
    6160        5266 :                 ret = clone_range(sctx, path, clone_root, disk_byte,
    6161             :                                   data_offset, offset, end - offset);
    6162             :         } else {
    6163      628792 :                 ret = send_extent_data(sctx, path, offset, end - offset);
    6164             :         }
    6165      634006 :         sctx->cur_inode_next_write_offset = end;
    6166      634006 :         return ret;
    6167             : }
    6168             : 
    6169       12234 : static int is_extent_unchanged(struct send_ctx *sctx,
    6170             :                                struct btrfs_path *left_path,
    6171             :                                struct btrfs_key *ekey)
    6172             : {
    6173       12234 :         int ret = 0;
    6174       12234 :         struct btrfs_key key;
    6175       12234 :         struct btrfs_path *path = NULL;
    6176       12234 :         struct extent_buffer *eb;
    6177       12234 :         int slot;
    6178       12234 :         struct btrfs_key found_key;
    6179       12234 :         struct btrfs_file_extent_item *ei;
    6180       12234 :         u64 left_disknr;
    6181       12234 :         u64 right_disknr;
    6182       12234 :         u64 left_offset;
    6183       12234 :         u64 right_offset;
    6184       12234 :         u64 left_offset_fixed;
    6185       12234 :         u64 left_len;
    6186       12234 :         u64 right_len;
    6187       12234 :         u64 left_gen;
    6188       12234 :         u64 right_gen;
    6189       12234 :         u8 left_type;
    6190       12234 :         u8 right_type;
    6191             : 
    6192       12234 :         path = alloc_path_for_send();
    6193       12234 :         if (!path)
    6194             :                 return -ENOMEM;
    6195             : 
    6196       12234 :         eb = left_path->nodes[0];
    6197       12234 :         slot = left_path->slots[0];
    6198       12234 :         ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6199       12234 :         left_type = btrfs_file_extent_type(eb, ei);
    6200             : 
    6201       12234 :         if (left_type != BTRFS_FILE_EXTENT_REG) {
    6202        6893 :                 ret = 0;
    6203        6893 :                 goto out;
    6204             :         }
    6205        5341 :         left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6206        5341 :         left_len = btrfs_file_extent_num_bytes(eb, ei);
    6207        5341 :         left_offset = btrfs_file_extent_offset(eb, ei);
    6208        5341 :         left_gen = btrfs_file_extent_generation(eb, ei);
    6209             : 
    6210             :         /*
    6211             :          * Following comments will refer to these graphics. L is the left
    6212             :          * extents which we are checking at the moment. 1-8 are the right
    6213             :          * extents that we iterate.
    6214             :          *
    6215             :          *       |-----L-----|
    6216             :          * |-1-|-2a-|-3-|-4-|-5-|-6-|
    6217             :          *
    6218             :          *       |-----L-----|
    6219             :          * |--1--|-2b-|...(same as above)
    6220             :          *
    6221             :          * Alternative situation. Happens on files where extents got split.
    6222             :          *       |-----L-----|
    6223             :          * |-----------7-----------|-6-|
    6224             :          *
    6225             :          * Alternative situation. Happens on files which got larger.
    6226             :          *       |-----L-----|
    6227             :          * |-8-|
    6228             :          * Nothing follows after 8.
    6229             :          */
    6230             : 
    6231        5341 :         key.objectid = ekey->objectid;
    6232        5341 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6233        5341 :         key.offset = ekey->offset;
    6234        5341 :         ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
    6235        5341 :         if (ret < 0)
    6236           0 :                 goto out;
    6237        5341 :         if (ret) {
    6238           0 :                 ret = 0;
    6239           0 :                 goto out;
    6240             :         }
    6241             : 
    6242             :         /*
    6243             :          * Handle special case where the right side has no extents at all.
    6244             :          */
    6245        5341 :         eb = path->nodes[0];
    6246        5341 :         slot = path->slots[0];
    6247        5341 :         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6248        5341 :         if (found_key.objectid != key.objectid ||
    6249        5341 :             found_key.type != key.type) {
    6250             :                 /* If we're a hole then just pretend nothing changed */
    6251         765 :                 ret = (left_disknr) ? 0 : 1;
    6252         765 :                 goto out;
    6253             :         }
    6254             : 
    6255             :         /*
    6256             :          * We're now on 2a, 2b or 7.
    6257             :          */
    6258        4576 :         key = found_key;
    6259        4753 :         while (key.offset < ekey->offset + left_len) {
    6260        4576 :                 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6261        4576 :                 right_type = btrfs_file_extent_type(eb, ei);
    6262        4576 :                 if (right_type != BTRFS_FILE_EXTENT_REG &&
    6263             :                     right_type != BTRFS_FILE_EXTENT_INLINE) {
    6264        1388 :                         ret = 0;
    6265        1388 :                         goto out;
    6266             :                 }
    6267             : 
    6268        3188 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6269           0 :                         right_len = btrfs_file_extent_ram_bytes(eb, ei);
    6270           0 :                         right_len = PAGE_ALIGN(right_len);
    6271             :                 } else {
    6272        3188 :                         right_len = btrfs_file_extent_num_bytes(eb, ei);
    6273             :                 }
    6274             : 
    6275             :                 /*
    6276             :                  * Are we at extent 8? If yes, we know the extent is changed.
    6277             :                  * This may only happen on the first iteration.
    6278             :                  */
    6279        3188 :                 if (found_key.offset + right_len <= ekey->offset) {
    6280             :                         /* If we're a hole just pretend nothing changed */
    6281        1572 :                         ret = (left_disknr) ? 0 : 1;
    6282        1572 :                         goto out;
    6283             :                 }
    6284             : 
    6285             :                 /*
    6286             :                  * We just wanted to see if when we have an inline extent, what
    6287             :                  * follows it is a regular extent (wanted to check the above
    6288             :                  * condition for inline extents too). This should normally not
    6289             :                  * happen but it's possible for example when we have an inline
    6290             :                  * compressed extent representing data with a size matching
    6291             :                  * the page size (currently the same as sector size).
    6292             :                  */
    6293        1616 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6294           0 :                         ret = 0;
    6295           0 :                         goto out;
    6296             :                 }
    6297             : 
    6298        1616 :                 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6299        1616 :                 right_offset = btrfs_file_extent_offset(eb, ei);
    6300        1616 :                 right_gen = btrfs_file_extent_generation(eb, ei);
    6301             : 
    6302        1616 :                 left_offset_fixed = left_offset;
    6303        1616 :                 if (key.offset < ekey->offset) {
    6304             :                         /* Fix the right offset for 2a and 7. */
    6305         761 :                         right_offset += ekey->offset - key.offset;
    6306             :                 } else {
    6307             :                         /* Fix the left offset for all behind 2a and 2b */
    6308         855 :                         left_offset_fixed += key.offset - ekey->offset;
    6309             :                 }
    6310             : 
    6311             :                 /*
    6312             :                  * Check if we have the same extent.
    6313             :                  */
    6314        1616 :                 if (left_disknr != right_disknr ||
    6315        1616 :                     left_offset_fixed != right_offset ||
    6316             :                     left_gen != right_gen) {
    6317         757 :                         ret = 0;
    6318         757 :                         goto out;
    6319             :                 }
    6320             : 
    6321             :                 /*
    6322             :                  * Go to the next extent.
    6323             :                  */
    6324         859 :                 ret = btrfs_next_item(sctx->parent_root, path);
    6325         859 :                 if (ret < 0)
    6326           0 :                         goto out;
    6327         859 :                 if (!ret) {
    6328         846 :                         eb = path->nodes[0];
    6329         846 :                         slot = path->slots[0];
    6330         846 :                         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6331             :                 }
    6332         859 :                 if (ret || found_key.objectid != key.objectid ||
    6333         438 :                     found_key.type != key.type) {
    6334         421 :                         key.offset += right_len;
    6335         421 :                         break;
    6336             :                 }
    6337         438 :                 if (found_key.offset != key.offset + right_len) {
    6338         261 :                         ret = 0;
    6339         261 :                         goto out;
    6340             :                 }
    6341         177 :                 key = found_key;
    6342             :         }
    6343             : 
    6344             :         /*
    6345             :          * We're now behind the left extent (treat as unchanged) or at the end
    6346             :          * of the right side (treat as changed).
    6347             :          */
    6348         598 :         if (key.offset >= ekey->offset + left_len)
    6349             :                 ret = 1;
    6350             :         else
    6351           0 :                 ret = 0;
    6352             : 
    6353             : 
    6354       12234 : out:
    6355       12234 :         btrfs_free_path(path);
    6356       12234 :         return ret;
    6357             : }
    6358             : 
    6359      403913 : static int get_last_extent(struct send_ctx *sctx, u64 offset)
    6360             : {
    6361      403913 :         struct btrfs_path *path;
    6362      403913 :         struct btrfs_root *root = sctx->send_root;
    6363      403913 :         struct btrfs_key key;
    6364      403913 :         int ret;
    6365             : 
    6366      403913 :         path = alloc_path_for_send();
    6367      403913 :         if (!path)
    6368             :                 return -ENOMEM;
    6369             : 
    6370      403913 :         sctx->cur_inode_last_extent = 0;
    6371             : 
    6372      403913 :         key.objectid = sctx->cur_ino;
    6373      403913 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6374      403913 :         key.offset = offset;
    6375      403913 :         ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
    6376      403913 :         if (ret < 0)
    6377           0 :                 goto out;
    6378      403913 :         ret = 0;
    6379      403913 :         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    6380      403913 :         if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
    6381        2961 :                 goto out;
    6382             : 
    6383      400952 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6384      403913 : out:
    6385      403913 :         btrfs_free_path(path);
    6386      403913 :         return ret;
    6387             : }
    6388             : 
    6389       30232 : static int range_is_hole_in_parent(struct send_ctx *sctx,
    6390             :                                    const u64 start,
    6391             :                                    const u64 end)
    6392             : {
    6393       30232 :         struct btrfs_path *path;
    6394       30232 :         struct btrfs_key key;
    6395       30232 :         struct btrfs_root *root = sctx->parent_root;
    6396       30232 :         u64 search_start = start;
    6397       30232 :         int ret;
    6398             : 
    6399       30232 :         path = alloc_path_for_send();
    6400       30232 :         if (!path)
    6401             :                 return -ENOMEM;
    6402             : 
    6403       30232 :         key.objectid = sctx->cur_ino;
    6404       30232 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6405       30232 :         key.offset = search_start;
    6406       30232 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    6407       30232 :         if (ret < 0)
    6408           0 :                 goto out;
    6409       30232 :         if (ret > 0 && path->slots[0] > 0)
    6410       30180 :                 path->slots[0]--;
    6411             : 
    6412       78663 :         while (search_start < end) {
    6413       78663 :                 struct extent_buffer *leaf = path->nodes[0];
    6414       78663 :                 int slot = path->slots[0];
    6415       78663 :                 struct btrfs_file_extent_item *fi;
    6416       78663 :                 u64 extent_end;
    6417             : 
    6418       78663 :                 if (slot >= btrfs_header_nritems(leaf)) {
    6419         414 :                         ret = btrfs_next_leaf(root, path);
    6420         414 :                         if (ret < 0)
    6421           0 :                                 goto out;
    6422         414 :                         else if (ret > 0)
    6423             :                                 break;
    6424         365 :                         continue;
    6425             :                 }
    6426             : 
    6427       78249 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    6428       78249 :                 if (key.objectid < sctx->cur_ino ||
    6429       78249 :                     key.type < BTRFS_EXTENT_DATA_KEY)
    6430       23063 :                         goto next;
    6431       55186 :                 if (key.objectid > sctx->cur_ino ||
    6432       53100 :                     key.type > BTRFS_EXTENT_DATA_KEY ||
    6433       53100 :                     key.offset >= end)
    6434             :                         break;
    6435             : 
    6436       27687 :                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    6437       27687 :                 extent_end = btrfs_file_extent_end(path);
    6438       27687 :                 if (extent_end <= start)
    6439       25003 :                         goto next;
    6440        2684 :                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
    6441           0 :                         search_start = extent_end;
    6442           0 :                         goto next;
    6443             :                 }
    6444        2684 :                 ret = 0;
    6445        2684 :                 goto out;
    6446       48066 : next:
    6447       48066 :                 path->slots[0]++;
    6448             :         }
    6449             :         ret = 1;
    6450       30232 : out:
    6451       30232 :         btrfs_free_path(path);
    6452       30232 :         return ret;
    6453             : }
    6454             : 
    6455     1067266 : static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
    6456             :                            struct btrfs_key *key)
    6457             : {
    6458     1067266 :         int ret = 0;
    6459             : 
    6460     1067266 :         if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
    6461             :                 return 0;
    6462             : 
    6463      442895 :         if (sctx->cur_inode_last_extent == (u64)-1) {
    6464      402941 :                 ret = get_last_extent(sctx, key->offset - 1);
    6465      402941 :                 if (ret)
    6466             :                         return ret;
    6467             :         }
    6468             : 
    6469      442895 :         if (path->slots[0] == 0 &&
    6470       72689 :             sctx->cur_inode_last_extent < key->offset) {
    6471             :                 /*
    6472             :                  * We might have skipped entire leafs that contained only
    6473             :                  * file extent items for our current inode. These leafs have
    6474             :                  * a generation number smaller (older) than the one in the
    6475             :                  * current leaf and the leaf our last extent came from, and
    6476             :                  * are located between these 2 leafs.
    6477             :                  */
    6478         156 :                 ret = get_last_extent(sctx, key->offset - 1);
    6479         156 :                 if (ret)
    6480             :                         return ret;
    6481             :         }
    6482             : 
    6483      442895 :         if (sctx->cur_inode_last_extent < key->offset) {
    6484       30232 :                 ret = range_is_hole_in_parent(sctx,
    6485             :                                               sctx->cur_inode_last_extent,
    6486             :                                               key->offset);
    6487       30232 :                 if (ret < 0)
    6488             :                         return ret;
    6489       30232 :                 else if (ret == 0)
    6490        2684 :                         ret = send_hole(sctx, key->offset);
    6491             :                 else
    6492             :                         ret = 0;
    6493             :         }
    6494      442895 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6495      442895 :         return ret;
    6496             : }
    6497             : 
    6498      636984 : static int process_extent(struct send_ctx *sctx,
    6499             :                           struct btrfs_path *path,
    6500             :                           struct btrfs_key *key)
    6501             : {
    6502      636984 :         struct clone_root *found_clone = NULL;
    6503      636984 :         int ret = 0;
    6504             : 
    6505      636984 :         if (S_ISLNK(sctx->cur_inode_mode))
    6506             :                 return 0;
    6507             : 
    6508      636634 :         if (sctx->parent_root && !sctx->cur_inode_new) {
    6509       12234 :                 ret = is_extent_unchanged(sctx, path, key);
    6510       12234 :                 if (ret < 0)
    6511           0 :                         goto out;
    6512       12234 :                 if (ret) {
    6513         598 :                         ret = 0;
    6514         598 :                         goto out_hole;
    6515             :                 }
    6516             :         } else {
    6517      624400 :                 struct btrfs_file_extent_item *ei;
    6518      624400 :                 u8 type;
    6519             : 
    6520      624400 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6521             :                                     struct btrfs_file_extent_item);
    6522      624399 :                 type = btrfs_file_extent_type(path->nodes[0], ei);
    6523      624400 :                 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
    6524             :                     type == BTRFS_FILE_EXTENT_REG) {
    6525             :                         /*
    6526             :                          * The send spec does not have a prealloc command yet,
    6527             :                          * so just leave a hole for prealloc'ed extents until
    6528             :                          * we have enough commands queued up to justify rev'ing
    6529             :                          * the send spec.
    6530             :                          */
    6531      624389 :                         if (type == BTRFS_FILE_EXTENT_PREALLOC) {
    6532        1889 :                                 ret = 0;
    6533        1889 :                                 goto out;
    6534             :                         }
    6535             : 
    6536             :                         /* Have a hole, just skip it. */
    6537      622500 :                         if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
    6538           0 :                                 ret = 0;
    6539           0 :                                 goto out;
    6540             :                         }
    6541             :                 }
    6542             :         }
    6543             : 
    6544      634146 :         ret = find_extent_clone(sctx, path, key->objectid, key->offset,
    6545             :                         sctx->cur_inode_size, &found_clone);
    6546      634143 :         if (ret != -ENOENT && ret < 0)
    6547           0 :                 goto out;
    6548             : 
    6549      634143 :         ret = send_write_or_clone(sctx, path, key, found_clone);
    6550      634066 :         if (ret)
    6551           0 :                 goto out;
    6552      634066 : out_hole:
    6553      634664 :         ret = maybe_send_hole(sctx, path, key);
    6554             : out:
    6555             :         return ret;
    6556             : }
    6557             : 
    6558           7 : static int process_all_extents(struct send_ctx *sctx)
    6559             : {
    6560           7 :         int ret = 0;
    6561           7 :         int iter_ret = 0;
    6562           7 :         struct btrfs_root *root;
    6563           7 :         struct btrfs_path *path;
    6564           7 :         struct btrfs_key key;
    6565           7 :         struct btrfs_key found_key;
    6566             : 
    6567           7 :         root = sctx->send_root;
    6568           7 :         path = alloc_path_for_send();
    6569           7 :         if (!path)
    6570             :                 return -ENOMEM;
    6571             : 
    6572           7 :         key.objectid = sctx->cmp_key->objectid;
    6573           7 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6574           7 :         key.offset = 0;
    6575           7 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    6576           3 :                 if (found_key.objectid != key.objectid ||
    6577           0 :                     found_key.type != key.type) {
    6578             :                         ret = 0;
    6579             :                         break;
    6580             :                 }
    6581             : 
    6582           0 :                 ret = process_extent(sctx, path, &found_key);
    6583           0 :                 if (ret < 0)
    6584             :                         break;
    6585             :         }
    6586             :         /* Catch error found during iteration */
    6587           7 :         if (iter_ret < 0)
    6588           0 :                 ret = iter_ret;
    6589             : 
    6590           7 :         btrfs_free_path(path);
    6591           7 :         return ret;
    6592             : }
    6593             : 
    6594     4283941 : static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
    6595             :                                            int *pending_move,
    6596             :                                            int *refs_processed)
    6597             : {
    6598     4283941 :         int ret = 0;
    6599             : 
    6600     4283941 :         if (sctx->cur_ino == 0)
    6601         213 :                 goto out;
    6602     4283728 :         if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
    6603     3273594 :             sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
    6604      608859 :                 goto out;
    6605     3674869 :         if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
    6606     3069077 :                 goto out;
    6607             : 
    6608      605792 :         ret = process_recorded_refs(sctx, pending_move);
    6609      605888 :         if (ret < 0)
    6610           0 :                 goto out;
    6611             : 
    6612      605888 :         *refs_processed = 1;
    6613     4284037 : out:
    6614     4284037 :         return ret;
    6615             : }
    6616             : 
    6617     4283958 : static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
    6618             : {
    6619     4283958 :         int ret = 0;
    6620     4283958 :         struct btrfs_inode_info info;
    6621     4283958 :         u64 left_mode;
    6622     4283958 :         u64 left_uid;
    6623     4283958 :         u64 left_gid;
    6624     4283958 :         u64 left_fileattr;
    6625     4283958 :         u64 right_mode;
    6626     4283958 :         u64 right_uid;
    6627     4283958 :         u64 right_gid;
    6628     4283958 :         u64 right_fileattr;
    6629     4283958 :         int need_chmod = 0;
    6630     4283958 :         int need_chown = 0;
    6631     4283958 :         bool need_fileattr = false;
    6632     4283958 :         int need_truncate = 1;
    6633     4283958 :         int pending_move = 0;
    6634     4283958 :         int refs_processed = 0;
    6635             : 
    6636     4283958 :         if (sctx->ignore_cur_inode)
    6637             :                 return 0;
    6638             : 
    6639     4283979 :         ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
    6640             :                                               &refs_processed);
    6641     4283877 :         if (ret < 0)
    6642           0 :                 goto out;
    6643             : 
    6644             :         /*
    6645             :          * We have processed the refs and thus need to advance send_progress.
    6646             :          * Now, calls to get_cur_xxx will take the updated refs of the current
    6647             :          * inode into account.
    6648             :          *
    6649             :          * On the other hand, if our current inode is a directory and couldn't
    6650             :          * be moved/renamed because its parent was renamed/moved too and it has
    6651             :          * a higher inode number, we can only move/rename our current inode
    6652             :          * after we moved/renamed its parent. Therefore in this case operate on
    6653             :          * the old path (pre move/rename) of our current inode, and the
    6654             :          * move/rename will be performed later.
    6655             :          */
    6656     4283877 :         if (refs_processed && !pending_move)
    6657      605789 :                 sctx->send_progress = sctx->cur_ino + 1;
    6658             : 
    6659     4283877 :         if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
    6660         932 :                 goto out;
    6661     4282945 :         if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
    6662     3273119 :                 goto out;
    6663     1009826 :         ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
    6664     1009984 :         if (ret < 0)
    6665           0 :                 goto out;
    6666     1009984 :         left_mode = info.mode;
    6667     1009984 :         left_uid = info.uid;
    6668     1009984 :         left_gid = info.gid;
    6669     1009984 :         left_fileattr = info.fileattr;
    6670             : 
    6671     1009984 :         if (!sctx->parent_root || sctx->cur_inode_new) {
    6672      604285 :                 need_chown = 1;
    6673      604285 :                 if (!S_ISLNK(sctx->cur_inode_mode))
    6674      603935 :                         need_chmod = 1;
    6675      604285 :                 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
    6676      603346 :                         need_truncate = 0;
    6677             :         } else {
    6678      405699 :                 u64 old_size;
    6679             : 
    6680      405699 :                 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
    6681      405699 :                 if (ret < 0)
    6682           0 :                         goto out;
    6683      405699 :                 old_size = info.size;
    6684      405699 :                 right_mode = info.mode;
    6685      405699 :                 right_uid = info.uid;
    6686      405699 :                 right_gid = info.gid;
    6687      405699 :                 right_fileattr = info.fileattr;
    6688             : 
    6689      405699 :                 if (left_uid != right_uid || left_gid != right_gid)
    6690         347 :                         need_chown = 1;
    6691      405699 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
    6692           1 :                         need_chmod = 1;
    6693      405699 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_fileattr != right_fileattr)
    6694         194 :                         need_fileattr = true;
    6695      405699 :                 if ((old_size == sctx->cur_inode_size) ||
    6696        2217 :                     (sctx->cur_inode_size > old_size &&
    6697        2217 :                      sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
    6698      403895 :                         need_truncate = 0;
    6699             :         }
    6700             : 
    6701     1009984 :         if (S_ISREG(sctx->cur_inode_mode)) {
    6702     1005522 :                 if (need_send_hole(sctx)) {
    6703      403044 :                         if (sctx->cur_inode_last_extent == (u64)-1 ||
    6704             :                             sctx->cur_inode_last_extent <
    6705      402942 :                             sctx->cur_inode_size) {
    6706         816 :                                 ret = get_last_extent(sctx, (u64)-1);
    6707         816 :                                 if (ret)
    6708           0 :                                         goto out;
    6709             :                         }
    6710      403044 :                         if (sctx->cur_inode_last_extent <
    6711      403044 :                             sctx->cur_inode_size) {
    6712         744 :                                 ret = send_hole(sctx, sctx->cur_inode_size);
    6713         744 :                                 if (ret)
    6714           0 :                                         goto out;
    6715             :                         }
    6716             :                 }
    6717     1005522 :                 if (need_truncate) {
    6718         438 :                         ret = send_truncate(sctx, sctx->cur_ino,
    6719             :                                             sctx->cur_inode_gen,
    6720             :                                             sctx->cur_inode_size);
    6721         438 :                         if (ret < 0)
    6722           0 :                                 goto out;
    6723             :                 }
    6724             :         }
    6725             : 
    6726     1009984 :         if (need_chown) {
    6727      604420 :                 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6728             :                                 left_uid, left_gid);
    6729      604457 :                 if (ret < 0)
    6730           0 :                         goto out;
    6731             :         }
    6732     1010021 :         if (need_chmod) {
    6733      603857 :                 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6734             :                                 left_mode);
    6735      603762 :                 if (ret < 0)
    6736           0 :                         goto out;
    6737             :         }
    6738     1009926 :         if (need_fileattr) {
    6739         194 :                 ret = send_fileattr(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6740             :                                     left_fileattr);
    6741         194 :                 if (ret < 0)
    6742           0 :                         goto out;
    6743             :         }
    6744             : 
    6745     1009926 :         if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
    6746           0 :             && sctx->cur_inode_needs_verity) {
    6747           0 :                 ret = process_verity(sctx);
    6748           0 :                 if (ret < 0)
    6749           0 :                         goto out;
    6750             :         }
    6751             : 
    6752     1009926 :         ret = send_capabilities(sctx);
    6753     1010081 :         if (ret < 0)
    6754           0 :                 goto out;
    6755             : 
    6756             :         /*
    6757             :          * If other directory inodes depended on our current directory
    6758             :          * inode's move/rename, now do their move/rename operations.
    6759             :          */
    6760     1010081 :         if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
    6761     1009756 :                 ret = apply_children_dir_moves(sctx);
    6762     1009633 :                 if (ret)
    6763           0 :                         goto out;
    6764             :                 /*
    6765             :                  * Need to send that every time, no matter if it actually
    6766             :                  * changed between the two trees as we have done changes to
    6767             :                  * the inode before. If our inode is a directory and it's
    6768             :                  * waiting to be moved/renamed, we will send its utimes when
    6769             :                  * it's moved/renamed, therefore we don't need to do it here.
    6770             :                  */
    6771     1009633 :                 sctx->send_progress = sctx->cur_ino + 1;
    6772             : 
    6773             :                 /*
    6774             :                  * If the current inode is a non-empty directory, delay issuing
    6775             :                  * the utimes command for it, as it's very likely we have inodes
    6776             :                  * with an higher number inside it. We want to issue the utimes
    6777             :                  * command only after adding all dentries to it.
    6778             :                  */
    6779     1009633 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
    6780        2080 :                         ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6781             :                 else
    6782     1007553 :                         ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6783             : 
    6784     1010030 :                 if (ret < 0)
    6785           0 :                         goto out;
    6786             :         }
    6787             : 
    6788     1010127 : out:
    6789     4284178 :         if (!ret)
    6790     4284616 :                 ret = trim_dir_utimes_cache(sctx);
    6791             : 
    6792             :         return ret;
    6793             : }
    6794             : 
    6795     1010304 : static void close_current_inode(struct send_ctx *sctx)
    6796             : {
    6797     1010304 :         u64 i_size;
    6798             : 
    6799     1010304 :         if (sctx->cur_inode == NULL)
    6800             :                 return;
    6801             : 
    6802      602260 :         i_size = i_size_read(sctx->cur_inode);
    6803             : 
    6804             :         /*
    6805             :          * If we are doing an incremental send, we may have extents between the
    6806             :          * last processed extent and the i_size that have not been processed
    6807             :          * because they haven't changed but we may have read some of their pages
    6808             :          * through readahead, see the comments at send_extent_data().
    6809             :          */
    6810      602260 :         if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
    6811          15 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    6812             :                                            sctx->page_cache_clear_start,
    6813          15 :                                            round_up(i_size, PAGE_SIZE) - 1);
    6814             : 
    6815      602260 :         iput(sctx->cur_inode);
    6816      602352 :         sctx->cur_inode = NULL;
    6817             : }
    6818             : 
    6819     1010099 : static int changed_inode(struct send_ctx *sctx,
    6820             :                          enum btrfs_compare_tree_result result)
    6821             : {
    6822     1010099 :         int ret = 0;
    6823     1010099 :         struct btrfs_key *key = sctx->cmp_key;
    6824     1010099 :         struct btrfs_inode_item *left_ii = NULL;
    6825     1010099 :         struct btrfs_inode_item *right_ii = NULL;
    6826     1010099 :         u64 left_gen = 0;
    6827     1010099 :         u64 right_gen = 0;
    6828             : 
    6829     1010099 :         close_current_inode(sctx);
    6830             : 
    6831     1010109 :         sctx->cur_ino = key->objectid;
    6832     1010109 :         sctx->cur_inode_new_gen = false;
    6833     1010109 :         sctx->cur_inode_last_extent = (u64)-1;
    6834     1010109 :         sctx->cur_inode_next_write_offset = 0;
    6835     1010109 :         sctx->ignore_cur_inode = false;
    6836             : 
    6837             :         /*
    6838             :          * Set send_progress to current inode. This will tell all get_cur_xxx
    6839             :          * functions that the current inode's refs are not updated yet. Later,
    6840             :          * when process_recorded_refs is finished, it is set to cur_ino + 1.
    6841             :          */
    6842     1010109 :         sctx->send_progress = sctx->cur_ino;
    6843             : 
    6844     1010109 :         if (result == BTRFS_COMPARE_TREE_NEW ||
    6845     1010109 :             result == BTRFS_COMPARE_TREE_CHANGED) {
    6846     1009981 :                 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
    6847             :                                 sctx->left_path->slots[0],
    6848             :                                 struct btrfs_inode_item);
    6849     1009990 :                 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
    6850             :                                 left_ii);
    6851             :         } else {
    6852         128 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6853             :                                 sctx->right_path->slots[0],
    6854             :                                 struct btrfs_inode_item);
    6855         128 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6856             :                                 right_ii);
    6857             :         }
    6858     1010147 :         if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6859      405701 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6860             :                                 sctx->right_path->slots[0],
    6861             :                                 struct btrfs_inode_item);
    6862             : 
    6863      405701 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6864             :                                 right_ii);
    6865             : 
    6866             :                 /*
    6867             :                  * The cur_ino = root dir case is special here. We can't treat
    6868             :                  * the inode as deleted+reused because it would generate a
    6869             :                  * stream that tries to delete/mkdir the root dir.
    6870             :                  */
    6871      405701 :                 if (left_gen != right_gen &&
    6872           9 :                     sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6873           7 :                         sctx->cur_inode_new_gen = true;
    6874             :         }
    6875             : 
    6876             :         /*
    6877             :          * Normally we do not find inodes with a link count of zero (orphans)
    6878             :          * because the most common case is to create a snapshot and use it
    6879             :          * for a send operation. However other less common use cases involve
    6880             :          * using a subvolume and send it after turning it to RO mode just
    6881             :          * after deleting all hard links of a file while holding an open
    6882             :          * file descriptor against it or turning a RO snapshot into RW mode,
    6883             :          * keep an open file descriptor against a file, delete it and then
    6884             :          * turn the snapshot back to RO mode before using it for a send
    6885             :          * operation. The former is what the receiver operation does.
    6886             :          * Therefore, if we want to send these snapshots soon after they're
    6887             :          * received, we need to handle orphan inodes as well. Moreover, orphans
    6888             :          * can appear not only in the send snapshot but also in the parent
    6889             :          * snapshot. Here are several cases:
    6890             :          *
    6891             :          * Case 1: BTRFS_COMPARE_TREE_NEW
    6892             :          *       |  send snapshot  | action
    6893             :          * --------------------------------
    6894             :          * nlink |        0        | ignore
    6895             :          *
    6896             :          * Case 2: BTRFS_COMPARE_TREE_DELETED
    6897             :          *       | parent snapshot | action
    6898             :          * ----------------------------------
    6899             :          * nlink |        0        | as usual
    6900             :          * Note: No unlinks will be sent because there're no paths for it.
    6901             :          *
    6902             :          * Case 3: BTRFS_COMPARE_TREE_CHANGED
    6903             :          *           |       | parent snapshot | send snapshot | action
    6904             :          * -----------------------------------------------------------------------
    6905             :          * subcase 1 | nlink |        0        |       0       | ignore
    6906             :          * subcase 2 | nlink |       >0        |       0       | new_gen(deletion)
    6907             :          * subcase 3 | nlink |        0        |      >0       | new_gen(creation)
    6908             :          *
    6909             :          */
    6910     1010147 :         if (result == BTRFS_COMPARE_TREE_NEW) {
    6911      604318 :                 if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
    6912           7 :                         sctx->ignore_cur_inode = true;
    6913           7 :                         goto out;
    6914             :                 }
    6915      604299 :                 sctx->cur_inode_gen = left_gen;
    6916      604299 :                 sctx->cur_inode_new = true;
    6917      604299 :                 sctx->cur_inode_deleted = false;
    6918     1208595 :                 sctx->cur_inode_size = btrfs_inode_size(
    6919      604299 :                                 sctx->left_path->nodes[0], left_ii);
    6920     1208578 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6921      604296 :                                 sctx->left_path->nodes[0], left_ii);
    6922     1208572 :                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6923      604282 :                                 sctx->left_path->nodes[0], left_ii);
    6924      604290 :                 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6925      604202 :                         ret = send_create_inode_if_needed(sctx);
    6926      405829 :         } else if (result == BTRFS_COMPARE_TREE_DELETED) {
    6927         128 :                 sctx->cur_inode_gen = right_gen;
    6928         128 :                 sctx->cur_inode_new = false;
    6929         128 :                 sctx->cur_inode_deleted = true;
    6930         256 :                 sctx->cur_inode_size = btrfs_inode_size(
    6931         128 :                                 sctx->right_path->nodes[0], right_ii);
    6932         128 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6933         128 :                                 sctx->right_path->nodes[0], right_ii);
    6934      405701 :         } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6935      405701 :                 u32 new_nlinks, old_nlinks;
    6936             : 
    6937      405701 :                 new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
    6938      405701 :                 old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
    6939      405701 :                 if (new_nlinks == 0 && old_nlinks == 0) {
    6940           2 :                         sctx->ignore_cur_inode = true;
    6941           2 :                         goto out;
    6942      405699 :                 } else if (new_nlinks == 0 || old_nlinks == 0) {
    6943           3 :                         sctx->cur_inode_new_gen = 1;
    6944             :                 }
    6945             :                 /*
    6946             :                  * We need to do some special handling in case the inode was
    6947             :                  * reported as changed with a changed generation number. This
    6948             :                  * means that the original inode was deleted and new inode
    6949             :                  * reused the same inum. So we have to treat the old inode as
    6950             :                  * deleted and the new one as new.
    6951             :                  */
    6952      405699 :                 if (sctx->cur_inode_new_gen) {
    6953             :                         /*
    6954             :                          * First, process the inode as if it was deleted.
    6955             :                          */
    6956          10 :                         if (old_nlinks > 0) {
    6957          10 :                                 sctx->cur_inode_gen = right_gen;
    6958          10 :                                 sctx->cur_inode_new = false;
    6959          10 :                                 sctx->cur_inode_deleted = true;
    6960          20 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6961          10 :                                                 sctx->right_path->nodes[0], right_ii);
    6962          20 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6963          10 :                                                 sctx->right_path->nodes[0], right_ii);
    6964          10 :                                 ret = process_all_refs(sctx,
    6965             :                                                 BTRFS_COMPARE_TREE_DELETED);
    6966          10 :                                 if (ret < 0)
    6967           0 :                                         goto out;
    6968             :                         }
    6969             : 
    6970             :                         /*
    6971             :                          * Now process the inode as if it was new.
    6972             :                          */
    6973          10 :                         if (new_nlinks > 0) {
    6974           7 :                                 sctx->cur_inode_gen = left_gen;
    6975           7 :                                 sctx->cur_inode_new = true;
    6976           7 :                                 sctx->cur_inode_deleted = false;
    6977          14 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6978           7 :                                                 sctx->left_path->nodes[0],
    6979             :                                                 left_ii);
    6980          14 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6981           7 :                                                 sctx->left_path->nodes[0],
    6982             :                                                 left_ii);
    6983          14 :                                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6984           7 :                                                 sctx->left_path->nodes[0],
    6985             :                                                 left_ii);
    6986           7 :                                 ret = send_create_inode_if_needed(sctx);
    6987           7 :                                 if (ret < 0)
    6988           0 :                                         goto out;
    6989             : 
    6990           7 :                                 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
    6991           7 :                                 if (ret < 0)
    6992           0 :                                         goto out;
    6993             :                                 /*
    6994             :                                  * Advance send_progress now as we did not get
    6995             :                                  * into process_recorded_refs_if_needed in the
    6996             :                                  * new_gen case.
    6997             :                                  */
    6998           7 :                                 sctx->send_progress = sctx->cur_ino + 1;
    6999             : 
    7000             :                                 /*
    7001             :                                  * Now process all extents and xattrs of the
    7002             :                                  * inode as if they were all new.
    7003             :                                  */
    7004           7 :                                 ret = process_all_extents(sctx);
    7005           7 :                                 if (ret < 0)
    7006           0 :                                         goto out;
    7007           7 :                                 ret = process_all_new_xattrs(sctx);
    7008           7 :                                 if (ret < 0)
    7009           0 :                                         goto out;
    7010             :                         }
    7011             :                 } else {
    7012      405689 :                         sctx->cur_inode_gen = left_gen;
    7013      405689 :                         sctx->cur_inode_new = false;
    7014      405689 :                         sctx->cur_inode_new_gen = false;
    7015      405689 :                         sctx->cur_inode_deleted = false;
    7016      811378 :                         sctx->cur_inode_size = btrfs_inode_size(
    7017      405689 :                                         sctx->left_path->nodes[0], left_ii);
    7018      405689 :                         sctx->cur_inode_mode = btrfs_inode_mode(
    7019      405689 :                                         sctx->left_path->nodes[0], left_ii);
    7020             :                 }
    7021             :         }
    7022             : 
    7023           0 : out:
    7024     1010261 :         return ret;
    7025             : }
    7026             : 
    7027             : /*
    7028             :  * We have to process new refs before deleted refs, but compare_trees gives us
    7029             :  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
    7030             :  * first and later process them in process_recorded_refs.
    7031             :  * For the cur_inode_new_gen case, we skip recording completely because
    7032             :  * changed_inode did already initiate processing of refs. The reason for this is
    7033             :  * that in this case, compare_tree actually compares the refs of 2 different
    7034             :  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
    7035             :  * refs of the right tree as deleted and all refs of the left tree as new.
    7036             :  */
    7037      608853 : static int changed_ref(struct send_ctx *sctx,
    7038             :                        enum btrfs_compare_tree_result result)
    7039             : {
    7040      608853 :         int ret = 0;
    7041             : 
    7042      608853 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7043           0 :                 inconsistent_snapshot_error(sctx, result, "reference");
    7044           0 :                 return -EIO;
    7045             :         }
    7046             : 
    7047      608853 :         if (!sctx->cur_inode_new_gen &&
    7048             :             sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
    7049      608752 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7050      606510 :                         ret = record_new_ref(sctx);
    7051        2242 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7052        2189 :                         ret = record_deleted_ref(sctx);
    7053          53 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7054          53 :                         ret = record_changed_ref(sctx);
    7055             :         }
    7056             : 
    7057             :         return ret;
    7058             : }
    7059             : 
    7060             : /*
    7061             :  * Process new/deleted/changed xattrs. We skip processing in the
    7062             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7063             :  * of xattrs. The reason is the same as in changed_ref
    7064             :  */
    7065      800357 : static int changed_xattr(struct send_ctx *sctx,
    7066             :                          enum btrfs_compare_tree_result result)
    7067             : {
    7068      800357 :         int ret = 0;
    7069             : 
    7070      800357 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7071           0 :                 inconsistent_snapshot_error(sctx, result, "xattr");
    7072           0 :                 return -EIO;
    7073             :         }
    7074             : 
    7075      800357 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7076      800349 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7077      800289 :                         ret = process_new_xattr(sctx);
    7078          60 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7079          30 :                         ret = process_deleted_xattr(sctx);
    7080          30 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7081          30 :                         ret = process_changed_xattr(sctx);
    7082             :         }
    7083             : 
    7084             :         return ret;
    7085             : }
    7086             : 
    7087             : /*
    7088             :  * Process new/deleted/changed extents. We skip processing in the
    7089             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7090             :  * of extents. The reason is the same as in changed_ref
    7091             :  */
    7092      642536 : static int changed_extent(struct send_ctx *sctx,
    7093             :                           enum btrfs_compare_tree_result result)
    7094             : {
    7095      642536 :         int ret = 0;
    7096             : 
    7097             :         /*
    7098             :          * We have found an extent item that changed without the inode item
    7099             :          * having changed. This can happen either after relocation (where the
    7100             :          * disk_bytenr of an extent item is replaced at
    7101             :          * relocation.c:replace_file_extents()) or after deduplication into a
    7102             :          * file in both the parent and send snapshots (where an extent item can
    7103             :          * get modified or replaced with a new one). Note that deduplication
    7104             :          * updates the inode item, but it only changes the iversion (sequence
    7105             :          * field in the inode item) of the inode, so if a file is deduplicated
    7106             :          * the same amount of times in both the parent and send snapshots, its
    7107             :          * iversion becomes the same in both snapshots, whence the inode item is
    7108             :          * the same on both snapshots.
    7109             :          */
    7110      642536 :         if (sctx->cur_ino != sctx->cmp_key->objectid)
    7111             :                 return 0;
    7112             : 
    7113      642527 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7114      642114 :                 if (result != BTRFS_COMPARE_TREE_DELETED)
    7115      636984 :                         ret = process_extent(sctx, sctx->left_path,
    7116             :                                         sctx->cmp_key);
    7117             :         }
    7118             : 
    7119             :         return ret;
    7120             : }
    7121             : 
    7122             : static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
    7123             : {
    7124           0 :         int ret = 0;
    7125             : 
    7126           0 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7127           0 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7128           0 :                         sctx->cur_inode_needs_verity = true;
    7129             :         }
    7130             :         return ret;
    7131             : }
    7132             : 
    7133      409463 : static int dir_changed(struct send_ctx *sctx, u64 dir)
    7134             : {
    7135      409463 :         u64 orig_gen, new_gen;
    7136      409463 :         int ret;
    7137             : 
    7138      409463 :         ret = get_inode_gen(sctx->send_root, dir, &new_gen);
    7139      409463 :         if (ret)
    7140             :                 return ret;
    7141             : 
    7142      409463 :         ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
    7143      409463 :         if (ret)
    7144             :                 return ret;
    7145             : 
    7146      409463 :         return (orig_gen != new_gen) ? 1 : 0;
    7147             : }
    7148             : 
    7149      409463 : static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
    7150             :                         struct btrfs_key *key)
    7151             : {
    7152      409463 :         struct btrfs_inode_extref *extref;
    7153      409463 :         struct extent_buffer *leaf;
    7154      409463 :         u64 dirid = 0, last_dirid = 0;
    7155      409463 :         unsigned long ptr;
    7156      409463 :         u32 item_size;
    7157      409463 :         u32 cur_offset = 0;
    7158      409463 :         int ref_name_len;
    7159      409463 :         int ret = 0;
    7160             : 
    7161             :         /* Easy case, just check this one dirid */
    7162      409463 :         if (key->type == BTRFS_INODE_REF_KEY) {
    7163      409463 :                 dirid = key->offset;
    7164             : 
    7165      409463 :                 ret = dir_changed(sctx, dirid);
    7166      409463 :                 goto out;
    7167             :         }
    7168             : 
    7169           0 :         leaf = path->nodes[0];
    7170           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    7171           0 :         ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    7172           0 :         while (cur_offset < item_size) {
    7173           0 :                 extref = (struct btrfs_inode_extref *)(ptr +
    7174             :                                                        cur_offset);
    7175           0 :                 dirid = btrfs_inode_extref_parent(leaf, extref);
    7176           0 :                 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
    7177           0 :                 cur_offset += ref_name_len + sizeof(*extref);
    7178           0 :                 if (dirid == last_dirid)
    7179           0 :                         continue;
    7180           0 :                 ret = dir_changed(sctx, dirid);
    7181           0 :                 if (ret)
    7182             :                         break;
    7183             :                 last_dirid = dirid;
    7184             :         }
    7185           0 : out:
    7186      409463 :         return ret;
    7187             : }
    7188             : 
    7189             : /*
    7190             :  * Updates compare related fields in sctx and simply forwards to the actual
    7191             :  * changed_xxx functions.
    7192             :  */
    7193     5553123 : static int changed_cb(struct btrfs_path *left_path,
    7194             :                       struct btrfs_path *right_path,
    7195             :                       struct btrfs_key *key,
    7196             :                       enum btrfs_compare_tree_result result,
    7197             :                       struct send_ctx *sctx)
    7198             : {
    7199     5553123 :         int ret = 0;
    7200             : 
    7201             :         /*
    7202             :          * We can not hold the commit root semaphore here. This is because in
    7203             :          * the case of sending and receiving to the same filesystem, using a
    7204             :          * pipe, could result in a deadlock:
    7205             :          *
    7206             :          * 1) The task running send blocks on the pipe because it's full;
    7207             :          *
    7208             :          * 2) The task running receive, which is the only consumer of the pipe,
    7209             :          *    is waiting for a transaction commit (for example due to a space
    7210             :          *    reservation when doing a write or triggering a transaction commit
    7211             :          *    when creating a subvolume);
    7212             :          *
    7213             :          * 3) The transaction is waiting to write lock the commit root semaphore,
    7214             :          *    but can not acquire it since it's being held at 1).
    7215             :          *
    7216             :          * Down this call chain we write to the pipe through kernel_write().
    7217             :          * The same type of problem can also happen when sending to a file that
    7218             :          * is stored in the same filesystem - when reserving space for a write
    7219             :          * into the file, we can trigger a transaction commit.
    7220             :          *
    7221             :          * Our caller has supplied us with clones of leaves from the send and
    7222             :          * parent roots, so we're safe here from a concurrent relocation and
    7223             :          * further reallocation of metadata extents while we are here. Below we
    7224             :          * also assert that the leaves are clones.
    7225             :          */
    7226     5553123 :         lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
    7227             : 
    7228             :         /*
    7229             :          * We always have a send root, so left_path is never NULL. We will not
    7230             :          * have a leaf when we have reached the end of the send root but have
    7231             :          * not yet reached the end of the parent root.
    7232             :          */
    7233     5553123 :         if (left_path->nodes[0])
    7234     5550951 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7235             :                                 &left_path->nodes[0]->bflags));
    7236             :         /*
    7237             :          * When doing a full send we don't have a parent root, so right_path is
    7238             :          * NULL. When doing an incremental send, we may have reached the end of
    7239             :          * the parent root already, so we don't have a leaf at right_path.
    7240             :          */
    7241     5553123 :         if (right_path && right_path->nodes[0])
    7242     2915976 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7243             :                                 &right_path->nodes[0]->bflags));
    7244             : 
    7245     5553123 :         if (result == BTRFS_COMPARE_TREE_SAME) {
    7246     1270879 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7247             :                     key->type == BTRFS_INODE_EXTREF_KEY) {
    7248      409463 :                         ret = compare_refs(sctx, left_path, key);
    7249      409463 :                         if (!ret)
    7250             :                                 return 0;
    7251           2 :                         if (ret < 0)
    7252             :                                 return ret;
    7253      861416 :                 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
    7254      432595 :                         return maybe_send_hole(sctx, left_path, key);
    7255             :                 } else {
    7256             :                         return 0;
    7257             :                 }
    7258             :                 result = BTRFS_COMPARE_TREE_CHANGED;
    7259             :                 ret = 0;
    7260             :         }
    7261             : 
    7262     4282246 :         sctx->left_path = left_path;
    7263     4282246 :         sctx->right_path = right_path;
    7264     4282246 :         sctx->cmp_key = key;
    7265             : 
    7266     4282246 :         ret = finish_inode_if_needed(sctx, 0);
    7267     4284372 :         if (ret < 0)
    7268           0 :                 goto out;
    7269             : 
    7270             :         /* Ignore non-FS objects */
    7271     4284372 :         if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
    7272             :             key->objectid == BTRFS_FREE_SPACE_OBJECTID)
    7273           0 :                 goto out;
    7274             : 
    7275     4284372 :         if (key->type == BTRFS_INODE_ITEM_KEY) {
    7276     1010148 :                 ret = changed_inode(sctx, result);
    7277     3274224 :         } else if (!sctx->ignore_cur_inode) {
    7278     3274217 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7279             :                     key->type == BTRFS_INODE_EXTREF_KEY)
    7280      608854 :                         ret = changed_ref(sctx, result);
    7281     2665363 :                 else if (key->type == BTRFS_XATTR_ITEM_KEY)
    7282      800357 :                         ret = changed_xattr(sctx, result);
    7283     1865006 :                 else if (key->type == BTRFS_EXTENT_DATA_KEY)
    7284      642536 :                         ret = changed_extent(sctx, result);
    7285     1222470 :                 else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
    7286           0 :                          key->offset == 0)
    7287           0 :                         ret = changed_verity(sctx, result);
    7288             :         }
    7289             : 
    7290     1222477 : out:
    7291             :         return ret;
    7292             : }
    7293             : 
    7294        2708 : static int search_key_again(const struct send_ctx *sctx,
    7295             :                             struct btrfs_root *root,
    7296             :                             struct btrfs_path *path,
    7297             :                             const struct btrfs_key *key)
    7298             : {
    7299        2708 :         int ret;
    7300             : 
    7301        2708 :         if (!path->need_commit_sem)
    7302        2708 :                 lockdep_assert_held_read(&root->fs_info->commit_root_sem);
    7303             : 
    7304             :         /*
    7305             :          * Roots used for send operations are readonly and no one can add,
    7306             :          * update or remove keys from them, so we should be able to find our
    7307             :          * key again. The only exception is deduplication, which can operate on
    7308             :          * readonly roots and add, update or remove keys to/from them - but at
    7309             :          * the moment we don't allow it to run in parallel with send.
    7310             :          */
    7311        2708 :         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
    7312        2708 :         ASSERT(ret <= 0);
    7313        2708 :         if (ret > 0) {
    7314           0 :                 btrfs_print_tree(path->nodes[path->lowest_level], false);
    7315           0 :                 btrfs_err(root->fs_info,
    7316             : "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
    7317             :                           key->objectid, key->type, key->offset,
    7318             :                           (root == sctx->parent_root ? "parent" : "send"),
    7319             :                           root->root_key.objectid, path->lowest_level,
    7320             :                           path->slots[path->lowest_level]);
    7321           0 :                 return -EUCLEAN;
    7322             :         }
    7323             : 
    7324             :         return ret;
    7325             : }
    7326             : 
    7327          87 : static int full_send_tree(struct send_ctx *sctx)
    7328             : {
    7329          87 :         int ret;
    7330          87 :         struct btrfs_root *send_root = sctx->send_root;
    7331          87 :         struct btrfs_key key;
    7332          87 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    7333          87 :         struct btrfs_path *path;
    7334             : 
    7335          87 :         path = alloc_path_for_send();
    7336          87 :         if (!path)
    7337             :                 return -ENOMEM;
    7338          87 :         path->reada = READA_FORWARD_ALWAYS;
    7339             : 
    7340          87 :         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
    7341          87 :         key.type = BTRFS_INODE_ITEM_KEY;
    7342          87 :         key.offset = 0;
    7343             : 
    7344          87 :         down_read(&fs_info->commit_root_sem);
    7345          87 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7346          87 :         up_read(&fs_info->commit_root_sem);
    7347             : 
    7348          87 :         ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
    7349          87 :         if (ret < 0)
    7350           0 :                 goto out;
    7351          87 :         if (ret)
    7352           0 :                 goto out_finish;
    7353             : 
    7354     1035140 :         while (1) {
    7355     1035140 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    7356             : 
    7357     1035140 :                 ret = changed_cb(path, NULL, &key,
    7358             :                                  BTRFS_COMPARE_TREE_NEW, sctx);
    7359     1035140 :                 if (ret < 0)
    7360           0 :                         goto out;
    7361             : 
    7362     1035140 :                 down_read(&fs_info->commit_root_sem);
    7363     1035140 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7364         418 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7365         418 :                         up_read(&fs_info->commit_root_sem);
    7366             :                         /*
    7367             :                          * A transaction used for relocating a block group was
    7368             :                          * committed or is about to finish its commit. Release
    7369             :                          * our path (leaf) and restart the search, so that we
    7370             :                          * avoid operating on any file extent items that are
    7371             :                          * stale, with a disk_bytenr that reflects a pre
    7372             :                          * relocation value. This way we avoid as much as
    7373             :                          * possible to fallback to regular writes when checking
    7374             :                          * if we can clone file ranges.
    7375             :                          */
    7376         418 :                         btrfs_release_path(path);
    7377         418 :                         ret = search_key_again(sctx, send_root, path, &key);
    7378         418 :                         if (ret < 0)
    7379           0 :                                 goto out;
    7380             :                 } else {
    7381     1034722 :                         up_read(&fs_info->commit_root_sem);
    7382             :                 }
    7383             : 
    7384     1035140 :                 ret = btrfs_next_item(send_root, path);
    7385     1035140 :                 if (ret < 0)
    7386           0 :                         goto out;
    7387     1035140 :                 if (ret) {
    7388             :                         ret  = 0;
    7389             :                         break;
    7390             :                 }
    7391             :         }
    7392             : 
    7393          87 : out_finish:
    7394          87 :         ret = finish_inode_if_needed(sctx, 1);
    7395             : 
    7396          87 : out:
    7397          87 :         btrfs_free_path(path);
    7398          87 :         return ret;
    7399             : }
    7400             : 
    7401      326242 : static int replace_node_with_clone(struct btrfs_path *path, int level)
    7402             : {
    7403      326242 :         struct extent_buffer *clone;
    7404             : 
    7405      326242 :         clone = btrfs_clone_extent_buffer(path->nodes[level]);
    7406      326242 :         if (!clone)
    7407             :                 return -ENOMEM;
    7408             : 
    7409      326242 :         free_extent_buffer(path->nodes[level]);
    7410      326242 :         path->nodes[level] = clone;
    7411             : 
    7412      326242 :         return 0;
    7413             : }
    7414             : 
    7415      323676 : static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
    7416             : {
    7417      323676 :         struct extent_buffer *eb;
    7418      323676 :         struct extent_buffer *parent = path->nodes[*level];
    7419      323676 :         int slot = path->slots[*level];
    7420      323676 :         const int nritems = btrfs_header_nritems(parent);
    7421      323676 :         u64 reada_max;
    7422      323676 :         u64 reada_done = 0;
    7423             : 
    7424      323676 :         lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
    7425             : 
    7426      323676 :         BUG_ON(*level == 0);
    7427      323676 :         eb = btrfs_read_node_slot(parent, slot);
    7428      323676 :         if (IS_ERR(eb))
    7429           0 :                 return PTR_ERR(eb);
    7430             : 
    7431             :         /*
    7432             :          * Trigger readahead for the next leaves we will process, so that it is
    7433             :          * very likely that when we need them they are already in memory and we
    7434             :          * will not block on disk IO. For nodes we only do readahead for one,
    7435             :          * since the time window between processing nodes is typically larger.
    7436             :          */
    7437      323676 :         reada_max = (*level == 1 ? SZ_128K : eb->fs_info->nodesize);
    7438             : 
    7439    22861489 :         for (slot++; slot < nritems && reada_done < reada_max; slot++) {
    7440    22537813 :                 if (btrfs_node_ptr_generation(parent, slot) > reada_min_gen) {
    7441     1713758 :                         btrfs_readahead_node_child(parent, slot);
    7442     1713758 :                         reada_done += eb->fs_info->nodesize;
    7443             :                 }
    7444             :         }
    7445             : 
    7446      323676 :         path->nodes[*level - 1] = eb;
    7447      323676 :         path->slots[*level - 1] = 0;
    7448      323676 :         (*level)--;
    7449             : 
    7450      323676 :         if (*level == 0)
    7451      322711 :                 return replace_node_with_clone(path, 0);
    7452             : 
    7453             :         return 0;
    7454             : }
    7455             : 
    7456     6204605 : static int tree_move_next_or_upnext(struct btrfs_path *path,
    7457             :                                     int *level, int root_level)
    7458             : {
    7459     6204605 :         int ret = 0;
    7460     6204605 :         int nritems;
    7461     6204605 :         nritems = btrfs_header_nritems(path->nodes[*level]);
    7462             : 
    7463     6204605 :         path->slots[*level]++;
    7464             : 
    7465     6528280 :         while (path->slots[*level] >= nritems) {
    7466      323928 :                 if (*level == root_level) {
    7467         252 :                         path->slots[*level] = nritems - 1;
    7468         252 :                         return -1;
    7469             :                 }
    7470             : 
    7471             :                 /* move upnext */
    7472      323676 :                 path->slots[*level] = 0;
    7473      323676 :                 free_extent_buffer(path->nodes[*level]);
    7474      323675 :                 path->nodes[*level] = NULL;
    7475      323675 :                 (*level)++;
    7476      323675 :                 path->slots[*level]++;
    7477             : 
    7478      323675 :                 nritems = btrfs_header_nritems(path->nodes[*level]);
    7479      323675 :                 ret = 1;
    7480             :         }
    7481             :         return ret;
    7482             : }
    7483             : 
    7484             : /*
    7485             :  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
    7486             :  * or down.
    7487             :  */
    7488     6528281 : static int tree_advance(struct btrfs_path *path,
    7489             :                         int *level, int root_level,
    7490             :                         int allow_down,
    7491             :                         struct btrfs_key *key,
    7492             :                         u64 reada_min_gen)
    7493             : {
    7494     6528281 :         int ret;
    7495             : 
    7496     6528281 :         if (*level == 0 || !allow_down) {
    7497     6204605 :                 ret = tree_move_next_or_upnext(path, level, root_level);
    7498             :         } else {
    7499      323676 :                 ret = tree_move_down(path, level, reada_min_gen);
    7500             :         }
    7501             : 
    7502             :         /*
    7503             :          * Even if we have reached the end of a tree, ret is -1, update the key
    7504             :          * anyway, so that in case we need to restart due to a block group
    7505             :          * relocation, we can assert that the last key of the root node still
    7506             :          * exists in the tree.
    7507             :          */
    7508     6528280 :         if (*level == 0)
    7509     6201314 :                 btrfs_item_key_to_cpu(path->nodes[*level], key,
    7510             :                                       path->slots[*level]);
    7511             :         else
    7512      326966 :                 btrfs_node_key_to_cpu(path->nodes[*level], key,
    7513             :                                       path->slots[*level]);
    7514             : 
    7515     6528284 :         return ret;
    7516             : }
    7517             : 
    7518     1680396 : static int tree_compare_item(struct btrfs_path *left_path,
    7519             :                              struct btrfs_path *right_path,
    7520             :                              char *tmp_buf)
    7521             : {
    7522     1680396 :         int cmp;
    7523     1680396 :         int len1, len2;
    7524     1680396 :         unsigned long off1, off2;
    7525             : 
    7526     1680396 :         len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]);
    7527     1680395 :         len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);
    7528     1680396 :         if (len1 != len2)
    7529             :                 return 1;
    7530             : 
    7531     1680340 :         off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
    7532     1680340 :         off2 = btrfs_item_ptr_offset(right_path->nodes[0],
    7533             :                                 right_path->slots[0]);
    7534             : 
    7535     1680339 :         read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
    7536             : 
    7537     1680341 :         cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
    7538     1680340 :         if (cmp)
    7539      409461 :                 return 1;
    7540             :         return 0;
    7541             : }
    7542             : 
    7543             : /*
    7544             :  * A transaction used for relocating a block group was committed or is about to
    7545             :  * finish its commit. Release our paths and restart the search, so that we are
    7546             :  * not using stale extent buffers:
    7547             :  *
    7548             :  * 1) For levels > 0, we are only holding references of extent buffers, without
    7549             :  *    any locks on them, which does not prevent them from having been relocated
    7550             :  *    and reallocated after the last time we released the commit root semaphore.
    7551             :  *    The exception are the root nodes, for which we always have a clone, see
    7552             :  *    the comment at btrfs_compare_trees();
    7553             :  *
    7554             :  * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
    7555             :  *    we are safe from the concurrent relocation and reallocation. However they
    7556             :  *    can have file extent items with a pre relocation disk_bytenr value, so we
    7557             :  *    restart the start from the current commit roots and clone the new leaves so
    7558             :  *    that we get the post relocation disk_bytenr values. Not doing so, could
    7559             :  *    make us clone the wrong data in case there are new extents using the old
    7560             :  *    disk_bytenr that happen to be shared.
    7561             :  */
    7562        1145 : static int restart_after_relocation(struct btrfs_path *left_path,
    7563             :                                     struct btrfs_path *right_path,
    7564             :                                     const struct btrfs_key *left_key,
    7565             :                                     const struct btrfs_key *right_key,
    7566             :                                     int left_level,
    7567             :                                     int right_level,
    7568             :                                     const struct send_ctx *sctx)
    7569             : {
    7570        1145 :         int root_level;
    7571        1145 :         int ret;
    7572             : 
    7573        1145 :         lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
    7574             : 
    7575        1145 :         btrfs_release_path(left_path);
    7576        1145 :         btrfs_release_path(right_path);
    7577             : 
    7578             :         /*
    7579             :          * Since keys can not be added or removed to/from our roots because they
    7580             :          * are readonly and we do not allow deduplication to run in parallel
    7581             :          * (which can add, remove or change keys), the layout of the trees should
    7582             :          * not change.
    7583             :          */
    7584        1145 :         left_path->lowest_level = left_level;
    7585        1145 :         ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
    7586        1145 :         if (ret < 0)
    7587             :                 return ret;
    7588             : 
    7589        1145 :         right_path->lowest_level = right_level;
    7590        1145 :         ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
    7591        1145 :         if (ret < 0)
    7592             :                 return ret;
    7593             : 
    7594             :         /*
    7595             :          * If the lowest level nodes are leaves, clone them so that they can be
    7596             :          * safely used by changed_cb() while not under the protection of the
    7597             :          * commit root semaphore, even if relocation and reallocation happens in
    7598             :          * parallel.
    7599             :          */
    7600        1145 :         if (left_level == 0) {
    7601        1145 :                 ret = replace_node_with_clone(left_path, 0);
    7602        1145 :                 if (ret < 0)
    7603             :                         return ret;
    7604             :         }
    7605             : 
    7606        1145 :         if (right_level == 0) {
    7607          96 :                 ret = replace_node_with_clone(right_path, 0);
    7608          96 :                 if (ret < 0)
    7609             :                         return ret;
    7610             :         }
    7611             : 
    7612             :         /*
    7613             :          * Now clone the root nodes (unless they happen to be the leaves we have
    7614             :          * already cloned). This is to protect against concurrent snapshotting of
    7615             :          * the send and parent roots (see the comment at btrfs_compare_trees()).
    7616             :          */
    7617        1145 :         root_level = btrfs_header_level(sctx->send_root->commit_root);
    7618        1145 :         if (root_level > 0) {
    7619        1145 :                 ret = replace_node_with_clone(left_path, root_level);
    7620        1145 :                 if (ret < 0)
    7621             :                         return ret;
    7622             :         }
    7623             : 
    7624        1145 :         root_level = btrfs_header_level(sctx->parent_root->commit_root);
    7625        1145 :         if (root_level > 0) {
    7626        1145 :                 ret = replace_node_with_clone(right_path, root_level);
    7627        1145 :                 if (ret < 0)
    7628             :                         return ret;
    7629             :         }
    7630             : 
    7631             :         return 0;
    7632             : }
    7633             : 
    7634             : /*
    7635             :  * This function compares two trees and calls the provided callback for
    7636             :  * every changed/new/deleted item it finds.
    7637             :  * If shared tree blocks are encountered, whole subtrees are skipped, making
    7638             :  * the compare pretty fast on snapshotted subvolumes.
    7639             :  *
    7640             :  * This currently works on commit roots only. As commit roots are read only,
    7641             :  * we don't do any locking. The commit roots are protected with transactions.
    7642             :  * Transactions are ended and rejoined when a commit is tried in between.
    7643             :  *
    7644             :  * This function checks for modifications done to the trees while comparing.
    7645             :  * If it detects a change, it aborts immediately.
    7646             :  */
    7647         126 : static int btrfs_compare_trees(struct btrfs_root *left_root,
    7648             :                         struct btrfs_root *right_root, struct send_ctx *sctx)
    7649             : {
    7650         126 :         struct btrfs_fs_info *fs_info = left_root->fs_info;
    7651         126 :         int ret;
    7652         126 :         int cmp;
    7653         126 :         struct btrfs_path *left_path = NULL;
    7654         126 :         struct btrfs_path *right_path = NULL;
    7655         126 :         struct btrfs_key left_key;
    7656         126 :         struct btrfs_key right_key;
    7657         126 :         char *tmp_buf = NULL;
    7658         126 :         int left_root_level;
    7659         126 :         int right_root_level;
    7660         126 :         int left_level;
    7661         126 :         int right_level;
    7662         126 :         int left_end_reached = 0;
    7663         126 :         int right_end_reached = 0;
    7664         126 :         int advance_left = 0;
    7665         126 :         int advance_right = 0;
    7666         126 :         u64 left_blockptr;
    7667         126 :         u64 right_blockptr;
    7668         126 :         u64 left_gen;
    7669         126 :         u64 right_gen;
    7670         126 :         u64 reada_min_gen;
    7671             : 
    7672         126 :         left_path = btrfs_alloc_path();
    7673         126 :         if (!left_path) {
    7674           0 :                 ret = -ENOMEM;
    7675           0 :                 goto out;
    7676             :         }
    7677         126 :         right_path = btrfs_alloc_path();
    7678         126 :         if (!right_path) {
    7679           0 :                 ret = -ENOMEM;
    7680           0 :                 goto out;
    7681             :         }
    7682             : 
    7683         126 :         tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
    7684         126 :         if (!tmp_buf) {
    7685           0 :                 ret = -ENOMEM;
    7686           0 :                 goto out;
    7687             :         }
    7688             : 
    7689         126 :         left_path->search_commit_root = 1;
    7690         126 :         left_path->skip_locking = 1;
    7691         126 :         right_path->search_commit_root = 1;
    7692         126 :         right_path->skip_locking = 1;
    7693             : 
    7694             :         /*
    7695             :          * Strategy: Go to the first items of both trees. Then do
    7696             :          *
    7697             :          * If both trees are at level 0
    7698             :          *   Compare keys of current items
    7699             :          *     If left < right treat left item as new, advance left tree
    7700             :          *       and repeat
    7701             :          *     If left > right treat right item as deleted, advance right tree
    7702             :          *       and repeat
    7703             :          *     If left == right do deep compare of items, treat as changed if
    7704             :          *       needed, advance both trees and repeat
    7705             :          * If both trees are at the same level but not at level 0
    7706             :          *   Compare keys of current nodes/leafs
    7707             :          *     If left < right advance left tree and repeat
    7708             :          *     If left > right advance right tree and repeat
    7709             :          *     If left == right compare blockptrs of the next nodes/leafs
    7710             :          *       If they match advance both trees but stay at the same level
    7711             :          *         and repeat
    7712             :          *       If they don't match advance both trees while allowing to go
    7713             :          *         deeper and repeat
    7714             :          * If tree levels are different
    7715             :          *   Advance the tree that needs it and repeat
    7716             :          *
    7717             :          * Advancing a tree means:
    7718             :          *   If we are at level 0, try to go to the next slot. If that's not
    7719             :          *   possible, go one level up and repeat. Stop when we found a level
    7720             :          *   where we could go to the next slot. We may at this point be on a
    7721             :          *   node or a leaf.
    7722             :          *
    7723             :          *   If we are not at level 0 and not on shared tree blocks, go one
    7724             :          *   level deeper.
    7725             :          *
    7726             :          *   If we are not at level 0 and on shared tree blocks, go one slot to
    7727             :          *   the right if possible or go up and right.
    7728             :          */
    7729             : 
    7730         126 :         down_read(&fs_info->commit_root_sem);
    7731         126 :         left_level = btrfs_header_level(left_root->commit_root);
    7732         126 :         left_root_level = left_level;
    7733             :         /*
    7734             :          * We clone the root node of the send and parent roots to prevent races
    7735             :          * with snapshot creation of these roots. Snapshot creation COWs the
    7736             :          * root node of a tree, so after the transaction is committed the old
    7737             :          * extent can be reallocated while this send operation is still ongoing.
    7738             :          * So we clone them, under the commit root semaphore, to be race free.
    7739             :          */
    7740         252 :         left_path->nodes[left_level] =
    7741         126 :                         btrfs_clone_extent_buffer(left_root->commit_root);
    7742         126 :         if (!left_path->nodes[left_level]) {
    7743           0 :                 ret = -ENOMEM;
    7744           0 :                 goto out_unlock;
    7745             :         }
    7746             : 
    7747         126 :         right_level = btrfs_header_level(right_root->commit_root);
    7748         126 :         right_root_level = right_level;
    7749         252 :         right_path->nodes[right_level] =
    7750         126 :                         btrfs_clone_extent_buffer(right_root->commit_root);
    7751         126 :         if (!right_path->nodes[right_level]) {
    7752           0 :                 ret = -ENOMEM;
    7753           0 :                 goto out_unlock;
    7754             :         }
    7755             :         /*
    7756             :          * Our right root is the parent root, while the left root is the "send"
    7757             :          * root. We know that all new nodes/leaves in the left root must have
    7758             :          * a generation greater than the right root's generation, so we trigger
    7759             :          * readahead for those nodes and leaves of the left root, as we know we
    7760             :          * will need to read them at some point.
    7761             :          */
    7762         126 :         reada_min_gen = btrfs_header_generation(right_root->commit_root);
    7763             : 
    7764         126 :         if (left_level == 0)
    7765          99 :                 btrfs_item_key_to_cpu(left_path->nodes[left_level],
    7766             :                                 &left_key, left_path->slots[left_level]);
    7767             :         else
    7768          27 :                 btrfs_node_key_to_cpu(left_path->nodes[left_level],
    7769             :                                 &left_key, left_path->slots[left_level]);
    7770         126 :         if (right_level == 0)
    7771         101 :                 btrfs_item_key_to_cpu(right_path->nodes[right_level],
    7772             :                                 &right_key, right_path->slots[right_level]);
    7773             :         else
    7774          25 :                 btrfs_node_key_to_cpu(right_path->nodes[right_level],
    7775             :                                 &right_key, right_path->slots[right_level]);
    7776             : 
    7777         126 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7778             : 
    7779     4845408 :         while (1) {
    7780     4845408 :                 if (need_resched() ||
    7781             :                     rwsem_is_contended(&fs_info->commit_root_sem)) {
    7782        5708 :                         up_read(&fs_info->commit_root_sem);
    7783        5708 :                         cond_resched();
    7784        5708 :                         down_read(&fs_info->commit_root_sem);
    7785             :                 }
    7786             : 
    7787     4845409 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7788        1145 :                         ret = restart_after_relocation(left_path, right_path,
    7789             :                                                        &left_key, &right_key,
    7790             :                                                        left_level, right_level,
    7791             :                                                        sctx);
    7792        1145 :                         if (ret < 0)
    7793           0 :                                 goto out_unlock;
    7794        1145 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7795             :                 }
    7796             : 
    7797     4845409 :                 if (advance_left && !left_end_reached) {
    7798     4821265 :                         ret = tree_advance(left_path, &left_level,
    7799             :                                         left_root_level,
    7800             :                                         advance_left != ADVANCE_ONLY_NEXT,
    7801             :                                         &left_key, reada_min_gen);
    7802     4821265 :                         if (ret == -1)
    7803             :                                 left_end_reached = ADVANCE;
    7804     4821139 :                         else if (ret < 0)
    7805           0 :                                 goto out_unlock;
    7806             :                         advance_left = 0;
    7807             :                 }
    7808     4845409 :                 if (advance_right && !right_end_reached) {
    7809     1707020 :                         ret = tree_advance(right_path, &right_level,
    7810             :                                         right_root_level,
    7811             :                                         advance_right != ADVANCE_ONLY_NEXT,
    7812             :                                         &right_key, reada_min_gen);
    7813     1707020 :                         if (ret == -1)
    7814             :                                 right_end_reached = ADVANCE;
    7815     1706894 :                         else if (ret < 0)
    7816           0 :                                 goto out_unlock;
    7817             :                         advance_right = 0;
    7818             :                 }
    7819             : 
    7820     4845409 :                 if (left_end_reached && right_end_reached) {
    7821         126 :                         ret = 0;
    7822         126 :                         goto out_unlock;
    7823     4845283 :                 } else if (left_end_reached) {
    7824        2200 :                         if (right_level == 0) {
    7825        2190 :                                 up_read(&fs_info->commit_root_sem);
    7826        2190 :                                 ret = changed_cb(left_path, right_path,
    7827             :                                                 &right_key,
    7828             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7829             :                                                 sctx);
    7830        2190 :                                 if (ret < 0)
    7831           0 :                                         goto out;
    7832        2190 :                                 down_read(&fs_info->commit_root_sem);
    7833             :                         }
    7834        2200 :                         advance_right = ADVANCE;
    7835        2200 :                         continue;
    7836     4843083 :                 } else if (right_end_reached) {
    7837     1759896 :                         if (left_level == 0) {
    7838     1607131 :                                 up_read(&fs_info->commit_root_sem);
    7839     1607131 :                                 ret = changed_cb(left_path, right_path,
    7840             :                                                 &left_key,
    7841             :                                                 BTRFS_COMPARE_TREE_NEW,
    7842             :                                                 sctx);
    7843     1607131 :                                 if (ret < 0)
    7844           0 :                                         goto out;
    7845     1607131 :                                 down_read(&fs_info->commit_root_sem);
    7846             :                         }
    7847     1759896 :                         advance_left = ADVANCE;
    7848     1759896 :                         continue;
    7849             :                 }
    7850             : 
    7851     3083187 :                 if (left_level == 0 && right_level == 0) {
    7852     2911601 :                         up_read(&fs_info->commit_root_sem);
    7853     2911599 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7854     2911599 :                         if (cmp < 0) {
    7855     1220218 :                                 ret = changed_cb(left_path, right_path,
    7856             :                                                 &left_key,
    7857             :                                                 BTRFS_COMPARE_TREE_NEW,
    7858             :                                                 sctx);
    7859     1220218 :                                 advance_left = ADVANCE;
    7860     1691381 :                         } else if (cmp > 0) {
    7861       10985 :                                 ret = changed_cb(left_path, right_path,
    7862             :                                                 &right_key,
    7863             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7864             :                                                 sctx);
    7865       10985 :                                 advance_right = ADVANCE;
    7866             :                         } else {
    7867     1680396 :                                 enum btrfs_compare_tree_result result;
    7868             : 
    7869     3360792 :                                 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
    7870     1680396 :                                 ret = tree_compare_item(left_path, right_path,
    7871             :                                                         tmp_buf);
    7872     1680396 :                                 if (ret)
    7873             :                                         result = BTRFS_COMPARE_TREE_CHANGED;
    7874             :                                 else
    7875     1270879 :                                         result = BTRFS_COMPARE_TREE_SAME;
    7876     1680396 :                                 ret = changed_cb(left_path, right_path,
    7877             :                                                  &left_key, result, sctx);
    7878     1680396 :                                 advance_left = ADVANCE;
    7879     1680396 :                                 advance_right = ADVANCE;
    7880             :                         }
    7881             : 
    7882     2911599 :                         if (ret < 0)
    7883           0 :                                 goto out;
    7884     2911599 :                         down_read(&fs_info->commit_root_sem);
    7885      171586 :                 } else if (left_level == right_level) {
    7886        2746 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7887        2746 :                         if (cmp < 0) {
    7888             :                                 advance_left = ADVANCE;
    7889        2616 :                         } else if (cmp > 0) {
    7890             :                                 advance_right = ADVANCE;
    7891             :                         } else {
    7892        2605 :                                 left_blockptr = btrfs_node_blockptr(
    7893        2605 :                                                 left_path->nodes[left_level],
    7894             :                                                 left_path->slots[left_level]);
    7895        2605 :                                 right_blockptr = btrfs_node_blockptr(
    7896        2605 :                                                 right_path->nodes[right_level],
    7897             :                                                 right_path->slots[right_level]);
    7898        2605 :                                 left_gen = btrfs_node_ptr_generation(
    7899        2605 :                                                 left_path->nodes[left_level],
    7900             :                                                 left_path->slots[left_level]);
    7901        2605 :                                 right_gen = btrfs_node_ptr_generation(
    7902        2605 :                                                 right_path->nodes[right_level],
    7903             :                                                 right_path->slots[right_level]);
    7904        2605 :                                 if (left_blockptr == right_blockptr &&
    7905        2605 :                                     left_gen == right_gen) {
    7906             :                                         /*
    7907             :                                          * As we're on a shared block, don't
    7908             :                                          * allow to go deeper.
    7909             :                                          */
    7910             :                                         advance_left = ADVANCE_ONLY_NEXT;
    7911             :                                         advance_right = ADVANCE_ONLY_NEXT;
    7912             :                                 } else {
    7913         960 :                                         advance_left = ADVANCE;
    7914         960 :                                         advance_right = ADVANCE;
    7915             :                                 }
    7916             :                         }
    7917      168840 :                 } else if (left_level < right_level) {
    7918             :                         advance_right = ADVANCE;
    7919             :                 } else {
    7920      158018 :                         advance_left = ADVANCE;
    7921             :                 }
    7922             :         }
    7923             : 
    7924         126 : out_unlock:
    7925         126 :         up_read(&fs_info->commit_root_sem);
    7926         126 : out:
    7927         126 :         btrfs_free_path(left_path);
    7928         126 :         btrfs_free_path(right_path);
    7929         126 :         kvfree(tmp_buf);
    7930         126 :         return ret;
    7931             : }
    7932             : 
    7933         212 : static int send_subvol(struct send_ctx *sctx)
    7934             : {
    7935         212 :         int ret;
    7936             : 
    7937         212 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
    7938         212 :                 ret = send_header(sctx);
    7939         212 :                 if (ret < 0)
    7940           0 :                         goto out;
    7941             :         }
    7942             : 
    7943         212 :         ret = send_subvol_begin(sctx);
    7944         213 :         if (ret < 0)
    7945           0 :                 goto out;
    7946             : 
    7947         213 :         if (sctx->parent_root) {
    7948         126 :                 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
    7949         126 :                 if (ret < 0)
    7950           0 :                         goto out;
    7951         126 :                 ret = finish_inode_if_needed(sctx, 1);
    7952         126 :                 if (ret < 0)
    7953           0 :                         goto out;
    7954             :         } else {
    7955          87 :                 ret = full_send_tree(sctx);
    7956          87 :                 if (ret < 0)
    7957           0 :                         goto out;
    7958             :         }
    7959             : 
    7960          87 : out:
    7961         213 :         free_recorded_refs(sctx);
    7962         213 :         return ret;
    7963             : }
    7964             : 
    7965             : /*
    7966             :  * If orphan cleanup did remove any orphans from a root, it means the tree
    7967             :  * was modified and therefore the commit root is not the same as the current
    7968             :  * root anymore. This is a problem, because send uses the commit root and
    7969             :  * therefore can see inode items that don't exist in the current root anymore,
    7970             :  * and for example make calls to btrfs_iget, which will do tree lookups based
    7971             :  * on the current root and not on the commit root. Those lookups will fail,
    7972             :  * returning a -ESTALE error, and making send fail with that error. So make
    7973             :  * sure a send does not see any orphans we have just removed, and that it will
    7974             :  * see the same inodes regardless of whether a transaction commit happened
    7975             :  * before it started (meaning that the commit root will be the same as the
    7976             :  * current root) or not.
    7977             :  */
    7978         212 : static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
    7979             : {
    7980         212 :         int i;
    7981         212 :         struct btrfs_trans_handle *trans = NULL;
    7982             : 
    7983         222 : again:
    7984         222 :         if (sctx->parent_root &&
    7985         133 :             sctx->parent_root->node != sctx->parent_root->commit_root)
    7986          12 :                 goto commit_trans;
    7987             : 
    7988         535 :         for (i = 0; i < sctx->clone_roots_cnt; i++)
    7989         333 :                 if (sctx->clone_roots[i].root->node !=
    7990         333 :                     sctx->clone_roots[i].root->commit_root)
    7991           8 :                         goto commit_trans;
    7992             : 
    7993         202 :         if (trans)
    7994           0 :                 return btrfs_end_transaction(trans);
    7995             : 
    7996             :         return 0;
    7997             : 
    7998          20 : commit_trans:
    7999             :         /* Use any root, all fs roots will get their commit roots updated. */
    8000          20 :         if (!trans) {
    8001          10 :                 trans = btrfs_join_transaction(sctx->send_root);
    8002          10 :                 if (IS_ERR(trans))
    8003           0 :                         return PTR_ERR(trans);
    8004          10 :                 goto again;
    8005             :         }
    8006             : 
    8007          10 :         return btrfs_commit_transaction(trans);
    8008             : }
    8009             : 
    8010             : /*
    8011             :  * Make sure any existing dellaloc is flushed for any root used by a send
    8012             :  * operation so that we do not miss any data and we do not race with writeback
    8013             :  * finishing and changing a tree while send is using the tree. This could
    8014             :  * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
    8015             :  * a send operation then uses the subvolume.
    8016             :  * After flushing delalloc ensure_commit_roots_uptodate() must be called.
    8017             :  */
    8018         213 : static int flush_delalloc_roots(struct send_ctx *sctx)
    8019             : {
    8020         213 :         struct btrfs_root *root = sctx->parent_root;
    8021         213 :         int ret;
    8022         213 :         int i;
    8023             : 
    8024         213 :         if (root) {
    8025         126 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8026         126 :                 if (ret)
    8027             :                         return ret;
    8028         126 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8029             :         }
    8030             : 
    8031         554 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8032         341 :                 root = sctx->clone_roots[i].root;
    8033         341 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8034         341 :                 if (ret)
    8035           0 :                         return ret;
    8036         341 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8037             :         }
    8038             : 
    8039             :         return 0;
    8040             : }
    8041             : 
    8042         467 : static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
    8043             : {
    8044         467 :         spin_lock(&root->root_item_lock);
    8045         467 :         root->send_in_progress--;
    8046             :         /*
    8047             :          * Not much left to do, we don't know why it's unbalanced and
    8048             :          * can't blindly reset it to 0.
    8049             :          */
    8050         467 :         if (root->send_in_progress < 0)
    8051           0 :                 btrfs_err(root->fs_info,
    8052             :                           "send_in_progress unbalanced %d root %llu",
    8053             :                           root->send_in_progress, root->root_key.objectid);
    8054         467 :         spin_unlock(&root->root_item_lock);
    8055         467 : }
    8056             : 
    8057           0 : static void dedupe_in_progress_warn(const struct btrfs_root *root)
    8058             : {
    8059           0 :         btrfs_warn_rl(root->fs_info,
    8060             : "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
    8061             :                       root->root_key.objectid, root->dedupe_in_progress);
    8062           0 : }
    8063             : 
    8064         213 : long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
    8065             : {
    8066         213 :         int ret = 0;
    8067         213 :         struct btrfs_root *send_root = BTRFS_I(inode)->root;
    8068         213 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    8069         213 :         struct btrfs_root *clone_root;
    8070         213 :         struct send_ctx *sctx = NULL;
    8071         213 :         u32 i;
    8072         213 :         u64 *clone_sources_tmp = NULL;
    8073         213 :         int clone_sources_to_rollback = 0;
    8074         213 :         size_t alloc_size;
    8075         213 :         int sort_clone_roots = 0;
    8076         213 :         struct btrfs_lru_cache_entry *entry;
    8077         213 :         struct btrfs_lru_cache_entry *tmp;
    8078             : 
    8079         213 :         if (!capable(CAP_SYS_ADMIN))
    8080             :                 return -EPERM;
    8081             : 
    8082             :         /*
    8083             :          * The subvolume must remain read-only during send, protect against
    8084             :          * making it RW. This also protects against deletion.
    8085             :          */
    8086         213 :         spin_lock(&send_root->root_item_lock);
    8087         213 :         if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
    8088           0 :                 dedupe_in_progress_warn(send_root);
    8089           0 :                 spin_unlock(&send_root->root_item_lock);
    8090           0 :                 return -EAGAIN;
    8091             :         }
    8092         213 :         send_root->send_in_progress++;
    8093         213 :         spin_unlock(&send_root->root_item_lock);
    8094             : 
    8095             :         /*
    8096             :          * Userspace tools do the checks and warn the user if it's
    8097             :          * not RO.
    8098             :          */
    8099         213 :         if (!btrfs_root_readonly(send_root)) {
    8100           0 :                 ret = -EPERM;
    8101           0 :                 goto out;
    8102             :         }
    8103             : 
    8104             :         /*
    8105             :          * Check that we don't overflow at later allocations, we request
    8106             :          * clone_sources_count + 1 items, and compare to unsigned long inside
    8107             :          * access_ok. Also set an upper limit for allocation size so this can't
    8108             :          * easily exhaust memory. Max number of clone sources is about 200K.
    8109             :          */
    8110         213 :         if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
    8111           0 :                 ret = -EINVAL;
    8112           0 :                 goto out;
    8113             :         }
    8114             : 
    8115         213 :         if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
    8116           0 :                 ret = -EINVAL;
    8117           0 :                 goto out;
    8118             :         }
    8119             : 
    8120         213 :         sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
    8121         212 :         if (!sctx) {
    8122           0 :                 ret = -ENOMEM;
    8123           0 :                 goto out;
    8124             :         }
    8125             : 
    8126         212 :         INIT_LIST_HEAD(&sctx->new_refs);
    8127         212 :         INIT_LIST_HEAD(&sctx->deleted_refs);
    8128             : 
    8129         212 :         btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
    8130         212 :         btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
    8131         212 :         btrfs_lru_cache_init(&sctx->dir_created_cache,
    8132             :                              SEND_MAX_DIR_CREATED_CACHE_SIZE);
    8133             :         /*
    8134             :          * This cache is periodically trimmed to a fixed size elsewhere, see
    8135             :          * cache_dir_utimes() and trim_dir_utimes_cache().
    8136             :          */
    8137         213 :         btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
    8138             : 
    8139         212 :         sctx->pending_dir_moves = RB_ROOT;
    8140         212 :         sctx->waiting_dir_moves = RB_ROOT;
    8141         212 :         sctx->orphan_dirs = RB_ROOT;
    8142         212 :         sctx->rbtree_new_refs = RB_ROOT;
    8143         212 :         sctx->rbtree_deleted_refs = RB_ROOT;
    8144             : 
    8145         212 :         sctx->flags = arg->flags;
    8146             : 
    8147         212 :         if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
    8148           0 :                 if (arg->version > BTRFS_SEND_STREAM_VERSION) {
    8149           0 :                         ret = -EPROTO;
    8150           0 :                         goto out;
    8151             :                 }
    8152             :                 /* Zero means "use the highest version" */
    8153           0 :                 sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
    8154             :         } else {
    8155         212 :                 sctx->proto = 1;
    8156             :         }
    8157         212 :         if ((arg->flags & BTRFS_SEND_FLAG_COMPRESSED) && sctx->proto < 2) {
    8158           0 :                 ret = -EINVAL;
    8159           0 :                 goto out;
    8160             :         }
    8161             : 
    8162         212 :         sctx->send_filp = fget(arg->send_fd);
    8163         212 :         if (!sctx->send_filp) {
    8164           0 :                 ret = -EBADF;
    8165           0 :                 goto out;
    8166             :         }
    8167             : 
    8168         212 :         sctx->send_root = send_root;
    8169             :         /*
    8170             :          * Unlikely but possible, if the subvolume is marked for deletion but
    8171             :          * is slow to remove the directory entry, send can still be started
    8172             :          */
    8173         212 :         if (btrfs_root_dead(sctx->send_root)) {
    8174           0 :                 ret = -EPERM;
    8175           0 :                 goto out;
    8176             :         }
    8177             : 
    8178         212 :         sctx->clone_roots_cnt = arg->clone_sources_count;
    8179             : 
    8180         212 :         if (sctx->proto >= 2) {
    8181           0 :                 u32 send_buf_num_pages;
    8182             : 
    8183           0 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
    8184           0 :                 sctx->send_buf = vmalloc(sctx->send_max_size);
    8185           0 :                 if (!sctx->send_buf) {
    8186           0 :                         ret = -ENOMEM;
    8187           0 :                         goto out;
    8188             :                 }
    8189           0 :                 send_buf_num_pages = sctx->send_max_size >> PAGE_SHIFT;
    8190           0 :                 sctx->send_buf_pages = kcalloc(send_buf_num_pages,
    8191             :                                                sizeof(*sctx->send_buf_pages),
    8192             :                                                GFP_KERNEL);
    8193           0 :                 if (!sctx->send_buf_pages) {
    8194           0 :                         ret = -ENOMEM;
    8195           0 :                         goto out;
    8196             :                 }
    8197           0 :                 for (i = 0; i < send_buf_num_pages; i++) {
    8198           0 :                         sctx->send_buf_pages[i] =
    8199           1 :                                 vmalloc_to_page(sctx->send_buf + (i << PAGE_SHIFT));
    8200             :                 }
    8201             :         } else {
    8202         212 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
    8203         212 :                 sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
    8204             :         }
    8205         212 :         if (!sctx->send_buf) {
    8206           0 :                 ret = -ENOMEM;
    8207           0 :                 goto out;
    8208             :         }
    8209             : 
    8210         424 :         sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
    8211         212 :                                      arg->clone_sources_count + 1,
    8212             :                                      GFP_KERNEL);
    8213         212 :         if (!sctx->clone_roots) {
    8214           0 :                 ret = -ENOMEM;
    8215           0 :                 goto out;
    8216             :         }
    8217             : 
    8218         212 :         alloc_size = array_size(sizeof(*arg->clone_sources),
    8219             :                                 arg->clone_sources_count);
    8220             : 
    8221         212 :         if (arg->clone_sources_count) {
    8222         125 :                 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
    8223         126 :                 if (!clone_sources_tmp) {
    8224           0 :                         ret = -ENOMEM;
    8225           0 :                         goto out;
    8226             :                 }
    8227             : 
    8228         126 :                 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
    8229             :                                 alloc_size);
    8230         126 :                 if (ret) {
    8231           0 :                         ret = -EFAULT;
    8232           0 :                         goto out;
    8233             :                 }
    8234             : 
    8235         254 :                 for (i = 0; i < arg->clone_sources_count; i++) {
    8236         128 :                         clone_root = btrfs_get_fs_root(fs_info,
    8237         128 :                                                 clone_sources_tmp[i], true);
    8238         128 :                         if (IS_ERR(clone_root)) {
    8239           0 :                                 ret = PTR_ERR(clone_root);
    8240           0 :                                 goto out;
    8241             :                         }
    8242         128 :                         spin_lock(&clone_root->root_item_lock);
    8243         128 :                         if (!btrfs_root_readonly(clone_root) ||
    8244             :                             btrfs_root_dead(clone_root)) {
    8245           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8246           0 :                                 btrfs_put_root(clone_root);
    8247           0 :                                 ret = -EPERM;
    8248           0 :                                 goto out;
    8249             :                         }
    8250         128 :                         if (clone_root->dedupe_in_progress) {
    8251           0 :                                 dedupe_in_progress_warn(clone_root);
    8252           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8253           0 :                                 btrfs_put_root(clone_root);
    8254           0 :                                 ret = -EAGAIN;
    8255           0 :                                 goto out;
    8256             :                         }
    8257         128 :                         clone_root->send_in_progress++;
    8258         128 :                         spin_unlock(&clone_root->root_item_lock);
    8259             : 
    8260         128 :                         sctx->clone_roots[i].root = clone_root;
    8261         128 :                         clone_sources_to_rollback = i + 1;
    8262             :                 }
    8263         126 :                 kvfree(clone_sources_tmp);
    8264         126 :                 clone_sources_tmp = NULL;
    8265             :         }
    8266             : 
    8267         212 :         if (arg->parent_root) {
    8268         125 :                 sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
    8269             :                                                       true);
    8270         126 :                 if (IS_ERR(sctx->parent_root)) {
    8271           0 :                         ret = PTR_ERR(sctx->parent_root);
    8272           0 :                         goto out;
    8273             :                 }
    8274             : 
    8275         126 :                 spin_lock(&sctx->parent_root->root_item_lock);
    8276         126 :                 sctx->parent_root->send_in_progress++;
    8277         126 :                 if (!btrfs_root_readonly(sctx->parent_root) ||
    8278             :                                 btrfs_root_dead(sctx->parent_root)) {
    8279           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8280           0 :                         ret = -EPERM;
    8281           0 :                         goto out;
    8282             :                 }
    8283         126 :                 if (sctx->parent_root->dedupe_in_progress) {
    8284           0 :                         dedupe_in_progress_warn(sctx->parent_root);
    8285           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8286           0 :                         ret = -EAGAIN;
    8287           0 :                         goto out;
    8288             :                 }
    8289         126 :                 spin_unlock(&sctx->parent_root->root_item_lock);
    8290             :         }
    8291             : 
    8292             :         /*
    8293             :          * Clones from send_root are allowed, but only if the clone source
    8294             :          * is behind the current send position. This is checked while searching
    8295             :          * for possible clone sources.
    8296             :          */
    8297         426 :         sctx->clone_roots[sctx->clone_roots_cnt++].root =
    8298         213 :                 btrfs_grab_root(sctx->send_root);
    8299             : 
    8300             :         /* We do a bsearch later */
    8301         213 :         sort(sctx->clone_roots, sctx->clone_roots_cnt,
    8302             :                         sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
    8303             :                         NULL);
    8304         213 :         sort_clone_roots = 1;
    8305             : 
    8306         213 :         ret = flush_delalloc_roots(sctx);
    8307         213 :         if (ret)
    8308           0 :                 goto out;
    8309             : 
    8310         213 :         ret = ensure_commit_roots_uptodate(sctx);
    8311         212 :         if (ret)
    8312           0 :                 goto out;
    8313             : 
    8314         212 :         ret = send_subvol(sctx);
    8315         213 :         if (ret < 0)
    8316           0 :                 goto out;
    8317             : 
    8318        1508 :         btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
    8319        1295 :                 ret = send_utimes(sctx, entry->key, entry->gen);
    8320        1295 :                 if (ret < 0)
    8321           0 :                         goto out;
    8322        1295 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
    8323             :         }
    8324             : 
    8325         213 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
    8326         213 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
    8327         213 :                 if (ret < 0)
    8328           0 :                         goto out;
    8329         213 :                 ret = send_cmd(sctx);
    8330         213 :                 if (ret < 0)
    8331           0 :                         goto out;
    8332             :         }
    8333             : 
    8334         213 : out:
    8335         426 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
    8336         213 :         while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
    8337           0 :                 struct rb_node *n;
    8338           0 :                 struct pending_dir_move *pm;
    8339             : 
    8340           0 :                 n = rb_first(&sctx->pending_dir_moves);
    8341           0 :                 pm = rb_entry(n, struct pending_dir_move, node);
    8342           0 :                 while (!list_empty(&pm->list)) {
    8343           0 :                         struct pending_dir_move *pm2;
    8344             : 
    8345           0 :                         pm2 = list_first_entry(&pm->list,
    8346             :                                                struct pending_dir_move, list);
    8347           0 :                         free_pending_move(sctx, pm2);
    8348             :                 }
    8349           0 :                 free_pending_move(sctx, pm);
    8350             :         }
    8351             : 
    8352         426 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
    8353         213 :         while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
    8354           0 :                 struct rb_node *n;
    8355           0 :                 struct waiting_dir_move *dm;
    8356             : 
    8357           0 :                 n = rb_first(&sctx->waiting_dir_moves);
    8358           0 :                 dm = rb_entry(n, struct waiting_dir_move, node);
    8359           0 :                 rb_erase(&dm->node, &sctx->waiting_dir_moves);
    8360           0 :                 kfree(dm);
    8361             :         }
    8362             : 
    8363         426 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
    8364         213 :         while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
    8365           0 :                 struct rb_node *n;
    8366           0 :                 struct orphan_dir_info *odi;
    8367             : 
    8368           0 :                 n = rb_first(&sctx->orphan_dirs);
    8369           0 :                 odi = rb_entry(n, struct orphan_dir_info, node);
    8370           0 :                 free_orphan_dir_info(sctx, odi);
    8371             :         }
    8372             : 
    8373         213 :         if (sort_clone_roots) {
    8374         554 :                 for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8375         341 :                         btrfs_root_dec_send_in_progress(
    8376         341 :                                         sctx->clone_roots[i].root);
    8377         341 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8378             :                 }
    8379             :         } else {
    8380           0 :                 for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
    8381           0 :                         btrfs_root_dec_send_in_progress(
    8382           0 :                                         sctx->clone_roots[i].root);
    8383           0 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8384             :                 }
    8385             : 
    8386           0 :                 btrfs_root_dec_send_in_progress(send_root);
    8387             :         }
    8388         339 :         if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
    8389         126 :                 btrfs_root_dec_send_in_progress(sctx->parent_root);
    8390         126 :                 btrfs_put_root(sctx->parent_root);
    8391             :         }
    8392             : 
    8393         213 :         kvfree(clone_sources_tmp);
    8394             : 
    8395         213 :         if (sctx) {
    8396         213 :                 if (sctx->send_filp)
    8397         213 :                         fput(sctx->send_filp);
    8398             : 
    8399         213 :                 kvfree(sctx->clone_roots);
    8400         213 :                 kfree(sctx->send_buf_pages);
    8401         213 :                 kvfree(sctx->send_buf);
    8402         213 :                 kvfree(sctx->verity_descriptor);
    8403             : 
    8404         213 :                 close_current_inode(sctx);
    8405             : 
    8406         213 :                 btrfs_lru_cache_clear(&sctx->name_cache);
    8407         213 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    8408         213 :                 btrfs_lru_cache_clear(&sctx->dir_created_cache);
    8409         213 :                 btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
    8410             : 
    8411         213 :                 kfree(sctx);
    8412             :         }
    8413             : 
    8414         213 :         return ret;
    8415             : }

Generated by: LCOV version 1.14