LCOV - code coverage report
Current view: top level - fs/btrfs - send.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 3341 4068 82.1 %
Date: 2023-07-31 20:08:22 Functions: 147 153 96.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2012 Alexander Block.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/bsearch.h>
       7             : #include <linux/fs.h>
       8             : #include <linux/file.h>
       9             : #include <linux/sort.h>
      10             : #include <linux/mount.h>
      11             : #include <linux/xattr.h>
      12             : #include <linux/posix_acl_xattr.h>
      13             : #include <linux/radix-tree.h>
      14             : #include <linux/vmalloc.h>
      15             : #include <linux/string.h>
      16             : #include <linux/compat.h>
      17             : #include <linux/crc32c.h>
      18             : #include <linux/fsverity.h>
      19             : 
      20             : #include "send.h"
      21             : #include "ctree.h"
      22             : #include "backref.h"
      23             : #include "locking.h"
      24             : #include "disk-io.h"
      25             : #include "btrfs_inode.h"
      26             : #include "transaction.h"
      27             : #include "compression.h"
      28             : #include "xattr.h"
      29             : #include "print-tree.h"
      30             : #include "accessors.h"
      31             : #include "dir-item.h"
      32             : #include "file-item.h"
      33             : #include "ioctl.h"
      34             : #include "verity.h"
      35             : #include "lru_cache.h"
      36             : 
      37             : /*
      38             :  * Maximum number of references an extent can have in order for us to attempt to
      39             :  * issue clone operations instead of write operations. This currently exists to
      40             :  * avoid hitting limitations of the backreference walking code (taking a lot of
      41             :  * time and using too much memory for extents with large number of references).
      42             :  */
      43             : #define SEND_MAX_EXTENT_REFS    1024
      44             : 
      45             : /*
      46             :  * A fs_path is a helper to dynamically build path names with unknown size.
      47             :  * It reallocates the internal buffer on demand.
      48             :  * It allows fast adding of path elements on the right side (normal path) and
      49             :  * fast adding to the left side (reversed path). A reversed path can also be
      50             :  * unreversed if needed.
      51             :  */
      52             : struct fs_path {
      53             :         union {
      54             :                 struct {
      55             :                         char *start;
      56             :                         char *end;
      57             : 
      58             :                         char *buf;
      59             :                         unsigned short buf_len:15;
      60             :                         unsigned short reversed:1;
      61             :                         char inline_buf[];
      62             :                 };
      63             :                 /*
      64             :                  * Average path length does not exceed 200 bytes, we'll have
      65             :                  * better packing in the slab and higher chance to satisfy
      66             :                  * a allocation later during send.
      67             :                  */
      68             :                 char pad[256];
      69             :         };
      70             : };
      71             : #define FS_PATH_INLINE_SIZE \
      72             :         (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
      73             : 
      74             : 
      75             : /* reused for each extent */
      76             : struct clone_root {
      77             :         struct btrfs_root *root;
      78             :         u64 ino;
      79             :         u64 offset;
      80             :         u64 num_bytes;
      81             :         bool found_ref;
      82             : };
      83             : 
      84             : #define SEND_MAX_NAME_CACHE_SIZE                        256
      85             : 
      86             : /*
      87             :  * Limit the root_ids array of struct backref_cache_entry to 17 elements.
      88             :  * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
      89             :  * can be satisfied from the kmalloc-192 slab, without wasting any space.
      90             :  * The most common case is to have a single root for cloning, which corresponds
      91             :  * to the send root. Having the user specify more than 16 clone roots is not
      92             :  * common, and in such rare cases we simply don't use caching if the number of
      93             :  * cloning roots that lead down to a leaf is more than 17.
      94             :  */
      95             : #define SEND_MAX_BACKREF_CACHE_ROOTS                    17
      96             : 
      97             : /*
      98             :  * Max number of entries in the cache.
      99             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
     100             :  * maple tree's internal nodes, is 24K.
     101             :  */
     102             : #define SEND_MAX_BACKREF_CACHE_SIZE 128
     103             : 
     104             : /*
     105             :  * A backref cache entry maps a leaf to a list of IDs of roots from which the
     106             :  * leaf is accessible and we can use for clone operations.
     107             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
     108             :  * x86_64).
     109             :  */
     110             : struct backref_cache_entry {
     111             :         struct btrfs_lru_cache_entry entry;
     112             :         u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
     113             :         /* Number of valid elements in the root_ids array. */
     114             :         int num_roots;
     115             : };
     116             : 
     117             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     118             : static_assert(offsetof(struct backref_cache_entry, entry) == 0);
     119             : 
     120             : /*
     121             :  * Max number of entries in the cache that stores directories that were already
     122             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     123             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     124             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     125             :  */
     126             : #define SEND_MAX_DIR_CREATED_CACHE_SIZE                 64
     127             : 
     128             : /*
     129             :  * Max number of entries in the cache that stores directories that were already
     130             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     131             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     132             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     133             :  */
     134             : #define SEND_MAX_DIR_UTIMES_CACHE_SIZE                  64
     135             : 
     136             : struct send_ctx {
     137             :         struct file *send_filp;
     138             :         loff_t send_off;
     139             :         char *send_buf;
     140             :         u32 send_size;
     141             :         u32 send_max_size;
     142             :         /*
     143             :          * Whether BTRFS_SEND_A_DATA attribute was already added to current
     144             :          * command (since protocol v2, data must be the last attribute).
     145             :          */
     146             :         bool put_data;
     147             :         struct page **send_buf_pages;
     148             :         u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
     149             :         /* Protocol version compatibility requested */
     150             :         u32 proto;
     151             : 
     152             :         struct btrfs_root *send_root;
     153             :         struct btrfs_root *parent_root;
     154             :         struct clone_root *clone_roots;
     155             :         int clone_roots_cnt;
     156             : 
     157             :         /* current state of the compare_tree call */
     158             :         struct btrfs_path *left_path;
     159             :         struct btrfs_path *right_path;
     160             :         struct btrfs_key *cmp_key;
     161             : 
     162             :         /*
     163             :          * Keep track of the generation of the last transaction that was used
     164             :          * for relocating a block group. This is periodically checked in order
     165             :          * to detect if a relocation happened since the last check, so that we
     166             :          * don't operate on stale extent buffers for nodes (level >= 1) or on
     167             :          * stale disk_bytenr values of file extent items.
     168             :          */
     169             :         u64 last_reloc_trans;
     170             : 
     171             :         /*
     172             :          * infos of the currently processed inode. In case of deleted inodes,
     173             :          * these are the values from the deleted inode.
     174             :          */
     175             :         u64 cur_ino;
     176             :         u64 cur_inode_gen;
     177             :         u64 cur_inode_size;
     178             :         u64 cur_inode_mode;
     179             :         u64 cur_inode_rdev;
     180             :         u64 cur_inode_last_extent;
     181             :         u64 cur_inode_next_write_offset;
     182             :         bool cur_inode_new;
     183             :         bool cur_inode_new_gen;
     184             :         bool cur_inode_deleted;
     185             :         bool ignore_cur_inode;
     186             :         bool cur_inode_needs_verity;
     187             :         void *verity_descriptor;
     188             : 
     189             :         u64 send_progress;
     190             : 
     191             :         struct list_head new_refs;
     192             :         struct list_head deleted_refs;
     193             : 
     194             :         struct btrfs_lru_cache name_cache;
     195             : 
     196             :         /*
     197             :          * The inode we are currently processing. It's not NULL only when we
     198             :          * need to issue write commands for data extents from this inode.
     199             :          */
     200             :         struct inode *cur_inode;
     201             :         struct file_ra_state ra;
     202             :         u64 page_cache_clear_start;
     203             :         bool clean_page_cache;
     204             : 
     205             :         /*
     206             :          * We process inodes by their increasing order, so if before an
     207             :          * incremental send we reverse the parent/child relationship of
     208             :          * directories such that a directory with a lower inode number was
     209             :          * the parent of a directory with a higher inode number, and the one
     210             :          * becoming the new parent got renamed too, we can't rename/move the
     211             :          * directory with lower inode number when we finish processing it - we
     212             :          * must process the directory with higher inode number first, then
     213             :          * rename/move it and then rename/move the directory with lower inode
     214             :          * number. Example follows.
     215             :          *
     216             :          * Tree state when the first send was performed:
     217             :          *
     218             :          * .
     219             :          * |-- a                   (ino 257)
     220             :          *     |-- b               (ino 258)
     221             :          *         |
     222             :          *         |
     223             :          *         |-- c           (ino 259)
     224             :          *         |   |-- d       (ino 260)
     225             :          *         |
     226             :          *         |-- c2          (ino 261)
     227             :          *
     228             :          * Tree state when the second (incremental) send is performed:
     229             :          *
     230             :          * .
     231             :          * |-- a                   (ino 257)
     232             :          *     |-- b               (ino 258)
     233             :          *         |-- c2          (ino 261)
     234             :          *             |-- d2      (ino 260)
     235             :          *                 |-- cc  (ino 259)
     236             :          *
     237             :          * The sequence of steps that lead to the second state was:
     238             :          *
     239             :          * mv /a/b/c/d /a/b/c2/d2
     240             :          * mv /a/b/c /a/b/c2/d2/cc
     241             :          *
     242             :          * "c" has lower inode number, but we can't move it (2nd mv operation)
     243             :          * before we move "d", which has higher inode number.
     244             :          *
     245             :          * So we just memorize which move/rename operations must be performed
     246             :          * later when their respective parent is processed and moved/renamed.
     247             :          */
     248             : 
     249             :         /* Indexed by parent directory inode number. */
     250             :         struct rb_root pending_dir_moves;
     251             : 
     252             :         /*
     253             :          * Reverse index, indexed by the inode number of a directory that
     254             :          * is waiting for the move/rename of its immediate parent before its
     255             :          * own move/rename can be performed.
     256             :          */
     257             :         struct rb_root waiting_dir_moves;
     258             : 
     259             :         /*
     260             :          * A directory that is going to be rm'ed might have a child directory
     261             :          * which is in the pending directory moves index above. In this case,
     262             :          * the directory can only be removed after the move/rename of its child
     263             :          * is performed. Example:
     264             :          *
     265             :          * Parent snapshot:
     266             :          *
     267             :          * .                        (ino 256)
     268             :          * |-- a/                   (ino 257)
     269             :          *     |-- b/               (ino 258)
     270             :          *         |-- c/           (ino 259)
     271             :          *         |   |-- x/       (ino 260)
     272             :          *         |
     273             :          *         |-- y/           (ino 261)
     274             :          *
     275             :          * Send snapshot:
     276             :          *
     277             :          * .                        (ino 256)
     278             :          * |-- a/                   (ino 257)
     279             :          *     |-- b/               (ino 258)
     280             :          *         |-- YY/          (ino 261)
     281             :          *              |-- x/      (ino 260)
     282             :          *
     283             :          * Sequence of steps that lead to the send snapshot:
     284             :          * rm -f /a/b/c/foo.txt
     285             :          * mv /a/b/y /a/b/YY
     286             :          * mv /a/b/c/x /a/b/YY
     287             :          * rmdir /a/b/c
     288             :          *
     289             :          * When the child is processed, its move/rename is delayed until its
     290             :          * parent is processed (as explained above), but all other operations
     291             :          * like update utimes, chown, chgrp, etc, are performed and the paths
     292             :          * that it uses for those operations must use the orphanized name of
     293             :          * its parent (the directory we're going to rm later), so we need to
     294             :          * memorize that name.
     295             :          *
     296             :          * Indexed by the inode number of the directory to be deleted.
     297             :          */
     298             :         struct rb_root orphan_dirs;
     299             : 
     300             :         struct rb_root rbtree_new_refs;
     301             :         struct rb_root rbtree_deleted_refs;
     302             : 
     303             :         struct btrfs_lru_cache backref_cache;
     304             :         u64 backref_cache_last_reloc_trans;
     305             : 
     306             :         struct btrfs_lru_cache dir_created_cache;
     307             :         struct btrfs_lru_cache dir_utimes_cache;
     308             : };
     309             : 
     310             : struct pending_dir_move {
     311             :         struct rb_node node;
     312             :         struct list_head list;
     313             :         u64 parent_ino;
     314             :         u64 ino;
     315             :         u64 gen;
     316             :         struct list_head update_refs;
     317             : };
     318             : 
     319             : struct waiting_dir_move {
     320             :         struct rb_node node;
     321             :         u64 ino;
     322             :         /*
     323             :          * There might be some directory that could not be removed because it
     324             :          * was waiting for this directory inode to be moved first. Therefore
     325             :          * after this directory is moved, we can try to rmdir the ino rmdir_ino.
     326             :          */
     327             :         u64 rmdir_ino;
     328             :         u64 rmdir_gen;
     329             :         bool orphanized;
     330             : };
     331             : 
     332             : struct orphan_dir_info {
     333             :         struct rb_node node;
     334             :         u64 ino;
     335             :         u64 gen;
     336             :         u64 last_dir_index_offset;
     337             :         u64 dir_high_seq_ino;
     338             : };
     339             : 
     340             : struct name_cache_entry {
     341             :         /*
     342             :          * The key in the entry is an inode number, and the generation matches
     343             :          * the inode's generation.
     344             :          */
     345             :         struct btrfs_lru_cache_entry entry;
     346             :         u64 parent_ino;
     347             :         u64 parent_gen;
     348             :         int ret;
     349             :         int need_later_update;
     350             :         int name_len;
     351             :         char name[];
     352             : };
     353             : 
     354             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     355             : static_assert(offsetof(struct name_cache_entry, entry) == 0);
     356             : 
     357             : #define ADVANCE                                                 1
     358             : #define ADVANCE_ONLY_NEXT                                       -1
     359             : 
     360             : enum btrfs_compare_tree_result {
     361             :         BTRFS_COMPARE_TREE_NEW,
     362             :         BTRFS_COMPARE_TREE_DELETED,
     363             :         BTRFS_COMPARE_TREE_CHANGED,
     364             :         BTRFS_COMPARE_TREE_SAME,
     365             : };
     366             : 
     367             : __cold
     368           0 : static void inconsistent_snapshot_error(struct send_ctx *sctx,
     369             :                                         enum btrfs_compare_tree_result result,
     370             :                                         const char *what)
     371             : {
     372           0 :         const char *result_string;
     373             : 
     374           0 :         switch (result) {
     375             :         case BTRFS_COMPARE_TREE_NEW:
     376             :                 result_string = "new";
     377             :                 break;
     378             :         case BTRFS_COMPARE_TREE_DELETED:
     379             :                 result_string = "deleted";
     380             :                 break;
     381             :         case BTRFS_COMPARE_TREE_CHANGED:
     382             :                 result_string = "updated";
     383             :                 break;
     384             :         case BTRFS_COMPARE_TREE_SAME:
     385             :                 ASSERT(0);
     386             :                 result_string = "unchanged";
     387             :                 break;
     388             :         default:
     389             :                 ASSERT(0);
     390             :                 result_string = "unexpected";
     391             :         }
     392             : 
     393           0 :         btrfs_err(sctx->send_root->fs_info,
     394             :                   "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
     395             :                   result_string, what, sctx->cmp_key->objectid,
     396             :                   sctx->send_root->root_key.objectid,
     397             :                   (sctx->parent_root ?
     398             :                    sctx->parent_root->root_key.objectid : 0));
     399           0 : }
     400             : 
     401             : __maybe_unused
     402             : static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
     403             : {
     404     1008631 :         switch (sctx->proto) {
     405             :         case 1:  return cmd <= BTRFS_SEND_C_MAX_V1;
     406             :         case 2:  return cmd <= BTRFS_SEND_C_MAX_V2;
     407           0 :         case 3:  return cmd <= BTRFS_SEND_C_MAX_V3;
     408             :         default: return false;
     409             :         }
     410             : }
     411             : 
     412             : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
     413             : 
     414             : static struct waiting_dir_move *
     415             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
     416             : 
     417             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
     418             : 
     419     2063745 : static int need_send_hole(struct send_ctx *sctx)
     420             : {
     421     1640490 :         return (sctx->parent_root && !sctx->cur_inode_new &&
     422     2902279 :                 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
     423      838533 :                 S_ISREG(sctx->cur_inode_mode));
     424             : }
     425             : 
     426    22813931 : static void fs_path_reset(struct fs_path *p)
     427             : {
     428    22813931 :         if (p->reversed) {
     429     6098775 :                 p->start = p->buf + p->buf_len - 1;
     430     6098775 :                 p->end = p->start;
     431     6098775 :                 *p->start = 0;
     432             :         } else {
     433    16715156 :                 p->start = p->buf;
     434    16715156 :                 p->end = p->start;
     435    16715156 :                 *p->start = 0;
     436             :         }
     437    22813931 : }
     438             : 
     439    11557844 : static struct fs_path *fs_path_alloc(void)
     440             : {
     441    11557844 :         struct fs_path *p;
     442             : 
     443    11557844 :         p = kmalloc(sizeof(*p), GFP_KERNEL);
     444    11560680 :         if (!p)
     445             :                 return NULL;
     446    11560680 :         p->reversed = 0;
     447    11560680 :         p->buf = p->inline_buf;
     448    11560680 :         p->buf_len = FS_PATH_INLINE_SIZE;
     449    11560680 :         fs_path_reset(p);
     450    11560680 :         return p;
     451             : }
     452             : 
     453      610306 : static struct fs_path *fs_path_alloc_reversed(void)
     454             : {
     455      610306 :         struct fs_path *p;
     456             : 
     457      610306 :         p = fs_path_alloc();
     458      610336 :         if (!p)
     459             :                 return NULL;
     460      610336 :         p->reversed = 1;
     461      610336 :         fs_path_reset(p);
     462      610336 :         return p;
     463             : }
     464             : 
     465    13183115 : static void fs_path_free(struct fs_path *p)
     466             : {
     467    13183115 :         if (!p)
     468             :                 return;
     469    11563979 :         if (p->buf != p->inline_buf)
     470         291 :                 kfree(p->buf);
     471    11563979 :         kfree(p);
     472             : }
     473             : 
     474             : static int fs_path_len(struct fs_path *p)
     475             : {
     476     2833509 :         return p->end - p->start;
     477             : }
     478             : 
     479    12131388 : static int fs_path_ensure_buf(struct fs_path *p, int len)
     480             : {
     481    12131388 :         char *tmp_buf;
     482    12131388 :         int path_len;
     483    12131388 :         int old_buf_len;
     484             : 
     485    12131388 :         len++;
     486             : 
     487    12131388 :         if (p->buf_len >= len)
     488             :                 return 0;
     489             : 
     490         291 :         if (len > PATH_MAX) {
     491           0 :                 WARN_ON(1);
     492           0 :                 return -ENOMEM;
     493             :         }
     494             : 
     495         291 :         path_len = p->end - p->start;
     496         291 :         old_buf_len = p->buf_len;
     497             : 
     498             :         /*
     499             :          * Allocate to the next largest kmalloc bucket size, to let
     500             :          * the fast path happen most of the time.
     501             :          */
     502         291 :         len = kmalloc_size_roundup(len);
     503             :         /*
     504             :          * First time the inline_buf does not suffice
     505             :          */
     506         291 :         if (p->buf == p->inline_buf) {
     507         291 :                 tmp_buf = kmalloc(len, GFP_KERNEL);
     508         291 :                 if (tmp_buf)
     509         582 :                         memcpy(tmp_buf, p->buf, old_buf_len);
     510             :         } else {
     511           0 :                 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
     512             :         }
     513         291 :         if (!tmp_buf)
     514             :                 return -ENOMEM;
     515         291 :         p->buf = tmp_buf;
     516         291 :         p->buf_len = len;
     517             : 
     518         291 :         if (p->reversed) {
     519           4 :                 tmp_buf = p->buf + old_buf_len - path_len - 1;
     520           4 :                 p->end = p->buf + p->buf_len - 1;
     521           4 :                 p->start = p->end - path_len;
     522           8 :                 memmove(p->start, tmp_buf, path_len + 1);
     523             :         } else {
     524         287 :                 p->start = p->buf;
     525         287 :                 p->end = p->start + path_len;
     526             :         }
     527             :         return 0;
     528             : }
     529             : 
     530    12131799 : static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
     531             :                                    char **prepared)
     532             : {
     533    12131799 :         int ret;
     534    12131799 :         int new_len;
     535             : 
     536    12131799 :         new_len = p->end - p->start + name_len;
     537    12131799 :         if (p->start != p->end)
     538      282238 :                 new_len++;
     539    12131799 :         ret = fs_path_ensure_buf(p, new_len);
     540    12133060 :         if (ret < 0)
     541           0 :                 goto out;
     542             : 
     543    12133060 :         if (p->reversed) {
     544     5155693 :                 if (p->start != p->end)
     545      275285 :                         *--p->start = '/';
     546     5155693 :                 p->start -= name_len;
     547     5155693 :                 *prepared = p->start;
     548             :         } else {
     549     6977367 :                 if (p->start != p->end)
     550        6953 :                         *p->end++ = '/';
     551     6977367 :                 *prepared = p->end;
     552     6977367 :                 p->end += name_len;
     553     6977367 :                 *p->end = 0;
     554             :         }
     555             : 
     556    12133060 : out:
     557    12133060 :         return ret;
     558             : }
     559             : 
     560     4338830 : static int fs_path_add(struct fs_path *p, const char *name, int name_len)
     561             : {
     562     4338830 :         int ret;
     563     4338830 :         char *prepared;
     564             : 
     565     4338830 :         ret = fs_path_prepare_for_add(p, name_len, &prepared);
     566     4338283 :         if (ret < 0)
     567           0 :                 goto out;
     568     8676566 :         memcpy(prepared, name, name_len);
     569             : 
     570     4338283 : out:
     571     4338283 :         return ret;
     572             : }
     573             : 
     574     5760473 : static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
     575             : {
     576     5760473 :         int ret;
     577     5760473 :         char *prepared;
     578             : 
     579     5760473 :         ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
     580     5759757 :         if (ret < 0)
     581           0 :                 goto out;
     582    11519514 :         memcpy(prepared, p2->start, p2->end - p2->start);
     583             : 
     584     5759757 : out:
     585     5759757 :         return ret;
     586             : }
     587             : 
     588     2036210 : static int fs_path_add_from_extent_buffer(struct fs_path *p,
     589             :                                           struct extent_buffer *eb,
     590             :                                           unsigned long off, int len)
     591             : {
     592     2036210 :         int ret;
     593     2036210 :         char *prepared;
     594             : 
     595     2036210 :         ret = fs_path_prepare_for_add(p, len, &prepared);
     596     2036204 :         if (ret < 0)
     597           0 :                 goto out;
     598             : 
     599     2036204 :         read_extent_buffer(eb, prepared, off, len);
     600             : 
     601     2036203 : out:
     602     2036203 :         return ret;
     603             : }
     604             : 
     605      606329 : static int fs_path_copy(struct fs_path *p, struct fs_path *from)
     606             : {
     607      606329 :         p->reversed = from->reversed;
     608      606329 :         fs_path_reset(p);
     609             : 
     610      606328 :         return fs_path_add_path(p, from);
     611             : }
     612             : 
     613     4871410 : static void fs_path_unreverse(struct fs_path *p)
     614             : {
     615     4871410 :         char *tmp;
     616     4871410 :         int len;
     617             : 
     618     4871410 :         if (!p->reversed)
     619             :                 return;
     620             : 
     621     4871410 :         tmp = p->start;
     622     4871410 :         len = p->end - p->start;
     623     4871410 :         p->start = p->buf;
     624     4871410 :         p->end = p->start + len;
     625     9742820 :         memmove(p->start, tmp, len + 1);
     626     4871410 :         p->reversed = 0;
     627             : }
     628             : 
     629    18510587 : static struct btrfs_path *alloc_path_for_send(void)
     630             : {
     631    18510587 :         struct btrfs_path *path;
     632             : 
     633    18510587 :         path = btrfs_alloc_path();
     634    18512425 :         if (!path)
     635             :                 return NULL;
     636    18512425 :         path->search_commit_root = 1;
     637    18512425 :         path->skip_locking = 1;
     638    18512425 :         path->need_commit_sem = 1;
     639    18512425 :         return path;
     640             : }
     641             : 
     642     5494501 : static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
     643             : {
     644     5494501 :         int ret;
     645     5494501 :         u32 pos = 0;
     646             : 
     647    10991316 :         while (pos < len) {
     648     5494757 :                 ret = kernel_write(filp, buf + pos, len - pos, off);
     649     5496815 :                 if (ret < 0)
     650           0 :                         return ret;
     651     5496815 :                 if (ret == 0)
     652             :                         return -EIO;
     653     5496815 :                 pos += ret;
     654             :         }
     655             : 
     656             :         return 0;
     657             : }
     658             : 
     659    14426781 : static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
     660             : {
     661    14426781 :         struct btrfs_tlv_header *hdr;
     662    14426781 :         int total_len = sizeof(*hdr) + len;
     663    14426781 :         int left = sctx->send_max_size - sctx->send_size;
     664             : 
     665    14426781 :         if (WARN_ON_ONCE(sctx->put_data))
     666             :                 return -EINVAL;
     667             : 
     668    14426781 :         if (unlikely(left < total_len))
     669             :                 return -EOVERFLOW;
     670             : 
     671    14426781 :         hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
     672    14426781 :         put_unaligned_le16(attr, &hdr->tlv_type);
     673    14426781 :         put_unaligned_le16(len, &hdr->tlv_len);
     674    28853562 :         memcpy(hdr + 1, data, len);
     675    14426781 :         sctx->send_size += total_len;
     676             : 
     677    14426781 :         return 0;
     678             : }
     679             : 
     680             : #define TLV_PUT_DEFINE_INT(bits) \
     681             :         static int tlv_put_u##bits(struct send_ctx *sctx,               \
     682             :                         u##bits attr, u##bits value)                    \
     683             :         {                                                               \
     684             :                 __le##bits __tmp = cpu_to_le##bits(value);              \
     685             :                 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));  \
     686             :         }
     687             : 
     688             : TLV_PUT_DEFINE_INT(8)
     689           0 : TLV_PUT_DEFINE_INT(32)
     690        4928 : TLV_PUT_DEFINE_INT(64)
     691             : 
     692     6907031 : static int tlv_put_string(struct send_ctx *sctx, u16 attr,
     693             :                           const char *str, int len)
     694             : {
     695     6907031 :         if (len == -1)
     696           0 :                 len = strlen(str);
     697     6907031 :         return tlv_put(sctx, attr, str, len);
     698             : }
     699             : 
     700             : static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
     701             :                         const u8 *uuid)
     702             : {
     703        4317 :         return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
     704             : }
     705             : 
     706     3029471 : static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
     707             :                                   struct extent_buffer *eb,
     708             :                                   struct btrfs_timespec *ts)
     709             : {
     710     3029471 :         struct btrfs_timespec bts;
     711     3029471 :         read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
     712     3029253 :         return tlv_put(sctx, attr, &bts, sizeof(bts));
     713             : }
     714             : 
     715             : 
     716             : #define TLV_PUT(sctx, attrtype, data, attrlen) \
     717             :         do { \
     718             :                 ret = tlv_put(sctx, attrtype, data, attrlen); \
     719             :                 if (ret < 0) \
     720             :                         goto tlv_put_failure; \
     721             :         } while (0)
     722             : 
     723             : #define TLV_PUT_INT(sctx, attrtype, bits, value) \
     724             :         do { \
     725             :                 ret = tlv_put_u##bits(sctx, attrtype, value); \
     726             :                 if (ret < 0) \
     727             :                         goto tlv_put_failure; \
     728             :         } while (0)
     729             : 
     730             : #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
     731             : #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
     732             : #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
     733             : #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
     734             : #define TLV_PUT_STRING(sctx, attrtype, str, len) \
     735             :         do { \
     736             :                 ret = tlv_put_string(sctx, attrtype, str, len); \
     737             :                 if (ret < 0) \
     738             :                         goto tlv_put_failure; \
     739             :         } while (0)
     740             : #define TLV_PUT_PATH(sctx, attrtype, p) \
     741             :         do { \
     742             :                 ret = tlv_put_string(sctx, attrtype, p->start, \
     743             :                         p->end - p->start); \
     744             :                 if (ret < 0) \
     745             :                         goto tlv_put_failure; \
     746             :         } while(0)
     747             : #define TLV_PUT_UUID(sctx, attrtype, uuid) \
     748             :         do { \
     749             :                 ret = tlv_put_uuid(sctx, attrtype, uuid); \
     750             :                 if (ret < 0) \
     751             :                         goto tlv_put_failure; \
     752             :         } while (0)
     753             : #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
     754             :         do { \
     755             :                 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
     756             :                 if (ret < 0) \
     757             :                         goto tlv_put_failure; \
     758             :         } while (0)
     759             : 
     760         213 : static int send_header(struct send_ctx *sctx)
     761             : {
     762         213 :         struct btrfs_stream_header hdr;
     763             : 
     764         213 :         strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
     765         213 :         hdr.version = cpu_to_le32(sctx->proto);
     766         213 :         return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
     767             :                                         &sctx->send_off);
     768             : }
     769             : 
     770             : /*
     771             :  * For each command/item we want to send to userspace, we call this function.
     772             :  */
     773     5492949 : static int begin_cmd(struct send_ctx *sctx, int cmd)
     774             : {
     775     5492949 :         struct btrfs_cmd_header *hdr;
     776             : 
     777     5492949 :         if (WARN_ON(!sctx->send_buf))
     778             :                 return -EINVAL;
     779             : 
     780     5492949 :         BUG_ON(sctx->send_size);
     781             : 
     782     5492949 :         sctx->send_size += sizeof(*hdr);
     783     5492949 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     784     5492949 :         put_unaligned_le16(cmd, &hdr->cmd);
     785             : 
     786     5492949 :         return 0;
     787             : }
     788             : 
     789     5494483 : static int send_cmd(struct send_ctx *sctx)
     790             : {
     791     5494483 :         int ret;
     792     5494483 :         struct btrfs_cmd_header *hdr;
     793     5494483 :         u32 crc;
     794             : 
     795     5494483 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     796     5494483 :         put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
     797     5494483 :         put_unaligned_le32(0, &hdr->crc);
     798             : 
     799     5494483 :         crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
     800     5494810 :         put_unaligned_le32(crc, &hdr->crc);
     801             : 
     802     5494810 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
     803             :                                         &sctx->send_off);
     804             : 
     805     5496393 :         sctx->send_size = 0;
     806     5496393 :         sctx->put_data = false;
     807             : 
     808     5496393 :         return ret;
     809             : }
     810             : 
     811             : /*
     812             :  * Sends a move instruction to user space
     813             :  */
     814      604876 : static int send_rename(struct send_ctx *sctx,
     815             :                      struct fs_path *from, struct fs_path *to)
     816             : {
     817      604876 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     818      604876 :         int ret;
     819             : 
     820      604876 :         btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
     821             : 
     822      604876 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
     823      604877 :         if (ret < 0)
     824           0 :                 goto out;
     825             : 
     826      604877 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
     827      604881 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
     828             : 
     829      604868 :         ret = send_cmd(sctx);
     830             : 
     831      604884 : tlv_put_failure:
     832      604884 : out:
     833      604884 :         return ret;
     834             : }
     835             : 
     836             : /*
     837             :  * Sends a link instruction to user space
     838             :  */
     839        2967 : static int send_link(struct send_ctx *sctx,
     840             :                      struct fs_path *path, struct fs_path *lnk)
     841             : {
     842        2967 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     843        2967 :         int ret;
     844             : 
     845        2967 :         btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
     846             : 
     847        2967 :         ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
     848        2967 :         if (ret < 0)
     849           0 :                 goto out;
     850             : 
     851        2967 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     852        2967 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
     853             : 
     854        2967 :         ret = send_cmd(sctx);
     855             : 
     856        2967 : tlv_put_failure:
     857        2967 : out:
     858        2967 :         return ret;
     859             : }
     860             : 
     861             : /*
     862             :  * Sends an unlink instruction to user space
     863             :  */
     864        1958 : static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
     865             : {
     866        1958 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     867        1958 :         int ret;
     868             : 
     869        1958 :         btrfs_debug(fs_info, "send_unlink %s", path->start);
     870             : 
     871        1958 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
     872        1958 :         if (ret < 0)
     873           0 :                 goto out;
     874             : 
     875        1958 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     876             : 
     877        1958 :         ret = send_cmd(sctx);
     878             : 
     879        1958 : tlv_put_failure:
     880        1958 : out:
     881        1958 :         return ret;
     882             : }
     883             : 
     884             : /*
     885             :  * Sends a rmdir instruction to user space
     886             :  */
     887          25 : static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
     888             : {
     889          25 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     890          25 :         int ret;
     891             : 
     892          25 :         btrfs_debug(fs_info, "send_rmdir %s", path->start);
     893             : 
     894          25 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
     895          25 :         if (ret < 0)
     896           0 :                 goto out;
     897             : 
     898          25 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     899             : 
     900          25 :         ret = send_cmd(sctx);
     901             : 
     902          25 : tlv_put_failure:
     903          25 : out:
     904          25 :         return ret;
     905             : }
     906             : 
     907             : struct btrfs_inode_info {
     908             :         u64 size;
     909             :         u64 gen;
     910             :         u64 mode;
     911             :         u64 uid;
     912             :         u64 gid;
     913             :         u64 rdev;
     914             :         u64 fileattr;
     915             :         u64 nlink;
     916             : };
     917             : 
     918             : /*
     919             :  * Helper function to retrieve some fields from an inode item.
     920             :  */
     921    11187321 : static int get_inode_info(struct btrfs_root *root, u64 ino,
     922             :                           struct btrfs_inode_info *info)
     923             : {
     924    11187321 :         int ret;
     925    11187321 :         struct btrfs_path *path;
     926    11187321 :         struct btrfs_inode_item *ii;
     927    11187321 :         struct btrfs_key key;
     928             : 
     929    11187321 :         path = alloc_path_for_send();
     930    11187448 :         if (!path)
     931             :                 return -ENOMEM;
     932             : 
     933    11187448 :         key.objectid = ino;
     934    11187448 :         key.type = BTRFS_INODE_ITEM_KEY;
     935    11187448 :         key.offset = 0;
     936    11187448 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     937    11187511 :         if (ret) {
     938      403958 :                 if (ret > 0)
     939      403958 :                         ret = -ENOENT;
     940      403958 :                 goto out;
     941             :         }
     942             : 
     943    10783553 :         if (!info)
     944         181 :                 goto out;
     945             : 
     946    10783372 :         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
     947             :                         struct btrfs_inode_item);
     948    10782672 :         info->size = btrfs_inode_size(path->nodes[0], ii);
     949    10782837 :         info->gen = btrfs_inode_generation(path->nodes[0], ii);
     950    10782832 :         info->mode = btrfs_inode_mode(path->nodes[0], ii);
     951    10782900 :         info->uid = btrfs_inode_uid(path->nodes[0], ii);
     952    10782838 :         info->gid = btrfs_inode_gid(path->nodes[0], ii);
     953    10782781 :         info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
     954    10782780 :         info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
     955             :         /*
     956             :          * Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
     957             :          * otherwise logically split to 32/32 parts.
     958             :          */
     959    10782811 :         info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
     960             : 
     961    11186970 : out:
     962    11186970 :         btrfs_free_path(path);
     963    11186970 :         return ret;
     964             : }
     965             : 
     966     4064664 : static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
     967             : {
     968     4064664 :         int ret;
     969     4064664 :         struct btrfs_inode_info info = { 0 };
     970             : 
     971     4064664 :         ASSERT(gen);
     972             : 
     973     4064664 :         ret = get_inode_info(root, ino, &info);
     974     4064741 :         *gen = info.gen;
     975     4064741 :         return ret;
     976             : }
     977             : 
     978             : typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
     979             :                                    struct fs_path *p,
     980             :                                    void *ctx);
     981             : 
     982             : /*
     983             :  * Helper function to iterate the entries in ONE btrfs_inode_ref or
     984             :  * btrfs_inode_extref.
     985             :  * The iterate callback may return a non zero value to stop iteration. This can
     986             :  * be a negative value for error codes or 1 to simply stop it.
     987             :  *
     988             :  * path must point to the INODE_REF or INODE_EXTREF when called.
     989             :  */
     990      610310 : static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
     991             :                              struct btrfs_key *found_key, int resolve,
     992             :                              iterate_inode_ref_t iterate, void *ctx)
     993             : {
     994      610310 :         struct extent_buffer *eb = path->nodes[0];
     995      610310 :         struct btrfs_inode_ref *iref;
     996      610310 :         struct btrfs_inode_extref *extref;
     997      610310 :         struct btrfs_path *tmp_path;
     998      610310 :         struct fs_path *p;
     999      610310 :         u32 cur = 0;
    1000      610310 :         u32 total;
    1001      610310 :         int slot = path->slots[0];
    1002      610310 :         u32 name_len;
    1003      610310 :         char *start;
    1004      610310 :         int ret = 0;
    1005      610310 :         int num = 0;
    1006      610310 :         int index;
    1007      610310 :         u64 dir;
    1008      610310 :         unsigned long name_off;
    1009      610310 :         unsigned long elem_size;
    1010      610310 :         unsigned long ptr;
    1011             : 
    1012      610310 :         p = fs_path_alloc_reversed();
    1013      610343 :         if (!p)
    1014             :                 return -ENOMEM;
    1015             : 
    1016      610343 :         tmp_path = alloc_path_for_send();
    1017      610320 :         if (!tmp_path) {
    1018           0 :                 fs_path_free(p);
    1019           0 :                 return -ENOMEM;
    1020             :         }
    1021             : 
    1022             : 
    1023      610320 :         if (found_key->type == BTRFS_INODE_REF_KEY) {
    1024      608797 :                 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
    1025             :                                                     struct btrfs_inode_ref);
    1026      608772 :                 total = btrfs_item_size(eb, slot);
    1027      608772 :                 elem_size = sizeof(*iref);
    1028             :         } else {
    1029        1523 :                 ptr = btrfs_item_ptr_offset(eb, slot);
    1030        1523 :                 total = btrfs_item_size(eb, slot);
    1031        1523 :                 elem_size = sizeof(*extref);
    1032             :         }
    1033             : 
    1034     1222289 :         while (cur < total) {
    1035      613836 :                 fs_path_reset(p);
    1036             : 
    1037      613780 :                 if (found_key->type == BTRFS_INODE_REF_KEY) {
    1038      612257 :                         iref = (struct btrfs_inode_ref *)(ptr + cur);
    1039      612257 :                         name_len = btrfs_inode_ref_name_len(eb, iref);
    1040      612278 :                         name_off = (unsigned long)(iref + 1);
    1041      612278 :                         index = btrfs_inode_ref_index(eb, iref);
    1042      612286 :                         dir = found_key->offset;
    1043             :                 } else {
    1044        1523 :                         extref = (struct btrfs_inode_extref *)(ptr + cur);
    1045        1523 :                         name_len = btrfs_inode_extref_name_len(eb, extref);
    1046        1523 :                         name_off = (unsigned long)&extref->name;
    1047        1523 :                         index = btrfs_inode_extref_index(eb, extref);
    1048        1523 :                         dir = btrfs_inode_extref_parent(eb, extref);
    1049             :                 }
    1050             : 
    1051      613805 :                 if (resolve) {
    1052        1775 :                         start = btrfs_ref_to_path(root, tmp_path, name_len,
    1053             :                                                   name_off, eb, dir,
    1054        1775 :                                                   p->buf, p->buf_len);
    1055        1775 :                         if (IS_ERR(start)) {
    1056           0 :                                 ret = PTR_ERR(start);
    1057           0 :                                 goto out;
    1058             :                         }
    1059        1775 :                         if (start < p->buf) {
    1060             :                                 /* overflow , try again with larger buffer */
    1061           0 :                                 ret = fs_path_ensure_buf(p,
    1062           0 :                                                 p->buf_len + p->buf - start);
    1063           0 :                                 if (ret < 0)
    1064           0 :                                         goto out;
    1065           0 :                                 start = btrfs_ref_to_path(root, tmp_path,
    1066             :                                                           name_len, name_off,
    1067             :                                                           eb, dir,
    1068           0 :                                                           p->buf, p->buf_len);
    1069           0 :                                 if (IS_ERR(start)) {
    1070           0 :                                         ret = PTR_ERR(start);
    1071           0 :                                         goto out;
    1072             :                                 }
    1073           0 :                                 BUG_ON(start < p->buf);
    1074             :                         }
    1075        1775 :                         p->start = start;
    1076             :                 } else {
    1077      612030 :                         ret = fs_path_add_from_extent_buffer(p, eb, name_off,
    1078             :                                                              name_len);
    1079      612038 :                         if (ret < 0)
    1080           0 :                                 goto out;
    1081             :                 }
    1082             : 
    1083      613813 :                 cur += elem_size + name_len;
    1084      613813 :                 ret = iterate(num, dir, index, p, ctx);
    1085      613757 :                 if (ret)
    1086        1775 :                         goto out;
    1087      611982 :                 num++;
    1088             :         }
    1089             : 
    1090      608453 : out:
    1091      610228 :         btrfs_free_path(tmp_path);
    1092      610316 :         fs_path_free(p);
    1093      610316 :         return ret;
    1094             : }
    1095             : 
    1096             : typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
    1097             :                                   const char *name, int name_len,
    1098             :                                   const char *data, int data_len,
    1099             :                                   void *ctx);
    1100             : 
    1101             : /*
    1102             :  * Helper function to iterate the entries in ONE btrfs_dir_item.
    1103             :  * The iterate callback may return a non zero value to stop iteration. This can
    1104             :  * be a negative value for error codes or 1 to simply stop it.
    1105             :  *
    1106             :  * path must point to the dir item when called.
    1107             :  */
    1108      800506 : static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
    1109             :                             iterate_dir_item_t iterate, void *ctx)
    1110             : {
    1111      800506 :         int ret = 0;
    1112      800506 :         struct extent_buffer *eb;
    1113      800506 :         struct btrfs_dir_item *di;
    1114      800506 :         struct btrfs_key di_key;
    1115      800506 :         char *buf = NULL;
    1116      800506 :         int buf_len;
    1117      800506 :         u32 name_len;
    1118      800506 :         u32 data_len;
    1119      800506 :         u32 cur;
    1120      800506 :         u32 len;
    1121      800506 :         u32 total;
    1122      800506 :         int slot;
    1123      800506 :         int num;
    1124             : 
    1125             :         /*
    1126             :          * Start with a small buffer (1 page). If later we end up needing more
    1127             :          * space, which can happen for xattrs on a fs with a leaf size greater
    1128             :          * then the page size, attempt to increase the buffer. Typically xattr
    1129             :          * values are small.
    1130             :          */
    1131      800506 :         buf_len = PATH_MAX;
    1132      800506 :         buf = kmalloc(buf_len, GFP_KERNEL);
    1133      800506 :         if (!buf) {
    1134           0 :                 ret = -ENOMEM;
    1135           0 :                 goto out;
    1136             :         }
    1137             : 
    1138      800506 :         eb = path->nodes[0];
    1139      800506 :         slot = path->slots[0];
    1140      800506 :         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
    1141      800506 :         cur = 0;
    1142      800506 :         len = 0;
    1143      800506 :         total = btrfs_item_size(eb, slot);
    1144             : 
    1145      800506 :         num = 0;
    1146     1600916 :         while (cur < total) {
    1147      800506 :                 name_len = btrfs_dir_name_len(eb, di);
    1148      800506 :                 data_len = btrfs_dir_data_len(eb, di);
    1149      800506 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    1150             : 
    1151      800506 :                 if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
    1152      800506 :                         if (name_len > XATTR_NAME_MAX) {
    1153           0 :                                 ret = -ENAMETOOLONG;
    1154           0 :                                 goto out;
    1155             :                         }
    1156      800506 :                         if (name_len + data_len >
    1157      800506 :                                         BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
    1158           0 :                                 ret = -E2BIG;
    1159           0 :                                 goto out;
    1160             :                         }
    1161             :                 } else {
    1162             :                         /*
    1163             :                          * Path too long
    1164             :                          */
    1165           0 :                         if (name_len + data_len > PATH_MAX) {
    1166           0 :                                 ret = -ENAMETOOLONG;
    1167           0 :                                 goto out;
    1168             :                         }
    1169             :                 }
    1170             : 
    1171      800506 :                 if (name_len + data_len > buf_len) {
    1172           6 :                         buf_len = name_len + data_len;
    1173           6 :                         if (is_vmalloc_addr(buf)) {
    1174           0 :                                 vfree(buf);
    1175           0 :                                 buf = NULL;
    1176             :                         } else {
    1177           6 :                                 char *tmp = krealloc(buf, buf_len,
    1178             :                                                 GFP_KERNEL | __GFP_NOWARN);
    1179             : 
    1180           6 :                                 if (!tmp)
    1181           0 :                                         kfree(buf);
    1182           6 :                                 buf = tmp;
    1183             :                         }
    1184           6 :                         if (!buf) {
    1185           0 :                                 buf = kvmalloc(buf_len, GFP_KERNEL);
    1186           0 :                                 if (!buf) {
    1187           0 :                                         ret = -ENOMEM;
    1188           0 :                                         goto out;
    1189             :                                 }
    1190             :                         }
    1191             :                 }
    1192             : 
    1193      800506 :                 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
    1194             :                                 name_len + data_len);
    1195             : 
    1196      800506 :                 len = sizeof(*di) + name_len + data_len;
    1197      800506 :                 di = (struct btrfs_dir_item *)((char *)di + len);
    1198      800506 :                 cur += len;
    1199             : 
    1200      800506 :                 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
    1201             :                               data_len, ctx);
    1202      800506 :                 if (ret < 0)
    1203           0 :                         goto out;
    1204      800506 :                 if (ret) {
    1205          96 :                         ret = 0;
    1206          96 :                         goto out;
    1207             :                 }
    1208             : 
    1209      800410 :                 num++;
    1210             :         }
    1211             : 
    1212      800410 : out:
    1213      800506 :         kvfree(buf);
    1214      800506 :         return ret;
    1215             : }
    1216             : 
    1217        1775 : static int __copy_first_ref(int num, u64 dir, int index,
    1218             :                             struct fs_path *p, void *ctx)
    1219             : {
    1220        1775 :         int ret;
    1221        1775 :         struct fs_path *pt = ctx;
    1222             : 
    1223        1775 :         ret = fs_path_copy(pt, p);
    1224        1775 :         if (ret < 0)
    1225           0 :                 return ret;
    1226             : 
    1227             :         /* we want the first only */
    1228             :         return 1;
    1229             : }
    1230             : 
    1231             : /*
    1232             :  * Retrieve the first path of an inode. If an inode has more then one
    1233             :  * ref/hardlink, this is ignored.
    1234             :  */
    1235        1775 : static int get_inode_path(struct btrfs_root *root,
    1236             :                           u64 ino, struct fs_path *path)
    1237             : {
    1238        1775 :         int ret;
    1239        1775 :         struct btrfs_key key, found_key;
    1240        1775 :         struct btrfs_path *p;
    1241             : 
    1242        1775 :         p = alloc_path_for_send();
    1243        1775 :         if (!p)
    1244             :                 return -ENOMEM;
    1245             : 
    1246        1775 :         fs_path_reset(path);
    1247             : 
    1248        1775 :         key.objectid = ino;
    1249        1775 :         key.type = BTRFS_INODE_REF_KEY;
    1250        1775 :         key.offset = 0;
    1251             : 
    1252        1775 :         ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
    1253        1775 :         if (ret < 0)
    1254           0 :                 goto out;
    1255        1775 :         if (ret) {
    1256           0 :                 ret = 1;
    1257           0 :                 goto out;
    1258             :         }
    1259        1775 :         btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
    1260        1775 :         if (found_key.objectid != ino ||
    1261        1775 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    1262             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    1263           0 :                 ret = -ENOENT;
    1264           0 :                 goto out;
    1265             :         }
    1266             : 
    1267        1775 :         ret = iterate_inode_ref(root, p, &found_key, 1,
    1268             :                                 __copy_first_ref, path);
    1269        1775 :         if (ret < 0)
    1270             :                 goto out;
    1271             :         ret = 0;
    1272             : 
    1273        1775 : out:
    1274        1775 :         btrfs_free_path(p);
    1275        1775 :         return ret;
    1276             : }
    1277             : 
    1278             : struct backref_ctx {
    1279             :         struct send_ctx *sctx;
    1280             : 
    1281             :         /* number of total found references */
    1282             :         u64 found;
    1283             : 
    1284             :         /*
    1285             :          * used for clones found in send_root. clones found behind cur_objectid
    1286             :          * and cur_offset are not considered as allowed clones.
    1287             :          */
    1288             :         u64 cur_objectid;
    1289             :         u64 cur_offset;
    1290             : 
    1291             :         /* may be truncated in case it's the last extent in a file */
    1292             :         u64 extent_len;
    1293             : 
    1294             :         /* The bytenr the file extent item we are processing refers to. */
    1295             :         u64 bytenr;
    1296             :         /* The owner (root id) of the data backref for the current extent. */
    1297             :         u64 backref_owner;
    1298             :         /* The offset of the data backref for the current extent. */
    1299             :         u64 backref_offset;
    1300             : };
    1301             : 
    1302     2216085 : static int __clone_root_cmp_bsearch(const void *key, const void *elt)
    1303             : {
    1304     2216085 :         u64 root = (u64)(uintptr_t)key;
    1305     2216085 :         const struct clone_root *cr = elt;
    1306             : 
    1307     2216085 :         if (root < cr->root->root_key.objectid)
    1308             :                 return -1;
    1309     1442914 :         if (root > cr->root->root_key.objectid)
    1310      179155 :                 return 1;
    1311             :         return 0;
    1312             : }
    1313             : 
    1314         129 : static int __clone_root_cmp_sort(const void *e1, const void *e2)
    1315             : {
    1316         129 :         const struct clone_root *cr1 = e1;
    1317         129 :         const struct clone_root *cr2 = e2;
    1318             : 
    1319         129 :         if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
    1320             :                 return -1;
    1321           4 :         if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
    1322           4 :                 return 1;
    1323             :         return 0;
    1324             : }
    1325             : 
    1326             : /*
    1327             :  * Called for every backref that is found for the current extent.
    1328             :  * Results are collected in sctx->clone_roots->ino/offset.
    1329             :  */
    1330     1421236 : static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
    1331             :                             void *ctx_)
    1332             : {
    1333     1421236 :         struct backref_ctx *bctx = ctx_;
    1334     1421236 :         struct clone_root *clone_root;
    1335             : 
    1336             :         /* First check if the root is in the list of accepted clone sources */
    1337     1421236 :         clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
    1338     1421236 :                              bctx->sctx->clone_roots_cnt,
    1339             :                              sizeof(struct clone_root),
    1340             :                              __clone_root_cmp_bsearch);
    1341     1421248 :         if (!clone_root)
    1342             :                 return 0;
    1343             : 
    1344             :         /* This is our own reference, bail out as we can't clone from it. */
    1345     1103334 :         if (clone_root->root == bctx->sctx->send_root &&
    1346     1101378 :             ino == bctx->cur_objectid &&
    1347     1082872 :             offset == bctx->cur_offset)
    1348             :                 return 0;
    1349             : 
    1350             :         /*
    1351             :          * Make sure we don't consider clones from send_root that are
    1352             :          * behind the current inode/offset.
    1353             :          */
    1354       48731 :         if (clone_root->root == bctx->sctx->send_root) {
    1355             :                 /*
    1356             :                  * If the source inode was not yet processed we can't issue a
    1357             :                  * clone operation, as the source extent does not exist yet at
    1358             :                  * the destination of the stream.
    1359             :                  */
    1360       46777 :                 if (ino > bctx->cur_objectid)
    1361             :                         return 0;
    1362             :                 /*
    1363             :                  * We clone from the inode currently being sent as long as the
    1364             :                  * source extent is already processed, otherwise we could try
    1365             :                  * to clone from an extent that does not exist yet at the
    1366             :                  * destination of the stream.
    1367             :                  */
    1368       30365 :                 if (ino == bctx->cur_objectid &&
    1369       28267 :                     offset + bctx->extent_len >
    1370       28267 :                     bctx->sctx->cur_inode_next_write_offset)
    1371             :                         return 0;
    1372             :         }
    1373             : 
    1374        4356 :         bctx->found++;
    1375        4356 :         clone_root->found_ref = true;
    1376             : 
    1377             :         /*
    1378             :          * If the given backref refers to a file extent item with a larger
    1379             :          * number of bytes than what we found before, use the new one so that
    1380             :          * we clone more optimally and end up doing less writes and getting
    1381             :          * less exclusive, non-shared extents at the destination.
    1382             :          */
    1383        4356 :         if (num_bytes > clone_root->num_bytes) {
    1384        4269 :                 clone_root->ino = ino;
    1385        4269 :                 clone_root->offset = offset;
    1386        4269 :                 clone_root->num_bytes = num_bytes;
    1387             : 
    1388             :                 /*
    1389             :                  * Found a perfect candidate, so there's no need to continue
    1390             :                  * backref walking.
    1391             :                  */
    1392        4269 :                 if (num_bytes >= bctx->extent_len)
    1393        4102 :                         return BTRFS_ITERATE_EXTENT_INODES_STOP;
    1394             :         }
    1395             : 
    1396             :         return 0;
    1397             : }
    1398             : 
    1399     2608741 : static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
    1400             :                                  const u64 **root_ids_ret, int *root_count_ret)
    1401             : {
    1402     2608741 :         struct backref_ctx *bctx = ctx;
    1403     2608741 :         struct send_ctx *sctx = bctx->sctx;
    1404     2608741 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1405     2608741 :         const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
    1406     2608741 :         struct btrfs_lru_cache_entry *raw_entry;
    1407     2608741 :         struct backref_cache_entry *entry;
    1408             : 
    1409     2608741 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
    1410             :                 return false;
    1411             : 
    1412             :         /*
    1413             :          * If relocation happened since we first filled the cache, then we must
    1414             :          * empty the cache and can not use it, because even though we operate on
    1415             :          * read-only roots, their leaves and nodes may have been reallocated and
    1416             :          * now be used for different nodes/leaves of the same tree or some other
    1417             :          * tree.
    1418             :          *
    1419             :          * We are called from iterate_extent_inodes() while either holding a
    1420             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1421             :          * to take any lock here.
    1422             :          */
    1423     2602918 :         if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
    1424        1534 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    1425        1534 :                 return false;
    1426             :         }
    1427             : 
    1428     2601384 :         raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
    1429     2601392 :         if (!raw_entry)
    1430             :                 return false;
    1431             : 
    1432     1740021 :         entry = container_of(raw_entry, struct backref_cache_entry, entry);
    1433     1740021 :         *root_ids_ret = entry->root_ids;
    1434     1740021 :         *root_count_ret = entry->num_roots;
    1435             : 
    1436     1740021 :         return true;
    1437             : }
    1438             : 
    1439      334288 : static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
    1440             :                                 void *ctx)
    1441             : {
    1442      334288 :         struct backref_ctx *bctx = ctx;
    1443      334288 :         struct send_ctx *sctx = bctx->sctx;
    1444      334288 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1445      334288 :         struct backref_cache_entry *new_entry;
    1446      334288 :         struct ulist_iterator uiter;
    1447      334288 :         struct ulist_node *node;
    1448      334288 :         int ret;
    1449             : 
    1450             :         /*
    1451             :          * We're called while holding a transaction handle or while holding
    1452             :          * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
    1453             :          * NOFS allocation.
    1454             :          */
    1455      334288 :         new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS);
    1456             :         /* No worries, cache is optional. */
    1457      334286 :         if (!new_entry)
    1458           0 :                 return;
    1459             : 
    1460      334286 :         new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
    1461      334286 :         new_entry->entry.gen = 0;
    1462      334286 :         new_entry->num_roots = 0;
    1463      334286 :         ULIST_ITER_INIT(&uiter);
    1464      816241 :         while ((node = ulist_next(root_ids, &uiter)) != NULL) {
    1465      481950 :                 const u64 root_id = node->val;
    1466      481950 :                 struct clone_root *root;
    1467             : 
    1468      481950 :                 root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
    1469      481950 :                                sctx->clone_roots_cnt, sizeof(struct clone_root),
    1470             :                                __clone_root_cmp_bsearch);
    1471      481955 :                 if (!root)
    1472      321515 :                         continue;
    1473             : 
    1474             :                 /* Too many roots, just exit, no worries as caching is optional. */
    1475      160440 :                 if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
    1476           0 :                         kfree(new_entry);
    1477           0 :                         return;
    1478             :                 }
    1479             : 
    1480      160440 :                 new_entry->root_ids[new_entry->num_roots] = root_id;
    1481      160440 :                 new_entry->num_roots++;
    1482             :         }
    1483             : 
    1484             :         /*
    1485             :          * We may have not added any roots to the new cache entry, which means
    1486             :          * none of the roots is part of the list of roots from which we are
    1487             :          * allowed to clone. Cache the new entry as it's still useful to avoid
    1488             :          * backref walking to determine which roots have a path to the leaf.
    1489             :          *
    1490             :          * Also use GFP_NOFS because we're called while holding a transaction
    1491             :          * handle or while holding fs_info->commit_root_sem.
    1492             :          */
    1493      334288 :         ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
    1494             :                                     GFP_NOFS);
    1495      334290 :         ASSERT(ret == 0 || ret == -ENOMEM);
    1496      334290 :         if (ret) {
    1497             :                 /* Caching is optional, no worries. */
    1498           0 :                 kfree(new_entry);
    1499           0 :                 return;
    1500             :         }
    1501             : 
    1502             :         /*
    1503             :          * We are called from iterate_extent_inodes() while either holding a
    1504             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1505             :          * to take any lock here.
    1506             :          */
    1507      334290 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
    1508        1610 :                 sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
    1509             : }
    1510             : 
    1511     2236317 : static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
    1512             :                              const struct extent_buffer *leaf, void *ctx)
    1513             : {
    1514     2236317 :         const u64 refs = btrfs_extent_refs(leaf, ei);
    1515     2236310 :         const struct backref_ctx *bctx = ctx;
    1516     2236310 :         const struct send_ctx *sctx = bctx->sctx;
    1517             : 
    1518     2236310 :         if (bytenr == bctx->bytenr) {
    1519      631798 :                 const u64 flags = btrfs_extent_flags(leaf, ei);
    1520             : 
    1521      631798 :                 if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
    1522             :                         return -EUCLEAN;
    1523             : 
    1524             :                 /*
    1525             :                  * If we have only one reference and only the send root as a
    1526             :                  * clone source - meaning no clone roots were given in the
    1527             :                  * struct btrfs_ioctl_send_args passed to the send ioctl - then
    1528             :                  * it's our reference and there's no point in doing backref
    1529             :                  * walking which is expensive, so exit early.
    1530             :                  */
    1531      631798 :                 if (refs == 1 && sctx->clone_roots_cnt == 1)
    1532             :                         return -ENOENT;
    1533             :         }
    1534             : 
    1535             :         /*
    1536             :          * Backreference walking (iterate_extent_inodes() below) is currently
    1537             :          * too expensive when an extent has a large number of references, both
    1538             :          * in time spent and used memory. So for now just fallback to write
    1539             :          * operations instead of clone operations when an extent has more than
    1540             :          * a certain amount of references.
    1541             :          */
    1542     2216095 :         if (refs > SEND_MAX_EXTENT_REFS)
    1543        1863 :                 return -ENOENT;
    1544             : 
    1545             :         return 0;
    1546             : }
    1547             : 
    1548       69977 : static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
    1549             : {
    1550       69977 :         const struct backref_ctx *bctx = ctx;
    1551             : 
    1552       69977 :         if (ino == bctx->cur_objectid &&
    1553       69908 :             root == bctx->backref_owner &&
    1554       69775 :             offset == bctx->backref_offset)
    1555        4492 :                 return true;
    1556             : 
    1557             :         return false;
    1558             : }
    1559             : 
    1560             : /*
    1561             :  * Given an inode, offset and extent item, it finds a good clone for a clone
    1562             :  * instruction. Returns -ENOENT when none could be found. The function makes
    1563             :  * sure that the returned clone is usable at the point where sending is at the
    1564             :  * moment. This means, that no clones are accepted which lie behind the current
    1565             :  * inode+offset.
    1566             :  *
    1567             :  * path must point to the extent item when called.
    1568             :  */
    1569      631919 : static int find_extent_clone(struct send_ctx *sctx,
    1570             :                              struct btrfs_path *path,
    1571             :                              u64 ino, u64 data_offset,
    1572             :                              u64 ino_size,
    1573             :                              struct clone_root **found)
    1574             : {
    1575      631919 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1576      631919 :         int ret;
    1577      631919 :         int extent_type;
    1578      631919 :         u64 logical;
    1579      631919 :         u64 disk_byte;
    1580      631919 :         u64 num_bytes;
    1581      631919 :         struct btrfs_file_extent_item *fi;
    1582      631919 :         struct extent_buffer *eb = path->nodes[0];
    1583      631919 :         struct backref_ctx backref_ctx = { 0 };
    1584      631919 :         struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
    1585      631919 :         struct clone_root *cur_clone_root;
    1586      631919 :         int compressed;
    1587      631919 :         u32 i;
    1588             : 
    1589             :         /*
    1590             :          * With fallocate we can get prealloc extents beyond the inode's i_size,
    1591             :          * so we don't do anything here because clone operations can not clone
    1592             :          * to a range beyond i_size without increasing the i_size of the
    1593             :          * destination inode.
    1594             :          */
    1595      631919 :         if (data_offset >= ino_size)
    1596             :                 return 0;
    1597             : 
    1598      631812 :         fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
    1599      631813 :         extent_type = btrfs_file_extent_type(eb, fi);
    1600      631811 :         if (extent_type == BTRFS_FILE_EXTENT_INLINE)
    1601             :                 return -ENOENT;
    1602             : 
    1603      631797 :         disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
    1604      631797 :         if (disk_byte == 0)
    1605             :                 return -ENOENT;
    1606             : 
    1607      631797 :         compressed = btrfs_file_extent_compression(eb, fi);
    1608      631797 :         num_bytes = btrfs_file_extent_num_bytes(eb, fi);
    1609      631797 :         logical = disk_byte + btrfs_file_extent_offset(eb, fi);
    1610             : 
    1611             :         /*
    1612             :          * Setup the clone roots.
    1613             :          */
    1614     2305108 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1615     1041516 :                 cur_clone_root = sctx->clone_roots + i;
    1616     1041516 :                 cur_clone_root->ino = (u64)-1;
    1617     1041516 :                 cur_clone_root->offset = 0;
    1618     1041516 :                 cur_clone_root->num_bytes = 0;
    1619     1041516 :                 cur_clone_root->found_ref = false;
    1620             :         }
    1621             : 
    1622      631795 :         backref_ctx.sctx = sctx;
    1623      631795 :         backref_ctx.cur_objectid = ino;
    1624      631795 :         backref_ctx.cur_offset = data_offset;
    1625      631795 :         backref_ctx.bytenr = disk_byte;
    1626             :         /*
    1627             :          * Use the header owner and not the send root's id, because in case of a
    1628             :          * snapshot we can have shared subtrees.
    1629             :          */
    1630      631795 :         backref_ctx.backref_owner = btrfs_header_owner(eb);
    1631      631795 :         backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
    1632             : 
    1633             :         /*
    1634             :          * The last extent of a file may be too large due to page alignment.
    1635             :          * We need to adjust extent_len in this case so that the checks in
    1636             :          * iterate_backrefs() work.
    1637             :          */
    1638      631799 :         if (data_offset + num_bytes >= ino_size)
    1639      601253 :                 backref_ctx.extent_len = ino_size - data_offset;
    1640             :         else
    1641       30546 :                 backref_ctx.extent_len = num_bytes;
    1642             : 
    1643             :         /*
    1644             :          * Now collect all backrefs.
    1645             :          */
    1646      631799 :         backref_walk_ctx.bytenr = disk_byte;
    1647      631799 :         if (compressed == BTRFS_COMPRESS_NONE)
    1648      631779 :                 backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
    1649      631799 :         backref_walk_ctx.fs_info = fs_info;
    1650      631799 :         backref_walk_ctx.cache_lookup = lookup_backref_cache;
    1651      631799 :         backref_walk_ctx.cache_store = store_backref_cache;
    1652      631799 :         backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
    1653      631799 :         backref_walk_ctx.check_extent_item = check_extent_item;
    1654      631799 :         backref_walk_ctx.user_ctx = &backref_ctx;
    1655             : 
    1656             :         /*
    1657             :          * If have a single clone root, then it's the send root and we can tell
    1658             :          * the backref walking code to skip our own backref and not resolve it,
    1659             :          * since we can not use it for cloning - the source and destination
    1660             :          * ranges can't overlap and in case the leaf is shared through a subtree
    1661             :          * due to snapshots, we can't use those other roots since they are not
    1662             :          * in the list of clone roots.
    1663             :          */
    1664      631799 :         if (sctx->clone_roots_cnt == 1)
    1665      222084 :                 backref_walk_ctx.skip_data_ref = skip_self_data_ref;
    1666             : 
    1667      631799 :         ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
    1668             :                                     &backref_ctx);
    1669      631791 :         if (ret < 0)
    1670             :                 return ret;
    1671             : 
    1672      631791 :         down_read(&fs_info->commit_root_sem);
    1673      631792 :         if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    1674             :                 /*
    1675             :                  * A transaction commit for a transaction in which block group
    1676             :                  * relocation was done just happened.
    1677             :                  * The disk_bytenr of the file extent item we processed is
    1678             :                  * possibly stale, referring to the extent's location before
    1679             :                  * relocation. So act as if we haven't found any clone sources
    1680             :                  * and fallback to write commands, which will read the correct
    1681             :                  * data from the new extent location. Otherwise we will fail
    1682             :                  * below because we haven't found our own back reference or we
    1683             :                  * could be getting incorrect sources in case the old extent
    1684             :                  * was already reallocated after the relocation.
    1685             :                  */
    1686          21 :                 up_read(&fs_info->commit_root_sem);
    1687          21 :                 return -ENOENT;
    1688             :         }
    1689      631771 :         up_read(&fs_info->commit_root_sem);
    1690             : 
    1691      631775 :         btrfs_debug(fs_info,
    1692             :                     "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
    1693             :                     data_offset, ino, num_bytes, logical);
    1694             : 
    1695      631775 :         if (!backref_ctx.found) {
    1696             :                 btrfs_debug(fs_info, "no clones found");
    1697             :                 return -ENOENT;
    1698             :         }
    1699             : 
    1700             :         cur_clone_root = NULL;
    1701        6063 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1702        6053 :                 struct clone_root *clone_root = &sctx->clone_roots[i];
    1703             : 
    1704        6053 :                 if (!clone_root->found_ref)
    1705        1934 :                         continue;
    1706             : 
    1707             :                 /*
    1708             :                  * Choose the root from which we can clone more bytes, to
    1709             :                  * minimize write operations and therefore have more extent
    1710             :                  * sharing at the destination (the same as in the source).
    1711             :                  */
    1712        4119 :                 if (!cur_clone_root ||
    1713           7 :                     clone_root->num_bytes > cur_clone_root->num_bytes) {
    1714        4119 :                         cur_clone_root = clone_root;
    1715             : 
    1716             :                         /*
    1717             :                          * We found an optimal clone candidate (any inode from
    1718             :                          * any root is fine), so we're done.
    1719             :                          */
    1720        4119 :                         if (clone_root->num_bytes >= backref_ctx.extent_len)
    1721             :                                 break;
    1722             :                 }
    1723             :         }
    1724             : 
    1725        4112 :         if (cur_clone_root) {
    1726        4112 :                 *found = cur_clone_root;
    1727        4112 :                 ret = 0;
    1728             :         } else {
    1729             :                 ret = -ENOENT;
    1730             :         }
    1731             : 
    1732             :         return ret;
    1733             : }
    1734             : 
    1735         302 : static int read_symlink(struct btrfs_root *root,
    1736             :                         u64 ino,
    1737             :                         struct fs_path *dest)
    1738             : {
    1739         302 :         int ret;
    1740         302 :         struct btrfs_path *path;
    1741         302 :         struct btrfs_key key;
    1742         302 :         struct btrfs_file_extent_item *ei;
    1743         302 :         u8 type;
    1744         302 :         u8 compression;
    1745         302 :         unsigned long off;
    1746         302 :         int len;
    1747             : 
    1748         302 :         path = alloc_path_for_send();
    1749         302 :         if (!path)
    1750             :                 return -ENOMEM;
    1751             : 
    1752         302 :         key.objectid = ino;
    1753         302 :         key.type = BTRFS_EXTENT_DATA_KEY;
    1754         302 :         key.offset = 0;
    1755         302 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1756         302 :         if (ret < 0)
    1757           0 :                 goto out;
    1758         302 :         if (ret) {
    1759             :                 /*
    1760             :                  * An empty symlink inode. Can happen in rare error paths when
    1761             :                  * creating a symlink (transaction committed before the inode
    1762             :                  * eviction handler removed the symlink inode items and a crash
    1763             :                  * happened in between or the subvol was snapshoted in between).
    1764             :                  * Print an informative message to dmesg/syslog so that the user
    1765             :                  * can delete the symlink.
    1766             :                  */
    1767           0 :                 btrfs_err(root->fs_info,
    1768             :                           "Found empty symlink inode %llu at root %llu",
    1769             :                           ino, root->root_key.objectid);
    1770           0 :                 ret = -EIO;
    1771           0 :                 goto out;
    1772             :         }
    1773             : 
    1774         302 :         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    1775             :                         struct btrfs_file_extent_item);
    1776         302 :         type = btrfs_file_extent_type(path->nodes[0], ei);
    1777         302 :         if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
    1778           0 :                 ret = -EUCLEAN;
    1779           0 :                 btrfs_crit(root->fs_info,
    1780             : "send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
    1781             :                            ino, btrfs_root_id(root), type);
    1782           0 :                 goto out;
    1783             :         }
    1784         302 :         compression = btrfs_file_extent_compression(path->nodes[0], ei);
    1785         302 :         if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
    1786           0 :                 ret = -EUCLEAN;
    1787           0 :                 btrfs_crit(root->fs_info,
    1788             : "send: found symlink extent with compression, ino %llu root %llu compression type %d",
    1789             :                            ino, btrfs_root_id(root), compression);
    1790           0 :                 goto out;
    1791             :         }
    1792             : 
    1793         302 :         off = btrfs_file_extent_inline_start(ei);
    1794         302 :         len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
    1795             : 
    1796         302 :         ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
    1797             : 
    1798         302 : out:
    1799         302 :         btrfs_free_path(path);
    1800         302 :         return ret;
    1801             : }
    1802             : 
    1803             : /*
    1804             :  * Helper function to generate a file name that is unique in the root of
    1805             :  * send_root and parent_root. This is used to generate names for orphan inodes.
    1806             :  */
    1807     1207569 : static int gen_unique_name(struct send_ctx *sctx,
    1808             :                            u64 ino, u64 gen,
    1809             :                            struct fs_path *dest)
    1810             : {
    1811     1207569 :         int ret = 0;
    1812     1207569 :         struct btrfs_path *path;
    1813     1207569 :         struct btrfs_dir_item *di;
    1814     1207569 :         char tmp[64];
    1815     1207569 :         int len;
    1816     1207569 :         u64 idx = 0;
    1817             : 
    1818     1207569 :         path = alloc_path_for_send();
    1819     1207662 :         if (!path)
    1820             :                 return -ENOMEM;
    1821             : 
    1822     1207662 :         while (1) {
    1823     1207662 :                 struct fscrypt_str tmp_name;
    1824             : 
    1825     1207662 :                 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
    1826             :                                 ino, gen, idx);
    1827     1207662 :                 ASSERT(len < sizeof(tmp));
    1828     1207662 :                 tmp_name.name = tmp;
    1829     1207662 :                 tmp_name.len = strlen(tmp);
    1830             : 
    1831     1207575 :                 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
    1832             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1833             :                                 &tmp_name, 0);
    1834     1208962 :                 btrfs_release_path(path);
    1835     1209054 :                 if (IS_ERR(di)) {
    1836           0 :                         ret = PTR_ERR(di);
    1837           0 :                         goto out;
    1838             :                 }
    1839     1209054 :                 if (di) {
    1840             :                         /* not unique, try again */
    1841           0 :                         idx++;
    1842           0 :                         continue;
    1843             :                 }
    1844             : 
    1845     1209054 :                 if (!sctx->parent_root) {
    1846             :                         /* unique */
    1847             :                         ret = 0;
    1848     1209030 :                         break;
    1849             :                 }
    1850             : 
    1851      805932 :                 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
    1852             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1853             :                                 &tmp_name, 0);
    1854      805932 :                 btrfs_release_path(path);
    1855      805908 :                 if (IS_ERR(di)) {
    1856           0 :                         ret = PTR_ERR(di);
    1857           0 :                         goto out;
    1858             :                 }
    1859      805908 :                 if (di) {
    1860             :                         /* not unique, try again */
    1861           0 :                         idx++;
    1862           0 :                         continue;
    1863             :                 }
    1864             :                 /* unique */
    1865             :                 break;
    1866             :         }
    1867             : 
    1868     2418065 :         ret = fs_path_add(dest, tmp, strlen(tmp));
    1869             : 
    1870     1209031 : out:
    1871     1209031 :         btrfs_free_path(path);
    1872     1209031 :         return ret;
    1873             : }
    1874             : 
    1875             : enum inode_state {
    1876             :         inode_state_no_change,
    1877             :         inode_state_will_create,
    1878             :         inode_state_did_create,
    1879             :         inode_state_will_delete,
    1880             :         inode_state_did_delete,
    1881             : };
    1882             : 
    1883     3257967 : static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
    1884             :                                u64 *send_gen, u64 *parent_gen)
    1885             : {
    1886     3257967 :         int ret;
    1887     3257967 :         int left_ret;
    1888     3257967 :         int right_ret;
    1889     3257967 :         u64 left_gen;
    1890     3257967 :         u64 right_gen = 0;
    1891     3257967 :         struct btrfs_inode_info info;
    1892             : 
    1893     3257967 :         ret = get_inode_info(sctx->send_root, ino, &info);
    1894     3258061 :         if (ret < 0 && ret != -ENOENT)
    1895           0 :                 goto out;
    1896     3258061 :         left_ret = (info.nlink == 0) ? -ENOENT : ret;
    1897     3258061 :         left_gen = info.gen;
    1898     3258061 :         if (send_gen)
    1899       30324 :                 *send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
    1900             : 
    1901     3258061 :         if (!sctx->parent_root) {
    1902             :                 right_ret = -ENOENT;
    1903             :         } else {
    1904     2445878 :                 ret = get_inode_info(sctx->parent_root, ino, &info);
    1905     2445878 :                 if (ret < 0 && ret != -ENOENT)
    1906           0 :                         goto out;
    1907     2445878 :                 right_ret = (info.nlink == 0) ? -ENOENT : ret;
    1908     2445878 :                 right_gen = info.gen;
    1909     2445878 :                 if (parent_gen)
    1910        7740 :                         *parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
    1911             :         }
    1912             : 
    1913     3258061 :         if (!left_ret && !right_ret) {
    1914     2041918 :                 if (left_gen == gen && right_gen == gen) {
    1915             :                         ret = inode_state_no_change;
    1916          52 :                 } else if (left_gen == gen) {
    1917          24 :                         if (ino < sctx->send_progress)
    1918             :                                 ret = inode_state_did_create;
    1919             :                         else
    1920           8 :                                 ret = inode_state_will_create;
    1921          28 :                 } else if (right_gen == gen) {
    1922          28 :                         if (ino < sctx->send_progress)
    1923             :                                 ret = inode_state_did_delete;
    1924             :                         else
    1925          11 :                                 ret = inode_state_will_delete;
    1926             :                 } else  {
    1927             :                         ret = -ENOENT;
    1928             :                 }
    1929     1216143 :         } else if (!left_ret) {
    1930     1215937 :                 if (left_gen == gen) {
    1931     1215937 :                         if (ino < sctx->send_progress)
    1932             :                                 ret = inode_state_did_create;
    1933             :                         else
    1934         448 :                                 ret = inode_state_will_create;
    1935             :                 } else {
    1936             :                         ret = -ENOENT;
    1937             :                 }
    1938         206 :         } else if (!right_ret) {
    1939         206 :                 if (right_gen == gen) {
    1940         206 :                         if (ino < sctx->send_progress)
    1941             :                                 ret = inode_state_did_delete;
    1942             :                         else
    1943         150 :                                 ret = inode_state_will_delete;
    1944             :                 } else {
    1945             :                         ret = -ENOENT;
    1946             :                 }
    1947             :         } else {
    1948             :                 ret = -ENOENT;
    1949             :         }
    1950             : 
    1951     3258061 : out:
    1952     3258061 :         return ret;
    1953             : }
    1954             : 
    1955     3035079 : static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
    1956             :                              u64 *send_gen, u64 *parent_gen)
    1957             : {
    1958     3035079 :         int ret;
    1959             : 
    1960     3035079 :         if (ino == BTRFS_FIRST_FREE_OBJECTID)
    1961             :                 return 1;
    1962             : 
    1963     1433680 :         ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
    1964     1433687 :         if (ret < 0)
    1965           0 :                 goto out;
    1966             : 
    1967     1433687 :         if (ret == inode_state_no_change ||
    1968     1433687 :             ret == inode_state_did_create ||
    1969             :             ret == inode_state_will_delete)
    1970             :                 ret = 1;
    1971             :         else
    1972             :                 ret = 0;
    1973             : 
    1974             : out:
    1975             :         return ret;
    1976             : }
    1977             : 
    1978             : /*
    1979             :  * Helper function to lookup a dir item in a dir.
    1980             :  */
    1981     1620256 : static int lookup_dir_item_inode(struct btrfs_root *root,
    1982             :                                  u64 dir, const char *name, int name_len,
    1983             :                                  u64 *found_inode)
    1984             : {
    1985     1620256 :         int ret = 0;
    1986     1620256 :         struct btrfs_dir_item *di;
    1987     1620256 :         struct btrfs_key key;
    1988     1620256 :         struct btrfs_path *path;
    1989     1620256 :         struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
    1990             : 
    1991     1620256 :         path = alloc_path_for_send();
    1992     1620256 :         if (!path)
    1993             :                 return -ENOMEM;
    1994             : 
    1995     1620256 :         di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
    1996     2834425 :         if (IS_ERR_OR_NULL(di)) {
    1997      406089 :                 ret = di ? PTR_ERR(di) : -ENOENT;
    1998      406089 :                 goto out;
    1999             :         }
    2000     1214168 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
    2001     1214167 :         if (key.type == BTRFS_ROOT_ITEM_KEY) {
    2002           3 :                 ret = -ENOENT;
    2003           3 :                 goto out;
    2004             :         }
    2005     1214164 :         *found_inode = key.objectid;
    2006             : 
    2007     1620256 : out:
    2008     1620256 :         btrfs_free_path(path);
    2009     1620256 :         return ret;
    2010             : }
    2011             : 
    2012             : /*
    2013             :  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
    2014             :  * generation of the parent dir and the name of the dir entry.
    2015             :  */
    2016     1423938 : static int get_first_ref(struct btrfs_root *root, u64 ino,
    2017             :                          u64 *dir, u64 *dir_gen, struct fs_path *name)
    2018             : {
    2019     1423938 :         int ret;
    2020     1423938 :         struct btrfs_key key;
    2021     1423938 :         struct btrfs_key found_key;
    2022     1423938 :         struct btrfs_path *path;
    2023     1423938 :         int len;
    2024     1423938 :         u64 parent_dir;
    2025             : 
    2026     1423938 :         path = alloc_path_for_send();
    2027     1423931 :         if (!path)
    2028             :                 return -ENOMEM;
    2029             : 
    2030     1423931 :         key.objectid = ino;
    2031     1423931 :         key.type = BTRFS_INODE_REF_KEY;
    2032     1423931 :         key.offset = 0;
    2033             : 
    2034     1423931 :         ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
    2035     1423959 :         if (ret < 0)
    2036           0 :                 goto out;
    2037     1423959 :         if (!ret)
    2038     1423903 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    2039             :                                 path->slots[0]);
    2040     1423941 :         if (ret || found_key.objectid != ino ||
    2041     1423884 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    2042             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    2043          57 :                 ret = -ENOENT;
    2044          57 :                 goto out;
    2045             :         }
    2046             : 
    2047     1423884 :         if (found_key.type == BTRFS_INODE_REF_KEY) {
    2048     1423883 :                 struct btrfs_inode_ref *iref;
    2049     1423883 :                 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2050             :                                       struct btrfs_inode_ref);
    2051     1423873 :                 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
    2052     1423877 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2053     1423877 :                                                      (unsigned long)(iref + 1),
    2054             :                                                      len);
    2055     1423883 :                 parent_dir = found_key.offset;
    2056             :         } else {
    2057           1 :                 struct btrfs_inode_extref *extref;
    2058           1 :                 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2059             :                                         struct btrfs_inode_extref);
    2060           1 :                 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
    2061           1 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2062           1 :                                         (unsigned long)&extref->name, len);
    2063           1 :                 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
    2064             :         }
    2065     1423881 :         if (ret < 0)
    2066           0 :                 goto out;
    2067     1423881 :         btrfs_release_path(path);
    2068             : 
    2069     1423897 :         if (dir_gen) {
    2070     1420988 :                 ret = get_inode_gen(root, parent_dir, dir_gen);
    2071     1420992 :                 if (ret < 0)
    2072           0 :                         goto out;
    2073             :         }
    2074             : 
    2075     1423901 :         *dir = parent_dir;
    2076             : 
    2077     1423958 : out:
    2078     1423958 :         btrfs_free_path(path);
    2079     1423958 :         return ret;
    2080             : }
    2081             : 
    2082         230 : static int is_first_ref(struct btrfs_root *root,
    2083             :                         u64 ino, u64 dir,
    2084             :                         const char *name, int name_len)
    2085             : {
    2086         230 :         int ret;
    2087         230 :         struct fs_path *tmp_name;
    2088         230 :         u64 tmp_dir;
    2089             : 
    2090         230 :         tmp_name = fs_path_alloc();
    2091         230 :         if (!tmp_name)
    2092             :                 return -ENOMEM;
    2093             : 
    2094         230 :         ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
    2095         230 :         if (ret < 0)
    2096           0 :                 goto out;
    2097             : 
    2098         230 :         if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
    2099           5 :                 ret = 0;
    2100           5 :                 goto out;
    2101             :         }
    2102             : 
    2103         450 :         ret = !memcmp(tmp_name->start, name, name_len);
    2104             : 
    2105         230 : out:
    2106         230 :         fs_path_free(tmp_name);
    2107         230 :         return ret;
    2108             : }
    2109             : 
    2110             : /*
    2111             :  * Used by process_recorded_refs to determine if a new ref would overwrite an
    2112             :  * already existing ref. In case it detects an overwrite, it returns the
    2113             :  * inode/gen in who_ino/who_gen.
    2114             :  * When an overwrite is detected, process_recorded_refs does proper orphanizing
    2115             :  * to make sure later references to the overwritten inode are possible.
    2116             :  * Orphanizing is however only required for the first ref of an inode.
    2117             :  * process_recorded_refs does an additional is_first_ref check to see if
    2118             :  * orphanizing is really required.
    2119             :  */
    2120      607479 : static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
    2121             :                               const char *name, int name_len,
    2122             :                               u64 *who_ino, u64 *who_gen, u64 *who_mode)
    2123             : {
    2124      607479 :         int ret;
    2125      607479 :         u64 parent_root_dir_gen;
    2126      607479 :         u64 other_inode = 0;
    2127      607479 :         struct btrfs_inode_info info;
    2128             : 
    2129      607479 :         if (!sctx->parent_root)
    2130             :                 return 0;
    2131             : 
    2132      403988 :         ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
    2133      403988 :         if (ret <= 0)
    2134             :                 return 0;
    2135             : 
    2136             :         /*
    2137             :          * If we have a parent root we need to verify that the parent dir was
    2138             :          * not deleted and then re-created, if it was then we have no overwrite
    2139             :          * and we can just unlink this entry.
    2140             :          *
    2141             :          * @parent_root_dir_gen was set to 0 if the inode does not exist in the
    2142             :          * parent root.
    2143             :          */
    2144      403988 :         if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
    2145        3953 :             parent_root_dir_gen != dir_gen)
    2146             :                 return 0;
    2147             : 
    2148      403822 :         ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
    2149             :                                     &other_inode);
    2150      403822 :         if (ret == -ENOENT)
    2151             :                 return 0;
    2152         671 :         else if (ret < 0)
    2153             :                 return ret;
    2154             : 
    2155             :         /*
    2156             :          * Check if the overwritten ref was already processed. If yes, the ref
    2157             :          * was already unlinked/moved, so we can safely assume that we will not
    2158             :          * overwrite anything at this point in time.
    2159             :          */
    2160        1116 :         if (other_inode > sctx->send_progress ||
    2161         445 :             is_waiting_for_move(sctx, other_inode)) {
    2162         230 :                 ret = get_inode_info(sctx->parent_root, other_inode, &info);
    2163         230 :                 if (ret < 0)
    2164             :                         return ret;
    2165             : 
    2166         230 :                 *who_ino = other_inode;
    2167         230 :                 *who_gen = info.gen;
    2168         230 :                 *who_mode = info.mode;
    2169         230 :                 return 1;
    2170             :         }
    2171             : 
    2172             :         return 0;
    2173             : }
    2174             : 
    2175             : /*
    2176             :  * Checks if the ref was overwritten by an already processed inode. This is
    2177             :  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
    2178             :  * thus the orphan name needs be used.
    2179             :  * process_recorded_refs also uses it to avoid unlinking of refs that were
    2180             :  * overwritten.
    2181             :  */
    2182     1418085 : static int did_overwrite_ref(struct send_ctx *sctx,
    2183             :                             u64 dir, u64 dir_gen,
    2184             :                             u64 ino, u64 ino_gen,
    2185             :                             const char *name, int name_len)
    2186             : {
    2187     1418085 :         int ret;
    2188     1418085 :         u64 ow_inode;
    2189     1418085 :         u64 ow_gen = 0;
    2190     1418085 :         u64 send_root_dir_gen;
    2191             : 
    2192     1418085 :         if (!sctx->parent_root)
    2193             :                 return 0;
    2194             : 
    2195     1216547 :         ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
    2196     1216547 :         if (ret <= 0)
    2197             :                 return ret;
    2198             : 
    2199             :         /*
    2200             :          * @send_root_dir_gen was set to 0 if the inode does not exist in the
    2201             :          * send root.
    2202             :          */
    2203     1216447 :         if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
    2204             :                 return 0;
    2205             : 
    2206             :         /* check if the ref was overwritten by another ref */
    2207     1216434 :         ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
    2208             :                                     &ow_inode);
    2209     1216435 :         if (ret == -ENOENT) {
    2210             :                 /* was never and will never be overwritten */
    2211             :                 return 0;
    2212     1213494 :         } else if (ret < 0) {
    2213             :                 return ret;
    2214             :         }
    2215             : 
    2216     1213494 :         if (ow_inode == ino) {
    2217     1211825 :                 ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2218     1211825 :                 if (ret < 0)
    2219             :                         return ret;
    2220             : 
    2221             :                 /* It's the same inode, so no overwrite happened. */
    2222     1211825 :                 if (ow_gen == ino_gen)
    2223             :                         return 0;
    2224             :         }
    2225             : 
    2226             :         /*
    2227             :          * We know that it is or will be overwritten. Check this now.
    2228             :          * The current inode being processed might have been the one that caused
    2229             :          * inode 'ino' to be orphanized, therefore check if ow_inode matches
    2230             :          * the current inode being processed.
    2231             :          */
    2232        1669 :         if (ow_inode < sctx->send_progress)
    2233             :                 return 1;
    2234             : 
    2235        1251 :         if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
    2236           9 :                 if (ow_gen == 0) {
    2237           9 :                         ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2238           9 :                         if (ret < 0)
    2239             :                                 return ret;
    2240             :                 }
    2241           9 :                 if (ow_gen == sctx->cur_inode_gen)
    2242           9 :                         return 1;
    2243             :         }
    2244             : 
    2245             :         return 0;
    2246             : }
    2247             : 
    2248             : /*
    2249             :  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
    2250             :  * that got overwritten. This is used by process_recorded_refs to determine
    2251             :  * if it has to use the path as returned by get_cur_path or the orphan name.
    2252             :  */
    2253        1521 : static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
    2254             : {
    2255        1521 :         int ret = 0;
    2256        1521 :         struct fs_path *name = NULL;
    2257        1521 :         u64 dir;
    2258        1521 :         u64 dir_gen;
    2259             : 
    2260        1521 :         if (!sctx->parent_root)
    2261           0 :                 goto out;
    2262             : 
    2263        1521 :         name = fs_path_alloc();
    2264        1521 :         if (!name)
    2265             :                 return -ENOMEM;
    2266             : 
    2267        1521 :         ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
    2268        1521 :         if (ret < 0)
    2269           0 :                 goto out;
    2270             : 
    2271        1521 :         ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
    2272             :                         name->start, fs_path_len(name));
    2273             : 
    2274        1521 : out:
    2275        1521 :         fs_path_free(name);
    2276        1521 :         return ret;
    2277             : }
    2278             : 
    2279             : static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
    2280             :                                                          u64 ino, u64 gen)
    2281             : {
    2282     4543751 :         struct btrfs_lru_cache_entry *entry;
    2283             : 
    2284     4543751 :         entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
    2285     4544660 :         if (!entry)
    2286             :                 return NULL;
    2287             : 
    2288             :         return container_of(entry, struct name_cache_entry, entry);
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * Used by get_cur_path for each ref up to the root.
    2293             :  * Returns 0 if it succeeded.
    2294             :  * Returns 1 if the inode is not existent or got overwritten. In that case, the
    2295             :  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
    2296             :  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
    2297             :  * Returns <0 in case of error.
    2298             :  */
    2299     4543526 : static int __get_cur_name_and_parent(struct send_ctx *sctx,
    2300             :                                      u64 ino, u64 gen,
    2301             :                                      u64 *parent_ino,
    2302             :                                      u64 *parent_gen,
    2303             :                                      struct fs_path *dest)
    2304             : {
    2305     4543526 :         int ret;
    2306     4543526 :         int nce_ret;
    2307     4543526 :         struct name_cache_entry *nce;
    2308             : 
    2309             :         /*
    2310             :          * First check if we already did a call to this function with the same
    2311             :          * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
    2312             :          * return the cached result.
    2313             :          */
    2314     4543526 :         nce = name_cache_search(sctx, ino, gen);
    2315     3534154 :         if (nce) {
    2316     3534154 :                 if (ino < sctx->send_progress && nce->need_later_update) {
    2317      404253 :                         btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
    2318      404253 :                         nce = NULL;
    2319             :                 } else {
    2320     3129901 :                         *parent_ino = nce->parent_ino;
    2321     3129901 :                         *parent_gen = nce->parent_gen;
    2322     3129901 :                         ret = fs_path_add(dest, nce->name, nce->name_len);
    2323     3128898 :                         if (ret < 0)
    2324           0 :                                 goto out;
    2325     3128898 :                         ret = nce->ret;
    2326     3128898 :                         goto out;
    2327             :                 }
    2328             :         }
    2329             : 
    2330             :         /*
    2331             :          * If the inode is not existent yet, add the orphan name and return 1.
    2332             :          * This should only happen for the parent dir that we determine in
    2333             :          * record_new_ref_if_needed().
    2334             :          */
    2335     1414534 :         ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
    2336     1414553 :         if (ret < 0)
    2337           0 :                 goto out;
    2338             : 
    2339     1414553 :         if (!ret) {
    2340         121 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2341         121 :                 if (ret < 0)
    2342           0 :                         goto out;
    2343         121 :                 ret = 1;
    2344         121 :                 goto out_cache;
    2345             :         }
    2346             : 
    2347             :         /*
    2348             :          * Depending on whether the inode was already processed or not, use
    2349             :          * send_root or parent_root for ref lookup.
    2350             :          */
    2351     1414432 :         if (ino < sctx->send_progress)
    2352     1009810 :                 ret = get_first_ref(sctx->send_root, ino,
    2353             :                                     parent_ino, parent_gen, dest);
    2354             :         else
    2355      404622 :                 ret = get_first_ref(sctx->parent_root, ino,
    2356             :                                     parent_ino, parent_gen, dest);
    2357     1414430 :         if (ret < 0)
    2358           0 :                 goto out;
    2359             : 
    2360             :         /*
    2361             :          * Check if the ref was overwritten by an inode's ref that was processed
    2362             :          * earlier. If yes, treat as orphan and return 1.
    2363             :          */
    2364     1414430 :         ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
    2365     1414430 :                         dest->start, dest->end - dest->start);
    2366     1414434 :         if (ret < 0)
    2367           0 :                 goto out;
    2368     1414434 :         if (ret) {
    2369          29 :                 fs_path_reset(dest);
    2370          29 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2371          29 :                 if (ret < 0)
    2372           0 :                         goto out;
    2373             :                 ret = 1;
    2374             :         }
    2375             : 
    2376     1414405 : out_cache:
    2377             :         /*
    2378             :          * Store the result of the lookup in the name cache.
    2379             :          */
    2380     1414555 :         nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
    2381     1414529 :         if (!nce) {
    2382           0 :                 ret = -ENOMEM;
    2383           0 :                 goto out;
    2384             :         }
    2385             : 
    2386     1414529 :         nce->entry.key = ino;
    2387     1414529 :         nce->entry.gen = gen;
    2388     1414529 :         nce->parent_ino = *parent_ino;
    2389     1414529 :         nce->parent_gen = *parent_gen;
    2390     1414529 :         nce->name_len = fs_path_len(dest);
    2391     1414529 :         nce->ret = ret;
    2392     1414529 :         strcpy(nce->name, dest->start);
    2393             : 
    2394     1414529 :         if (ino < sctx->send_progress)
    2395     1009795 :                 nce->need_later_update = 0;
    2396             :         else
    2397      404734 :                 nce->need_later_update = 1;
    2398             : 
    2399     1414529 :         nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
    2400     1414534 :         if (nce_ret < 0) {
    2401           0 :                 kfree(nce);
    2402           0 :                 ret = nce_ret;
    2403             :         }
    2404             : 
    2405     1414534 : out:
    2406     4543432 :         return ret;
    2407             : }
    2408             : 
    2409             : /*
    2410             :  * Magic happens here. This function returns the first ref to an inode as it
    2411             :  * would look like while receiving the stream at this point in time.
    2412             :  * We walk the path up to the root. For every inode in between, we check if it
    2413             :  * was already processed/sent. If yes, we continue with the parent as found
    2414             :  * in send_root. If not, we continue with the parent as found in parent_root.
    2415             :  * If we encounter an inode that was deleted at this point in time, we use the
    2416             :  * inodes "orphan" name instead of the real name and stop. Same with new inodes
    2417             :  * that were not created yet and overwritten inodes/refs.
    2418             :  *
    2419             :  * When do we have orphan inodes:
    2420             :  * 1. When an inode is freshly created and thus no valid refs are available yet
    2421             :  * 2. When a directory lost all it's refs (deleted) but still has dir items
    2422             :  *    inside which were not processed yet (pending for move/delete). If anyone
    2423             :  *    tried to get the path to the dir items, it would get a path inside that
    2424             :  *    orphan directory.
    2425             :  * 3. When an inode is moved around or gets new links, it may overwrite the ref
    2426             :  *    of an unprocessed inode. If in that case the first ref would be
    2427             :  *    overwritten, the overwritten inode gets "orphanized". Later when we
    2428             :  *    process this overwritten inode, it is restored at a new place by moving
    2429             :  *    the orphan inode.
    2430             :  *
    2431             :  * sctx->send_progress tells this function at which point in time receiving
    2432             :  * would be.
    2433             :  */
    2434     4871633 : static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
    2435             :                         struct fs_path *dest)
    2436             : {
    2437     4871633 :         int ret = 0;
    2438     4871633 :         struct fs_path *name = NULL;
    2439     4871633 :         u64 parent_inode = 0;
    2440     4871633 :         u64 parent_gen = 0;
    2441     4871633 :         int stop = 0;
    2442             : 
    2443     4871633 :         name = fs_path_alloc();
    2444     4871543 :         if (!name) {
    2445           0 :                 ret = -ENOMEM;
    2446           0 :                 goto out;
    2447             :         }
    2448             : 
    2449     4871543 :         dest->reversed = 1;
    2450     4871543 :         fs_path_reset(dest);
    2451             : 
    2452     9413334 :         while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
    2453     4543577 :                 struct waiting_dir_move *wdm;
    2454             : 
    2455     4543577 :                 fs_path_reset(name);
    2456             : 
    2457     9086687 :                 if (is_waiting_for_rm(sctx, ino, gen)) {
    2458          41 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2459          41 :                         if (ret < 0)
    2460           0 :                                 goto out;
    2461          41 :                         ret = fs_path_add_path(dest, name);
    2462          41 :                         break;
    2463             :                 }
    2464             : 
    2465     4543361 :                 wdm = get_waiting_dir_move(sctx, ino);
    2466     4543361 :                 if (wdm && wdm->orphanized) {
    2467           6 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2468           6 :                         stop = 1;
    2469     4543355 :                 } else if (wdm) {
    2470         435 :                         ret = get_first_ref(sctx->parent_root, ino,
    2471             :                                             &parent_inode, &parent_gen, name);
    2472             :                 } else {
    2473     4542920 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    2474             :                                                         &parent_inode,
    2475             :                                                         &parent_gen, name);
    2476     4542509 :                         if (ret)
    2477        1130 :                                 stop = 1;
    2478             :                 }
    2479             : 
    2480     4542813 :                 if (ret < 0)
    2481           0 :                         goto out;
    2482             : 
    2483     4542813 :                 ret = fs_path_add_path(dest, name);
    2484     4541791 :                 if (ret < 0)
    2485           0 :                         goto out;
    2486             : 
    2487     4541791 :                 ino = parent_inode;
    2488     4541791 :                 gen = parent_gen;
    2489             :         }
    2490             : 
    2491     4871058 : out:
    2492     4871099 :         fs_path_free(name);
    2493     4870934 :         if (!ret)
    2494     4871019 :                 fs_path_unreverse(dest);
    2495     4870484 :         return ret;
    2496             : }
    2497             : 
    2498             : /*
    2499             :  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
    2500             :  */
    2501         213 : static int send_subvol_begin(struct send_ctx *sctx)
    2502             : {
    2503         213 :         int ret;
    2504         213 :         struct btrfs_root *send_root = sctx->send_root;
    2505         213 :         struct btrfs_root *parent_root = sctx->parent_root;
    2506         213 :         struct btrfs_path *path;
    2507         213 :         struct btrfs_key key;
    2508         213 :         struct btrfs_root_ref *ref;
    2509         213 :         struct extent_buffer *leaf;
    2510         213 :         char *name = NULL;
    2511         213 :         int namelen;
    2512             : 
    2513         213 :         path = btrfs_alloc_path();
    2514         213 :         if (!path)
    2515             :                 return -ENOMEM;
    2516             : 
    2517         213 :         name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
    2518         213 :         if (!name) {
    2519           0 :                 btrfs_free_path(path);
    2520           0 :                 return -ENOMEM;
    2521             :         }
    2522             : 
    2523         213 :         key.objectid = send_root->root_key.objectid;
    2524         213 :         key.type = BTRFS_ROOT_BACKREF_KEY;
    2525         213 :         key.offset = 0;
    2526             : 
    2527         213 :         ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
    2528             :                                 &key, path, 1, 0);
    2529         212 :         if (ret < 0)
    2530           0 :                 goto out;
    2531         212 :         if (ret) {
    2532           0 :                 ret = -ENOENT;
    2533           0 :                 goto out;
    2534             :         }
    2535             : 
    2536         212 :         leaf = path->nodes[0];
    2537         212 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2538         212 :         if (key.type != BTRFS_ROOT_BACKREF_KEY ||
    2539         213 :             key.objectid != send_root->root_key.objectid) {
    2540           0 :                 ret = -ENOENT;
    2541           0 :                 goto out;
    2542             :         }
    2543         212 :         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
    2544         213 :         namelen = btrfs_root_ref_name_len(leaf, ref);
    2545         212 :         read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
    2546         212 :         btrfs_release_path(path);
    2547             : 
    2548         213 :         if (parent_root) {
    2549         126 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
    2550         126 :                 if (ret < 0)
    2551           0 :                         goto out;
    2552             :         } else {
    2553          87 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
    2554          87 :                 if (ret < 0)
    2555           0 :                         goto out;
    2556             :         }
    2557             : 
    2558         213 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
    2559             : 
    2560         212 :         if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
    2561           4 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2562             :                             sctx->send_root->root_item.received_uuid);
    2563             :         else
    2564         208 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2565             :                             sctx->send_root->root_item.uuid);
    2566             : 
    2567         213 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
    2568             :                     btrfs_root_ctransid(&sctx->send_root->root_item));
    2569         212 :         if (parent_root) {
    2570         125 :                 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
    2571           4 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2572             :                                      parent_root->root_item.received_uuid);
    2573             :                 else
    2574         121 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2575             :                                      parent_root->root_item.uuid);
    2576         124 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    2577             :                             btrfs_root_ctransid(&sctx->parent_root->root_item));
    2578             :         }
    2579             : 
    2580         211 :         ret = send_cmd(sctx);
    2581             : 
    2582         213 : tlv_put_failure:
    2583         213 : out:
    2584         213 :         btrfs_free_path(path);
    2585         212 :         kfree(name);
    2586         212 :         return ret;
    2587             : }
    2588             : 
    2589         447 : static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
    2590             : {
    2591         447 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2592         447 :         int ret = 0;
    2593         447 :         struct fs_path *p;
    2594             : 
    2595         447 :         btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
    2596             : 
    2597         447 :         p = fs_path_alloc();
    2598         447 :         if (!p)
    2599             :                 return -ENOMEM;
    2600             : 
    2601         447 :         ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
    2602         447 :         if (ret < 0)
    2603           0 :                 goto out;
    2604             : 
    2605         447 :         ret = get_cur_path(sctx, ino, gen, p);
    2606         447 :         if (ret < 0)
    2607           0 :                 goto out;
    2608         447 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2609         447 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
    2610             : 
    2611         447 :         ret = send_cmd(sctx);
    2612             : 
    2613         447 : tlv_put_failure:
    2614         447 : out:
    2615         447 :         fs_path_free(p);
    2616         447 :         return ret;
    2617             : }
    2618             : 
    2619      603164 : static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
    2620             : {
    2621      603164 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2622      603164 :         int ret = 0;
    2623      603164 :         struct fs_path *p;
    2624             : 
    2625      603164 :         btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
    2626             : 
    2627      603164 :         p = fs_path_alloc();
    2628      602960 :         if (!p)
    2629             :                 return -ENOMEM;
    2630             : 
    2631      602960 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
    2632      602724 :         if (ret < 0)
    2633           0 :                 goto out;
    2634             : 
    2635      602724 :         ret = get_cur_path(sctx, ino, gen, p);
    2636      603577 :         if (ret < 0)
    2637           0 :                 goto out;
    2638      603577 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2639      603299 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
    2640             : 
    2641      603230 :         ret = send_cmd(sctx);
    2642             : 
    2643      603787 : tlv_put_failure:
    2644      603787 : out:
    2645      603787 :         fs_path_free(p);
    2646      603787 :         return ret;
    2647             : }
    2648             : 
    2649         183 : static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
    2650             : {
    2651         183 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2652         183 :         int ret = 0;
    2653         183 :         struct fs_path *p;
    2654             : 
    2655         183 :         if (sctx->proto < 2)
    2656             :                 return 0;
    2657             : 
    2658           0 :         btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
    2659             : 
    2660           0 :         p = fs_path_alloc();
    2661           0 :         if (!p)
    2662             :                 return -ENOMEM;
    2663             : 
    2664           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
    2665           0 :         if (ret < 0)
    2666           0 :                 goto out;
    2667             : 
    2668           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2669           0 :         if (ret < 0)
    2670           0 :                 goto out;
    2671           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2672           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
    2673             : 
    2674           0 :         ret = send_cmd(sctx);
    2675             : 
    2676           0 : tlv_put_failure:
    2677           0 : out:
    2678           0 :         fs_path_free(p);
    2679           0 :         return ret;
    2680             : }
    2681             : 
    2682      603922 : static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
    2683             : {
    2684      603922 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2685      603922 :         int ret = 0;
    2686      603922 :         struct fs_path *p;
    2687             : 
    2688      603922 :         btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
    2689             :                     ino, uid, gid);
    2690             : 
    2691      603922 :         p = fs_path_alloc();
    2692      603116 :         if (!p)
    2693             :                 return -ENOMEM;
    2694             : 
    2695      603116 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
    2696      602909 :         if (ret < 0)
    2697           0 :                 goto out;
    2698             : 
    2699      602909 :         ret = get_cur_path(sctx, ino, gen, p);
    2700      603653 :         if (ret < 0)
    2701           0 :                 goto out;
    2702      603653 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2703      602558 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
    2704      602823 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
    2705             : 
    2706      602719 :         ret = send_cmd(sctx);
    2707             : 
    2708      604334 : tlv_put_failure:
    2709      604334 : out:
    2710      604334 :         fs_path_free(p);
    2711      604334 :         return ret;
    2712             : }
    2713             : 
    2714     1009675 : static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
    2715             : {
    2716     1009675 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2717     1009675 :         int ret = 0;
    2718     1009675 :         struct fs_path *p = NULL;
    2719     1009675 :         struct btrfs_inode_item *ii;
    2720     1009675 :         struct btrfs_path *path = NULL;
    2721     1009675 :         struct extent_buffer *eb;
    2722     1009675 :         struct btrfs_key key;
    2723     1009675 :         int slot;
    2724             : 
    2725     1009675 :         btrfs_debug(fs_info, "send_utimes %llu", ino);
    2726             : 
    2727     1009675 :         p = fs_path_alloc();
    2728     1009567 :         if (!p)
    2729             :                 return -ENOMEM;
    2730             : 
    2731     1009567 :         path = alloc_path_for_send();
    2732     1009525 :         if (!path) {
    2733           0 :                 ret = -ENOMEM;
    2734           0 :                 goto out;
    2735             :         }
    2736             : 
    2737     1009525 :         key.objectid = ino;
    2738     1009525 :         key.type = BTRFS_INODE_ITEM_KEY;
    2739     1009525 :         key.offset = 0;
    2740     1009525 :         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
    2741     1010357 :         if (ret > 0)
    2742             :                 ret = -ENOENT;
    2743     1010387 :         if (ret < 0)
    2744           0 :                 goto out;
    2745             : 
    2746     1010357 :         eb = path->nodes[0];
    2747     1010357 :         slot = path->slots[0];
    2748     1010357 :         ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
    2749             : 
    2750     1010103 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
    2751     1010096 :         if (ret < 0)
    2752           0 :                 goto out;
    2753             : 
    2754     1010096 :         ret = get_cur_path(sctx, ino, gen, p);
    2755     1010151 :         if (ret < 0)
    2756           0 :                 goto out;
    2757     1010151 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2758     1010165 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
    2759     1010161 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
    2760     1010151 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
    2761     1010176 :         if (sctx->proto >= 2)
    2762           0 :                 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_OTIME, eb, &ii->otime);
    2763             : 
    2764     1010176 :         ret = send_cmd(sctx);
    2765             : 
    2766     1010397 : tlv_put_failure:
    2767     1010397 : out:
    2768     1010397 :         fs_path_free(p);
    2769     1010188 :         btrfs_free_path(path);
    2770     1010188 :         return ret;
    2771             : }
    2772             : 
    2773             : /*
    2774             :  * If the cache is full, we can't remove entries from it and do a call to
    2775             :  * send_utimes() for each respective inode, because we might be finishing
    2776             :  * processing an inode that is a directory and it just got renamed, and existing
    2777             :  * entries in the cache may refer to inodes that have the directory in their
    2778             :  * full path - in which case we would generate outdated paths (pre-rename)
    2779             :  * for the inodes that the cache entries point to. Instead of prunning the
    2780             :  * cache when inserting, do it after we finish processing each inode at
    2781             :  * finish_inode_if_needed().
    2782             :  */
    2783      611233 : static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
    2784             : {
    2785      611233 :         struct btrfs_lru_cache_entry *entry;
    2786      611233 :         int ret;
    2787             : 
    2788      611233 :         entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
    2789      611235 :         if (entry != NULL)
    2790             :                 return 0;
    2791             : 
    2792             :         /* Caching is optional, don't fail if we can't allocate memory. */
    2793        2986 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2794        2988 :         if (!entry)
    2795           0 :                 return send_utimes(sctx, dir, gen);
    2796             : 
    2797        2988 :         entry->key = dir;
    2798        2988 :         entry->gen = gen;
    2799             : 
    2800        2988 :         ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
    2801        2985 :         ASSERT(ret != -EEXIST);
    2802        2985 :         if (ret) {
    2803           0 :                 kfree(entry);
    2804           0 :                 return send_utimes(sctx, dir, gen);
    2805             :         }
    2806             : 
    2807             :         return 0;
    2808             : }
    2809             : 
    2810     4280182 : static int trim_dir_utimes_cache(struct send_ctx *sctx)
    2811             : {
    2812     4281759 :         while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
    2813             :                SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
    2814        1690 :                 struct btrfs_lru_cache_entry *lru;
    2815        1690 :                 int ret;
    2816             : 
    2817        1690 :                 lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
    2818        1690 :                 ASSERT(lru != NULL);
    2819             : 
    2820        1690 :                 ret = send_utimes(sctx, lru->key, lru->gen);
    2821        1690 :                 if (ret)
    2822           0 :                         return ret;
    2823             : 
    2824        1690 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
    2825             :         }
    2826             : 
    2827             :         return 0;
    2828             : }
    2829             : 
    2830             : /*
    2831             :  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
    2832             :  * a valid path yet because we did not process the refs yet. So, the inode
    2833             :  * is created as orphan.
    2834             :  */
    2835      602683 : static int send_create_inode(struct send_ctx *sctx, u64 ino)
    2836             : {
    2837      602683 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2838      602683 :         int ret = 0;
    2839      602683 :         struct fs_path *p;
    2840      602683 :         int cmd;
    2841      602683 :         struct btrfs_inode_info info;
    2842      602683 :         u64 gen;
    2843      602683 :         u64 mode;
    2844      602683 :         u64 rdev;
    2845             : 
    2846      602683 :         btrfs_debug(fs_info, "send_create_inode %llu", ino);
    2847             : 
    2848      602683 :         p = fs_path_alloc();
    2849      603120 :         if (!p)
    2850             :                 return -ENOMEM;
    2851             : 
    2852      603120 :         if (ino != sctx->cur_ino) {
    2853         111 :                 ret = get_inode_info(sctx->send_root, ino, &info);
    2854         111 :                 if (ret < 0)
    2855           0 :                         goto out;
    2856         111 :                 gen = info.gen;
    2857         111 :                 mode = info.mode;
    2858         111 :                 rdev = info.rdev;
    2859             :         } else {
    2860      603009 :                 gen = sctx->cur_inode_gen;
    2861      603009 :                 mode = sctx->cur_inode_mode;
    2862      603009 :                 rdev = sctx->cur_inode_rdev;
    2863             :         }
    2864             : 
    2865      603120 :         if (S_ISREG(mode)) {
    2866             :                 cmd = BTRFS_SEND_C_MKFILE;
    2867        1485 :         } else if (S_ISDIR(mode)) {
    2868             :                 cmd = BTRFS_SEND_C_MKDIR;
    2869         913 :         } else if (S_ISLNK(mode)) {
    2870             :                 cmd = BTRFS_SEND_C_SYMLINK;
    2871         611 :         } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
    2872             :                 cmd = BTRFS_SEND_C_MKNOD;
    2873           0 :         } else if (S_ISFIFO(mode)) {
    2874             :                 cmd = BTRFS_SEND_C_MKFIFO;
    2875           0 :         } else if (S_ISSOCK(mode)) {
    2876             :                 cmd = BTRFS_SEND_C_MKSOCK;
    2877             :         } else {
    2878           0 :                 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
    2879             :                                 (int)(mode & S_IFMT));
    2880           0 :                 ret = -EOPNOTSUPP;
    2881           0 :                 goto out;
    2882             :         }
    2883             : 
    2884      603120 :         ret = begin_cmd(sctx, cmd);
    2885      602827 :         if (ret < 0)
    2886           0 :                 goto out;
    2887             : 
    2888      602827 :         ret = gen_unique_name(sctx, ino, gen, p);
    2889      604187 :         if (ret < 0)
    2890           0 :                 goto out;
    2891             : 
    2892      604187 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2893      604167 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
    2894             : 
    2895      604181 :         if (S_ISLNK(mode)) {
    2896         302 :                 fs_path_reset(p);
    2897         302 :                 ret = read_symlink(sctx->send_root, ino, p);
    2898         302 :                 if (ret < 0)
    2899           0 :                         goto out;
    2900         302 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
    2901      603879 :         } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
    2902      603268 :                    S_ISFIFO(mode) || S_ISSOCK(mode)) {
    2903         611 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
    2904         611 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
    2905             :         }
    2906             : 
    2907      604181 :         ret = send_cmd(sctx);
    2908      604190 :         if (ret < 0)
    2909           0 :                 goto out;
    2910             : 
    2911             : 
    2912      604190 : tlv_put_failure:
    2913      604190 : out:
    2914      604190 :         fs_path_free(p);
    2915      604190 :         return ret;
    2916             : }
    2917             : 
    2918         572 : static void cache_dir_created(struct send_ctx *sctx, u64 dir)
    2919             : {
    2920         572 :         struct btrfs_lru_cache_entry *entry;
    2921         572 :         int ret;
    2922             : 
    2923             :         /* Caching is optional, ignore any failures. */
    2924         572 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2925         572 :         if (!entry)
    2926             :                 return;
    2927             : 
    2928         572 :         entry->key = dir;
    2929         572 :         entry->gen = 0;
    2930         572 :         ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
    2931         572 :         if (ret < 0)
    2932           0 :                 kfree(entry);
    2933             : }
    2934             : 
    2935             : /*
    2936             :  * We need some special handling for inodes that get processed before the parent
    2937             :  * directory got created. See process_recorded_refs for details.
    2938             :  * This function does the check if we already created the dir out of order.
    2939             :  */
    2940         714 : static int did_create_dir(struct send_ctx *sctx, u64 dir)
    2941             : {
    2942         714 :         int ret = 0;
    2943         714 :         int iter_ret = 0;
    2944         714 :         struct btrfs_path *path = NULL;
    2945         714 :         struct btrfs_key key;
    2946         714 :         struct btrfs_key found_key;
    2947         714 :         struct btrfs_key di_key;
    2948         714 :         struct btrfs_dir_item *di;
    2949             : 
    2950         714 :         if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
    2951             :                 return 1;
    2952             : 
    2953         572 :         path = alloc_path_for_send();
    2954         572 :         if (!path)
    2955             :                 return -ENOMEM;
    2956             : 
    2957         572 :         key.objectid = dir;
    2958         572 :         key.type = BTRFS_DIR_INDEX_KEY;
    2959         572 :         key.offset = 0;
    2960             : 
    2961        2232 :         btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
    2962        2198 :                 struct extent_buffer *eb = path->nodes[0];
    2963             : 
    2964        2198 :                 if (found_key.objectid != key.objectid ||
    2965        1660 :                     found_key.type != key.type) {
    2966             :                         ret = 0;
    2967             :                         break;
    2968             :                 }
    2969             : 
    2970        1660 :                 di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
    2971        1660 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    2972             : 
    2973        1660 :                 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
    2974        1659 :                     di_key.objectid < sctx->send_progress) {
    2975           0 :                         ret = 1;
    2976           0 :                         cache_dir_created(sctx, dir);
    2977           0 :                         break;
    2978             :                 }
    2979             :         }
    2980             :         /* Catch error found during iteration */
    2981         572 :         if (iter_ret < 0)
    2982           0 :                 ret = iter_ret;
    2983             : 
    2984         572 :         btrfs_free_path(path);
    2985         572 :         return ret;
    2986             : }
    2987             : 
    2988             : /*
    2989             :  * Only creates the inode if it is:
    2990             :  * 1. Not a directory
    2991             :  * 2. Or a directory which was not created already due to out of order
    2992             :  *    directories. See did_create_dir and process_recorded_refs for details.
    2993             :  */
    2994      603712 : static int send_create_inode_if_needed(struct send_ctx *sctx)
    2995             : {
    2996      603712 :         int ret;
    2997             : 
    2998      603712 :         if (S_ISDIR(sctx->cur_inode_mode)) {
    2999         572 :                 ret = did_create_dir(sctx, sctx->cur_ino);
    3000         572 :                 if (ret < 0)
    3001             :                         return ret;
    3002         572 :                 else if (ret > 0)
    3003             :                         return 0;
    3004             :         }
    3005             : 
    3006      603601 :         ret = send_create_inode(sctx, sctx->cur_ino);
    3007             : 
    3008      604078 :         if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
    3009         461 :                 cache_dir_created(sctx, sctx->cur_ino);
    3010             : 
    3011             :         return ret;
    3012             : }
    3013             : 
    3014             : struct recorded_ref {
    3015             :         struct list_head list;
    3016             :         char *name;
    3017             :         struct fs_path *full_path;
    3018             :         u64 dir;
    3019             :         u64 dir_gen;
    3020             :         int name_len;
    3021             :         struct rb_node node;
    3022             :         struct rb_root *root;
    3023             : };
    3024             : 
    3025     1221211 : static struct recorded_ref *recorded_ref_alloc(void)
    3026             : {
    3027     1221211 :         struct recorded_ref *ref;
    3028             : 
    3029     1221211 :         ref = kzalloc(sizeof(*ref), GFP_KERNEL);
    3030     1221231 :         if (!ref)
    3031             :                 return NULL;
    3032     1221231 :         RB_CLEAR_NODE(&ref->node);
    3033     1221231 :         INIT_LIST_HEAD(&ref->list);
    3034     1221231 :         return ref;
    3035             : }
    3036             : 
    3037     1221311 : static void recorded_ref_free(struct recorded_ref *ref)
    3038             : {
    3039     1221311 :         if (!ref)
    3040             :                 return;
    3041     1221311 :         if (!RB_EMPTY_NODE(&ref->node))
    3042      611069 :                 rb_erase(&ref->node, ref->root);
    3043     1221302 :         list_del(&ref->list);
    3044     1221296 :         fs_path_free(ref->full_path);
    3045     1221298 :         kfree(ref);
    3046             : }
    3047             : 
    3048     1223035 : static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
    3049             : {
    3050     1223035 :         ref->full_path = path;
    3051     1223035 :         ref->name = (char *)kbasename(ref->full_path->start);
    3052     1223035 :         ref->name_len = ref->full_path->end - ref->name;
    3053     1223035 : }
    3054             : 
    3055      610240 : static int dup_ref(struct recorded_ref *ref, struct list_head *list)
    3056             : {
    3057      610240 :         struct recorded_ref *new;
    3058             : 
    3059      610240 :         new = recorded_ref_alloc();
    3060      610239 :         if (!new)
    3061             :                 return -ENOMEM;
    3062             : 
    3063      610239 :         new->dir = ref->dir;
    3064      610239 :         new->dir_gen = ref->dir_gen;
    3065      610239 :         list_add_tail(&new->list, list);
    3066      610239 :         return 0;
    3067             : }
    3068             : 
    3069             : static void __free_recorded_refs(struct list_head *head)
    3070             : {
    3071     1211736 :         struct recorded_ref *cur;
    3072             : 
    3073     3037947 :         while (!list_empty(head)) {
    3074     1220303 :                 cur = list_entry(head->next, struct recorded_ref, list);
    3075     1220303 :                 recorded_ref_free(cur);
    3076             :         }
    3077             : }
    3078             : 
    3079      605926 : static void free_recorded_refs(struct send_ctx *sctx)
    3080             : {
    3081      605926 :         __free_recorded_refs(&sctx->new_refs);
    3082      605914 :         __free_recorded_refs(&sctx->deleted_refs);
    3083      605920 : }
    3084             : 
    3085             : /*
    3086             :  * Renames/moves a file/dir to its orphan name. Used when the first
    3087             :  * ref of an unprocessed inode gets overwritten and for all non empty
    3088             :  * directories.
    3089             :  */
    3090         234 : static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
    3091             :                           struct fs_path *path)
    3092             : {
    3093         234 :         int ret;
    3094         234 :         struct fs_path *orphan;
    3095             : 
    3096         234 :         orphan = fs_path_alloc();
    3097         234 :         if (!orphan)
    3098             :                 return -ENOMEM;
    3099             : 
    3100         234 :         ret = gen_unique_name(sctx, ino, gen, orphan);
    3101         234 :         if (ret < 0)
    3102           0 :                 goto out;
    3103             : 
    3104         234 :         ret = send_rename(sctx, path, orphan);
    3105             : 
    3106         234 : out:
    3107         234 :         fs_path_free(orphan);
    3108         234 :         return ret;
    3109             : }
    3110             : 
    3111           9 : static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
    3112             :                                                    u64 dir_ino, u64 dir_gen)
    3113             : {
    3114           9 :         struct rb_node **p = &sctx->orphan_dirs.rb_node;
    3115           9 :         struct rb_node *parent = NULL;
    3116           9 :         struct orphan_dir_info *entry, *odi;
    3117             : 
    3118          10 :         while (*p) {
    3119           1 :                 parent = *p;
    3120           1 :                 entry = rb_entry(parent, struct orphan_dir_info, node);
    3121           1 :                 if (dir_ino < entry->ino)
    3122           0 :                         p = &(*p)->rb_left;
    3123           1 :                 else if (dir_ino > entry->ino)
    3124           1 :                         p = &(*p)->rb_right;
    3125           0 :                 else if (dir_gen < entry->gen)
    3126           0 :                         p = &(*p)->rb_left;
    3127           0 :                 else if (dir_gen > entry->gen)
    3128           0 :                         p = &(*p)->rb_right;
    3129             :                 else
    3130           0 :                         return entry;
    3131             :         }
    3132             : 
    3133           9 :         odi = kmalloc(sizeof(*odi), GFP_KERNEL);
    3134           9 :         if (!odi)
    3135             :                 return ERR_PTR(-ENOMEM);
    3136           9 :         odi->ino = dir_ino;
    3137           9 :         odi->gen = dir_gen;
    3138           9 :         odi->last_dir_index_offset = 0;
    3139           9 :         odi->dir_high_seq_ino = 0;
    3140             : 
    3141           9 :         rb_link_node(&odi->node, parent, p);
    3142           9 :         rb_insert_color(&odi->node, &sctx->orphan_dirs);
    3143           9 :         return odi;
    3144             : }
    3145             : 
    3146     4543894 : static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
    3147             :                                                    u64 dir_ino, u64 gen)
    3148             : {
    3149     4543894 :         struct rb_node *n = sctx->orphan_dirs.rb_node;
    3150     4543894 :         struct orphan_dir_info *entry;
    3151             : 
    3152     4544001 :         while (n) {
    3153         171 :                 entry = rb_entry(n, struct orphan_dir_info, node);
    3154         171 :                 if (dir_ino < entry->ino)
    3155          55 :                         n = n->rb_left;
    3156         116 :                 else if (dir_ino > entry->ino)
    3157          49 :                         n = n->rb_right;
    3158          67 :                 else if (gen < entry->gen)
    3159           0 :                         n = n->rb_left;
    3160          67 :                 else if (gen > entry->gen)
    3161           3 :                         n = n->rb_right;
    3162             :                 else
    3163          64 :                         return entry;
    3164             :         }
    3165             :         return NULL;
    3166             : }
    3167             : 
    3168             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
    3169             : {
    3170     4543285 :         struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
    3171             : 
    3172     4543402 :         return odi != NULL;
    3173             : }
    3174             : 
    3175          11 : static void free_orphan_dir_info(struct send_ctx *sctx,
    3176             :                                  struct orphan_dir_info *odi)
    3177             : {
    3178          11 :         if (!odi)
    3179             :                 return;
    3180           9 :         rb_erase(&odi->node, &sctx->orphan_dirs);
    3181           9 :         kfree(odi);
    3182             : }
    3183             : 
    3184             : /*
    3185             :  * Returns 1 if a directory can be removed at this point in time.
    3186             :  * We check this by iterating all dir items and checking if the inode behind
    3187             :  * the dir item was already processed.
    3188             :  */
    3189          49 : static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
    3190             : {
    3191          49 :         int ret = 0;
    3192          49 :         int iter_ret = 0;
    3193          49 :         struct btrfs_root *root = sctx->parent_root;
    3194          49 :         struct btrfs_path *path;
    3195          49 :         struct btrfs_key key;
    3196          49 :         struct btrfs_key found_key;
    3197          49 :         struct btrfs_key loc;
    3198          49 :         struct btrfs_dir_item *di;
    3199          49 :         struct orphan_dir_info *odi = NULL;
    3200          49 :         u64 dir_high_seq_ino = 0;
    3201          49 :         u64 last_dir_index_offset = 0;
    3202             : 
    3203             :         /*
    3204             :          * Don't try to rmdir the top/root subvolume dir.
    3205             :          */
    3206          49 :         if (dir == BTRFS_FIRST_FREE_OBJECTID)
    3207             :                 return 0;
    3208             : 
    3209          45 :         odi = get_orphan_dir_info(sctx, dir, dir_gen);
    3210          45 :         if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
    3211             :                 return 0;
    3212             : 
    3213          37 :         path = alloc_path_for_send();
    3214          37 :         if (!path)
    3215             :                 return -ENOMEM;
    3216             : 
    3217          37 :         if (!odi) {
    3218             :                 /*
    3219             :                  * Find the inode number associated with the last dir index
    3220             :                  * entry. This is very likely the inode with the highest number
    3221             :                  * of all inodes that have an entry in the directory. We can
    3222             :                  * then use it to avoid future calls to can_rmdir(), when
    3223             :                  * processing inodes with a lower number, from having to search
    3224             :                  * the parent root b+tree for dir index keys.
    3225             :                  */
    3226          25 :                 key.objectid = dir;
    3227          25 :                 key.type = BTRFS_DIR_INDEX_KEY;
    3228          25 :                 key.offset = (u64)-1;
    3229             : 
    3230          25 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    3231          25 :                 if (ret < 0) {
    3232           0 :                         goto out;
    3233          25 :                 } else if (ret > 0) {
    3234             :                         /* Can't happen, the root is never empty. */
    3235          25 :                         ASSERT(path->slots[0] > 0);
    3236          25 :                         if (WARN_ON(path->slots[0] == 0)) {
    3237           0 :                                 ret = -EUCLEAN;
    3238           0 :                                 goto out;
    3239             :                         }
    3240          25 :                         path->slots[0]--;
    3241             :                 }
    3242             : 
    3243          25 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    3244          25 :                 if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
    3245             :                         /* No index keys, dir can be removed. */
    3246          14 :                         ret = 1;
    3247          14 :                         goto out;
    3248             :                 }
    3249             : 
    3250          11 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3251             :                                     struct btrfs_dir_item);
    3252          11 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3253          11 :                 dir_high_seq_ino = loc.objectid;
    3254          11 :                 if (sctx->cur_ino < dir_high_seq_ino) {
    3255           9 :                         ret = 0;
    3256           9 :                         goto out;
    3257             :                 }
    3258             : 
    3259           2 :                 btrfs_release_path(path);
    3260             :         }
    3261             : 
    3262          14 :         key.objectid = dir;
    3263          14 :         key.type = BTRFS_DIR_INDEX_KEY;
    3264          14 :         key.offset = (odi ? odi->last_dir_index_offset : 0);
    3265             : 
    3266          38 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    3267          37 :                 struct waiting_dir_move *dm;
    3268             : 
    3269          37 :                 if (found_key.objectid != key.objectid ||
    3270          27 :                     found_key.type != key.type)
    3271             :                         break;
    3272             : 
    3273          27 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3274             :                                 struct btrfs_dir_item);
    3275          27 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3276             : 
    3277          27 :                 dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
    3278          27 :                 last_dir_index_offset = found_key.offset;
    3279             : 
    3280          27 :                 dm = get_waiting_dir_move(sctx, loc.objectid);
    3281          27 :                 if (dm) {
    3282           3 :                         dm->rmdir_ino = dir;
    3283           3 :                         dm->rmdir_gen = dir_gen;
    3284           3 :                         ret = 0;
    3285           3 :                         goto out;
    3286             :                 }
    3287             : 
    3288          24 :                 if (loc.objectid > sctx->cur_ino) {
    3289           0 :                         ret = 0;
    3290           0 :                         goto out;
    3291             :                 }
    3292             :         }
    3293          11 :         if (iter_ret < 0) {
    3294           0 :                 ret = iter_ret;
    3295           0 :                 goto out;
    3296             :         }
    3297          11 :         free_orphan_dir_info(sctx, odi);
    3298             : 
    3299          11 :         ret = 1;
    3300             : 
    3301          37 : out:
    3302          37 :         btrfs_free_path(path);
    3303             : 
    3304          37 :         if (ret)
    3305             :                 return ret;
    3306             : 
    3307          12 :         if (!odi) {
    3308           9 :                 odi = add_orphan_dir_info(sctx, dir, dir_gen);
    3309           9 :                 if (IS_ERR(odi))
    3310           0 :                         return PTR_ERR(odi);
    3311             : 
    3312           9 :                 odi->gen = dir_gen;
    3313             :         }
    3314             : 
    3315          12 :         odi->last_dir_index_offset = last_dir_index_offset;
    3316          12 :         odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
    3317             : 
    3318          12 :         return 0;
    3319             : }
    3320             : 
    3321     1012607 : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
    3322             : {
    3323     1012607 :         struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
    3324             : 
    3325     1012607 :         return entry != NULL;
    3326             : }
    3327             : 
    3328          96 : static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
    3329             : {
    3330          96 :         struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
    3331          96 :         struct rb_node *parent = NULL;
    3332          96 :         struct waiting_dir_move *entry, *dm;
    3333             : 
    3334          96 :         dm = kmalloc(sizeof(*dm), GFP_KERNEL);
    3335          96 :         if (!dm)
    3336             :                 return -ENOMEM;
    3337          96 :         dm->ino = ino;
    3338          96 :         dm->rmdir_ino = 0;
    3339          96 :         dm->rmdir_gen = 0;
    3340          96 :         dm->orphanized = orphanized;
    3341             : 
    3342         225 :         while (*p) {
    3343         129 :                 parent = *p;
    3344         129 :                 entry = rb_entry(parent, struct waiting_dir_move, node);
    3345         129 :                 if (ino < entry->ino) {
    3346           1 :                         p = &(*p)->rb_left;
    3347         128 :                 } else if (ino > entry->ino) {
    3348         128 :                         p = &(*p)->rb_right;
    3349             :                 } else {
    3350           0 :                         kfree(dm);
    3351           0 :                         return -EEXIST;
    3352             :                 }
    3353             :         }
    3354             : 
    3355          96 :         rb_link_node(&dm->node, parent, p);
    3356          96 :         rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
    3357          96 :         return 0;
    3358             : }
    3359             : 
    3360             : static struct waiting_dir_move *
    3361             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
    3362             : {
    3363     5556355 :         struct rb_node *n = sctx->waiting_dir_moves.rb_node;
    3364     5556355 :         struct waiting_dir_move *entry;
    3365             : 
    3366     5651921 :         while (n) {
    3367       96267 :                 entry = rb_entry(n, struct waiting_dir_move, node);
    3368       96267 :                 if (ino < entry->ino)
    3369       58236 :                         n = n->rb_left;
    3370       38031 :                 else if (ino > entry->ino)
    3371       37330 :                         n = n->rb_right;
    3372             :                 else
    3373             :                         return entry;
    3374             :         }
    3375             :         return NULL;
    3376             : }
    3377             : 
    3378          96 : static void free_waiting_dir_move(struct send_ctx *sctx,
    3379             :                                   struct waiting_dir_move *dm)
    3380             : {
    3381          96 :         if (!dm)
    3382             :                 return;
    3383          96 :         rb_erase(&dm->node, &sctx->waiting_dir_moves);
    3384          96 :         kfree(dm);
    3385             : }
    3386             : 
    3387          96 : static int add_pending_dir_move(struct send_ctx *sctx,
    3388             :                                 u64 ino,
    3389             :                                 u64 ino_gen,
    3390             :                                 u64 parent_ino,
    3391             :                                 struct list_head *new_refs,
    3392             :                                 struct list_head *deleted_refs,
    3393             :                                 const bool is_orphan)
    3394             : {
    3395          96 :         struct rb_node **p = &sctx->pending_dir_moves.rb_node;
    3396          96 :         struct rb_node *parent = NULL;
    3397          96 :         struct pending_dir_move *entry = NULL, *pm;
    3398          96 :         struct recorded_ref *cur;
    3399          96 :         int exists = 0;
    3400          96 :         int ret;
    3401             : 
    3402          96 :         pm = kmalloc(sizeof(*pm), GFP_KERNEL);
    3403          96 :         if (!pm)
    3404             :                 return -ENOMEM;
    3405          96 :         pm->parent_ino = parent_ino;
    3406          96 :         pm->ino = ino;
    3407          96 :         pm->gen = ino_gen;
    3408          96 :         INIT_LIST_HEAD(&pm->list);
    3409          96 :         INIT_LIST_HEAD(&pm->update_refs);
    3410          96 :         RB_CLEAR_NODE(&pm->node);
    3411             : 
    3412         160 :         while (*p) {
    3413          87 :                 parent = *p;
    3414          87 :                 entry = rb_entry(parent, struct pending_dir_move, node);
    3415          87 :                 if (parent_ino < entry->parent_ino) {
    3416          31 :                         p = &(*p)->rb_left;
    3417          56 :                 } else if (parent_ino > entry->parent_ino) {
    3418          33 :                         p = &(*p)->rb_right;
    3419             :                 } else {
    3420             :                         exists = 1;
    3421             :                         break;
    3422             :                 }
    3423             :         }
    3424             : 
    3425         190 :         list_for_each_entry(cur, deleted_refs, list) {
    3426          94 :                 ret = dup_ref(cur, &pm->update_refs);
    3427          94 :                 if (ret < 0)
    3428           0 :                         goto out;
    3429             :         }
    3430         194 :         list_for_each_entry(cur, new_refs, list) {
    3431          98 :                 ret = dup_ref(cur, &pm->update_refs);
    3432          98 :                 if (ret < 0)
    3433           0 :                         goto out;
    3434             :         }
    3435             : 
    3436          96 :         ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
    3437          96 :         if (ret)
    3438           0 :                 goto out;
    3439             : 
    3440          96 :         if (exists) {
    3441          23 :                 list_add_tail(&pm->list, &entry->list);
    3442             :         } else {
    3443          73 :                 rb_link_node(&pm->node, parent, p);
    3444          73 :                 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
    3445             :         }
    3446             :         ret = 0;
    3447           0 : out:
    3448           0 :         if (ret) {
    3449             :                 __free_recorded_refs(&pm->update_refs);
    3450           0 :                 kfree(pm);
    3451             :         }
    3452             :         return ret;
    3453             : }
    3454             : 
    3455             : static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
    3456             :                                                       u64 parent_ino)
    3457             : {
    3458     1008475 :         struct rb_node *n = sctx->pending_dir_moves.rb_node;
    3459     1008475 :         struct pending_dir_move *entry;
    3460             : 
    3461     1010458 :         while (n) {
    3462        2056 :                 entry = rb_entry(n, struct pending_dir_move, node);
    3463        2056 :                 if (parent_ino < entry->parent_ino)
    3464        1865 :                         n = n->rb_left;
    3465         191 :                 else if (parent_ino > entry->parent_ino)
    3466         118 :                         n = n->rb_right;
    3467             :                 else
    3468             :                         return entry;
    3469             :         }
    3470             :         return NULL;
    3471             : }
    3472             : 
    3473          96 : static int path_loop(struct send_ctx *sctx, struct fs_path *name,
    3474             :                      u64 ino, u64 gen, u64 *ancestor_ino)
    3475             : {
    3476          96 :         int ret = 0;
    3477          96 :         u64 parent_inode = 0;
    3478          96 :         u64 parent_gen = 0;
    3479          96 :         u64 start_ino = ino;
    3480             : 
    3481          96 :         *ancestor_ino = 0;
    3482         661 :         while (ino != BTRFS_FIRST_FREE_OBJECTID) {
    3483         575 :                 fs_path_reset(name);
    3484             : 
    3485         575 :                 if (is_waiting_for_rm(sctx, ino, gen))
    3486             :                         break;
    3487         575 :                 if (is_waiting_for_move(sctx, ino)) {
    3488          26 :                         if (*ancestor_ino == 0)
    3489          10 :                                 *ancestor_ino = ino;
    3490          26 :                         ret = get_first_ref(sctx->parent_root, ino,
    3491             :                                             &parent_inode, &parent_gen, name);
    3492             :                 } else {
    3493         549 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    3494             :                                                         &parent_inode,
    3495             :                                                         &parent_gen, name);
    3496         549 :                         if (ret > 0) {
    3497             :                                 ret = 0;
    3498             :                                 break;
    3499             :                         }
    3500             :                 }
    3501         567 :                 if (ret < 0)
    3502             :                         break;
    3503         567 :                 if (parent_inode == start_ino) {
    3504           2 :                         ret = 1;
    3505           2 :                         if (*ancestor_ino == 0)
    3506           0 :                                 *ancestor_ino = ino;
    3507             :                         break;
    3508             :                 }
    3509         565 :                 ino = parent_inode;
    3510         565 :                 gen = parent_gen;
    3511             :         }
    3512          96 :         return ret;
    3513             : }
    3514             : 
    3515          96 : static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
    3516             : {
    3517          96 :         struct fs_path *from_path = NULL;
    3518          96 :         struct fs_path *to_path = NULL;
    3519          96 :         struct fs_path *name = NULL;
    3520          96 :         u64 orig_progress = sctx->send_progress;
    3521          96 :         struct recorded_ref *cur;
    3522          96 :         u64 parent_ino, parent_gen;
    3523          96 :         struct waiting_dir_move *dm = NULL;
    3524          96 :         u64 rmdir_ino = 0;
    3525          96 :         u64 rmdir_gen;
    3526          96 :         u64 ancestor;
    3527          96 :         bool is_orphan;
    3528          96 :         int ret;
    3529             : 
    3530          96 :         name = fs_path_alloc();
    3531          96 :         from_path = fs_path_alloc();
    3532          96 :         if (!name || !from_path) {
    3533           0 :                 ret = -ENOMEM;
    3534           0 :                 goto out;
    3535             :         }
    3536             : 
    3537          96 :         dm = get_waiting_dir_move(sctx, pm->ino);
    3538          96 :         ASSERT(dm);
    3539          96 :         rmdir_ino = dm->rmdir_ino;
    3540          96 :         rmdir_gen = dm->rmdir_gen;
    3541          96 :         is_orphan = dm->orphanized;
    3542          96 :         free_waiting_dir_move(sctx, dm);
    3543             : 
    3544          96 :         if (is_orphan) {
    3545           8 :                 ret = gen_unique_name(sctx, pm->ino,
    3546             :                                       pm->gen, from_path);
    3547             :         } else {
    3548          88 :                 ret = get_first_ref(sctx->parent_root, pm->ino,
    3549             :                                     &parent_ino, &parent_gen, name);
    3550          88 :                 if (ret < 0)
    3551           0 :                         goto out;
    3552          88 :                 ret = get_cur_path(sctx, parent_ino, parent_gen,
    3553             :                                    from_path);
    3554          88 :                 if (ret < 0)
    3555           0 :                         goto out;
    3556          88 :                 ret = fs_path_add_path(from_path, name);
    3557             :         }
    3558          96 :         if (ret < 0)
    3559           0 :                 goto out;
    3560             : 
    3561          96 :         sctx->send_progress = sctx->cur_ino + 1;
    3562          96 :         ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
    3563          96 :         if (ret < 0)
    3564           0 :                 goto out;
    3565          96 :         if (ret) {
    3566           2 :                 LIST_HEAD(deleted_refs);
    3567           2 :                 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
    3568           2 :                 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
    3569             :                                            &pm->update_refs, &deleted_refs,
    3570             :                                            is_orphan);
    3571           2 :                 if (ret < 0)
    3572           0 :                         goto out;
    3573           2 :                 if (rmdir_ino) {
    3574           0 :                         dm = get_waiting_dir_move(sctx, pm->ino);
    3575           0 :                         ASSERT(dm);
    3576           0 :                         dm->rmdir_ino = rmdir_ino;
    3577           0 :                         dm->rmdir_gen = rmdir_gen;
    3578             :                 }
    3579           2 :                 goto out;
    3580             :         }
    3581          94 :         fs_path_reset(name);
    3582          94 :         to_path = name;
    3583          94 :         name = NULL;
    3584          94 :         ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
    3585          94 :         if (ret < 0)
    3586           0 :                 goto out;
    3587             : 
    3588          94 :         ret = send_rename(sctx, from_path, to_path);
    3589          94 :         if (ret < 0)
    3590           0 :                 goto out;
    3591             : 
    3592          94 :         if (rmdir_ino) {
    3593           3 :                 struct orphan_dir_info *odi;
    3594           3 :                 u64 gen;
    3595             : 
    3596           3 :                 odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
    3597           3 :                 if (!odi) {
    3598             :                         /* already deleted */
    3599           0 :                         goto finish;
    3600             :                 }
    3601           3 :                 gen = odi->gen;
    3602             : 
    3603           3 :                 ret = can_rmdir(sctx, rmdir_ino, gen);
    3604           3 :                 if (ret < 0)
    3605           0 :                         goto out;
    3606           3 :                 if (!ret)
    3607           1 :                         goto finish;
    3608             : 
    3609           2 :                 name = fs_path_alloc();
    3610           2 :                 if (!name) {
    3611           0 :                         ret = -ENOMEM;
    3612           0 :                         goto out;
    3613             :                 }
    3614           2 :                 ret = get_cur_path(sctx, rmdir_ino, gen, name);
    3615           2 :                 if (ret < 0)
    3616           0 :                         goto out;
    3617           2 :                 ret = send_rmdir(sctx, name);
    3618           2 :                 if (ret < 0)
    3619           0 :                         goto out;
    3620             :         }
    3621             : 
    3622          93 : finish:
    3623          94 :         ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
    3624          94 :         if (ret < 0)
    3625           0 :                 goto out;
    3626             : 
    3627             :         /*
    3628             :          * After rename/move, need to update the utimes of both new parent(s)
    3629             :          * and old parent(s).
    3630             :          */
    3631         282 :         list_for_each_entry(cur, &pm->update_refs, list) {
    3632             :                 /*
    3633             :                  * The parent inode might have been deleted in the send snapshot
    3634             :                  */
    3635         188 :                 ret = get_inode_info(sctx->send_root, cur->dir, NULL);
    3636         188 :                 if (ret == -ENOENT) {
    3637           7 :                         ret = 0;
    3638           7 :                         continue;
    3639             :                 }
    3640         181 :                 if (ret < 0)
    3641           0 :                         goto out;
    3642             : 
    3643         181 :                 ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    3644         181 :                 if (ret < 0)
    3645           0 :                         goto out;
    3646             :         }
    3647             : 
    3648          94 : out:
    3649          96 :         fs_path_free(name);
    3650          96 :         fs_path_free(from_path);
    3651          96 :         fs_path_free(to_path);
    3652          96 :         sctx->send_progress = orig_progress;
    3653             : 
    3654          96 :         return ret;
    3655             : }
    3656             : 
    3657          96 : static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
    3658             : {
    3659          96 :         if (!list_empty(&m->list))
    3660          96 :                 list_del(&m->list);
    3661          96 :         if (!RB_EMPTY_NODE(&m->node))
    3662           0 :                 rb_erase(&m->node, &sctx->pending_dir_moves);
    3663          96 :         __free_recorded_refs(&m->update_refs);
    3664          96 :         kfree(m);
    3665          96 : }
    3666             : 
    3667          73 : static void tail_append_pending_moves(struct send_ctx *sctx,
    3668             :                                       struct pending_dir_move *moves,
    3669             :                                       struct list_head *stack)
    3670             : {
    3671          73 :         if (list_empty(&moves->list)) {
    3672          56 :                 list_add_tail(&moves->list, stack);
    3673             :         } else {
    3674          17 :                 LIST_HEAD(list);
    3675          17 :                 list_splice_init(&moves->list, &list);
    3676          17 :                 list_add_tail(&moves->list, stack);
    3677          17 :                 list_splice_tail(&list, stack);
    3678             :         }
    3679          73 :         if (!RB_EMPTY_NODE(&moves->node)) {
    3680          73 :                 rb_erase(&moves->node, &sctx->pending_dir_moves);
    3681          73 :                 RB_CLEAR_NODE(&moves->node);
    3682             :         }
    3683          73 : }
    3684             : 
    3685     1008379 : static int apply_children_dir_moves(struct send_ctx *sctx)
    3686             : {
    3687     1008379 :         struct pending_dir_move *pm;
    3688     1008379 :         struct list_head stack;
    3689     1008379 :         u64 parent_ino = sctx->cur_ino;
    3690     1008379 :         int ret = 0;
    3691             : 
    3692     1008379 :         pm = get_pending_dir_moves(sctx, parent_ino);
    3693     1008379 :         if (!pm)
    3694             :                 return 0;
    3695             : 
    3696          50 :         INIT_LIST_HEAD(&stack);
    3697          50 :         tail_append_pending_moves(sctx, pm, &stack);
    3698             : 
    3699         196 :         while (!list_empty(&stack)) {
    3700          96 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3701          96 :                 parent_ino = pm->ino;
    3702          96 :                 ret = apply_dir_move(sctx, pm);
    3703          96 :                 free_pending_move(sctx, pm);
    3704          96 :                 if (ret)
    3705           0 :                         goto out;
    3706          96 :                 pm = get_pending_dir_moves(sctx, parent_ino);
    3707          96 :                 if (pm)
    3708          23 :                         tail_append_pending_moves(sctx, pm, &stack);
    3709             :         }
    3710             :         return 0;
    3711             : 
    3712             : out:
    3713           0 :         while (!list_empty(&stack)) {
    3714           0 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3715           0 :                 free_pending_move(sctx, pm);
    3716             :         }
    3717             :         return ret;
    3718             : }
    3719             : 
    3720             : /*
    3721             :  * We might need to delay a directory rename even when no ancestor directory
    3722             :  * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
    3723             :  * renamed. This happens when we rename a directory to the old name (the name
    3724             :  * in the parent root) of some other unrelated directory that got its rename
    3725             :  * delayed due to some ancestor with higher number that got renamed.
    3726             :  *
    3727             :  * Example:
    3728             :  *
    3729             :  * Parent snapshot:
    3730             :  * .                                       (ino 256)
    3731             :  * |---- a/                                (ino 257)
    3732             :  * |     |---- file                        (ino 260)
    3733             :  * |
    3734             :  * |---- b/                                (ino 258)
    3735             :  * |---- c/                                (ino 259)
    3736             :  *
    3737             :  * Send snapshot:
    3738             :  * .                                       (ino 256)
    3739             :  * |---- a/                                (ino 258)
    3740             :  * |---- x/                                (ino 259)
    3741             :  *       |---- y/                          (ino 257)
    3742             :  *             |----- file                 (ino 260)
    3743             :  *
    3744             :  * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
    3745             :  * from 'a' to 'x/y' happening first, which in turn depends on the rename of
    3746             :  * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
    3747             :  * must issue is:
    3748             :  *
    3749             :  * 1 - rename 259 from 'c' to 'x'
    3750             :  * 2 - rename 257 from 'a' to 'x/y'
    3751             :  * 3 - rename 258 from 'b' to 'a'
    3752             :  *
    3753             :  * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
    3754             :  * be done right away and < 0 on error.
    3755             :  */
    3756         553 : static int wait_for_dest_dir_move(struct send_ctx *sctx,
    3757             :                                   struct recorded_ref *parent_ref,
    3758             :                                   const bool is_orphan)
    3759             : {
    3760         553 :         struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
    3761         553 :         struct btrfs_path *path;
    3762         553 :         struct btrfs_key key;
    3763         553 :         struct btrfs_key di_key;
    3764         553 :         struct btrfs_dir_item *di;
    3765         553 :         u64 left_gen;
    3766         553 :         u64 right_gen;
    3767         553 :         int ret = 0;
    3768         553 :         struct waiting_dir_move *wdm;
    3769             : 
    3770         553 :         if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
    3771             :                 return 0;
    3772             : 
    3773         148 :         path = alloc_path_for_send();
    3774         148 :         if (!path)
    3775             :                 return -ENOMEM;
    3776             : 
    3777         148 :         key.objectid = parent_ref->dir;
    3778         148 :         key.type = BTRFS_DIR_ITEM_KEY;
    3779         148 :         key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
    3780             : 
    3781         148 :         ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
    3782         148 :         if (ret < 0) {
    3783           0 :                 goto out;
    3784         148 :         } else if (ret > 0) {
    3785         109 :                 ret = 0;
    3786         109 :                 goto out;
    3787             :         }
    3788             : 
    3789          39 :         di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
    3790             :                                        parent_ref->name_len);
    3791          39 :         if (!di) {
    3792           0 :                 ret = 0;
    3793           0 :                 goto out;
    3794             :         }
    3795             :         /*
    3796             :          * di_key.objectid has the number of the inode that has a dentry in the
    3797             :          * parent directory with the same name that sctx->cur_ino is being
    3798             :          * renamed to. We need to check if that inode is in the send root as
    3799             :          * well and if it is currently marked as an inode with a pending rename,
    3800             :          * if it is, we need to delay the rename of sctx->cur_ino as well, so
    3801             :          * that it happens after that other inode is renamed.
    3802             :          */
    3803          39 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
    3804          39 :         if (di_key.type != BTRFS_INODE_ITEM_KEY) {
    3805           0 :                 ret = 0;
    3806           0 :                 goto out;
    3807             :         }
    3808             : 
    3809          39 :         ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
    3810          39 :         if (ret < 0)
    3811           0 :                 goto out;
    3812          39 :         ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
    3813          39 :         if (ret < 0) {
    3814           0 :                 if (ret == -ENOENT)
    3815           0 :                         ret = 0;
    3816           0 :                 goto out;
    3817             :         }
    3818             : 
    3819             :         /* Different inode, no need to delay the rename of sctx->cur_ino */
    3820          39 :         if (right_gen != left_gen) {
    3821           0 :                 ret = 0;
    3822           0 :                 goto out;
    3823             :         }
    3824             : 
    3825          39 :         wdm = get_waiting_dir_move(sctx, di_key.objectid);
    3826          39 :         if (wdm && !wdm->orphanized) {
    3827           0 :                 ret = add_pending_dir_move(sctx,
    3828             :                                            sctx->cur_ino,
    3829             :                                            sctx->cur_inode_gen,
    3830             :                                            di_key.objectid,
    3831             :                                            &sctx->new_refs,
    3832             :                                            &sctx->deleted_refs,
    3833             :                                            is_orphan);
    3834           0 :                 if (!ret)
    3835           0 :                         ret = 1;
    3836             :         }
    3837          39 : out:
    3838         148 :         btrfs_free_path(path);
    3839         148 :         return ret;
    3840             : }
    3841             : 
    3842             : /*
    3843             :  * Check if inode ino2, or any of its ancestors, is inode ino1.
    3844             :  * Return 1 if true, 0 if false and < 0 on error.
    3845             :  */
    3846         298 : static int check_ino_in_path(struct btrfs_root *root,
    3847             :                              const u64 ino1,
    3848             :                              const u64 ino1_gen,
    3849             :                              const u64 ino2,
    3850             :                              const u64 ino2_gen,
    3851             :                              struct fs_path *fs_path)
    3852             : {
    3853         298 :         u64 ino = ino2;
    3854             : 
    3855         298 :         if (ino1 == ino2)
    3856           7 :                 return ino1_gen == ino2_gen;
    3857             : 
    3858        2040 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3859        1753 :                 u64 parent;
    3860        1753 :                 u64 parent_gen;
    3861        1753 :                 int ret;
    3862             : 
    3863        1753 :                 fs_path_reset(fs_path);
    3864        1753 :                 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
    3865        1753 :                 if (ret < 0)
    3866           4 :                         return ret;
    3867        1753 :                 if (parent == ino1)
    3868           4 :                         return parent_gen == ino1_gen;
    3869        1749 :                 ino = parent;
    3870             :         }
    3871             :         return 0;
    3872             : }
    3873             : 
    3874             : /*
    3875             :  * Check if inode ino1 is an ancestor of inode ino2 in the given root for any
    3876             :  * possible path (in case ino2 is not a directory and has multiple hard links).
    3877             :  * Return 1 if true, 0 if false and < 0 on error.
    3878             :  */
    3879         254 : static int is_ancestor(struct btrfs_root *root,
    3880             :                        const u64 ino1,
    3881             :                        const u64 ino1_gen,
    3882             :                        const u64 ino2,
    3883             :                        struct fs_path *fs_path)
    3884             : {
    3885         254 :         bool free_fs_path = false;
    3886         254 :         int ret = 0;
    3887         254 :         int iter_ret = 0;
    3888         254 :         struct btrfs_path *path = NULL;
    3889         254 :         struct btrfs_key key;
    3890             : 
    3891         254 :         if (!fs_path) {
    3892         225 :                 fs_path = fs_path_alloc();
    3893         225 :                 if (!fs_path)
    3894             :                         return -ENOMEM;
    3895             :                 free_fs_path = true;
    3896             :         }
    3897             : 
    3898         254 :         path = alloc_path_for_send();
    3899         254 :         if (!path) {
    3900           0 :                 ret = -ENOMEM;
    3901           0 :                 goto out;
    3902             :         }
    3903             : 
    3904         254 :         key.objectid = ino2;
    3905         254 :         key.type = BTRFS_INODE_REF_KEY;
    3906         254 :         key.offset = 0;
    3907             : 
    3908         541 :         btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
    3909         541 :                 struct extent_buffer *leaf = path->nodes[0];
    3910         541 :                 int slot = path->slots[0];
    3911         541 :                 u32 cur_offset = 0;
    3912         541 :                 u32 item_size;
    3913             : 
    3914         541 :                 if (key.objectid != ino2)
    3915             :                         break;
    3916         449 :                 if (key.type != BTRFS_INODE_REF_KEY &&
    3917             :                     key.type != BTRFS_INODE_EXTREF_KEY)
    3918             :                         break;
    3919             : 
    3920         298 :                 item_size = btrfs_item_size(leaf, slot);
    3921         585 :                 while (cur_offset < item_size) {
    3922         298 :                         u64 parent;
    3923         298 :                         u64 parent_gen;
    3924             : 
    3925         298 :                         if (key.type == BTRFS_INODE_EXTREF_KEY) {
    3926           0 :                                 unsigned long ptr;
    3927           0 :                                 struct btrfs_inode_extref *extref;
    3928             : 
    3929           0 :                                 ptr = btrfs_item_ptr_offset(leaf, slot);
    3930           0 :                                 extref = (struct btrfs_inode_extref *)
    3931           0 :                                         (ptr + cur_offset);
    3932           0 :                                 parent = btrfs_inode_extref_parent(leaf,
    3933             :                                                                    extref);
    3934           0 :                                 cur_offset += sizeof(*extref);
    3935           0 :                                 cur_offset += btrfs_inode_extref_name_len(leaf,
    3936             :                                                                   extref);
    3937             :                         } else {
    3938         298 :                                 parent = key.offset;
    3939         298 :                                 cur_offset = item_size;
    3940             :                         }
    3941             : 
    3942         298 :                         ret = get_inode_gen(root, parent, &parent_gen);
    3943         298 :                         if (ret < 0)
    3944          11 :                                 goto out;
    3945         298 :                         ret = check_ino_in_path(root, ino1, ino1_gen,
    3946             :                                                 parent, parent_gen, fs_path);
    3947         298 :                         if (ret)
    3948          11 :                                 goto out;
    3949             :                 }
    3950             :         }
    3951         243 :         ret = 0;
    3952         243 :         if (iter_ret < 0)
    3953             :                 ret = iter_ret;
    3954             : 
    3955         243 : out:
    3956         254 :         btrfs_free_path(path);
    3957         254 :         if (free_fs_path)
    3958         225 :                 fs_path_free(fs_path);
    3959             :         return ret;
    3960             : }
    3961             : 
    3962         553 : static int wait_for_parent_move(struct send_ctx *sctx,
    3963             :                                 struct recorded_ref *parent_ref,
    3964             :                                 const bool is_orphan)
    3965             : {
    3966         553 :         int ret = 0;
    3967         553 :         u64 ino = parent_ref->dir;
    3968         553 :         u64 ino_gen = parent_ref->dir_gen;
    3969         553 :         u64 parent_ino_before, parent_ino_after;
    3970         553 :         struct fs_path *path_before = NULL;
    3971         553 :         struct fs_path *path_after = NULL;
    3972         553 :         int len1, len2;
    3973             : 
    3974         553 :         path_after = fs_path_alloc();
    3975         553 :         path_before = fs_path_alloc();
    3976         553 :         if (!path_after || !path_before) {
    3977           0 :                 ret = -ENOMEM;
    3978           0 :                 goto out;
    3979             :         }
    3980             : 
    3981             :         /*
    3982             :          * Our current directory inode may not yet be renamed/moved because some
    3983             :          * ancestor (immediate or not) has to be renamed/moved first. So find if
    3984             :          * such ancestor exists and make sure our own rename/move happens after
    3985             :          * that ancestor is processed to avoid path build infinite loops (done
    3986             :          * at get_cur_path()).
    3987             :          */
    3988        3144 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3989        2741 :                 u64 parent_ino_after_gen;
    3990             : 
    3991        2741 :                 if (is_waiting_for_move(sctx, ino)) {
    3992             :                         /*
    3993             :                          * If the current inode is an ancestor of ino in the
    3994             :                          * parent root, we need to delay the rename of the
    3995             :                          * current inode, otherwise don't delayed the rename
    3996             :                          * because we can end up with a circular dependency
    3997             :                          * of renames, resulting in some directories never
    3998             :                          * getting the respective rename operations issued in
    3999             :                          * the send stream or getting into infinite path build
    4000             :                          * loops.
    4001             :                          */
    4002          29 :                         ret = is_ancestor(sctx->parent_root,
    4003             :                                           sctx->cur_ino, sctx->cur_inode_gen,
    4004             :                                           ino, path_before);
    4005          29 :                         if (ret)
    4006             :                                 break;
    4007             :                 }
    4008             : 
    4009        2735 :                 fs_path_reset(path_before);
    4010        2735 :                 fs_path_reset(path_after);
    4011             : 
    4012        2735 :                 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
    4013             :                                     &parent_ino_after_gen, path_after);
    4014        2735 :                 if (ret < 0)
    4015           0 :                         goto out;
    4016        2735 :                 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
    4017             :                                     NULL, path_before);
    4018        2735 :                 if (ret < 0 && ret != -ENOENT) {
    4019           0 :                         goto out;
    4020        2735 :                 } else if (ret == -ENOENT) {
    4021             :                         ret = 0;
    4022             :                         break;
    4023             :                 }
    4024             : 
    4025        2679 :                 len1 = fs_path_len(path_before);
    4026        2679 :                 len2 = fs_path_len(path_after);
    4027        2679 :                 if (ino > sctx->cur_ino &&
    4028         343 :                     (parent_ino_before != parent_ino_after || len1 != len2 ||
    4029         128 :                      memcmp(path_before->start, path_after->start, len1))) {
    4030          90 :                         u64 parent_ino_gen;
    4031             : 
    4032          90 :                         ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
    4033          90 :                         if (ret < 0)
    4034           0 :                                 goto out;
    4035          90 :                         if (ino_gen == parent_ino_gen) {
    4036          88 :                                 ret = 1;
    4037          88 :                                 break;
    4038             :                         }
    4039             :                 }
    4040        2591 :                 ino = parent_ino_after;
    4041        2591 :                 ino_gen = parent_ino_after_gen;
    4042             :         }
    4043             : 
    4044         403 : out:
    4045         553 :         fs_path_free(path_before);
    4046         553 :         fs_path_free(path_after);
    4047             : 
    4048         553 :         if (ret == 1) {
    4049          94 :                 ret = add_pending_dir_move(sctx,
    4050             :                                            sctx->cur_ino,
    4051             :                                            sctx->cur_inode_gen,
    4052             :                                            ino,
    4053             :                                            &sctx->new_refs,
    4054             :                                            &sctx->deleted_refs,
    4055             :                                            is_orphan);
    4056          94 :                 if (!ret)
    4057          94 :                         ret = 1;
    4058             :         }
    4059             : 
    4060         553 :         return ret;
    4061             : }
    4062             : 
    4063          15 : static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4064             : {
    4065          15 :         int ret;
    4066          15 :         struct fs_path *new_path;
    4067             : 
    4068             :         /*
    4069             :          * Our reference's name member points to its full_path member string, so
    4070             :          * we use here a new path.
    4071             :          */
    4072          15 :         new_path = fs_path_alloc();
    4073          15 :         if (!new_path)
    4074             :                 return -ENOMEM;
    4075             : 
    4076          15 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
    4077          15 :         if (ret < 0) {
    4078           0 :                 fs_path_free(new_path);
    4079           0 :                 return ret;
    4080             :         }
    4081          15 :         ret = fs_path_add(new_path, ref->name, ref->name_len);
    4082          15 :         if (ret < 0) {
    4083           0 :                 fs_path_free(new_path);
    4084           0 :                 return ret;
    4085             :         }
    4086             : 
    4087          15 :         fs_path_free(ref->full_path);
    4088          15 :         set_ref_path(ref, new_path);
    4089             : 
    4090          15 :         return 0;
    4091             : }
    4092             : 
    4093             : /*
    4094             :  * When processing the new references for an inode we may orphanize an existing
    4095             :  * directory inode because its old name conflicts with one of the new references
    4096             :  * of the current inode. Later, when processing another new reference of our
    4097             :  * inode, we might need to orphanize another inode, but the path we have in the
    4098             :  * reference reflects the pre-orphanization name of the directory we previously
    4099             :  * orphanized. For example:
    4100             :  *
    4101             :  * parent snapshot looks like:
    4102             :  *
    4103             :  * .                                     (ino 256)
    4104             :  * |----- f1                             (ino 257)
    4105             :  * |----- f2                             (ino 258)
    4106             :  * |----- d1/                            (ino 259)
    4107             :  *        |----- d2/                     (ino 260)
    4108             :  *
    4109             :  * send snapshot looks like:
    4110             :  *
    4111             :  * .                                     (ino 256)
    4112             :  * |----- d1                             (ino 258)
    4113             :  * |----- f2/                            (ino 259)
    4114             :  *        |----- f2_link/                (ino 260)
    4115             :  *        |       |----- f1              (ino 257)
    4116             :  *        |
    4117             :  *        |----- d2                      (ino 258)
    4118             :  *
    4119             :  * When processing inode 257 we compute the name for inode 259 as "d1", and we
    4120             :  * cache it in the name cache. Later when we start processing inode 258, when
    4121             :  * collecting all its new references we set a full path of "d1/d2" for its new
    4122             :  * reference with name "d2". When we start processing the new references we
    4123             :  * start by processing the new reference with name "d1", and this results in
    4124             :  * orphanizing inode 259, since its old reference causes a conflict. Then we
    4125             :  * move on the next new reference, with name "d2", and we find out we must
    4126             :  * orphanize inode 260, as its old reference conflicts with ours - but for the
    4127             :  * orphanization we use a source path corresponding to the path we stored in the
    4128             :  * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
    4129             :  * receiver fail since the path component "d1/" no longer exists, it was renamed
    4130             :  * to "o259-6-0/" when processing the previous new reference. So in this case we
    4131             :  * must recompute the path in the new reference and use it for the new
    4132             :  * orphanization operation.
    4133             :  */
    4134           2 : static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4135             : {
    4136           2 :         char *name;
    4137           2 :         int ret;
    4138             : 
    4139           2 :         name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
    4140           2 :         if (!name)
    4141             :                 return -ENOMEM;
    4142             : 
    4143           2 :         fs_path_reset(ref->full_path);
    4144           2 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
    4145           2 :         if (ret < 0)
    4146           0 :                 goto out;
    4147             : 
    4148           2 :         ret = fs_path_add(ref->full_path, name, ref->name_len);
    4149           2 :         if (ret < 0)
    4150           0 :                 goto out;
    4151             : 
    4152             :         /* Update the reference's base name pointer. */
    4153           2 :         set_ref_path(ref, ref->full_path);
    4154           2 : out:
    4155           2 :         kfree(name);
    4156           2 :         return ret;
    4157             : }
    4158             : 
    4159             : /*
    4160             :  * This does all the move/link/unlink/rmdir magic.
    4161             :  */
    4162      605681 : static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
    4163             : {
    4164      605681 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    4165      605681 :         int ret = 0;
    4166      605681 :         struct recorded_ref *cur;
    4167      605681 :         struct recorded_ref *cur2;
    4168      605681 :         struct list_head check_dirs;
    4169      605681 :         struct fs_path *valid_path = NULL;
    4170      605681 :         u64 ow_inode = 0;
    4171      605681 :         u64 ow_gen;
    4172      605681 :         u64 ow_mode;
    4173      605681 :         int did_overwrite = 0;
    4174      605681 :         int is_orphan = 0;
    4175      605681 :         u64 last_dir_ino_rm = 0;
    4176      605681 :         bool can_rename = true;
    4177      605681 :         bool orphanized_dir = false;
    4178      605681 :         bool orphanized_ancestor = false;
    4179             : 
    4180      605681 :         btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
    4181             : 
    4182             :         /*
    4183             :          * This should never happen as the root dir always has the same ref
    4184             :          * which is always '..'
    4185             :          */
    4186      605681 :         BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
    4187      605681 :         INIT_LIST_HEAD(&check_dirs);
    4188             : 
    4189      605681 :         valid_path = fs_path_alloc();
    4190      605660 :         if (!valid_path) {
    4191           0 :                 ret = -ENOMEM;
    4192           0 :                 goto out;
    4193             :         }
    4194             : 
    4195             :         /*
    4196             :          * First, check if the first ref of the current inode was overwritten
    4197             :          * before. If yes, we know that the current inode was already orphanized
    4198             :          * and thus use the orphan name. If not, we can use get_cur_path to
    4199             :          * get the path of the first ref as it would like while receiving at
    4200             :          * this point in time.
    4201             :          * New inodes are always orphan at the beginning, so force to use the
    4202             :          * orphan name in this case.
    4203             :          * The first ref is stored in valid_path and will be updated if it
    4204             :          * gets moved around.
    4205             :          */
    4206      605660 :         if (!sctx->cur_inode_new) {
    4207        1521 :                 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
    4208             :                                 sctx->cur_inode_gen);
    4209        1521 :                 if (ret < 0)
    4210           0 :                         goto out;
    4211        1521 :                 if (ret)
    4212         221 :                         did_overwrite = 1;
    4213             :         }
    4214      605660 :         if (sctx->cur_inode_new || did_overwrite) {
    4215      604360 :                 ret = gen_unique_name(sctx, sctx->cur_ino,
    4216             :                                 sctx->cur_inode_gen, valid_path);
    4217      604416 :                 if (ret < 0)
    4218           0 :                         goto out;
    4219             :                 is_orphan = 1;
    4220             :         } else {
    4221        1300 :                 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    4222             :                                 valid_path);
    4223        1300 :                 if (ret < 0)
    4224           0 :                         goto out;
    4225             :         }
    4226             : 
    4227             :         /*
    4228             :          * Before doing any rename and link operations, do a first pass on the
    4229             :          * new references to orphanize any unprocessed inodes that may have a
    4230             :          * reference that conflicts with one of the new references of the current
    4231             :          * inode. This needs to happen first because a new reference may conflict
    4232             :          * with the old reference of a parent directory, so we must make sure
    4233             :          * that the path used for link and rename commands don't use an
    4234             :          * orphanized name when an ancestor was not yet orphanized.
    4235             :          *
    4236             :          * Example:
    4237             :          *
    4238             :          * Parent snapshot:
    4239             :          *
    4240             :          * .                                                      (ino 256)
    4241             :          * |----- testdir/                                        (ino 259)
    4242             :          * |          |----- a                                    (ino 257)
    4243             :          * |
    4244             :          * |----- b                                               (ino 258)
    4245             :          *
    4246             :          * Send snapshot:
    4247             :          *
    4248             :          * .                                                      (ino 256)
    4249             :          * |----- testdir_2/                                      (ino 259)
    4250             :          * |          |----- a                                    (ino 260)
    4251             :          * |
    4252             :          * |----- testdir                                         (ino 257)
    4253             :          * |----- b                                               (ino 257)
    4254             :          * |----- b2                                              (ino 258)
    4255             :          *
    4256             :          * Processing the new reference for inode 257 with name "b" may happen
    4257             :          * before processing the new reference with name "testdir". If so, we
    4258             :          * must make sure that by the time we send a link command to create the
    4259             :          * hard link "b", inode 259 was already orphanized, since the generated
    4260             :          * path in "valid_path" already contains the orphanized name for 259.
    4261             :          * We are processing inode 257, so only later when processing 259 we do
    4262             :          * the rename operation to change its temporary (orphanized) name to
    4263             :          * "testdir_2".
    4264             :          */
    4265     1213336 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4266      607618 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4267      607617 :                 if (ret < 0)
    4268           0 :                         goto out;
    4269      607617 :                 if (ret == inode_state_will_create)
    4270         143 :                         continue;
    4271             : 
    4272             :                 /*
    4273             :                  * Check if this new ref would overwrite the first ref of another
    4274             :                  * unprocessed inode. If yes, orphanize the overwritten inode.
    4275             :                  * If we find an overwritten ref that is not the first ref,
    4276             :                  * simply unlink it.
    4277             :                  */
    4278      607474 :                 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4279      607474 :                                 cur->name, cur->name_len,
    4280             :                                 &ow_inode, &ow_gen, &ow_mode);
    4281      607477 :                 if (ret < 0)
    4282           0 :                         goto out;
    4283      607477 :                 if (ret) {
    4284         230 :                         ret = is_first_ref(sctx->parent_root,
    4285         230 :                                            ow_inode, cur->dir, cur->name,
    4286             :                                            cur->name_len);
    4287         230 :                         if (ret < 0)
    4288           0 :                                 goto out;
    4289         230 :                         if (ret) {
    4290         225 :                                 struct name_cache_entry *nce;
    4291         225 :                                 struct waiting_dir_move *wdm;
    4292             : 
    4293         225 :                                 if (orphanized_dir) {
    4294           1 :                                         ret = refresh_ref_path(sctx, cur);
    4295           1 :                                         if (ret < 0)
    4296           0 :                                                 goto out;
    4297             :                                 }
    4298             : 
    4299         225 :                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
    4300             :                                                 cur->full_path);
    4301         225 :                                 if (ret < 0)
    4302           0 :                                         goto out;
    4303         225 :                                 if (S_ISDIR(ow_mode))
    4304          53 :                                         orphanized_dir = true;
    4305             : 
    4306             :                                 /*
    4307             :                                  * If ow_inode has its rename operation delayed
    4308             :                                  * make sure that its orphanized name is used in
    4309             :                                  * the source path when performing its rename
    4310             :                                  * operation.
    4311             :                                  */
    4312         225 :                                 wdm = get_waiting_dir_move(sctx, ow_inode);
    4313         225 :                                 if (wdm)
    4314           4 :                                         wdm->orphanized = true;
    4315             : 
    4316             :                                 /*
    4317             :                                  * Make sure we clear our orphanized inode's
    4318             :                                  * name from the name cache. This is because the
    4319             :                                  * inode ow_inode might be an ancestor of some
    4320             :                                  * other inode that will be orphanized as well
    4321             :                                  * later and has an inode number greater than
    4322             :                                  * sctx->send_progress. We need to prevent
    4323             :                                  * future name lookups from using the old name
    4324             :                                  * and get instead the orphan name.
    4325             :                                  */
    4326         225 :                                 nce = name_cache_search(sctx, ow_inode, ow_gen);
    4327          28 :                                 if (nce)
    4328          28 :                                         btrfs_lru_cache_remove(&sctx->name_cache,
    4329             :                                                                &nce->entry);
    4330             : 
    4331             :                                 /*
    4332             :                                  * ow_inode might currently be an ancestor of
    4333             :                                  * cur_ino, therefore compute valid_path (the
    4334             :                                  * current path of cur_ino) again because it
    4335             :                                  * might contain the pre-orphanization name of
    4336             :                                  * ow_inode, which is no longer valid.
    4337             :                                  */
    4338         225 :                                 ret = is_ancestor(sctx->parent_root,
    4339             :                                                   ow_inode, ow_gen,
    4340             :                                                   sctx->cur_ino, NULL);
    4341         225 :                                 if (ret > 0) {
    4342           5 :                                         orphanized_ancestor = true;
    4343           5 :                                         fs_path_reset(valid_path);
    4344           5 :                                         ret = get_cur_path(sctx, sctx->cur_ino,
    4345             :                                                            sctx->cur_inode_gen,
    4346             :                                                            valid_path);
    4347             :                                 }
    4348         225 :                                 if (ret < 0)
    4349           0 :                                         goto out;
    4350             :                         } else {
    4351             :                                 /*
    4352             :                                  * If we previously orphanized a directory that
    4353             :                                  * collided with a new reference that we already
    4354             :                                  * processed, recompute the current path because
    4355             :                                  * that directory may be part of the path.
    4356             :                                  */
    4357           5 :                                 if (orphanized_dir) {
    4358           1 :                                         ret = refresh_ref_path(sctx, cur);
    4359           1 :                                         if (ret < 0)
    4360           0 :                                                 goto out;
    4361             :                                 }
    4362           5 :                                 ret = send_unlink(sctx, cur->full_path);
    4363           5 :                                 if (ret < 0)
    4364           0 :                                         goto out;
    4365             :                         }
    4366             :                 }
    4367             : 
    4368             :         }
    4369             : 
    4370     1213324 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4371             :                 /*
    4372             :                  * We may have refs where the parent directory does not exist
    4373             :                  * yet. This happens if the parent directories inum is higher
    4374             :                  * than the current inum. To handle this case, we create the
    4375             :                  * parent directory out of order. But we need to check if this
    4376             :                  * did already happen before due to other refs in the same dir.
    4377             :                  */
    4378      607624 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4379      607618 :                 if (ret < 0)
    4380           0 :                         goto out;
    4381      607618 :                 if (ret == inode_state_will_create) {
    4382         143 :                         ret = 0;
    4383             :                         /*
    4384             :                          * First check if any of the current inodes refs did
    4385             :                          * already create the dir.
    4386             :                          */
    4387         161 :                         list_for_each_entry(cur2, &sctx->new_refs, list) {
    4388         161 :                                 if (cur == cur2)
    4389             :                                         break;
    4390          19 :                                 if (cur2->dir == cur->dir) {
    4391             :                                         ret = 1;
    4392             :                                         break;
    4393             :                                 }
    4394             :                         }
    4395             : 
    4396             :                         /*
    4397             :                          * If that did not happen, check if a previous inode
    4398             :                          * did already create the dir.
    4399             :                          */
    4400         143 :                         if (!ret)
    4401         142 :                                 ret = did_create_dir(sctx, cur->dir);
    4402         143 :                         if (ret < 0)
    4403           0 :                                 goto out;
    4404         143 :                         if (!ret) {
    4405         111 :                                 ret = send_create_inode(sctx, cur->dir);
    4406         111 :                                 if (ret < 0)
    4407           0 :                                         goto out;
    4408         111 :                                 cache_dir_created(sctx, cur->dir);
    4409             :                         }
    4410             :                 }
    4411             : 
    4412      607618 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
    4413         553 :                         ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
    4414         553 :                         if (ret < 0)
    4415           0 :                                 goto out;
    4416         553 :                         if (ret == 1) {
    4417           0 :                                 can_rename = false;
    4418           0 :                                 *pending_move = 1;
    4419             :                         }
    4420             :                 }
    4421             : 
    4422      607618 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
    4423             :                     can_rename) {
    4424         553 :                         ret = wait_for_parent_move(sctx, cur, is_orphan);
    4425         553 :                         if (ret < 0)
    4426           0 :                                 goto out;
    4427         553 :                         if (ret == 1) {
    4428          94 :                                 can_rename = false;
    4429          94 :                                 *pending_move = 1;
    4430             :                         }
    4431             :                 }
    4432             : 
    4433             :                 /*
    4434             :                  * link/move the ref to the new place. If we have an orphan
    4435             :                  * inode, move it and update valid_path. If not, link or move
    4436             :                  * it depending on the inode mode.
    4437             :                  */
    4438      607618 :                 if (is_orphan && can_rename) {
    4439      604405 :                         ret = send_rename(sctx, valid_path, cur->full_path);
    4440      604402 :                         if (ret < 0)
    4441           0 :                                 goto out;
    4442      604402 :                         is_orphan = 0;
    4443      604402 :                         ret = fs_path_copy(valid_path, cur->full_path);
    4444      604396 :                         if (ret < 0)
    4445           0 :                                 goto out;
    4446        3213 :                 } else if (can_rename) {
    4447        3119 :                         if (S_ISDIR(sctx->cur_inode_mode)) {
    4448             :                                 /*
    4449             :                                  * Dirs can't be linked, so move it. For moved
    4450             :                                  * dirs, we always have one new and one deleted
    4451             :                                  * ref. The deleted ref is ignored later.
    4452             :                                  */
    4453         152 :                                 ret = send_rename(sctx, valid_path,
    4454             :                                                   cur->full_path);
    4455         152 :                                 if (!ret)
    4456         152 :                                         ret = fs_path_copy(valid_path,
    4457             :                                                            cur->full_path);
    4458         152 :                                 if (ret < 0)
    4459           0 :                                         goto out;
    4460             :                         } else {
    4461             :                                 /*
    4462             :                                  * We might have previously orphanized an inode
    4463             :                                  * which is an ancestor of our current inode,
    4464             :                                  * so our reference's full path, which was
    4465             :                                  * computed before any such orphanizations, must
    4466             :                                  * be updated.
    4467             :                                  */
    4468        2967 :                                 if (orphanized_dir) {
    4469          11 :                                         ret = update_ref_path(sctx, cur);
    4470          11 :                                         if (ret < 0)
    4471           0 :                                                 goto out;
    4472             :                                 }
    4473        2967 :                                 ret = send_link(sctx, cur->full_path,
    4474             :                                                 valid_path);
    4475        2967 :                                 if (ret < 0)
    4476           0 :                                         goto out;
    4477             :                         }
    4478             :                 }
    4479      607609 :                 ret = dup_ref(cur, &check_dirs);
    4480      607606 :                 if (ret < 0)
    4481           0 :                         goto out;
    4482             :         }
    4483             : 
    4484      605700 :         if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
    4485             :                 /*
    4486             :                  * Check if we can already rmdir the directory. If not,
    4487             :                  * orphanize it. For every dir item inside that gets deleted
    4488             :                  * later, we do this check again and rmdir it then if possible.
    4489             :                  * See the use of check_dirs for more details.
    4490             :                  */
    4491          25 :                 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    4492          25 :                 if (ret < 0)
    4493           0 :                         goto out;
    4494          25 :                 if (ret) {
    4495          16 :                         ret = send_rmdir(sctx, valid_path);
    4496          16 :                         if (ret < 0)
    4497           0 :                                 goto out;
    4498           9 :                 } else if (!is_orphan) {
    4499           9 :                         ret = orphanize_inode(sctx, sctx->cur_ino,
    4500             :                                         sctx->cur_inode_gen, valid_path);
    4501           9 :                         if (ret < 0)
    4502           0 :                                 goto out;
    4503             :                         is_orphan = 1;
    4504             :                 }
    4505             : 
    4506          50 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4507          25 :                         ret = dup_ref(cur, &check_dirs);
    4508          25 :                         if (ret < 0)
    4509           0 :                                 goto out;
    4510             :                 }
    4511      605675 :         } else if (S_ISDIR(sctx->cur_inode_mode) &&
    4512         862 :                    !list_empty(&sctx->deleted_refs)) {
    4513             :                 /*
    4514             :                  * We have a moved dir. Add the old parent to check_dirs
    4515             :                  */
    4516         290 :                 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
    4517             :                                 list);
    4518         290 :                 ret = dup_ref(cur, &check_dirs);
    4519         290 :                 if (ret < 0)
    4520           0 :                         goto out;
    4521      605385 :         } else if (!S_ISDIR(sctx->cur_inode_mode)) {
    4522             :                 /*
    4523             :                  * We have a non dir inode. Go through all deleted refs and
    4524             :                  * unlink them if they were not already overwritten by other
    4525             :                  * inodes.
    4526             :                  */
    4527      606939 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4528        2131 :                         ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4529             :                                         sctx->cur_ino, sctx->cur_inode_gen,
    4530        2131 :                                         cur->name, cur->name_len);
    4531        2126 :                         if (ret < 0)
    4532           0 :                                 goto out;
    4533        2126 :                         if (!ret) {
    4534             :                                 /*
    4535             :                                  * If we orphanized any ancestor before, we need
    4536             :                                  * to recompute the full path for deleted names,
    4537             :                                  * since any such path was computed before we
    4538             :                                  * processed any references and orphanized any
    4539             :                                  * ancestor inode.
    4540             :                                  */
    4541        1949 :                                 if (orphanized_ancestor) {
    4542           4 :                                         ret = update_ref_path(sctx, cur);
    4543           4 :                                         if (ret < 0)
    4544           0 :                                                 goto out;
    4545             :                                 }
    4546        1949 :                                 ret = send_unlink(sctx, cur->full_path);
    4547        1949 :                                 if (ret < 0)
    4548           0 :                                         goto out;
    4549             :                         }
    4550        2126 :                         ret = dup_ref(cur, &check_dirs);
    4551        2126 :                         if (ret < 0)
    4552           0 :                                 goto out;
    4553             :                 }
    4554             :                 /*
    4555             :                  * If the inode is still orphan, unlink the orphan. This may
    4556             :                  * happen when a previous inode did overwrite the first ref
    4557             :                  * of this inode and no new refs were added for the current
    4558             :                  * inode. Unlinking does not mean that the inode is deleted in
    4559             :                  * all cases. There may still be links to this inode in other
    4560             :                  * places.
    4561             :                  */
    4562      604808 :                 if (is_orphan) {
    4563           4 :                         ret = send_unlink(sctx, valid_path);
    4564           4 :                         if (ret < 0)
    4565           0 :                                 goto out;
    4566             :                 }
    4567             :         }
    4568             : 
    4569             :         /*
    4570             :          * We did collect all parent dirs where cur_inode was once located. We
    4571             :          * now go through all these dirs and check if they are pending for
    4572             :          * deletion and if it's finally possible to perform the rmdir now.
    4573             :          * We also update the inode stats of the parent dirs here.
    4574             :          */
    4575     1215755 :         list_for_each_entry(cur, &check_dirs, list) {
    4576             :                 /*
    4577             :                  * In case we had refs into dirs that were not processed yet,
    4578             :                  * we don't need to do the utime and rmdir logic for these dirs.
    4579             :                  * The dir will be processed later.
    4580             :                  */
    4581      610041 :                 if (cur->dir > sctx->cur_ino)
    4582         936 :                         continue;
    4583             : 
    4584      609105 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4585      609123 :                 if (ret < 0)
    4586           0 :                         goto out;
    4587             : 
    4588      609123 :                 if (ret == inode_state_did_create ||
    4589      609123 :                     ret == inode_state_no_change) {
    4590      609101 :                         ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    4591      609102 :                         if (ret < 0)
    4592           0 :                                 goto out;
    4593          22 :                 } else if (ret == inode_state_did_delete &&
    4594          22 :                            cur->dir != last_dir_ino_rm) {
    4595          21 :                         ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
    4596          21 :                         if (ret < 0)
    4597           0 :                                 goto out;
    4598          21 :                         if (ret) {
    4599           7 :                                 ret = get_cur_path(sctx, cur->dir,
    4600             :                                                    cur->dir_gen, valid_path);
    4601           7 :                                 if (ret < 0)
    4602           0 :                                         goto out;
    4603           7 :                                 ret = send_rmdir(sctx, valid_path);
    4604           7 :                                 if (ret < 0)
    4605           0 :                                         goto out;
    4606           7 :                                 last_dir_ino_rm = cur->dir;
    4607             :                         }
    4608             :                 }
    4609             :         }
    4610             : 
    4611             :         ret = 0;
    4612             : 
    4613      605714 : out:
    4614      605714 :         __free_recorded_refs(&check_dirs);
    4615      605714 :         free_recorded_refs(sctx);
    4616      605698 :         fs_path_free(valid_path);
    4617      605694 :         return ret;
    4618             : }
    4619             : 
    4620       78289 : static int rbtree_ref_comp(const void *k, const struct rb_node *node)
    4621             : {
    4622       78289 :         const struct recorded_ref *data = k;
    4623       78289 :         const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
    4624       78289 :         int result;
    4625             : 
    4626       78289 :         if (data->dir > ref->dir)
    4627             :                 return 1;
    4628       76935 :         if (data->dir < ref->dir)
    4629             :                 return -1;
    4630       76935 :         if (data->dir_gen > ref->dir_gen)
    4631             :                 return 1;
    4632       76935 :         if (data->dir_gen < ref->dir_gen)
    4633             :                 return -1;
    4634       76935 :         if (data->name_len > ref->name_len)
    4635             :                 return 1;
    4636       71797 :         if (data->name_len < ref->name_len)
    4637             :                 return -1;
    4638       62677 :         result = strcmp(data->name, ref->name);
    4639       62677 :         if (result > 0)
    4640             :                 return 1;
    4641        9207 :         if (result < 0)
    4642        8199 :                 return -1;
    4643             :         return 0;
    4644             : }
    4645             : 
    4646             : static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
    4647             : {
    4648       60263 :         const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
    4649             : 
    4650       60263 :         return rbtree_ref_comp(entry, parent) < 0;
    4651             : }
    4652             : 
    4653      611026 : static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
    4654             :                               struct fs_path *name, u64 dir, u64 dir_gen,
    4655             :                               struct send_ctx *sctx)
    4656             : {
    4657      611026 :         int ret = 0;
    4658      611026 :         struct fs_path *path = NULL;
    4659      611026 :         struct recorded_ref *ref = NULL;
    4660             : 
    4661      611026 :         path = fs_path_alloc();
    4662      611023 :         if (!path) {
    4663           0 :                 ret = -ENOMEM;
    4664           0 :                 goto out;
    4665             :         }
    4666             : 
    4667      611023 :         ref = recorded_ref_alloc();
    4668      611015 :         if (!ref) {
    4669           0 :                 ret = -ENOMEM;
    4670           0 :                 goto out;
    4671             :         }
    4672             : 
    4673      611015 :         ret = get_cur_path(sctx, dir, dir_gen, path);
    4674      611016 :         if (ret < 0)
    4675           0 :                 goto out;
    4676      611016 :         ret = fs_path_add_path(path, name);
    4677      610982 :         if (ret < 0)
    4678           0 :                 goto out;
    4679             : 
    4680      610982 :         ref->dir = dir;
    4681      610982 :         ref->dir_gen = dir_gen;
    4682      610982 :         set_ref_path(ref, path);
    4683      610974 :         list_add_tail(&ref->list, refs);
    4684      610971 :         rb_add(&ref->node, root, rbtree_ref_less);
    4685      610933 :         ref->root = root;
    4686      610933 : out:
    4687      610933 :         if (ret) {
    4688           0 :                 if (path && (!ref || !ref->full_path))
    4689           0 :                         fs_path_free(path);
    4690           0 :                 recorded_ref_free(ref);
    4691             :         }
    4692      610933 :         return ret;
    4693             : }
    4694             : 
    4695      608570 : static int record_new_ref_if_needed(int num, u64 dir, int index,
    4696             :                                     struct fs_path *name, void *ctx)
    4697             : {
    4698      608570 :         int ret = 0;
    4699      608570 :         struct send_ctx *sctx = ctx;
    4700      608570 :         struct rb_node *node = NULL;
    4701      608570 :         struct recorded_ref data;
    4702      608570 :         struct recorded_ref *ref;
    4703      608570 :         u64 dir_gen;
    4704             : 
    4705      608570 :         ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
    4706      608619 :         if (ret < 0)
    4707           0 :                 goto out;
    4708             : 
    4709      608619 :         data.dir = dir;
    4710      608619 :         data.dir_gen = dir_gen;
    4711      608619 :         set_ref_path(&data, name);
    4712      608580 :         node = rb_find(&data, &sctx->rbtree_deleted_refs, rbtree_ref_comp);
    4713      608580 :         if (node) {
    4714           0 :                 ref = rb_entry(node, struct recorded_ref, node);
    4715           0 :                 recorded_ref_free(ref);
    4716             :         } else {
    4717      608580 :                 ret = record_ref_in_tree(&sctx->rbtree_new_refs,
    4718             :                                          &sctx->new_refs, name, dir, dir_gen,
    4719             :                                          sctx);
    4720             :         }
    4721      608514 : out:
    4722      608514 :         return ret;
    4723             : }
    4724             : 
    4725        3449 : static int record_deleted_ref_if_needed(int num, u64 dir, int index,
    4726             :                                         struct fs_path *name, void *ctx)
    4727             : {
    4728        3449 :         int ret = 0;
    4729        3449 :         struct send_ctx *sctx = ctx;
    4730        3449 :         struct rb_node *node = NULL;
    4731        3449 :         struct recorded_ref data;
    4732        3449 :         struct recorded_ref *ref;
    4733        3449 :         u64 dir_gen;
    4734             : 
    4735        3449 :         ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
    4736        3449 :         if (ret < 0)
    4737           0 :                 goto out;
    4738             : 
    4739        3449 :         data.dir = dir;
    4740        3449 :         data.dir_gen = dir_gen;
    4741        3449 :         set_ref_path(&data, name);
    4742        3449 :         node = rb_find(&data, &sctx->rbtree_new_refs, rbtree_ref_comp);
    4743        3449 :         if (node) {
    4744        1008 :                 ref = rb_entry(node, struct recorded_ref, node);
    4745        1008 :                 recorded_ref_free(ref);
    4746             :         } else {
    4747        2441 :                 ret = record_ref_in_tree(&sctx->rbtree_deleted_refs,
    4748             :                                          &sctx->deleted_refs, name, dir,
    4749             :                                          dir_gen, sctx);
    4750             :         }
    4751        3449 : out:
    4752        3449 :         return ret;
    4753             : }
    4754             : 
    4755      606285 : static int record_new_ref(struct send_ctx *sctx)
    4756             : {
    4757      606285 :         int ret;
    4758             : 
    4759      606285 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4760             :                                 sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4761      606292 :         if (ret < 0)
    4762             :                 goto out;
    4763             :         ret = 0;
    4764             : 
    4765             : out:
    4766      606292 :         return ret;
    4767             : }
    4768             : 
    4769        2143 : static int record_deleted_ref(struct send_ctx *sctx)
    4770             : {
    4771        2143 :         int ret;
    4772             : 
    4773        2143 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4774             :                                 sctx->cmp_key, 0, record_deleted_ref_if_needed,
    4775             :                                 sctx);
    4776        2143 :         if (ret < 0)
    4777             :                 goto out;
    4778             :         ret = 0;
    4779             : 
    4780             : out:
    4781        2143 :         return ret;
    4782             : }
    4783             : 
    4784          54 : static int record_changed_ref(struct send_ctx *sctx)
    4785             : {
    4786          54 :         int ret = 0;
    4787             : 
    4788          54 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4789             :                         sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4790          54 :         if (ret < 0)
    4791           0 :                 goto out;
    4792          54 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4793             :                         sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
    4794          54 :         if (ret < 0)
    4795             :                 goto out;
    4796             :         ret = 0;
    4797             : 
    4798          54 : out:
    4799          54 :         return ret;
    4800             : }
    4801             : 
    4802             : /*
    4803             :  * Record and process all refs at once. Needed when an inode changes the
    4804             :  * generation number, which means that it was deleted and recreated.
    4805             :  */
    4806          17 : static int process_all_refs(struct send_ctx *sctx,
    4807             :                             enum btrfs_compare_tree_result cmd)
    4808             : {
    4809          17 :         int ret = 0;
    4810          17 :         int iter_ret = 0;
    4811          17 :         struct btrfs_root *root;
    4812          17 :         struct btrfs_path *path;
    4813          17 :         struct btrfs_key key;
    4814          17 :         struct btrfs_key found_key;
    4815          17 :         iterate_inode_ref_t cb;
    4816          17 :         int pending_move = 0;
    4817             : 
    4818          17 :         path = alloc_path_for_send();
    4819          17 :         if (!path)
    4820             :                 return -ENOMEM;
    4821             : 
    4822          17 :         if (cmd == BTRFS_COMPARE_TREE_NEW) {
    4823           7 :                 root = sctx->send_root;
    4824           7 :                 cb = record_new_ref_if_needed;
    4825          10 :         } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
    4826          10 :                 root = sctx->parent_root;
    4827          10 :                 cb = record_deleted_ref_if_needed;
    4828             :         } else {
    4829           0 :                 btrfs_err(sctx->send_root->fs_info,
    4830             :                                 "Wrong command %d in process_all_refs", cmd);
    4831           0 :                 ret = -EINVAL;
    4832           0 :                 goto out;
    4833             :         }
    4834             : 
    4835          17 :         key.objectid = sctx->cmp_key->objectid;
    4836          17 :         key.type = BTRFS_INODE_REF_KEY;
    4837          17 :         key.offset = 0;
    4838          34 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    4839          31 :                 if (found_key.objectid != key.objectid ||
    4840          24 :                     (found_key.type != BTRFS_INODE_REF_KEY &&
    4841             :                      found_key.type != BTRFS_INODE_EXTREF_KEY))
    4842             :                         break;
    4843             : 
    4844          17 :                 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
    4845          17 :                 if (ret < 0)
    4846           0 :                         goto out;
    4847             :         }
    4848             :         /* Catch error found during iteration */
    4849          17 :         if (iter_ret < 0) {
    4850           0 :                 ret = iter_ret;
    4851           0 :                 goto out;
    4852             :         }
    4853          17 :         btrfs_release_path(path);
    4854             : 
    4855             :         /*
    4856             :          * We don't actually care about pending_move as we are simply
    4857             :          * re-creating this inode and will be rename'ing it into place once we
    4858             :          * rename the parent directory.
    4859             :          */
    4860          17 :         ret = process_recorded_refs(sctx, &pending_move);
    4861          17 : out:
    4862          17 :         btrfs_free_path(path);
    4863          17 :         return ret;
    4864             : }
    4865             : 
    4866      800337 : static int send_set_xattr(struct send_ctx *sctx,
    4867             :                           struct fs_path *path,
    4868             :                           const char *name, int name_len,
    4869             :                           const char *data, int data_len)
    4870             : {
    4871      800337 :         int ret = 0;
    4872             : 
    4873      800337 :         ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
    4874      800337 :         if (ret < 0)
    4875           0 :                 goto out;
    4876             : 
    4877      800337 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4878      800337 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4879      800337 :         TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
    4880             : 
    4881      800337 :         ret = send_cmd(sctx);
    4882             : 
    4883      800337 : tlv_put_failure:
    4884      800337 : out:
    4885      800337 :         return ret;
    4886             : }
    4887             : 
    4888          25 : static int send_remove_xattr(struct send_ctx *sctx,
    4889             :                           struct fs_path *path,
    4890             :                           const char *name, int name_len)
    4891             : {
    4892          25 :         int ret = 0;
    4893             : 
    4894          25 :         ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
    4895          25 :         if (ret < 0)
    4896           0 :                 goto out;
    4897             : 
    4898          25 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4899          25 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4900             : 
    4901          25 :         ret = send_cmd(sctx);
    4902             : 
    4903          25 : tlv_put_failure:
    4904          25 : out:
    4905          25 :         return ret;
    4906             : }
    4907             : 
    4908      800333 : static int __process_new_xattr(int num, struct btrfs_key *di_key,
    4909             :                                const char *name, int name_len, const char *data,
    4910             :                                int data_len, void *ctx)
    4911             : {
    4912      800333 :         int ret;
    4913      800333 :         struct send_ctx *sctx = ctx;
    4914      800333 :         struct fs_path *p;
    4915      800333 :         struct posix_acl_xattr_header dummy_acl;
    4916             : 
    4917             :         /* Capabilities are emitted by finish_inode_if_needed */
    4918      800333 :         if (!strncmp(name, XATTR_NAME_CAPS, name_len))
    4919             :                 return 0;
    4920             : 
    4921      800321 :         p = fs_path_alloc();
    4922      800321 :         if (!p)
    4923             :                 return -ENOMEM;
    4924             : 
    4925             :         /*
    4926             :          * This hack is needed because empty acls are stored as zero byte
    4927             :          * data in xattrs. Problem with that is, that receiving these zero byte
    4928             :          * acls will fail later. To fix this, we send a dummy acl list that
    4929             :          * only contains the version number and no entries.
    4930             :          */
    4931      800321 :         if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
    4932      800321 :             !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
    4933           0 :                 if (data_len == 0) {
    4934           0 :                         dummy_acl.a_version =
    4935             :                                         cpu_to_le32(POSIX_ACL_XATTR_VERSION);
    4936           0 :                         data = (char *)&dummy_acl;
    4937           0 :                         data_len = sizeof(dummy_acl);
    4938             :                 }
    4939             :         }
    4940             : 
    4941      800321 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4942      800321 :         if (ret < 0)
    4943           0 :                 goto out;
    4944             : 
    4945      800321 :         ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
    4946             : 
    4947      800321 : out:
    4948      800321 :         fs_path_free(p);
    4949      800321 :         return ret;
    4950             : }
    4951             : 
    4952          25 : static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
    4953             :                                    const char *name, int name_len,
    4954             :                                    const char *data, int data_len, void *ctx)
    4955             : {
    4956          25 :         int ret;
    4957          25 :         struct send_ctx *sctx = ctx;
    4958          25 :         struct fs_path *p;
    4959             : 
    4960          25 :         p = fs_path_alloc();
    4961          25 :         if (!p)
    4962             :                 return -ENOMEM;
    4963             : 
    4964          25 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4965          25 :         if (ret < 0)
    4966           0 :                 goto out;
    4967             : 
    4968          25 :         ret = send_remove_xattr(sctx, p, name, name_len);
    4969             : 
    4970          25 : out:
    4971          25 :         fs_path_free(p);
    4972          25 :         return ret;
    4973             : }
    4974             : 
    4975      800289 : static int process_new_xattr(struct send_ctx *sctx)
    4976             : {
    4977      800289 :         int ret = 0;
    4978             : 
    4979      800289 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    4980             :                                __process_new_xattr, sctx);
    4981             : 
    4982      800289 :         return ret;
    4983             : }
    4984             : 
    4985          25 : static int process_deleted_xattr(struct send_ctx *sctx)
    4986             : {
    4987          25 :         return iterate_dir_item(sctx->parent_root, sctx->right_path,
    4988             :                                 __process_deleted_xattr, sctx);
    4989             : }
    4990             : 
    4991             : struct find_xattr_ctx {
    4992             :         const char *name;
    4993             :         int name_len;
    4994             :         int found_idx;
    4995             :         char *found_data;
    4996             :         int found_data_len;
    4997             : };
    4998             : 
    4999          96 : static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
    5000             :                         int name_len, const char *data, int data_len, void *vctx)
    5001             : {
    5002          96 :         struct find_xattr_ctx *ctx = vctx;
    5003             : 
    5004          96 :         if (name_len == ctx->name_len &&
    5005          96 :             strncmp(name, ctx->name, name_len) == 0) {
    5006          96 :                 ctx->found_idx = num;
    5007          96 :                 ctx->found_data_len = data_len;
    5008          96 :                 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
    5009          96 :                 if (!ctx->found_data)
    5010             :                         return -ENOMEM;
    5011          96 :                 return 1;
    5012             :         }
    5013             :         return 0;
    5014             : }
    5015             : 
    5016          96 : static int find_xattr(struct btrfs_root *root,
    5017             :                       struct btrfs_path *path,
    5018             :                       struct btrfs_key *key,
    5019             :                       const char *name, int name_len,
    5020             :                       char **data, int *data_len)
    5021             : {
    5022          96 :         int ret;
    5023          96 :         struct find_xattr_ctx ctx;
    5024             : 
    5025          96 :         ctx.name = name;
    5026          96 :         ctx.name_len = name_len;
    5027          96 :         ctx.found_idx = -1;
    5028          96 :         ctx.found_data = NULL;
    5029          96 :         ctx.found_data_len = 0;
    5030             : 
    5031          96 :         ret = iterate_dir_item(root, path, __find_xattr, &ctx);
    5032          96 :         if (ret < 0)
    5033             :                 return ret;
    5034             : 
    5035          96 :         if (ctx.found_idx == -1)
    5036             :                 return -ENOENT;
    5037          96 :         if (data) {
    5038          48 :                 *data = ctx.found_data;
    5039          48 :                 *data_len = ctx.found_data_len;
    5040             :         } else {
    5041          48 :                 kfree(ctx.found_data);
    5042             :         }
    5043          96 :         return ctx.found_idx;
    5044             : }
    5045             : 
    5046             : 
    5047          48 : static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
    5048             :                                        const char *name, int name_len,
    5049             :                                        const char *data, int data_len,
    5050             :                                        void *ctx)
    5051             : {
    5052          48 :         int ret;
    5053          48 :         struct send_ctx *sctx = ctx;
    5054          48 :         char *found_data = NULL;
    5055          48 :         int found_data_len  = 0;
    5056             : 
    5057          48 :         ret = find_xattr(sctx->parent_root, sctx->right_path,
    5058             :                          sctx->cmp_key, name, name_len, &found_data,
    5059             :                          &found_data_len);
    5060          48 :         if (ret == -ENOENT) {
    5061           0 :                 ret = __process_new_xattr(num, di_key, name, name_len, data,
    5062             :                                           data_len, ctx);
    5063          48 :         } else if (ret >= 0) {
    5064          56 :                 if (data_len != found_data_len ||
    5065           8 :                     memcmp(data, found_data, data_len)) {
    5066          44 :                         ret = __process_new_xattr(num, di_key, name, name_len,
    5067             :                                                   data, data_len, ctx);
    5068             :                 } else {
    5069             :                         ret = 0;
    5070             :                 }
    5071             :         }
    5072             : 
    5073          48 :         kfree(found_data);
    5074          48 :         return ret;
    5075             : }
    5076             : 
    5077          48 : static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
    5078             :                                            const char *name, int name_len,
    5079             :                                            const char *data, int data_len,
    5080             :                                            void *ctx)
    5081             : {
    5082          48 :         int ret;
    5083          48 :         struct send_ctx *sctx = ctx;
    5084             : 
    5085          48 :         ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
    5086             :                          name, name_len, NULL, NULL);
    5087          48 :         if (ret == -ENOENT)
    5088           0 :                 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
    5089             :                                               data_len, ctx);
    5090          48 :         else if (ret >= 0)
    5091             :                 ret = 0;
    5092             : 
    5093          48 :         return ret;
    5094             : }
    5095             : 
    5096          48 : static int process_changed_xattr(struct send_ctx *sctx)
    5097             : {
    5098          48 :         int ret = 0;
    5099             : 
    5100          48 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    5101             :                         __process_changed_new_xattr, sctx);
    5102          48 :         if (ret < 0)
    5103           0 :                 goto out;
    5104          48 :         ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
    5105             :                         __process_changed_deleted_xattr, sctx);
    5106             : 
    5107          48 : out:
    5108          48 :         return ret;
    5109             : }
    5110             : 
    5111           7 : static int process_all_new_xattrs(struct send_ctx *sctx)
    5112             : {
    5113           7 :         int ret = 0;
    5114           7 :         int iter_ret = 0;
    5115           7 :         struct btrfs_root *root;
    5116           7 :         struct btrfs_path *path;
    5117           7 :         struct btrfs_key key;
    5118           7 :         struct btrfs_key found_key;
    5119             : 
    5120           7 :         path = alloc_path_for_send();
    5121           7 :         if (!path)
    5122             :                 return -ENOMEM;
    5123             : 
    5124           7 :         root = sctx->send_root;
    5125             : 
    5126           7 :         key.objectid = sctx->cmp_key->objectid;
    5127           7 :         key.type = BTRFS_XATTR_ITEM_KEY;
    5128           7 :         key.offset = 0;
    5129           7 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    5130           5 :                 if (found_key.objectid != key.objectid ||
    5131           2 :                     found_key.type != key.type) {
    5132             :                         ret = 0;
    5133             :                         break;
    5134             :                 }
    5135             : 
    5136           0 :                 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
    5137           0 :                 if (ret < 0)
    5138             :                         break;
    5139             :         }
    5140             :         /* Catch error found during iteration */
    5141           7 :         if (iter_ret < 0)
    5142           0 :                 ret = iter_ret;
    5143             : 
    5144           7 :         btrfs_free_path(path);
    5145           7 :         return ret;
    5146             : }
    5147             : 
    5148             : static int send_verity(struct send_ctx *sctx, struct fs_path *path,
    5149             :                        struct fsverity_descriptor *desc)
    5150             : {
    5151             :         int ret;
    5152             : 
    5153             :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
    5154             :         if (ret < 0)
    5155             :                 goto out;
    5156             : 
    5157             :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    5158             :         TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
    5159             :                         le8_to_cpu(desc->hash_algorithm));
    5160             :         TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
    5161             :                         1U << le8_to_cpu(desc->log_blocksize));
    5162             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
    5163             :                         le8_to_cpu(desc->salt_size));
    5164             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
    5165             :                         le32_to_cpu(desc->sig_size));
    5166             : 
    5167             :         ret = send_cmd(sctx);
    5168             : 
    5169             : tlv_put_failure:
    5170             : out:
    5171             :         return ret;
    5172             : }
    5173             : 
    5174           0 : static int process_verity(struct send_ctx *sctx)
    5175             : {
    5176           0 :         int ret = 0;
    5177           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5178           0 :         struct inode *inode;
    5179           0 :         struct fs_path *p;
    5180             : 
    5181           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
    5182           0 :         if (IS_ERR(inode))
    5183           0 :                 return PTR_ERR(inode);
    5184             : 
    5185           0 :         ret = btrfs_get_verity_descriptor(inode, NULL, 0);
    5186           0 :         if (ret < 0)
    5187           0 :                 goto iput;
    5188             : 
    5189             :         if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
    5190             :                 ret = -EMSGSIZE;
    5191             :                 goto iput;
    5192             :         }
    5193             :         if (!sctx->verity_descriptor) {
    5194             :                 sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
    5195             :                                                    GFP_KERNEL);
    5196             :                 if (!sctx->verity_descriptor) {
    5197             :                         ret = -ENOMEM;
    5198             :                         goto iput;
    5199             :                 }
    5200             :         }
    5201             : 
    5202             :         ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
    5203             :         if (ret < 0)
    5204             :                 goto iput;
    5205             : 
    5206             :         p = fs_path_alloc();
    5207             :         if (!p) {
    5208             :                 ret = -ENOMEM;
    5209             :                 goto iput;
    5210             :         }
    5211             :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5212             :         if (ret < 0)
    5213             :                 goto free_path;
    5214             : 
    5215             :         ret = send_verity(sctx, p, sctx->verity_descriptor);
    5216             :         if (ret < 0)
    5217             :                 goto free_path;
    5218             : 
    5219             : free_path:
    5220             :         fs_path_free(p);
    5221             : iput:
    5222           0 :         iput(inode);
    5223           0 :         return ret;
    5224             : }
    5225             : 
    5226             : static inline u64 max_send_read_size(const struct send_ctx *sctx)
    5227             : {
    5228      630622 :         return sctx->send_max_size - SZ_16K;
    5229             : }
    5230             : 
    5231     1258900 : static int put_data_header(struct send_ctx *sctx, u32 len)
    5232             : {
    5233     1258900 :         if (WARN_ON_ONCE(sctx->put_data))
    5234             :                 return -EINVAL;
    5235     1258900 :         sctx->put_data = true;
    5236     1258900 :         if (sctx->proto >= 2) {
    5237             :                 /*
    5238             :                  * Since v2, the data attribute header doesn't include a length,
    5239             :                  * it is implicitly to the end of the command.
    5240             :                  */
    5241           0 :                 if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)
    5242             :                         return -EOVERFLOW;
    5243           0 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
    5244           0 :                 sctx->send_size += sizeof(__le16);
    5245             :         } else {
    5246     1258900 :                 struct btrfs_tlv_header *hdr;
    5247             : 
    5248     1258900 :                 if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
    5249             :                         return -EOVERFLOW;
    5250     1258900 :                 hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
    5251     1258900 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
    5252     1258900 :                 put_unaligned_le16(len, &hdr->tlv_len);
    5253     1258900 :                 sctx->send_size += sizeof(*hdr);
    5254             :         }
    5255             :         return 0;
    5256             : }
    5257             : 
    5258     1232144 : static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
    5259             : {
    5260     1232144 :         struct btrfs_root *root = sctx->send_root;
    5261     1232144 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5262     1232144 :         struct page *page;
    5263     1232144 :         pgoff_t index = offset >> PAGE_SHIFT;
    5264     1232144 :         pgoff_t last_index;
    5265     1232144 :         unsigned pg_offset = offset_in_page(offset);
    5266     1232144 :         int ret;
    5267             : 
    5268     1232144 :         ret = put_data_header(sctx, len);
    5269     1232491 :         if (ret)
    5270             :                 return ret;
    5271             : 
    5272     1232491 :         last_index = (offset + len - 1) >> PAGE_SHIFT;
    5273             : 
    5274    10945011 :         while (index <= last_index) {
    5275     9711983 :                 unsigned cur_len = min_t(unsigned, len,
    5276             :                                          PAGE_SIZE - pg_offset);
    5277             : 
    5278     9711983 :                 page = find_lock_page(sctx->cur_inode->i_mapping, index);
    5279     9712135 :                 if (!page) {
    5280      573942 :                         page_cache_sync_readahead(sctx->cur_inode->i_mapping,
    5281             :                                                   &sctx->ra, NULL, index,
    5282      573942 :                                                   last_index + 1 - index);
    5283             : 
    5284      573936 :                         page = find_or_create_page(sctx->cur_inode->i_mapping,
    5285             :                                                    index, GFP_KERNEL);
    5286      573926 :                         if (!page) {
    5287             :                                 ret = -ENOMEM;
    5288             :                                 break;
    5289             :                         }
    5290             :                 }
    5291             : 
    5292     9712119 :                 if (PageReadahead(page))
    5293         232 :                         page_cache_async_readahead(sctx->cur_inode->i_mapping,
    5294         232 :                                                    &sctx->ra, NULL, page_folio(page),
    5295         232 :                                                    index, last_index + 1 - index);
    5296             : 
    5297     9712119 :                 if (!PageUptodate(page)) {
    5298           0 :                         btrfs_read_folio(NULL, page_folio(page));
    5299           0 :                         lock_page(page);
    5300           0 :                         if (!PageUptodate(page)) {
    5301           0 :                                 unlock_page(page);
    5302           0 :                                 btrfs_err(fs_info,
    5303             :                         "send: IO error at offset %llu for inode %llu root %llu",
    5304             :                                         page_offset(page), sctx->cur_ino,
    5305             :                                         sctx->send_root->root_key.objectid);
    5306           0 :                                 put_page(page);
    5307           0 :                                 ret = -EIO;
    5308           0 :                                 break;
    5309             :                         }
    5310             :                 }
    5311             : 
    5312     9711902 :                 memcpy_from_page(sctx->send_buf + sctx->send_size, page,
    5313             :                                  pg_offset, cur_len);
    5314     9712501 :                 unlock_page(page);
    5315     9712722 :                 put_page(page);
    5316     9712520 :                 index++;
    5317     9712520 :                 pg_offset = 0;
    5318     9712520 :                 len -= cur_len;
    5319     9712520 :                 sctx->send_size += cur_len;
    5320             :         }
    5321             : 
    5322             :         return ret;
    5323             : }
    5324             : 
    5325             : /*
    5326             :  * Read some bytes from the current inode/file and send a write command to
    5327             :  * user space.
    5328             :  */
    5329     1232618 : static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
    5330             : {
    5331     1232618 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5332     1232618 :         int ret = 0;
    5333     1232618 :         struct fs_path *p;
    5334             : 
    5335     1232618 :         p = fs_path_alloc();
    5336     1232338 :         if (!p)
    5337             :                 return -ENOMEM;
    5338             : 
    5339     1232338 :         btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
    5340             : 
    5341     1232338 :         ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5342     1232564 :         if (ret < 0)
    5343           0 :                 goto out;
    5344             : 
    5345     1232564 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5346     1232237 :         if (ret < 0)
    5347           0 :                 goto out;
    5348             : 
    5349     1232237 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5350     1232038 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5351     1232153 :         ret = put_file_data(sctx, offset, len);
    5352     1233016 :         if (ret < 0)
    5353           0 :                 goto out;
    5354             : 
    5355     1233016 :         ret = send_cmd(sctx);
    5356             : 
    5357     1232594 : tlv_put_failure:
    5358     1232594 : out:
    5359     1232594 :         fs_path_free(p);
    5360     1232594 :         return ret;
    5361             : }
    5362             : 
    5363             : /*
    5364             :  * Send a clone command to user space.
    5365             :  */
    5366        3980 : static int send_clone(struct send_ctx *sctx,
    5367             :                       u64 offset, u32 len,
    5368             :                       struct clone_root *clone_root)
    5369             : {
    5370        3980 :         int ret = 0;
    5371        3980 :         struct fs_path *p;
    5372        3980 :         u64 gen;
    5373             : 
    5374        3980 :         btrfs_debug(sctx->send_root->fs_info,
    5375             :                     "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
    5376             :                     offset, len, clone_root->root->root_key.objectid,
    5377             :                     clone_root->ino, clone_root->offset);
    5378             : 
    5379        3980 :         p = fs_path_alloc();
    5380        3980 :         if (!p)
    5381             :                 return -ENOMEM;
    5382             : 
    5383        3980 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
    5384        3980 :         if (ret < 0)
    5385           0 :                 goto out;
    5386             : 
    5387        3980 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5388        3980 :         if (ret < 0)
    5389           0 :                 goto out;
    5390             : 
    5391        3980 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5392        3980 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
    5393        3980 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5394             : 
    5395        3980 :         if (clone_root->root == sctx->send_root) {
    5396        2205 :                 ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
    5397        2205 :                 if (ret < 0)
    5398           0 :                         goto out;
    5399        2205 :                 ret = get_cur_path(sctx, clone_root->ino, gen, p);
    5400             :         } else {
    5401        1775 :                 ret = get_inode_path(clone_root->root, clone_root->ino, p);
    5402             :         }
    5403        3980 :         if (ret < 0)
    5404           0 :                 goto out;
    5405             : 
    5406             :         /*
    5407             :          * If the parent we're using has a received_uuid set then use that as
    5408             :          * our clone source as that is what we will look for when doing a
    5409             :          * receive.
    5410             :          *
    5411             :          * This covers the case that we create a snapshot off of a received
    5412             :          * subvolume and then use that as the parent and try to receive on a
    5413             :          * different host.
    5414             :          */
    5415        3980 :         if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
    5416           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5417             :                              clone_root->root->root_item.received_uuid);
    5418             :         else
    5419        3980 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5420             :                              clone_root->root->root_item.uuid);
    5421        3980 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    5422             :                     btrfs_root_ctransid(&clone_root->root->root_item));
    5423        3980 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
    5424        3980 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
    5425             :                         clone_root->offset);
    5426             : 
    5427        3980 :         ret = send_cmd(sctx);
    5428             : 
    5429        3980 : tlv_put_failure:
    5430        3980 : out:
    5431        3980 :         fs_path_free(p);
    5432        3980 :         return ret;
    5433             : }
    5434             : 
    5435             : /*
    5436             :  * Send an update extent command to user space.
    5437             :  */
    5438           0 : static int send_update_extent(struct send_ctx *sctx,
    5439             :                               u64 offset, u32 len)
    5440             : {
    5441           0 :         int ret = 0;
    5442           0 :         struct fs_path *p;
    5443             : 
    5444           0 :         p = fs_path_alloc();
    5445           0 :         if (!p)
    5446             :                 return -ENOMEM;
    5447             : 
    5448           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
    5449           0 :         if (ret < 0)
    5450           0 :                 goto out;
    5451             : 
    5452           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5453           0 :         if (ret < 0)
    5454           0 :                 goto out;
    5455             : 
    5456           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5457           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5458           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
    5459             : 
    5460           0 :         ret = send_cmd(sctx);
    5461             : 
    5462           0 : tlv_put_failure:
    5463           0 : out:
    5464           0 :         fs_path_free(p);
    5465           0 :         return ret;
    5466             : }
    5467             : 
    5468        2785 : static int send_hole(struct send_ctx *sctx, u64 end)
    5469             : {
    5470        2785 :         struct fs_path *p = NULL;
    5471        2785 :         u64 read_size = max_send_read_size(sctx);
    5472        2785 :         u64 offset = sctx->cur_inode_last_extent;
    5473        2785 :         int ret = 0;
    5474             : 
    5475             :         /*
    5476             :          * A hole that starts at EOF or beyond it. Since we do not yet support
    5477             :          * fallocate (for extent preallocation and hole punching), sending a
    5478             :          * write of zeroes starting at EOF or beyond would later require issuing
    5479             :          * a truncate operation which would undo the write and achieve nothing.
    5480             :          */
    5481        2785 :         if (offset >= sctx->cur_inode_size)
    5482             :                 return 0;
    5483             : 
    5484             :         /*
    5485             :          * Don't go beyond the inode's i_size due to prealloc extents that start
    5486             :          * after the i_size.
    5487             :          */
    5488        2781 :         end = min_t(u64, end, sctx->cur_inode_size);
    5489             : 
    5490        2781 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5491           0 :                 return send_update_extent(sctx, offset, end - offset);
    5492             : 
    5493        2781 :         p = fs_path_alloc();
    5494        2781 :         if (!p)
    5495             :                 return -ENOMEM;
    5496        2781 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5497        2781 :         if (ret < 0)
    5498           0 :                 goto tlv_put_failure;
    5499       29243 :         while (offset < end) {
    5500       26462 :                 u64 len = min(end - offset, read_size);
    5501             : 
    5502       26462 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5503       26462 :                 if (ret < 0)
    5504             :                         break;
    5505       26462 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5506       26462 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5507       26462 :                 ret = put_data_header(sctx, len);
    5508       26462 :                 if (ret < 0)
    5509             :                         break;
    5510       26462 :                 memset(sctx->send_buf + sctx->send_size, 0, len);
    5511       26462 :                 sctx->send_size += len;
    5512       26462 :                 ret = send_cmd(sctx);
    5513       26462 :                 if (ret < 0)
    5514             :                         break;
    5515       26462 :                 offset += len;
    5516             :         }
    5517        2781 :         sctx->cur_inode_next_write_offset = offset;
    5518        2781 : tlv_put_failure:
    5519        2781 :         fs_path_free(p);
    5520        2781 :         return ret;
    5521             : }
    5522             : 
    5523           0 : static int send_encoded_inline_extent(struct send_ctx *sctx,
    5524             :                                       struct btrfs_path *path, u64 offset,
    5525             :                                       u64 len)
    5526             : {
    5527           0 :         struct btrfs_root *root = sctx->send_root;
    5528           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5529           0 :         struct inode *inode;
    5530           0 :         struct fs_path *fspath;
    5531           0 :         struct extent_buffer *leaf = path->nodes[0];
    5532           0 :         struct btrfs_key key;
    5533           0 :         struct btrfs_file_extent_item *ei;
    5534           0 :         u64 ram_bytes;
    5535           0 :         size_t inline_size;
    5536           0 :         int ret;
    5537             : 
    5538           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5539           0 :         if (IS_ERR(inode))
    5540           0 :                 return PTR_ERR(inode);
    5541             : 
    5542           0 :         fspath = fs_path_alloc();
    5543           0 :         if (!fspath) {
    5544           0 :                 ret = -ENOMEM;
    5545           0 :                 goto out;
    5546             :         }
    5547             : 
    5548           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5549           0 :         if (ret < 0)
    5550           0 :                 goto out;
    5551             : 
    5552           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5553           0 :         if (ret < 0)
    5554           0 :                 goto out;
    5555             : 
    5556           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5557           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5558           0 :         ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
    5559           0 :         inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
    5560             : 
    5561           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5562           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5563           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5564             :                     min(key.offset + ram_bytes - offset, len));
    5565           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
    5566           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
    5567           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5568             :                                 btrfs_file_extent_compression(leaf, ei));
    5569           0 :         if (ret < 0)
    5570           0 :                 goto out;
    5571           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5572             : 
    5573           0 :         ret = put_data_header(sctx, inline_size);
    5574           0 :         if (ret < 0)
    5575           0 :                 goto out;
    5576           0 :         read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
    5577             :                            btrfs_file_extent_inline_start(ei), inline_size);
    5578           0 :         sctx->send_size += inline_size;
    5579             : 
    5580           0 :         ret = send_cmd(sctx);
    5581             : 
    5582           0 : tlv_put_failure:
    5583           0 : out:
    5584           0 :         fs_path_free(fspath);
    5585           0 :         iput(inode);
    5586           0 :         return ret;
    5587             : }
    5588             : 
    5589           0 : static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
    5590             :                                u64 offset, u64 len)
    5591             : {
    5592           0 :         struct btrfs_root *root = sctx->send_root;
    5593           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5594           0 :         struct inode *inode;
    5595           0 :         struct fs_path *fspath;
    5596           0 :         struct extent_buffer *leaf = path->nodes[0];
    5597           0 :         struct btrfs_key key;
    5598           0 :         struct btrfs_file_extent_item *ei;
    5599           0 :         u64 disk_bytenr, disk_num_bytes;
    5600           0 :         u32 data_offset;
    5601           0 :         struct btrfs_cmd_header *hdr;
    5602           0 :         u32 crc;
    5603           0 :         int ret;
    5604             : 
    5605           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5606           0 :         if (IS_ERR(inode))
    5607           0 :                 return PTR_ERR(inode);
    5608             : 
    5609           0 :         fspath = fs_path_alloc();
    5610           0 :         if (!fspath) {
    5611           0 :                 ret = -ENOMEM;
    5612           0 :                 goto out;
    5613             :         }
    5614             : 
    5615           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5616           0 :         if (ret < 0)
    5617           0 :                 goto out;
    5618             : 
    5619           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5620           0 :         if (ret < 0)
    5621           0 :                 goto out;
    5622             : 
    5623           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5624           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5625           0 :         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
    5626           0 :         disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
    5627             : 
    5628           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5629           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5630           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5631             :                     min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
    5632             :                         len));
    5633           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
    5634             :                     btrfs_file_extent_ram_bytes(leaf, ei));
    5635           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
    5636             :                     offset - key.offset + btrfs_file_extent_offset(leaf, ei));
    5637           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5638             :                                 btrfs_file_extent_compression(leaf, ei));
    5639           0 :         if (ret < 0)
    5640           0 :                 goto out;
    5641           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5642           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
    5643             : 
    5644           0 :         ret = put_data_header(sctx, disk_num_bytes);
    5645           0 :         if (ret < 0)
    5646           0 :                 goto out;
    5647             : 
    5648             :         /*
    5649             :          * We want to do I/O directly into the send buffer, so get the next page
    5650             :          * boundary in the send buffer. This means that there may be a gap
    5651             :          * between the beginning of the command and the file data.
    5652             :          */
    5653           0 :         data_offset = PAGE_ALIGN(sctx->send_size);
    5654           0 :         if (data_offset > sctx->send_max_size ||
    5655           0 :             sctx->send_max_size - data_offset < disk_num_bytes) {
    5656           0 :                 ret = -EOVERFLOW;
    5657           0 :                 goto out;
    5658             :         }
    5659             : 
    5660             :         /*
    5661             :          * Note that send_buf is a mapping of send_buf_pages, so this is really
    5662             :          * reading into send_buf.
    5663             :          */
    5664           0 :         ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
    5665             :                                                     disk_bytenr, disk_num_bytes,
    5666           0 :                                                     sctx->send_buf_pages +
    5667           0 :                                                     (data_offset >> PAGE_SHIFT));
    5668           0 :         if (ret)
    5669           0 :                 goto out;
    5670             : 
    5671           0 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
    5672           0 :         hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
    5673           0 :         hdr->crc = 0;
    5674           0 :         crc = btrfs_crc32c(0, sctx->send_buf, sctx->send_size);
    5675           0 :         crc = btrfs_crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
    5676           0 :         hdr->crc = cpu_to_le32(crc);
    5677             : 
    5678           0 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
    5679             :                         &sctx->send_off);
    5680           0 :         if (!ret) {
    5681           0 :                 ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
    5682             :                                 disk_num_bytes, &sctx->send_off);
    5683             :         }
    5684           0 :         sctx->send_size = 0;
    5685           0 :         sctx->put_data = false;
    5686             : 
    5687           0 : tlv_put_failure:
    5688           0 : out:
    5689           0 :         fs_path_free(fspath);
    5690           0 :         iput(inode);
    5691           0 :         return ret;
    5692             : }
    5693             : 
    5694      627837 : static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
    5695             :                             const u64 offset, const u64 len)
    5696             : {
    5697      627837 :         const u64 end = offset + len;
    5698      627837 :         struct extent_buffer *leaf = path->nodes[0];
    5699      627837 :         struct btrfs_file_extent_item *ei;
    5700      627837 :         u64 read_size = max_send_read_size(sctx);
    5701      627837 :         u64 sent = 0;
    5702             : 
    5703      627837 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5704           0 :                 return send_update_extent(sctx, offset, len);
    5705             : 
    5706      627837 :         ei = btrfs_item_ptr(leaf, path->slots[0],
    5707             :                             struct btrfs_file_extent_item);
    5708      627831 :         if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
    5709             :             btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
    5710           0 :                 bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
    5711             :                                   BTRFS_FILE_EXTENT_INLINE);
    5712             : 
    5713             :                 /*
    5714             :                  * Send the compressed extent unless the compressed data is
    5715             :                  * larger than the decompressed data. This can happen if we're
    5716             :                  * not sending the entire extent, either because it has been
    5717             :                  * partially overwritten/truncated or because this is a part of
    5718             :                  * the extent that we couldn't clone in clone_range().
    5719             :                  */
    5720           0 :                 if (is_inline &&
    5721           0 :                     btrfs_file_extent_inline_item_len(leaf,
    5722             :                                                       path->slots[0]) <= len) {
    5723           0 :                         return send_encoded_inline_extent(sctx, path, offset,
    5724             :                                                           len);
    5725           0 :                 } else if (!is_inline &&
    5726             :                            btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
    5727           0 :                         return send_encoded_extent(sctx, path, offset, len);
    5728             :                 }
    5729             :         }
    5730             : 
    5731      627831 :         if (sctx->cur_inode == NULL) {
    5732      602115 :                 struct btrfs_root *root = sctx->send_root;
    5733             : 
    5734      602115 :                 sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
    5735      602137 :                 if (IS_ERR(sctx->cur_inode)) {
    5736           0 :                         int err = PTR_ERR(sctx->cur_inode);
    5737             : 
    5738           0 :                         sctx->cur_inode = NULL;
    5739           0 :                         return err;
    5740             :                 }
    5741      602137 :                 memset(&sctx->ra, 0, sizeof(struct file_ra_state));
    5742      602137 :                 file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
    5743             : 
    5744             :                 /*
    5745             :                  * It's very likely there are no pages from this inode in the page
    5746             :                  * cache, so after reading extents and sending their data, we clean
    5747             :                  * the page cache to avoid trashing the page cache (adding pressure
    5748             :                  * to the page cache and forcing eviction of other data more useful
    5749             :                  * for applications).
    5750             :                  *
    5751             :                  * We decide if we should clean the page cache simply by checking
    5752             :                  * if the inode's mapping nrpages is 0 when we first open it, and
    5753             :                  * not by using something like filemap_range_has_page() before
    5754             :                  * reading an extent because when we ask the readahead code to
    5755             :                  * read a given file range, it may (and almost always does) read
    5756             :                  * pages from beyond that range (see the documentation for
    5757             :                  * page_cache_sync_readahead()), so it would not be reliable,
    5758             :                  * because after reading the first extent future calls to
    5759             :                  * filemap_range_has_page() would return true because the readahead
    5760             :                  * on the previous extent resulted in reading pages of the current
    5761             :                  * extent as well.
    5762             :                  */
    5763      602123 :                 sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
    5764      602123 :                 sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
    5765             :         }
    5766             : 
    5767     1859846 :         while (sent < len) {
    5768     1232681 :                 u64 size = min(len - sent, read_size);
    5769     1232681 :                 int ret;
    5770             : 
    5771     1232681 :                 ret = send_write(sctx, offset + sent, size);
    5772     1232007 :                 if (ret < 0)
    5773           0 :                         return ret;
    5774     1232007 :                 sent += size;
    5775             :         }
    5776             : 
    5777      627165 :         if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
    5778             :                 /*
    5779             :                  * Always operate only on ranges that are a multiple of the page
    5780             :                  * size. This is not only to prevent zeroing parts of a page in
    5781             :                  * the case of subpage sector size, but also to guarantee we evict
    5782             :                  * pages, as passing a range that is smaller than page size does
    5783             :                  * not evict the respective page (only zeroes part of its content).
    5784             :                  *
    5785             :                  * Always start from the end offset of the last range cleared.
    5786             :                  * This is because the readahead code may (and very often does)
    5787             :                  * reads pages beyond the range we request for readahead. So if
    5788             :                  * we have an extent layout like this:
    5789             :                  *
    5790             :                  *            [ extent A ] [ extent B ] [ extent C ]
    5791             :                  *
    5792             :                  * When we ask page_cache_sync_readahead() to read extent A, it
    5793             :                  * may also trigger reads for pages of extent B. If we are doing
    5794             :                  * an incremental send and extent B has not changed between the
    5795             :                  * parent and send snapshots, some or all of its pages may end
    5796             :                  * up being read and placed in the page cache. So when truncating
    5797             :                  * the page cache we always start from the end offset of the
    5798             :                  * previously processed extent up to the end of the current
    5799             :                  * extent.
    5800             :                  */
    5801      571793 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    5802      571793 :                                            sctx->page_cache_clear_start,
    5803      571793 :                                            end - 1);
    5804      572130 :                 sctx->page_cache_clear_start = end;
    5805             :         }
    5806             : 
    5807             :         return 0;
    5808             : }
    5809             : 
    5810             : /*
    5811             :  * Search for a capability xattr related to sctx->cur_ino. If the capability is
    5812             :  * found, call send_set_xattr function to emit it.
    5813             :  *
    5814             :  * Return 0 if there isn't a capability, or when the capability was emitted
    5815             :  * successfully, or < 0 if an error occurred.
    5816             :  */
    5817     1008577 : static int send_capabilities(struct send_ctx *sctx)
    5818             : {
    5819     1008577 :         struct fs_path *fspath = NULL;
    5820     1008577 :         struct btrfs_path *path;
    5821     1008577 :         struct btrfs_dir_item *di;
    5822     1008577 :         struct extent_buffer *leaf;
    5823     1008577 :         unsigned long data_ptr;
    5824     1008577 :         char *buf = NULL;
    5825     1008577 :         int buf_len;
    5826     1008577 :         int ret = 0;
    5827             : 
    5828     1008577 :         path = alloc_path_for_send();
    5829     1008335 :         if (!path)
    5830             :                 return -ENOMEM;
    5831             : 
    5832     1008335 :         di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
    5833             :                                 XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
    5834     1009260 :         if (!di) {
    5835             :                 /* There is no xattr for this inode */
    5836     1009244 :                 goto out;
    5837          16 :         } else if (IS_ERR(di)) {
    5838           0 :                 ret = PTR_ERR(di);
    5839           0 :                 goto out;
    5840             :         }
    5841             : 
    5842          16 :         leaf = path->nodes[0];
    5843          16 :         buf_len = btrfs_dir_data_len(leaf, di);
    5844             : 
    5845          16 :         fspath = fs_path_alloc();
    5846          16 :         buf = kmalloc(buf_len, GFP_KERNEL);
    5847          16 :         if (!fspath || !buf) {
    5848           0 :                 ret = -ENOMEM;
    5849           0 :                 goto out;
    5850             :         }
    5851             : 
    5852          16 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5853          16 :         if (ret < 0)
    5854           0 :                 goto out;
    5855             : 
    5856          16 :         data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
    5857          16 :         read_extent_buffer(leaf, buf, data_ptr, buf_len);
    5858             : 
    5859          16 :         ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
    5860             :                         strlen(XATTR_NAME_CAPS), buf, buf_len);
    5861     1009260 : out:
    5862     1009260 :         kfree(buf);
    5863     1009111 :         fs_path_free(fspath);
    5864     1008589 :         btrfs_free_path(path);
    5865     1008589 :         return ret;
    5866             : }
    5867             : 
    5868        4075 : static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
    5869             :                        struct clone_root *clone_root, const u64 disk_byte,
    5870             :                        u64 data_offset, u64 offset, u64 len)
    5871             : {
    5872        4075 :         struct btrfs_path *path;
    5873        4075 :         struct btrfs_key key;
    5874        4075 :         int ret;
    5875        4075 :         struct btrfs_inode_info info;
    5876        4075 :         u64 clone_src_i_size = 0;
    5877             : 
    5878             :         /*
    5879             :          * Prevent cloning from a zero offset with a length matching the sector
    5880             :          * size because in some scenarios this will make the receiver fail.
    5881             :          *
    5882             :          * For example, if in the source filesystem the extent at offset 0
    5883             :          * has a length of sectorsize and it was written using direct IO, then
    5884             :          * it can never be an inline extent (even if compression is enabled).
    5885             :          * Then this extent can be cloned in the original filesystem to a non
    5886             :          * zero file offset, but it may not be possible to clone in the
    5887             :          * destination filesystem because it can be inlined due to compression
    5888             :          * on the destination filesystem (as the receiver's write operations are
    5889             :          * always done using buffered IO). The same happens when the original
    5890             :          * filesystem does not have compression enabled but the destination
    5891             :          * filesystem has.
    5892             :          */
    5893        4075 :         if (clone_root->offset == 0 &&
    5894         387 :             len == sctx->send_root->fs_info->sectorsize)
    5895           1 :                 return send_extent_data(sctx, dst_path, offset, len);
    5896             : 
    5897        4074 :         path = alloc_path_for_send();
    5898        4074 :         if (!path)
    5899             :                 return -ENOMEM;
    5900             : 
    5901             :         /*
    5902             :          * There are inodes that have extents that lie behind its i_size. Don't
    5903             :          * accept clones from these extents.
    5904             :          */
    5905        4074 :         ret = get_inode_info(clone_root->root, clone_root->ino, &info);
    5906        4074 :         btrfs_release_path(path);
    5907        4074 :         if (ret < 0)
    5908           0 :                 goto out;
    5909        4074 :         clone_src_i_size = info.size;
    5910             : 
    5911             :         /*
    5912             :          * We can't send a clone operation for the entire range if we find
    5913             :          * extent items in the respective range in the source file that
    5914             :          * refer to different extents or if we find holes.
    5915             :          * So check for that and do a mix of clone and regular write/copy
    5916             :          * operations if needed.
    5917             :          *
    5918             :          * Example:
    5919             :          *
    5920             :          * mkfs.btrfs -f /dev/sda
    5921             :          * mount /dev/sda /mnt
    5922             :          * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
    5923             :          * cp --reflink=always /mnt/foo /mnt/bar
    5924             :          * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
    5925             :          * btrfs subvolume snapshot -r /mnt /mnt/snap
    5926             :          *
    5927             :          * If when we send the snapshot and we are processing file bar (which
    5928             :          * has a higher inode number than foo) we blindly send a clone operation
    5929             :          * for the [0, 100K[ range from foo to bar, the receiver ends up getting
    5930             :          * a file bar that matches the content of file foo - iow, doesn't match
    5931             :          * the content from bar in the original filesystem.
    5932             :          */
    5933        4074 :         key.objectid = clone_root->ino;
    5934        4074 :         key.type = BTRFS_EXTENT_DATA_KEY;
    5935        4074 :         key.offset = clone_root->offset;
    5936        4074 :         ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
    5937        4074 :         if (ret < 0)
    5938           0 :                 goto out;
    5939        4074 :         if (ret > 0 && path->slots[0] > 0) {
    5940        1106 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
    5941        1106 :                 if (key.objectid == clone_root->ino &&
    5942        1106 :                     key.type == BTRFS_EXTENT_DATA_KEY)
    5943        1106 :                         path->slots[0]--;
    5944             :         }
    5945             : 
    5946        4097 :         while (true) {
    5947        4097 :                 struct extent_buffer *leaf = path->nodes[0];
    5948        4097 :                 int slot = path->slots[0];
    5949        4097 :                 struct btrfs_file_extent_item *ei;
    5950        4097 :                 u8 type;
    5951        4097 :                 u64 ext_len;
    5952        4097 :                 u64 clone_len;
    5953        4097 :                 u64 clone_data_offset;
    5954        4097 :                 bool crossed_src_i_size = false;
    5955             : 
    5956        4097 :                 if (slot >= btrfs_header_nritems(leaf)) {
    5957           0 :                         ret = btrfs_next_leaf(clone_root->root, path);
    5958           0 :                         if (ret < 0)
    5959           0 :                                 goto out;
    5960           0 :                         else if (ret > 0)
    5961             :                                 break;
    5962           0 :                         continue;
    5963             :                 }
    5964             : 
    5965        4097 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    5966             : 
    5967             :                 /*
    5968             :                  * We might have an implicit trailing hole (NO_HOLES feature
    5969             :                  * enabled). We deal with it after leaving this loop.
    5970             :                  */
    5971        4097 :                 if (key.objectid != clone_root->ino ||
    5972        4094 :                     key.type != BTRFS_EXTENT_DATA_KEY)
    5973             :                         break;
    5974             : 
    5975        4094 :                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    5976        4094 :                 type = btrfs_file_extent_type(leaf, ei);
    5977        4094 :                 if (type == BTRFS_FILE_EXTENT_INLINE) {
    5978           0 :                         ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
    5979           0 :                         ext_len = PAGE_ALIGN(ext_len);
    5980             :                 } else {
    5981        4094 :                         ext_len = btrfs_file_extent_num_bytes(leaf, ei);
    5982             :                 }
    5983             : 
    5984        4094 :                 if (key.offset + ext_len <= clone_root->offset)
    5985           0 :                         goto next;
    5986             : 
    5987        4094 :                 if (key.offset > clone_root->offset) {
    5988             :                         /* Implicit hole, NO_HOLES feature enabled. */
    5989          11 :                         u64 hole_len = key.offset - clone_root->offset;
    5990             : 
    5991          11 :                         if (hole_len > len)
    5992             :                                 hole_len = len;
    5993          11 :                         ret = send_extent_data(sctx, dst_path, offset,
    5994             :                                                hole_len);
    5995          11 :                         if (ret < 0)
    5996           0 :                                 goto out;
    5997             : 
    5998          11 :                         len -= hole_len;
    5999          11 :                         if (len == 0)
    6000             :                                 break;
    6001           1 :                         offset += hole_len;
    6002           1 :                         clone_root->offset += hole_len;
    6003           1 :                         data_offset += hole_len;
    6004             :                 }
    6005             : 
    6006        4084 :                 if (key.offset >= clone_root->offset + len)
    6007             :                         break;
    6008             : 
    6009        4084 :                 if (key.offset >= clone_src_i_size)
    6010             :                         break;
    6011             : 
    6012        4003 :                 if (key.offset + ext_len > clone_src_i_size) {
    6013         143 :                         ext_len = clone_src_i_size - key.offset;
    6014         143 :                         crossed_src_i_size = true;
    6015             :                 }
    6016             : 
    6017        4003 :                 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
    6018        4003 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
    6019        3995 :                         clone_root->offset = key.offset;
    6020        3995 :                         if (clone_data_offset < data_offset &&
    6021        1051 :                                 clone_data_offset + ext_len > data_offset) {
    6022        1038 :                                 u64 extent_offset;
    6023             : 
    6024        1038 :                                 extent_offset = data_offset - clone_data_offset;
    6025        1038 :                                 ext_len -= extent_offset;
    6026        1038 :                                 clone_data_offset += extent_offset;
    6027        1038 :                                 clone_root->offset += extent_offset;
    6028             :                         }
    6029             :                 }
    6030             : 
    6031        4003 :                 clone_len = min_t(u64, ext_len, len);
    6032             : 
    6033        4003 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
    6034             :                     clone_data_offset == data_offset) {
    6035        3982 :                         const u64 src_end = clone_root->offset + clone_len;
    6036        3982 :                         const u64 sectorsize = SZ_64K;
    6037             : 
    6038             :                         /*
    6039             :                          * We can't clone the last block, when its size is not
    6040             :                          * sector size aligned, into the middle of a file. If we
    6041             :                          * do so, the receiver will get a failure (-EINVAL) when
    6042             :                          * trying to clone or will silently corrupt the data in
    6043             :                          * the destination file if it's on a kernel without the
    6044             :                          * fix introduced by commit ac765f83f1397646
    6045             :                          * ("Btrfs: fix data corruption due to cloning of eof
    6046             :                          * block).
    6047             :                          *
    6048             :                          * So issue a clone of the aligned down range plus a
    6049             :                          * regular write for the eof block, if we hit that case.
    6050             :                          *
    6051             :                          * Also, we use the maximum possible sector size, 64K,
    6052             :                          * because we don't know what's the sector size of the
    6053             :                          * filesystem that receives the stream, so we have to
    6054             :                          * assume the largest possible sector size.
    6055             :                          */
    6056        3982 :                         if (src_end == clone_src_i_size &&
    6057         288 :                             !IS_ALIGNED(src_end, sectorsize) &&
    6058           8 :                             offset + clone_len < sctx->cur_inode_size) {
    6059           2 :                                 u64 slen;
    6060             : 
    6061           2 :                                 slen = ALIGN_DOWN(src_end - clone_root->offset,
    6062             :                                                   sectorsize);
    6063           2 :                                 if (slen > 0) {
    6064           0 :                                         ret = send_clone(sctx, offset, slen,
    6065             :                                                          clone_root);
    6066           0 :                                         if (ret < 0)
    6067           0 :                                                 goto out;
    6068             :                                 }
    6069           2 :                                 ret = send_extent_data(sctx, dst_path,
    6070             :                                                        offset + slen,
    6071             :                                                        clone_len - slen);
    6072             :                         } else {
    6073        3980 :                                 ret = send_clone(sctx, offset, clone_len,
    6074             :                                                  clone_root);
    6075             :                         }
    6076          21 :                 } else if (crossed_src_i_size && clone_len < len) {
    6077             :                         /*
    6078             :                          * If we are at i_size of the clone source inode and we
    6079             :                          * can not clone from it, terminate the loop. This is
    6080             :                          * to avoid sending two write operations, one with a
    6081             :                          * length matching clone_len and the final one after
    6082             :                          * this loop with a length of len - clone_len.
    6083             :                          *
    6084             :                          * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
    6085             :                          * was passed to the send ioctl), this helps avoid
    6086             :                          * sending an encoded write for an offset that is not
    6087             :                          * sector size aligned, in case the i_size of the source
    6088             :                          * inode is not sector size aligned. That will make the
    6089             :                          * receiver fallback to decompression of the data and
    6090             :                          * writing it using regular buffered IO, therefore while
    6091             :                          * not incorrect, it's not optimal due decompression and
    6092             :                          * possible re-compression at the receiver.
    6093             :                          */
    6094             :                         break;
    6095             :                 } else {
    6096          16 :                         ret = send_extent_data(sctx, dst_path, offset,
    6097             :                                                clone_len);
    6098             :                 }
    6099             : 
    6100        3998 :                 if (ret < 0)
    6101           0 :                         goto out;
    6102             : 
    6103        3998 :                 len -= clone_len;
    6104        3998 :                 if (len == 0)
    6105             :                         break;
    6106          23 :                 offset += clone_len;
    6107          23 :                 clone_root->offset += clone_len;
    6108             : 
    6109             :                 /*
    6110             :                  * If we are cloning from the file we are currently processing,
    6111             :                  * and using the send root as the clone root, we must stop once
    6112             :                  * the current clone offset reaches the current eof of the file
    6113             :                  * at the receiver, otherwise we would issue an invalid clone
    6114             :                  * operation (source range going beyond eof) and cause the
    6115             :                  * receiver to fail. So if we reach the current eof, bail out
    6116             :                  * and fallback to a regular write.
    6117             :                  */
    6118          23 :                 if (clone_root->root == sctx->send_root &&
    6119          21 :                     clone_root->ino == sctx->cur_ino &&
    6120           0 :                     clone_root->offset >= sctx->cur_inode_next_write_offset)
    6121             :                         break;
    6122             : 
    6123          23 :                 data_offset += clone_len;
    6124          23 : next:
    6125          23 :                 path->slots[0]++;
    6126             :         }
    6127             : 
    6128        4074 :         if (len > 0)
    6129          89 :                 ret = send_extent_data(sctx, dst_path, offset, len);
    6130             :         else
    6131             :                 ret = 0;
    6132        4074 : out:
    6133        4074 :         btrfs_free_path(path);
    6134        4074 :         return ret;
    6135             : }
    6136             : 
    6137      631915 : static int send_write_or_clone(struct send_ctx *sctx,
    6138             :                                struct btrfs_path *path,
    6139             :                                struct btrfs_key *key,
    6140             :                                struct clone_root *clone_root)
    6141             : {
    6142      631915 :         int ret = 0;
    6143      631915 :         u64 offset = key->offset;
    6144      631915 :         u64 end;
    6145      631915 :         u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
    6146             : 
    6147      631915 :         end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
    6148      631890 :         if (offset >= end)
    6149             :                 return 0;
    6150             : 
    6151      635856 :         if (clone_root && IS_ALIGNED(end, bs)) {
    6152        4075 :                 struct btrfs_file_extent_item *ei;
    6153        4075 :                 u64 disk_byte;
    6154        4075 :                 u64 data_offset;
    6155             : 
    6156        4075 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6157             :                                     struct btrfs_file_extent_item);
    6158        4075 :                 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
    6159        4075 :                 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
    6160        4075 :                 ret = clone_range(sctx, path, clone_root, disk_byte,
    6161             :                                   data_offset, offset, end - offset);
    6162             :         } else {
    6163      627706 :                 ret = send_extent_data(sctx, path, offset, end - offset);
    6164             :         }
    6165      631716 :         sctx->cur_inode_next_write_offset = end;
    6166      631716 :         return ret;
    6167             : }
    6168             : 
    6169        9999 : static int is_extent_unchanged(struct send_ctx *sctx,
    6170             :                                struct btrfs_path *left_path,
    6171             :                                struct btrfs_key *ekey)
    6172             : {
    6173        9999 :         int ret = 0;
    6174        9999 :         struct btrfs_key key;
    6175        9999 :         struct btrfs_path *path = NULL;
    6176        9999 :         struct extent_buffer *eb;
    6177        9999 :         int slot;
    6178        9999 :         struct btrfs_key found_key;
    6179        9999 :         struct btrfs_file_extent_item *ei;
    6180        9999 :         u64 left_disknr;
    6181        9999 :         u64 right_disknr;
    6182        9999 :         u64 left_offset;
    6183        9999 :         u64 right_offset;
    6184        9999 :         u64 left_offset_fixed;
    6185        9999 :         u64 left_len;
    6186        9999 :         u64 right_len;
    6187        9999 :         u64 left_gen;
    6188        9999 :         u64 right_gen;
    6189        9999 :         u8 left_type;
    6190        9999 :         u8 right_type;
    6191             : 
    6192        9999 :         path = alloc_path_for_send();
    6193        9999 :         if (!path)
    6194             :                 return -ENOMEM;
    6195             : 
    6196        9999 :         eb = left_path->nodes[0];
    6197        9999 :         slot = left_path->slots[0];
    6198        9999 :         ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6199        9999 :         left_type = btrfs_file_extent_type(eb, ei);
    6200             : 
    6201        9999 :         if (left_type != BTRFS_FILE_EXTENT_REG) {
    6202        4495 :                 ret = 0;
    6203        4495 :                 goto out;
    6204             :         }
    6205        5504 :         left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6206        5504 :         left_len = btrfs_file_extent_num_bytes(eb, ei);
    6207        5504 :         left_offset = btrfs_file_extent_offset(eb, ei);
    6208        5504 :         left_gen = btrfs_file_extent_generation(eb, ei);
    6209             : 
    6210             :         /*
    6211             :          * Following comments will refer to these graphics. L is the left
    6212             :          * extents which we are checking at the moment. 1-8 are the right
    6213             :          * extents that we iterate.
    6214             :          *
    6215             :          *       |-----L-----|
    6216             :          * |-1-|-2a-|-3-|-4-|-5-|-6-|
    6217             :          *
    6218             :          *       |-----L-----|
    6219             :          * |--1--|-2b-|...(same as above)
    6220             :          *
    6221             :          * Alternative situation. Happens on files where extents got split.
    6222             :          *       |-----L-----|
    6223             :          * |-----------7-----------|-6-|
    6224             :          *
    6225             :          * Alternative situation. Happens on files which got larger.
    6226             :          *       |-----L-----|
    6227             :          * |-8-|
    6228             :          * Nothing follows after 8.
    6229             :          */
    6230             : 
    6231        5504 :         key.objectid = ekey->objectid;
    6232        5504 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6233        5504 :         key.offset = ekey->offset;
    6234        5504 :         ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
    6235        5504 :         if (ret < 0)
    6236           0 :                 goto out;
    6237        5504 :         if (ret) {
    6238           0 :                 ret = 0;
    6239           0 :                 goto out;
    6240             :         }
    6241             : 
    6242             :         /*
    6243             :          * Handle special case where the right side has no extents at all.
    6244             :          */
    6245        5504 :         eb = path->nodes[0];
    6246        5504 :         slot = path->slots[0];
    6247        5504 :         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6248        5504 :         if (found_key.objectid != key.objectid ||
    6249        5504 :             found_key.type != key.type) {
    6250             :                 /* If we're a hole then just pretend nothing changed */
    6251         863 :                 ret = (left_disknr) ? 0 : 1;
    6252         863 :                 goto out;
    6253             :         }
    6254             : 
    6255             :         /*
    6256             :          * We're now on 2a, 2b or 7.
    6257             :          */
    6258        4641 :         key = found_key;
    6259        4803 :         while (key.offset < ekey->offset + left_len) {
    6260        4641 :                 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6261        4641 :                 right_type = btrfs_file_extent_type(eb, ei);
    6262        4641 :                 if (right_type != BTRFS_FILE_EXTENT_REG &&
    6263             :                     right_type != BTRFS_FILE_EXTENT_INLINE) {
    6264        1162 :                         ret = 0;
    6265        1162 :                         goto out;
    6266             :                 }
    6267             : 
    6268        3479 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6269           0 :                         right_len = btrfs_file_extent_ram_bytes(eb, ei);
    6270           0 :                         right_len = PAGE_ALIGN(right_len);
    6271             :                 } else {
    6272        3479 :                         right_len = btrfs_file_extent_num_bytes(eb, ei);
    6273             :                 }
    6274             : 
    6275             :                 /*
    6276             :                  * Are we at extent 8? If yes, we know the extent is changed.
    6277             :                  * This may only happen on the first iteration.
    6278             :                  */
    6279        3479 :                 if (found_key.offset + right_len <= ekey->offset) {
    6280             :                         /* If we're a hole just pretend nothing changed */
    6281        1878 :                         ret = (left_disknr) ? 0 : 1;
    6282        1878 :                         goto out;
    6283             :                 }
    6284             : 
    6285             :                 /*
    6286             :                  * We just wanted to see if when we have an inline extent, what
    6287             :                  * follows it is a regular extent (wanted to check the above
    6288             :                  * condition for inline extents too). This should normally not
    6289             :                  * happen but it's possible for example when we have an inline
    6290             :                  * compressed extent representing data with a size matching
    6291             :                  * the page size (currently the same as sector size).
    6292             :                  */
    6293        1601 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6294           0 :                         ret = 0;
    6295           0 :                         goto out;
    6296             :                 }
    6297             : 
    6298        1601 :                 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6299        1601 :                 right_offset = btrfs_file_extent_offset(eb, ei);
    6300        1601 :                 right_gen = btrfs_file_extent_generation(eb, ei);
    6301             : 
    6302        1601 :                 left_offset_fixed = left_offset;
    6303        1601 :                 if (key.offset < ekey->offset) {
    6304             :                         /* Fix the right offset for 2a and 7. */
    6305         777 :                         right_offset += ekey->offset - key.offset;
    6306             :                 } else {
    6307             :                         /* Fix the left offset for all behind 2a and 2b */
    6308         824 :                         left_offset_fixed += key.offset - ekey->offset;
    6309             :                 }
    6310             : 
    6311             :                 /*
    6312             :                  * Check if we have the same extent.
    6313             :                  */
    6314        1601 :                 if (left_disknr != right_disknr ||
    6315        1601 :                     left_offset_fixed != right_offset ||
    6316             :                     left_gen != right_gen) {
    6317         710 :                         ret = 0;
    6318         710 :                         goto out;
    6319             :                 }
    6320             : 
    6321             :                 /*
    6322             :                  * Go to the next extent.
    6323             :                  */
    6324         891 :                 ret = btrfs_next_item(sctx->parent_root, path);
    6325         891 :                 if (ret < 0)
    6326           0 :                         goto out;
    6327         891 :                 if (!ret) {
    6328         878 :                         eb = path->nodes[0];
    6329         878 :                         slot = path->slots[0];
    6330         878 :                         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6331             :                 }
    6332         891 :                 if (ret || found_key.objectid != key.objectid ||
    6333         482 :                     found_key.type != key.type) {
    6334         409 :                         key.offset += right_len;
    6335         409 :                         break;
    6336             :                 }
    6337         482 :                 if (found_key.offset != key.offset + right_len) {
    6338         320 :                         ret = 0;
    6339         320 :                         goto out;
    6340             :                 }
    6341         162 :                 key = found_key;
    6342             :         }
    6343             : 
    6344             :         /*
    6345             :          * We're now behind the left extent (treat as unchanged) or at the end
    6346             :          * of the right side (treat as changed).
    6347             :          */
    6348         571 :         if (key.offset >= ekey->offset + left_len)
    6349             :                 ret = 1;
    6350             :         else
    6351           0 :                 ret = 0;
    6352             : 
    6353             : 
    6354        9999 : out:
    6355        9999 :         btrfs_free_path(path);
    6356        9999 :         return ret;
    6357             : }
    6358             : 
    6359      403459 : static int get_last_extent(struct send_ctx *sctx, u64 offset)
    6360             : {
    6361      403459 :         struct btrfs_path *path;
    6362      403459 :         struct btrfs_root *root = sctx->send_root;
    6363      403459 :         struct btrfs_key key;
    6364      403459 :         int ret;
    6365             : 
    6366      403459 :         path = alloc_path_for_send();
    6367      403459 :         if (!path)
    6368             :                 return -ENOMEM;
    6369             : 
    6370      403459 :         sctx->cur_inode_last_extent = 0;
    6371             : 
    6372      403459 :         key.objectid = sctx->cur_ino;
    6373      403459 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6374      403459 :         key.offset = offset;
    6375      403459 :         ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
    6376      403459 :         if (ret < 0)
    6377           0 :                 goto out;
    6378      403459 :         ret = 0;
    6379      403459 :         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    6380      403459 :         if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
    6381        2650 :                 goto out;
    6382             : 
    6383      400809 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6384      403459 : out:
    6385      403459 :         btrfs_free_path(path);
    6386      403459 :         return ret;
    6387             : }
    6388             : 
    6389       24486 : static int range_is_hole_in_parent(struct send_ctx *sctx,
    6390             :                                    const u64 start,
    6391             :                                    const u64 end)
    6392             : {
    6393       24486 :         struct btrfs_path *path;
    6394       24486 :         struct btrfs_key key;
    6395       24486 :         struct btrfs_root *root = sctx->parent_root;
    6396       24486 :         u64 search_start = start;
    6397       24486 :         int ret;
    6398             : 
    6399       24486 :         path = alloc_path_for_send();
    6400       24486 :         if (!path)
    6401             :                 return -ENOMEM;
    6402             : 
    6403       24486 :         key.objectid = sctx->cur_ino;
    6404       24486 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6405       24486 :         key.offset = search_start;
    6406       24486 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    6407       24486 :         if (ret < 0)
    6408           0 :                 goto out;
    6409       24486 :         if (ret > 0 && path->slots[0] > 0)
    6410       24439 :                 path->slots[0]--;
    6411             : 
    6412       64538 :         while (search_start < end) {
    6413       64538 :                 struct extent_buffer *leaf = path->nodes[0];
    6414       64538 :                 int slot = path->slots[0];
    6415       64538 :                 struct btrfs_file_extent_item *fi;
    6416       64538 :                 u64 extent_end;
    6417             : 
    6418       64538 :                 if (slot >= btrfs_header_nritems(leaf)) {
    6419         315 :                         ret = btrfs_next_leaf(root, path);
    6420         315 :                         if (ret < 0)
    6421           0 :                                 goto out;
    6422         315 :                         else if (ret > 0)
    6423             :                                 break;
    6424         280 :                         continue;
    6425             :                 }
    6426             : 
    6427       64223 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    6428       64223 :                 if (key.objectid < sctx->cur_ino ||
    6429       64223 :                     key.type < BTRFS_EXTENT_DATA_KEY)
    6430       19747 :                         goto next;
    6431       44476 :                 if (key.objectid > sctx->cur_ino ||
    6432       42645 :                     key.type > BTRFS_EXTENT_DATA_KEY ||
    6433       42645 :                     key.offset >= end)
    6434             :                         break;
    6435             : 
    6436       22155 :                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    6437       22155 :                 extent_end = btrfs_file_extent_end(path);
    6438       22155 :                 if (extent_end <= start)
    6439       20025 :                         goto next;
    6440        2130 :                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
    6441           0 :                         search_start = extent_end;
    6442           0 :                         goto next;
    6443             :                 }
    6444        2130 :                 ret = 0;
    6445        2130 :                 goto out;
    6446       39772 : next:
    6447       39772 :                 path->slots[0]++;
    6448             :         }
    6449             :         ret = 1;
    6450       24486 : out:
    6451       24486 :         btrfs_free_path(path);
    6452       24486 :         return ret;
    6453             : }
    6454             : 
    6455     1059656 : static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
    6456             :                            struct btrfs_key *key)
    6457             : {
    6458     1059656 :         int ret = 0;
    6459             : 
    6460     1059656 :         if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
    6461             :                 return 0;
    6462             : 
    6463      435570 :         if (sctx->cur_inode_last_extent == (u64)-1) {
    6464      402625 :                 ret = get_last_extent(sctx, key->offset - 1);
    6465      402625 :                 if (ret)
    6466             :                         return ret;
    6467             :         }
    6468             : 
    6469      435570 :         if (path->slots[0] == 0 &&
    6470       71129 :             sctx->cur_inode_last_extent < key->offset) {
    6471             :                 /*
    6472             :                  * We might have skipped entire leafs that contained only
    6473             :                  * file extent items for our current inode. These leafs have
    6474             :                  * a generation number smaller (older) than the one in the
    6475             :                  * current leaf and the leaf our last extent came from, and
    6476             :                  * are located between these 2 leafs.
    6477             :                  */
    6478         113 :                 ret = get_last_extent(sctx, key->offset - 1);
    6479         113 :                 if (ret)
    6480             :                         return ret;
    6481             :         }
    6482             : 
    6483      435570 :         if (sctx->cur_inode_last_extent < key->offset) {
    6484       24486 :                 ret = range_is_hole_in_parent(sctx,
    6485             :                                               sctx->cur_inode_last_extent,
    6486             :                                               key->offset);
    6487       24486 :                 if (ret < 0)
    6488             :                         return ret;
    6489       24486 :                 else if (ret == 0)
    6490        2130 :                         ret = send_hole(sctx, key->offset);
    6491             :                 else
    6492             :                         ret = 0;
    6493             :         }
    6494      435570 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6495      435570 :         return ret;
    6496             : }
    6497             : 
    6498      634574 : static int process_extent(struct send_ctx *sctx,
    6499             :                           struct btrfs_path *path,
    6500             :                           struct btrfs_key *key)
    6501             : {
    6502      634574 :         struct clone_root *found_clone = NULL;
    6503      634574 :         int ret = 0;
    6504             : 
    6505      634574 :         if (S_ISLNK(sctx->cur_inode_mode))
    6506             :                 return 0;
    6507             : 
    6508      634272 :         if (sctx->parent_root && !sctx->cur_inode_new) {
    6509        9999 :                 ret = is_extent_unchanged(sctx, path, key);
    6510        9999 :                 if (ret < 0)
    6511           0 :                         goto out;
    6512        9999 :                 if (ret) {
    6513         571 :                         ret = 0;
    6514         571 :                         goto out_hole;
    6515             :                 }
    6516             :         } else {
    6517      624273 :                 struct btrfs_file_extent_item *ei;
    6518      624273 :                 u8 type;
    6519             : 
    6520      624273 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6521             :                                     struct btrfs_file_extent_item);
    6522      624273 :                 type = btrfs_file_extent_type(path->nodes[0], ei);
    6523      624270 :                 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
    6524             :                     type == BTRFS_FILE_EXTENT_REG) {
    6525             :                         /*
    6526             :                          * The send spec does not have a prealloc command yet,
    6527             :                          * so just leave a hole for prealloc'ed extents until
    6528             :                          * we have enough commands queued up to justify rev'ing
    6529             :                          * the send spec.
    6530             :                          */
    6531      624259 :                         if (type == BTRFS_FILE_EXTENT_PREALLOC) {
    6532        1779 :                                 ret = 0;
    6533        1779 :                                 goto out;
    6534             :                         }
    6535             : 
    6536             :                         /* Have a hole, just skip it. */
    6537      622480 :                         if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
    6538           0 :                                 ret = 0;
    6539           0 :                                 goto out;
    6540             :                         }
    6541             :                 }
    6542             :         }
    6543             : 
    6544      631920 :         ret = find_extent_clone(sctx, path, key->objectid, key->offset,
    6545             :                         sctx->cur_inode_size, &found_clone);
    6546      631917 :         if (ret != -ENOENT && ret < 0)
    6547           0 :                 goto out;
    6548             : 
    6549      631917 :         ret = send_write_or_clone(sctx, path, key, found_clone);
    6550      631775 :         if (ret)
    6551           0 :                 goto out;
    6552      631775 : out_hole:
    6553      632346 :         ret = maybe_send_hole(sctx, path, key);
    6554             : out:
    6555             :         return ret;
    6556             : }
    6557             : 
    6558           7 : static int process_all_extents(struct send_ctx *sctx)
    6559             : {
    6560           7 :         int ret = 0;
    6561           7 :         int iter_ret = 0;
    6562           7 :         struct btrfs_root *root;
    6563           7 :         struct btrfs_path *path;
    6564           7 :         struct btrfs_key key;
    6565           7 :         struct btrfs_key found_key;
    6566             : 
    6567           7 :         root = sctx->send_root;
    6568           7 :         path = alloc_path_for_send();
    6569           7 :         if (!path)
    6570             :                 return -ENOMEM;
    6571             : 
    6572           7 :         key.objectid = sctx->cmp_key->objectid;
    6573           7 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6574           7 :         key.offset = 0;
    6575           7 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    6576           3 :                 if (found_key.objectid != key.objectid ||
    6577           0 :                     found_key.type != key.type) {
    6578             :                         ret = 0;
    6579             :                         break;
    6580             :                 }
    6581             : 
    6582           0 :                 ret = process_extent(sctx, path, &found_key);
    6583           0 :                 if (ret < 0)
    6584             :                         break;
    6585             :         }
    6586             :         /* Catch error found during iteration */
    6587           7 :         if (iter_ret < 0)
    6588           0 :                 ret = iter_ret;
    6589             : 
    6590           7 :         btrfs_free_path(path);
    6591           7 :         return ret;
    6592             : }
    6593             : 
    6594     4280284 : static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
    6595             :                                            int *pending_move,
    6596             :                                            int *refs_processed)
    6597             : {
    6598     4280284 :         int ret = 0;
    6599             : 
    6600     4280284 :         if (sctx->cur_ino == 0)
    6601         211 :                 goto out;
    6602     4280073 :         if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
    6603     3270672 :             sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
    6604      608594 :                 goto out;
    6605     3671479 :         if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
    6606     3065794 :                 goto out;
    6607             : 
    6608      605685 :         ret = process_recorded_refs(sctx, pending_move);
    6609      605694 :         if (ret < 0)
    6610           0 :                 goto out;
    6611             : 
    6612      605694 :         *refs_processed = 1;
    6613     4280293 : out:
    6614     4280293 :         return ret;
    6615             : }
    6616             : 
    6617     4280345 : static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
    6618             : {
    6619     4280345 :         int ret = 0;
    6620     4280345 :         struct btrfs_inode_info info;
    6621     4280345 :         u64 left_mode;
    6622     4280345 :         u64 left_uid;
    6623     4280345 :         u64 left_gid;
    6624     4280345 :         u64 left_fileattr;
    6625     4280345 :         u64 right_mode;
    6626     4280345 :         u64 right_uid;
    6627     4280345 :         u64 right_gid;
    6628     4280345 :         u64 right_fileattr;
    6629     4280345 :         int need_chmod = 0;
    6630     4280345 :         int need_chown = 0;
    6631     4280345 :         bool need_fileattr = false;
    6632     4280345 :         int need_truncate = 1;
    6633     4280345 :         int pending_move = 0;
    6634     4280345 :         int refs_processed = 0;
    6635             : 
    6636     4280345 :         if (sctx->ignore_cur_inode)
    6637             :                 return 0;
    6638             : 
    6639     4280318 :         ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
    6640             :                                               &refs_processed);
    6641     4280230 :         if (ret < 0)
    6642           0 :                 goto out;
    6643             : 
    6644             :         /*
    6645             :          * We have processed the refs and thus need to advance send_progress.
    6646             :          * Now, calls to get_cur_xxx will take the updated refs of the current
    6647             :          * inode into account.
    6648             :          *
    6649             :          * On the other hand, if our current inode is a directory and couldn't
    6650             :          * be moved/renamed because its parent was renamed/moved too and it has
    6651             :          * a higher inode number, we can only move/rename our current inode
    6652             :          * after we moved/renamed its parent. Therefore in this case operate on
    6653             :          * the old path (pre move/rename) of our current inode, and the
    6654             :          * move/rename will be performed later.
    6655             :          */
    6656     4280230 :         if (refs_processed && !pending_move)
    6657      605589 :                 sctx->send_progress = sctx->cur_ino + 1;
    6658             : 
    6659     4280230 :         if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
    6660        1008 :                 goto out;
    6661     4279222 :         if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
    6662     3270166 :                 goto out;
    6663     1009056 :         ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
    6664     1009044 :         if (ret < 0)
    6665           0 :                 goto out;
    6666     1009044 :         left_mode = info.mode;
    6667     1009044 :         left_uid = info.uid;
    6668     1009044 :         left_gid = info.gid;
    6669     1009044 :         left_fileattr = info.fileattr;
    6670             : 
    6671     1009044 :         if (!sctx->parent_root || sctx->cur_inode_new) {
    6672      603875 :                 need_chown = 1;
    6673      603875 :                 if (!S_ISLNK(sctx->cur_inode_mode))
    6674      603573 :                         need_chmod = 1;
    6675      603875 :                 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
    6676      602904 :                         need_truncate = 0;
    6677             :         } else {
    6678      405169 :                 u64 old_size;
    6679             : 
    6680      405169 :                 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
    6681      405169 :                 if (ret < 0)
    6682           0 :                         goto out;
    6683      405169 :                 old_size = info.size;
    6684      405169 :                 right_mode = info.mode;
    6685      405169 :                 right_uid = info.uid;
    6686      405169 :                 right_gid = info.gid;
    6687      405169 :                 right_fileattr = info.fileattr;
    6688             : 
    6689      405169 :                 if (left_uid != right_uid || left_gid != right_gid)
    6690         392 :                         need_chown = 1;
    6691      405169 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
    6692           1 :                         need_chmod = 1;
    6693      405169 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_fileattr != right_fileattr)
    6694         183 :                         need_fileattr = true;
    6695      405169 :                 if ((old_size == sctx->cur_inode_size) ||
    6696        2068 :                     (sctx->cur_inode_size > old_size &&
    6697        2068 :                      sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
    6698      403517 :                         need_truncate = 0;
    6699             :         }
    6700             : 
    6701     1009044 :         if (S_ISREG(sctx->cur_inode_mode)) {
    6702     1004688 :                 if (need_send_hole(sctx)) {
    6703      402719 :                         if (sctx->cur_inode_last_extent == (u64)-1 ||
    6704             :                             sctx->cur_inode_last_extent <
    6705      402626 :                             sctx->cur_inode_size) {
    6706         721 :                                 ret = get_last_extent(sctx, (u64)-1);
    6707         721 :                                 if (ret)
    6708           0 :                                         goto out;
    6709             :                         }
    6710      402719 :                         if (sctx->cur_inode_last_extent <
    6711      402719 :                             sctx->cur_inode_size) {
    6712         655 :                                 ret = send_hole(sctx, sctx->cur_inode_size);
    6713         655 :                                 if (ret)
    6714           0 :                                         goto out;
    6715             :                         }
    6716             :                 }
    6717     1004688 :                 if (need_truncate) {
    6718         447 :                         ret = send_truncate(sctx, sctx->cur_ino,
    6719             :                                             sctx->cur_inode_gen,
    6720             :                                             sctx->cur_inode_size);
    6721         447 :                         if (ret < 0)
    6722           0 :                                 goto out;
    6723             :                 }
    6724             :         }
    6725             : 
    6726     1009044 :         if (need_chown) {
    6727      603869 :                 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6728             :                                 left_uid, left_gid);
    6729      603696 :                 if (ret < 0)
    6730           0 :                         goto out;
    6731             :         }
    6732     1008871 :         if (need_chmod) {
    6733      603254 :                 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6734             :                                 left_mode);
    6735      603014 :                 if (ret < 0)
    6736           0 :                         goto out;
    6737             :         }
    6738     1008631 :         if (need_fileattr) {
    6739         183 :                 ret = send_fileattr(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6740             :                                     left_fileattr);
    6741         183 :                 if (ret < 0)
    6742           0 :                         goto out;
    6743             :         }
    6744             : 
    6745     1008631 :         if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
    6746           0 :             && sctx->cur_inode_needs_verity) {
    6747           0 :                 ret = process_verity(sctx);
    6748           0 :                 if (ret < 0)
    6749           0 :                         goto out;
    6750             :         }
    6751             : 
    6752     1008631 :         ret = send_capabilities(sctx);
    6753     1009255 :         if (ret < 0)
    6754           0 :                 goto out;
    6755             : 
    6756             :         /*
    6757             :          * If other directory inodes depended on our current directory
    6758             :          * inode's move/rename, now do their move/rename operations.
    6759             :          */
    6760     1009255 :         if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
    6761     1008374 :                 ret = apply_children_dir_moves(sctx);
    6762     1007944 :                 if (ret)
    6763           0 :                         goto out;
    6764             :                 /*
    6765             :                  * Need to send that every time, no matter if it actually
    6766             :                  * changed between the two trees as we have done changes to
    6767             :                  * the inode before. If our inode is a directory and it's
    6768             :                  * waiting to be moved/renamed, we will send its utimes when
    6769             :                  * it's moved/renamed, therefore we don't need to do it here.
    6770             :                  */
    6771     1007944 :                 sctx->send_progress = sctx->cur_ino + 1;
    6772             : 
    6773             :                 /*
    6774             :                  * If the current inode is a non-empty directory, delay issuing
    6775             :                  * the utimes command for it, as it's very likely we have inodes
    6776             :                  * with an higher number inside it. We want to issue the utimes
    6777             :                  * command only after adding all dentries to it.
    6778             :                  */
    6779     1007944 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
    6780        1857 :                         ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6781             :                 else
    6782     1006087 :                         ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6783             : 
    6784     1009281 :                 if (ret < 0)
    6785           0 :                         goto out;
    6786             :         }
    6787             : 
    6788     1009375 : out:
    6789     4280549 :         if (!ret)
    6790     4280501 :                 ret = trim_dir_utimes_cache(sctx);
    6791             : 
    6792             :         return ret;
    6793             : }
    6794             : 
    6795     1008786 : static void close_current_inode(struct send_ctx *sctx)
    6796             : {
    6797     1008786 :         u64 i_size;
    6798             : 
    6799     1008786 :         if (sctx->cur_inode == NULL)
    6800             :                 return;
    6801             : 
    6802      601116 :         i_size = i_size_read(sctx->cur_inode);
    6803             : 
    6804             :         /*
    6805             :          * If we are doing an incremental send, we may have extents between the
    6806             :          * last processed extent and the i_size that have not been processed
    6807             :          * because they haven't changed but we may have read some of their pages
    6808             :          * through readahead, see the comments at send_extent_data().
    6809             :          */
    6810      601116 :         if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
    6811          17 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    6812             :                                            sctx->page_cache_clear_start,
    6813          17 :                                            round_up(i_size, PAGE_SIZE) - 1);
    6814             : 
    6815      601116 :         iput(sctx->cur_inode);
    6816      602001 :         sctx->cur_inode = NULL;
    6817             : }
    6818             : 
    6819     1008645 : static int changed_inode(struct send_ctx *sctx,
    6820             :                          enum btrfs_compare_tree_result result)
    6821             : {
    6822     1008645 :         int ret = 0;
    6823     1008645 :         struct btrfs_key *key = sctx->cmp_key;
    6824     1008645 :         struct btrfs_inode_item *left_ii = NULL;
    6825     1008645 :         struct btrfs_inode_item *right_ii = NULL;
    6826     1008645 :         u64 left_gen = 0;
    6827     1008645 :         u64 right_gen = 0;
    6828             : 
    6829     1008645 :         close_current_inode(sctx);
    6830             : 
    6831     1009332 :         sctx->cur_ino = key->objectid;
    6832     1009332 :         sctx->cur_inode_new_gen = false;
    6833     1009332 :         sctx->cur_inode_last_extent = (u64)-1;
    6834     1009332 :         sctx->cur_inode_next_write_offset = 0;
    6835     1009332 :         sctx->ignore_cur_inode = false;
    6836             : 
    6837             :         /*
    6838             :          * Set send_progress to current inode. This will tell all get_cur_xxx
    6839             :          * functions that the current inode's refs are not updated yet. Later,
    6840             :          * when process_recorded_refs is finished, it is set to cur_ino + 1.
    6841             :          */
    6842     1009332 :         sctx->send_progress = sctx->cur_ino;
    6843             : 
    6844     1009332 :         if (result == BTRFS_COMPARE_TREE_NEW ||
    6845     1009332 :             result == BTRFS_COMPARE_TREE_CHANGED) {
    6846     1009187 :                 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
    6847             :                                 sctx->left_path->slots[0],
    6848             :                                 struct btrfs_inode_item);
    6849     1009169 :                 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
    6850             :                                 left_ii);
    6851             :         } else {
    6852         145 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6853             :                                 sctx->right_path->slots[0],
    6854             :                                 struct btrfs_inode_item);
    6855         145 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6856             :                                 right_ii);
    6857             :         }
    6858     1009353 :         if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6859      405170 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6860             :                                 sctx->right_path->slots[0],
    6861             :                                 struct btrfs_inode_item);
    6862             : 
    6863      405169 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6864             :                                 right_ii);
    6865             : 
    6866             :                 /*
    6867             :                  * The cur_ino = root dir case is special here. We can't treat
    6868             :                  * the inode as deleted+reused because it would generate a
    6869             :                  * stream that tries to delete/mkdir the root dir.
    6870             :                  */
    6871      405168 :                 if (left_gen != right_gen &&
    6872           9 :                     sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6873           7 :                         sctx->cur_inode_new_gen = true;
    6874             :         }
    6875             : 
    6876             :         /*
    6877             :          * Normally we do not find inodes with a link count of zero (orphans)
    6878             :          * because the most common case is to create a snapshot and use it
    6879             :          * for a send operation. However other less common use cases involve
    6880             :          * using a subvolume and send it after turning it to RO mode just
    6881             :          * after deleting all hard links of a file while holding an open
    6882             :          * file descriptor against it or turning a RO snapshot into RW mode,
    6883             :          * keep an open file descriptor against a file, delete it and then
    6884             :          * turn the snapshot back to RO mode before using it for a send
    6885             :          * operation. The former is what the receiver operation does.
    6886             :          * Therefore, if we want to send these snapshots soon after they're
    6887             :          * received, we need to handle orphan inodes as well. Moreover, orphans
    6888             :          * can appear not only in the send snapshot but also in the parent
    6889             :          * snapshot. Here are several cases:
    6890             :          *
    6891             :          * Case 1: BTRFS_COMPARE_TREE_NEW
    6892             :          *       |  send snapshot  | action
    6893             :          * --------------------------------
    6894             :          * nlink |        0        | ignore
    6895             :          *
    6896             :          * Case 2: BTRFS_COMPARE_TREE_DELETED
    6897             :          *       | parent snapshot | action
    6898             :          * ----------------------------------
    6899             :          * nlink |        0        | as usual
    6900             :          * Note: No unlinks will be sent because there're no paths for it.
    6901             :          *
    6902             :          * Case 3: BTRFS_COMPARE_TREE_CHANGED
    6903             :          *           |       | parent snapshot | send snapshot | action
    6904             :          * -----------------------------------------------------------------------
    6905             :          * subcase 1 | nlink |        0        |       0       | ignore
    6906             :          * subcase 2 | nlink |       >0        |       0       | new_gen(deletion)
    6907             :          * subcase 3 | nlink |        0        |      >0       | new_gen(creation)
    6908             :          *
    6909             :          */
    6910     1009351 :         if (result == BTRFS_COMPARE_TREE_NEW) {
    6911      604038 :                 if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
    6912           7 :                         sctx->ignore_cur_inode = true;
    6913           7 :                         goto out;
    6914             :                 }
    6915      604006 :                 sctx->cur_inode_gen = left_gen;
    6916      604006 :                 sctx->cur_inode_new = true;
    6917      604006 :                 sctx->cur_inode_deleted = false;
    6918     1207997 :                 sctx->cur_inode_size = btrfs_inode_size(
    6919      604006 :                                 sctx->left_path->nodes[0], left_ii);
    6920     1207966 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6921      603991 :                                 sctx->left_path->nodes[0], left_ii);
    6922     1207914 :                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6923      603975 :                                 sctx->left_path->nodes[0], left_ii);
    6924      603939 :                 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6925      603832 :                         ret = send_create_inode_if_needed(sctx);
    6926      405313 :         } else if (result == BTRFS_COMPARE_TREE_DELETED) {
    6927         145 :                 sctx->cur_inode_gen = right_gen;
    6928         145 :                 sctx->cur_inode_new = false;
    6929         145 :                 sctx->cur_inode_deleted = true;
    6930         290 :                 sctx->cur_inode_size = btrfs_inode_size(
    6931         145 :                                 sctx->right_path->nodes[0], right_ii);
    6932         145 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6933         145 :                                 sctx->right_path->nodes[0], right_ii);
    6934      405168 :         } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6935      405168 :                 u32 new_nlinks, old_nlinks;
    6936             : 
    6937      405168 :                 new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
    6938      405169 :                 old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
    6939      405170 :                 if (new_nlinks == 0 && old_nlinks == 0) {
    6940           2 :                         sctx->ignore_cur_inode = true;
    6941           2 :                         goto out;
    6942      405168 :                 } else if (new_nlinks == 0 || old_nlinks == 0) {
    6943           3 :                         sctx->cur_inode_new_gen = 1;
    6944             :                 }
    6945             :                 /*
    6946             :                  * We need to do some special handling in case the inode was
    6947             :                  * reported as changed with a changed generation number. This
    6948             :                  * means that the original inode was deleted and new inode
    6949             :                  * reused the same inum. So we have to treat the old inode as
    6950             :                  * deleted and the new one as new.
    6951             :                  */
    6952      405168 :                 if (sctx->cur_inode_new_gen) {
    6953             :                         /*
    6954             :                          * First, process the inode as if it was deleted.
    6955             :                          */
    6956          10 :                         if (old_nlinks > 0) {
    6957          10 :                                 sctx->cur_inode_gen = right_gen;
    6958          10 :                                 sctx->cur_inode_new = false;
    6959          10 :                                 sctx->cur_inode_deleted = true;
    6960          20 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6961          10 :                                                 sctx->right_path->nodes[0], right_ii);
    6962          20 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6963          10 :                                                 sctx->right_path->nodes[0], right_ii);
    6964          10 :                                 ret = process_all_refs(sctx,
    6965             :                                                 BTRFS_COMPARE_TREE_DELETED);
    6966          10 :                                 if (ret < 0)
    6967           0 :                                         goto out;
    6968             :                         }
    6969             : 
    6970             :                         /*
    6971             :                          * Now process the inode as if it was new.
    6972             :                          */
    6973          10 :                         if (new_nlinks > 0) {
    6974           7 :                                 sctx->cur_inode_gen = left_gen;
    6975           7 :                                 sctx->cur_inode_new = true;
    6976           7 :                                 sctx->cur_inode_deleted = false;
    6977          14 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6978           7 :                                                 sctx->left_path->nodes[0],
    6979             :                                                 left_ii);
    6980          14 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6981           7 :                                                 sctx->left_path->nodes[0],
    6982             :                                                 left_ii);
    6983          14 :                                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6984           7 :                                                 sctx->left_path->nodes[0],
    6985             :                                                 left_ii);
    6986           7 :                                 ret = send_create_inode_if_needed(sctx);
    6987           7 :                                 if (ret < 0)
    6988           0 :                                         goto out;
    6989             : 
    6990           7 :                                 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
    6991           7 :                                 if (ret < 0)
    6992           0 :                                         goto out;
    6993             :                                 /*
    6994             :                                  * Advance send_progress now as we did not get
    6995             :                                  * into process_recorded_refs_if_needed in the
    6996             :                                  * new_gen case.
    6997             :                                  */
    6998           7 :                                 sctx->send_progress = sctx->cur_ino + 1;
    6999             : 
    7000             :                                 /*
    7001             :                                  * Now process all extents and xattrs of the
    7002             :                                  * inode as if they were all new.
    7003             :                                  */
    7004           7 :                                 ret = process_all_extents(sctx);
    7005           7 :                                 if (ret < 0)
    7006           0 :                                         goto out;
    7007           7 :                                 ret = process_all_new_xattrs(sctx);
    7008           7 :                                 if (ret < 0)
    7009           0 :                                         goto out;
    7010             :                         }
    7011             :                 } else {
    7012      405158 :                         sctx->cur_inode_gen = left_gen;
    7013      405158 :                         sctx->cur_inode_new = false;
    7014      405158 :                         sctx->cur_inode_new_gen = false;
    7015      405158 :                         sctx->cur_inode_deleted = false;
    7016      810316 :                         sctx->cur_inode_size = btrfs_inode_size(
    7017      405158 :                                         sctx->left_path->nodes[0], left_ii);
    7018      405159 :                         sctx->cur_inode_mode = btrfs_inode_mode(
    7019      405158 :                                         sctx->left_path->nodes[0], left_ii);
    7020             :                 }
    7021             :         }
    7022             : 
    7023           0 : out:
    7024     1009614 :         return ret;
    7025             : }
    7026             : 
    7027             : /*
    7028             :  * We have to process new refs before deleted refs, but compare_trees gives us
    7029             :  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
    7030             :  * first and later process them in process_recorded_refs.
    7031             :  * For the cur_inode_new_gen case, we skip recording completely because
    7032             :  * changed_inode did already initiate processing of refs. The reason for this is
    7033             :  * that in this case, compare_tree actually compares the refs of 2 different
    7034             :  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
    7035             :  * refs of the right tree as deleted and all refs of the left tree as new.
    7036             :  */
    7037      608580 : static int changed_ref(struct send_ctx *sctx,
    7038             :                        enum btrfs_compare_tree_result result)
    7039             : {
    7040      608580 :         int ret = 0;
    7041             : 
    7042      608580 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7043           0 :                 inconsistent_snapshot_error(sctx, result, "reference");
    7044           0 :                 return -EIO;
    7045             :         }
    7046             : 
    7047      608580 :         if (!sctx->cur_inode_new_gen &&
    7048             :             sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
    7049      608476 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7050      606279 :                         ret = record_new_ref(sctx);
    7051        2197 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7052        2143 :                         ret = record_deleted_ref(sctx);
    7053          54 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7054          54 :                         ret = record_changed_ref(sctx);
    7055             :         }
    7056             : 
    7057             :         return ret;
    7058             : }
    7059             : 
    7060             : /*
    7061             :  * Process new/deleted/changed xattrs. We skip processing in the
    7062             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7063             :  * of xattrs. The reason is the same as in changed_ref
    7064             :  */
    7065      800371 : static int changed_xattr(struct send_ctx *sctx,
    7066             :                          enum btrfs_compare_tree_result result)
    7067             : {
    7068      800371 :         int ret = 0;
    7069             : 
    7070      800371 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7071           0 :                 inconsistent_snapshot_error(sctx, result, "xattr");
    7072           0 :                 return -EIO;
    7073             :         }
    7074             : 
    7075      800371 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7076      800362 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7077      800289 :                         ret = process_new_xattr(sctx);
    7078          73 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7079          25 :                         ret = process_deleted_xattr(sctx);
    7080          48 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7081          48 :                         ret = process_changed_xattr(sctx);
    7082             :         }
    7083             : 
    7084             :         return ret;
    7085             : }
    7086             : 
    7087             : /*
    7088             :  * Process new/deleted/changed extents. We skip processing in the
    7089             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7090             :  * of extents. The reason is the same as in changed_ref
    7091             :  */
    7092      639635 : static int changed_extent(struct send_ctx *sctx,
    7093             :                           enum btrfs_compare_tree_result result)
    7094             : {
    7095      639635 :         int ret = 0;
    7096             : 
    7097             :         /*
    7098             :          * We have found an extent item that changed without the inode item
    7099             :          * having changed. This can happen either after relocation (where the
    7100             :          * disk_bytenr of an extent item is replaced at
    7101             :          * relocation.c:replace_file_extents()) or after deduplication into a
    7102             :          * file in both the parent and send snapshots (where an extent item can
    7103             :          * get modified or replaced with a new one). Note that deduplication
    7104             :          * updates the inode item, but it only changes the iversion (sequence
    7105             :          * field in the inode item) of the inode, so if a file is deduplicated
    7106             :          * the same amount of times in both the parent and send snapshots, its
    7107             :          * iversion becomes the same in both snapshots, whence the inode item is
    7108             :          * the same on both snapshots.
    7109             :          */
    7110      639635 :         if (sctx->cur_ino != sctx->cmp_key->objectid)
    7111             :                 return 0;
    7112             : 
    7113      639626 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7114      639170 :                 if (result != BTRFS_COMPARE_TREE_DELETED)
    7115      634574 :                         ret = process_extent(sctx, sctx->left_path,
    7116             :                                         sctx->cmp_key);
    7117             :         }
    7118             : 
    7119             :         return ret;
    7120             : }
    7121             : 
    7122             : static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
    7123             : {
    7124           0 :         int ret = 0;
    7125             : 
    7126           0 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7127           0 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7128           0 :                         sctx->cur_inode_needs_verity = true;
    7129             :         }
    7130             :         return ret;
    7131             : }
    7132             : 
    7133      408594 : static int dir_changed(struct send_ctx *sctx, u64 dir)
    7134             : {
    7135      408594 :         u64 orig_gen, new_gen;
    7136      408594 :         int ret;
    7137             : 
    7138      408594 :         ret = get_inode_gen(sctx->send_root, dir, &new_gen);
    7139      408596 :         if (ret)
    7140             :                 return ret;
    7141             : 
    7142      408595 :         ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
    7143      408597 :         if (ret)
    7144             :                 return ret;
    7145             : 
    7146      408597 :         return (orig_gen != new_gen) ? 1 : 0;
    7147             : }
    7148             : 
    7149      408593 : static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
    7150             :                         struct btrfs_key *key)
    7151             : {
    7152      408593 :         struct btrfs_inode_extref *extref;
    7153      408593 :         struct extent_buffer *leaf;
    7154      408593 :         u64 dirid = 0, last_dirid = 0;
    7155      408593 :         unsigned long ptr;
    7156      408593 :         u32 item_size;
    7157      408593 :         u32 cur_offset = 0;
    7158      408593 :         int ref_name_len;
    7159      408593 :         int ret = 0;
    7160             : 
    7161             :         /* Easy case, just check this one dirid */
    7162      408593 :         if (key->type == BTRFS_INODE_REF_KEY) {
    7163      408593 :                 dirid = key->offset;
    7164             : 
    7165      408593 :                 ret = dir_changed(sctx, dirid);
    7166      408597 :                 goto out;
    7167             :         }
    7168             : 
    7169           0 :         leaf = path->nodes[0];
    7170           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    7171           0 :         ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    7172           0 :         while (cur_offset < item_size) {
    7173           0 :                 extref = (struct btrfs_inode_extref *)(ptr +
    7174             :                                                        cur_offset);
    7175           0 :                 dirid = btrfs_inode_extref_parent(leaf, extref);
    7176           0 :                 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
    7177           0 :                 cur_offset += ref_name_len + sizeof(*extref);
    7178           0 :                 if (dirid == last_dirid)
    7179           0 :                         continue;
    7180           0 :                 ret = dir_changed(sctx, dirid);
    7181           0 :                 if (ret)
    7182             :                         break;
    7183             :                 last_dirid = dirid;
    7184             :         }
    7185           0 : out:
    7186      408597 :         return ret;
    7187             : }
    7188             : 
    7189             : /*
    7190             :  * Updates compare related fields in sctx and simply forwards to the actual
    7191             :  * changed_xxx functions.
    7192             :  */
    7193     5541600 : static int changed_cb(struct btrfs_path *left_path,
    7194             :                       struct btrfs_path *right_path,
    7195             :                       struct btrfs_key *key,
    7196             :                       enum btrfs_compare_tree_result result,
    7197             :                       struct send_ctx *sctx)
    7198             : {
    7199     5541600 :         int ret = 0;
    7200             : 
    7201             :         /*
    7202             :          * We can not hold the commit root semaphore here. This is because in
    7203             :          * the case of sending and receiving to the same filesystem, using a
    7204             :          * pipe, could result in a deadlock:
    7205             :          *
    7206             :          * 1) The task running send blocks on the pipe because it's full;
    7207             :          *
    7208             :          * 2) The task running receive, which is the only consumer of the pipe,
    7209             :          *    is waiting for a transaction commit (for example due to a space
    7210             :          *    reservation when doing a write or triggering a transaction commit
    7211             :          *    when creating a subvolume);
    7212             :          *
    7213             :          * 3) The transaction is waiting to write lock the commit root semaphore,
    7214             :          *    but can not acquire it since it's being held at 1).
    7215             :          *
    7216             :          * Down this call chain we write to the pipe through kernel_write().
    7217             :          * The same type of problem can also happen when sending to a file that
    7218             :          * is stored in the same filesystem - when reserving space for a write
    7219             :          * into the file, we can trigger a transaction commit.
    7220             :          *
    7221             :          * Our caller has supplied us with clones of leaves from the send and
    7222             :          * parent roots, so we're safe here from a concurrent relocation and
    7223             :          * further reallocation of metadata extents while we are here. Below we
    7224             :          * also assert that the leaves are clones.
    7225             :          */
    7226     5541600 :         lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
    7227             : 
    7228             :         /*
    7229             :          * We always have a send root, so left_path is never NULL. We will not
    7230             :          * have a leaf when we have reached the end of the send root but have
    7231             :          * not yet reached the end of the parent root.
    7232             :          */
    7233     5541600 :         if (left_path->nodes[0])
    7234     5539428 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7235             :                                 &left_path->nodes[0]->bflags));
    7236             :         /*
    7237             :          * When doing a full send we don't have a parent root, so right_path is
    7238             :          * NULL. When doing an incremental send, we may have reached the end of
    7239             :          * the parent root already, so we don't have a leaf at right_path.
    7240             :          */
    7241     5541600 :         if (right_path && right_path->nodes[0])
    7242     2903689 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7243             :                                 &right_path->nodes[0]->bflags));
    7244             : 
    7245     5541600 :         if (result == BTRFS_COMPARE_TREE_SAME) {
    7246     1262268 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7247             :                     key->type == BTRFS_INODE_EXTREF_KEY) {
    7248      408593 :                         ret = compare_refs(sctx, left_path, key);
    7249      408596 :                         if (!ret)
    7250             :                                 return 0;
    7251           2 :                         if (ret < 0)
    7252             :                                 return ret;
    7253      853675 :                 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
    7254      427301 :                         return maybe_send_hole(sctx, left_path, key);
    7255             :                 } else {
    7256             :                         return 0;
    7257             :                 }
    7258             :                 result = BTRFS_COMPARE_TREE_CHANGED;
    7259             :                 ret = 0;
    7260             :         }
    7261             : 
    7262     4279334 :         sctx->left_path = left_path;
    7263     4279334 :         sctx->right_path = right_path;
    7264     4279334 :         sctx->cmp_key = key;
    7265             : 
    7266     4279334 :         ret = finish_inode_if_needed(sctx, 0);
    7267     4280068 :         if (ret < 0)
    7268           0 :                 goto out;
    7269             : 
    7270             :         /* Ignore non-FS objects */
    7271     4280068 :         if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
    7272             :             key->objectid == BTRFS_FREE_SPACE_OBJECTID)
    7273           0 :                 goto out;
    7274             : 
    7275     4280068 :         if (key->type == BTRFS_INODE_ITEM_KEY) {
    7276     1009002 :                 ret = changed_inode(sctx, result);
    7277     3271066 :         } else if (!sctx->ignore_cur_inode) {
    7278     3271059 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7279             :                     key->type == BTRFS_INODE_EXTREF_KEY)
    7280      608583 :                         ret = changed_ref(sctx, result);
    7281     2662476 :                 else if (key->type == BTRFS_XATTR_ITEM_KEY)
    7282      800371 :                         ret = changed_xattr(sctx, result);
    7283     1862105 :                 else if (key->type == BTRFS_EXTENT_DATA_KEY)
    7284      639635 :                         ret = changed_extent(sctx, result);
    7285     1222470 :                 else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
    7286           0 :                          key->offset == 0)
    7287           0 :                         ret = changed_verity(sctx, result);
    7288             :         }
    7289             : 
    7290     1222477 : out:
    7291             :         return ret;
    7292             : }
    7293             : 
    7294        2720 : static int search_key_again(const struct send_ctx *sctx,
    7295             :                             struct btrfs_root *root,
    7296             :                             struct btrfs_path *path,
    7297             :                             const struct btrfs_key *key)
    7298             : {
    7299        2720 :         int ret;
    7300             : 
    7301        2720 :         if (!path->need_commit_sem)
    7302        2720 :                 lockdep_assert_held_read(&root->fs_info->commit_root_sem);
    7303             : 
    7304             :         /*
    7305             :          * Roots used for send operations are readonly and no one can add,
    7306             :          * update or remove keys from them, so we should be able to find our
    7307             :          * key again. The only exception is deduplication, which can operate on
    7308             :          * readonly roots and add, update or remove keys to/from them - but at
    7309             :          * the moment we don't allow it to run in parallel with send.
    7310             :          */
    7311        2720 :         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
    7312        2721 :         ASSERT(ret <= 0);
    7313        2721 :         if (ret > 0) {
    7314           0 :                 btrfs_print_tree(path->nodes[path->lowest_level], false);
    7315           0 :                 btrfs_err(root->fs_info,
    7316             : "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
    7317             :                           key->objectid, key->type, key->offset,
    7318             :                           (root == sctx->parent_root ? "parent" : "send"),
    7319             :                           root->root_key.objectid, path->lowest_level,
    7320             :                           path->slots[path->lowest_level]);
    7321           0 :                 return -EUCLEAN;
    7322             :         }
    7323             : 
    7324             :         return ret;
    7325             : }
    7326             : 
    7327          87 : static int full_send_tree(struct send_ctx *sctx)
    7328             : {
    7329          87 :         int ret;
    7330          87 :         struct btrfs_root *send_root = sctx->send_root;
    7331          87 :         struct btrfs_key key;
    7332          87 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    7333          87 :         struct btrfs_path *path;
    7334             : 
    7335          87 :         path = alloc_path_for_send();
    7336          87 :         if (!path)
    7337             :                 return -ENOMEM;
    7338          87 :         path->reada = READA_FORWARD_ALWAYS;
    7339             : 
    7340          87 :         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
    7341          87 :         key.type = BTRFS_INODE_ITEM_KEY;
    7342          87 :         key.offset = 0;
    7343             : 
    7344          87 :         down_read(&fs_info->commit_root_sem);
    7345          87 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7346          87 :         up_read(&fs_info->commit_root_sem);
    7347             : 
    7348          87 :         ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
    7349          87 :         if (ret < 0)
    7350           0 :                 goto out;
    7351          87 :         if (ret)
    7352           0 :                 goto out_finish;
    7353             : 
    7354     1034931 :         while (1) {
    7355     1034931 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    7356             : 
    7357     1034931 :                 ret = changed_cb(path, NULL, &key,
    7358             :                                  BTRFS_COMPARE_TREE_NEW, sctx);
    7359     1034931 :                 if (ret < 0)
    7360           0 :                         goto out;
    7361             : 
    7362     1034931 :                 down_read(&fs_info->commit_root_sem);
    7363     1034931 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7364         465 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7365         465 :                         up_read(&fs_info->commit_root_sem);
    7366             :                         /*
    7367             :                          * A transaction used for relocating a block group was
    7368             :                          * committed or is about to finish its commit. Release
    7369             :                          * our path (leaf) and restart the search, so that we
    7370             :                          * avoid operating on any file extent items that are
    7371             :                          * stale, with a disk_bytenr that reflects a pre
    7372             :                          * relocation value. This way we avoid as much as
    7373             :                          * possible to fallback to regular writes when checking
    7374             :                          * if we can clone file ranges.
    7375             :                          */
    7376         465 :                         btrfs_release_path(path);
    7377         465 :                         ret = search_key_again(sctx, send_root, path, &key);
    7378         465 :                         if (ret < 0)
    7379           0 :                                 goto out;
    7380             :                 } else {
    7381     1034466 :                         up_read(&fs_info->commit_root_sem);
    7382             :                 }
    7383             : 
    7384     1034931 :                 ret = btrfs_next_item(send_root, path);
    7385     1034931 :                 if (ret < 0)
    7386           0 :                         goto out;
    7387     1034931 :                 if (ret) {
    7388             :                         ret  = 0;
    7389             :                         break;
    7390             :                 }
    7391             :         }
    7392             : 
    7393          87 : out_finish:
    7394          87 :         ret = finish_inode_if_needed(sctx, 1);
    7395             : 
    7396          87 : out:
    7397          87 :         btrfs_free_path(path);
    7398          87 :         return ret;
    7399             : }
    7400             : 
    7401      324124 : static int replace_node_with_clone(struct btrfs_path *path, int level)
    7402             : {
    7403      324124 :         struct extent_buffer *clone;
    7404             : 
    7405      324124 :         clone = btrfs_clone_extent_buffer(path->nodes[level]);
    7406      324124 :         if (!clone)
    7407             :                 return -ENOMEM;
    7408             : 
    7409      324124 :         free_extent_buffer(path->nodes[level]);
    7410      324124 :         path->nodes[level] = clone;
    7411             : 
    7412      324124 :         return 0;
    7413             : }
    7414             : 
    7415      321638 : static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
    7416             : {
    7417      321638 :         struct extent_buffer *eb;
    7418      321638 :         struct extent_buffer *parent = path->nodes[*level];
    7419      321638 :         int slot = path->slots[*level];
    7420      321638 :         const int nritems = btrfs_header_nritems(parent);
    7421      321638 :         u64 reada_max;
    7422      321638 :         u64 reada_done = 0;
    7423             : 
    7424      321638 :         lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
    7425             : 
    7426      321638 :         BUG_ON(*level == 0);
    7427      321638 :         eb = btrfs_read_node_slot(parent, slot);
    7428      321638 :         if (IS_ERR(eb))
    7429           0 :                 return PTR_ERR(eb);
    7430             : 
    7431             :         /*
    7432             :          * Trigger readahead for the next leaves we will process, so that it is
    7433             :          * very likely that when we need them they are already in memory and we
    7434             :          * will not block on disk IO. For nodes we only do readahead for one,
    7435             :          * since the time window between processing nodes is typically larger.
    7436             :          */
    7437      321638 :         reada_max = (*level == 1 ? SZ_128K : eb->fs_info->nodesize);
    7438             : 
    7439    21926776 :         for (slot++; slot < nritems && reada_done < reada_max; slot++) {
    7440    21605138 :                 if (btrfs_node_ptr_generation(parent, slot) > reada_min_gen) {
    7441     1756971 :                         btrfs_readahead_node_child(parent, slot);
    7442     1756971 :                         reada_done += eb->fs_info->nodesize;
    7443             :                 }
    7444             :         }
    7445             : 
    7446      321638 :         path->nodes[*level - 1] = eb;
    7447      321638 :         path->slots[*level - 1] = 0;
    7448      321638 :         (*level)--;
    7449             : 
    7450      321638 :         if (*level == 0)
    7451      320631 :                 return replace_node_with_clone(path, 0);
    7452             : 
    7453             :         return 0;
    7454             : }
    7455             : 
    7456     6182410 : static int tree_move_next_or_upnext(struct btrfs_path *path,
    7457             :                                     int *level, int root_level)
    7458             : {
    7459     6182410 :         int ret = 0;
    7460     6182410 :         int nritems;
    7461     6182410 :         nritems = btrfs_header_nritems(path->nodes[*level]);
    7462             : 
    7463     6182410 :         path->slots[*level]++;
    7464             : 
    7465     6504049 :         while (path->slots[*level] >= nritems) {
    7466      321889 :                 if (*level == root_level) {
    7467         251 :                         path->slots[*level] = nritems - 1;
    7468         251 :                         return -1;
    7469             :                 }
    7470             : 
    7471             :                 /* move upnext */
    7472      321638 :                 path->slots[*level] = 0;
    7473      321638 :                 free_extent_buffer(path->nodes[*level]);
    7474      321639 :                 path->nodes[*level] = NULL;
    7475      321639 :                 (*level)++;
    7476      321639 :                 path->slots[*level]++;
    7477             : 
    7478      321639 :                 nritems = btrfs_header_nritems(path->nodes[*level]);
    7479      321639 :                 ret = 1;
    7480             :         }
    7481             :         return ret;
    7482             : }
    7483             : 
    7484             : /*
    7485             :  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
    7486             :  * or down.
    7487             :  */
    7488     6504049 : static int tree_advance(struct btrfs_path *path,
    7489             :                         int *level, int root_level,
    7490             :                         int allow_down,
    7491             :                         struct btrfs_key *key,
    7492             :                         u64 reada_min_gen)
    7493             : {
    7494     6504049 :         int ret;
    7495             : 
    7496     6504049 :         if (*level == 0 || !allow_down) {
    7497     6182411 :                 ret = tree_move_next_or_upnext(path, level, root_level);
    7498             :         } else {
    7499      321638 :                 ret = tree_move_down(path, level, reada_min_gen);
    7500             :         }
    7501             : 
    7502             :         /*
    7503             :          * Even if we have reached the end of a tree, ret is -1, update the key
    7504             :          * anyway, so that in case we need to restart due to a block group
    7505             :          * relocation, we can assert that the last key of the root node still
    7506             :          * exists in the tree.
    7507             :          */
    7508     6504058 :         if (*level == 0)
    7509     6179050 :                 btrfs_item_key_to_cpu(path->nodes[*level], key,
    7510             :                                       path->slots[*level]);
    7511             :         else
    7512      325008 :                 btrfs_node_key_to_cpu(path->nodes[*level], key,
    7513             :                                       path->slots[*level]);
    7514             : 
    7515     6504057 :         return ret;
    7516             : }
    7517             : 
    7518     1670799 : static int tree_compare_item(struct btrfs_path *left_path,
    7519             :                              struct btrfs_path *right_path,
    7520             :                              char *tmp_buf)
    7521             : {
    7522     1670799 :         int cmp;
    7523     1670799 :         int len1, len2;
    7524     1670799 :         unsigned long off1, off2;
    7525             : 
    7526     1670799 :         len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]);
    7527     1670795 :         len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);
    7528     1670794 :         if (len1 != len2)
    7529             :                 return 1;
    7530             : 
    7531     1670719 :         off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
    7532     1670721 :         off2 = btrfs_item_ptr_offset(right_path->nodes[0],
    7533             :                                 right_path->slots[0]);
    7534             : 
    7535     1670718 :         read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
    7536             : 
    7537     1670721 :         cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
    7538     1670713 :         if (cmp)
    7539      408445 :                 return 1;
    7540             :         return 0;
    7541             : }
    7542             : 
    7543             : /*
    7544             :  * A transaction used for relocating a block group was committed or is about to
    7545             :  * finish its commit. Release our paths and restart the search, so that we are
    7546             :  * not using stale extent buffers:
    7547             :  *
    7548             :  * 1) For levels > 0, we are only holding references of extent buffers, without
    7549             :  *    any locks on them, which does not prevent them from having been relocated
    7550             :  *    and reallocated after the last time we released the commit root semaphore.
    7551             :  *    The exception are the root nodes, for which we always have a clone, see
    7552             :  *    the comment at btrfs_compare_trees();
    7553             :  *
    7554             :  * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
    7555             :  *    we are safe from the concurrent relocation and reallocation. However they
    7556             :  *    can have file extent items with a pre relocation disk_bytenr value, so we
    7557             :  *    restart the start from the current commit roots and clone the new leaves so
    7558             :  *    that we get the post relocation disk_bytenr values. Not doing so, could
    7559             :  *    make us clone the wrong data in case there are new extents using the old
    7560             :  *    disk_bytenr that happen to be shared.
    7561             :  */
    7562        1128 : static int restart_after_relocation(struct btrfs_path *left_path,
    7563             :                                     struct btrfs_path *right_path,
    7564             :                                     const struct btrfs_key *left_key,
    7565             :                                     const struct btrfs_key *right_key,
    7566             :                                     int left_level,
    7567             :                                     int right_level,
    7568             :                                     const struct send_ctx *sctx)
    7569             : {
    7570        1128 :         int root_level;
    7571        1128 :         int ret;
    7572             : 
    7573        1128 :         lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
    7574             : 
    7575        1128 :         btrfs_release_path(left_path);
    7576        1128 :         btrfs_release_path(right_path);
    7577             : 
    7578             :         /*
    7579             :          * Since keys can not be added or removed to/from our roots because they
    7580             :          * are readonly and we do not allow deduplication to run in parallel
    7581             :          * (which can add, remove or change keys), the layout of the trees should
    7582             :          * not change.
    7583             :          */
    7584        1128 :         left_path->lowest_level = left_level;
    7585        1128 :         ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
    7586        1128 :         if (ret < 0)
    7587             :                 return ret;
    7588             : 
    7589        1128 :         right_path->lowest_level = right_level;
    7590        1128 :         ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
    7591        1128 :         if (ret < 0)
    7592             :                 return ret;
    7593             : 
    7594             :         /*
    7595             :          * If the lowest level nodes are leaves, clone them so that they can be
    7596             :          * safely used by changed_cb() while not under the protection of the
    7597             :          * commit root semaphore, even if relocation and reallocation happens in
    7598             :          * parallel.
    7599             :          */
    7600        1128 :         if (left_level == 0) {
    7601        1127 :                 ret = replace_node_with_clone(left_path, 0);
    7602        1127 :                 if (ret < 0)
    7603             :                         return ret;
    7604             :         }
    7605             : 
    7606        1128 :         if (right_level == 0) {
    7607         110 :                 ret = replace_node_with_clone(right_path, 0);
    7608         110 :                 if (ret < 0)
    7609             :                         return ret;
    7610             :         }
    7611             : 
    7612             :         /*
    7613             :          * Now clone the root nodes (unless they happen to be the leaves we have
    7614             :          * already cloned). This is to protect against concurrent snapshotting of
    7615             :          * the send and parent roots (see the comment at btrfs_compare_trees()).
    7616             :          */
    7617        1128 :         root_level = btrfs_header_level(sctx->send_root->commit_root);
    7618        1128 :         if (root_level > 0) {
    7619        1128 :                 ret = replace_node_with_clone(left_path, root_level);
    7620        1128 :                 if (ret < 0)
    7621             :                         return ret;
    7622             :         }
    7623             : 
    7624        1128 :         root_level = btrfs_header_level(sctx->parent_root->commit_root);
    7625        1128 :         if (root_level > 0) {
    7626        1128 :                 ret = replace_node_with_clone(right_path, root_level);
    7627        1128 :                 if (ret < 0)
    7628             :                         return ret;
    7629             :         }
    7630             : 
    7631             :         return 0;
    7632             : }
    7633             : 
    7634             : /*
    7635             :  * This function compares two trees and calls the provided callback for
    7636             :  * every changed/new/deleted item it finds.
    7637             :  * If shared tree blocks are encountered, whole subtrees are skipped, making
    7638             :  * the compare pretty fast on snapshotted subvolumes.
    7639             :  *
    7640             :  * This currently works on commit roots only. As commit roots are read only,
    7641             :  * we don't do any locking. The commit roots are protected with transactions.
    7642             :  * Transactions are ended and rejoined when a commit is tried in between.
    7643             :  *
    7644             :  * This function checks for modifications done to the trees while comparing.
    7645             :  * If it detects a change, it aborts immediately.
    7646             :  */
    7647         126 : static int btrfs_compare_trees(struct btrfs_root *left_root,
    7648             :                         struct btrfs_root *right_root, struct send_ctx *sctx)
    7649             : {
    7650         126 :         struct btrfs_fs_info *fs_info = left_root->fs_info;
    7651         126 :         int ret;
    7652         126 :         int cmp;
    7653         126 :         struct btrfs_path *left_path = NULL;
    7654         126 :         struct btrfs_path *right_path = NULL;
    7655         126 :         struct btrfs_key left_key;
    7656         126 :         struct btrfs_key right_key;
    7657         126 :         char *tmp_buf = NULL;
    7658         126 :         int left_root_level;
    7659         126 :         int right_root_level;
    7660         126 :         int left_level;
    7661         126 :         int right_level;
    7662         126 :         int left_end_reached = 0;
    7663         126 :         int right_end_reached = 0;
    7664         126 :         int advance_left = 0;
    7665         126 :         int advance_right = 0;
    7666         126 :         u64 left_blockptr;
    7667         126 :         u64 right_blockptr;
    7668         126 :         u64 left_gen;
    7669         126 :         u64 right_gen;
    7670         126 :         u64 reada_min_gen;
    7671             : 
    7672         126 :         left_path = btrfs_alloc_path();
    7673         125 :         if (!left_path) {
    7674           0 :                 ret = -ENOMEM;
    7675           0 :                 goto out;
    7676             :         }
    7677         125 :         right_path = btrfs_alloc_path();
    7678         125 :         if (!right_path) {
    7679           0 :                 ret = -ENOMEM;
    7680           0 :                 goto out;
    7681             :         }
    7682             : 
    7683         125 :         tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
    7684         126 :         if (!tmp_buf) {
    7685           0 :                 ret = -ENOMEM;
    7686           0 :                 goto out;
    7687             :         }
    7688             : 
    7689         126 :         left_path->search_commit_root = 1;
    7690         126 :         left_path->skip_locking = 1;
    7691         126 :         right_path->search_commit_root = 1;
    7692         126 :         right_path->skip_locking = 1;
    7693             : 
    7694             :         /*
    7695             :          * Strategy: Go to the first items of both trees. Then do
    7696             :          *
    7697             :          * If both trees are at level 0
    7698             :          *   Compare keys of current items
    7699             :          *     If left < right treat left item as new, advance left tree
    7700             :          *       and repeat
    7701             :          *     If left > right treat right item as deleted, advance right tree
    7702             :          *       and repeat
    7703             :          *     If left == right do deep compare of items, treat as changed if
    7704             :          *       needed, advance both trees and repeat
    7705             :          * If both trees are at the same level but not at level 0
    7706             :          *   Compare keys of current nodes/leafs
    7707             :          *     If left < right advance left tree and repeat
    7708             :          *     If left > right advance right tree and repeat
    7709             :          *     If left == right compare blockptrs of the next nodes/leafs
    7710             :          *       If they match advance both trees but stay at the same level
    7711             :          *         and repeat
    7712             :          *       If they don't match advance both trees while allowing to go
    7713             :          *         deeper and repeat
    7714             :          * If tree levels are different
    7715             :          *   Advance the tree that needs it and repeat
    7716             :          *
    7717             :          * Advancing a tree means:
    7718             :          *   If we are at level 0, try to go to the next slot. If that's not
    7719             :          *   possible, go one level up and repeat. Stop when we found a level
    7720             :          *   where we could go to the next slot. We may at this point be on a
    7721             :          *   node or a leaf.
    7722             :          *
    7723             :          *   If we are not at level 0 and not on shared tree blocks, go one
    7724             :          *   level deeper.
    7725             :          *
    7726             :          *   If we are not at level 0 and on shared tree blocks, go one slot to
    7727             :          *   the right if possible or go up and right.
    7728             :          */
    7729             : 
    7730         126 :         down_read(&fs_info->commit_root_sem);
    7731         126 :         left_level = btrfs_header_level(left_root->commit_root);
    7732         126 :         left_root_level = left_level;
    7733             :         /*
    7734             :          * We clone the root node of the send and parent roots to prevent races
    7735             :          * with snapshot creation of these roots. Snapshot creation COWs the
    7736             :          * root node of a tree, so after the transaction is committed the old
    7737             :          * extent can be reallocated while this send operation is still ongoing.
    7738             :          * So we clone them, under the commit root semaphore, to be race free.
    7739             :          */
    7740         251 :         left_path->nodes[left_level] =
    7741         126 :                         btrfs_clone_extent_buffer(left_root->commit_root);
    7742         125 :         if (!left_path->nodes[left_level]) {
    7743           0 :                 ret = -ENOMEM;
    7744           0 :                 goto out_unlock;
    7745             :         }
    7746             : 
    7747         125 :         right_level = btrfs_header_level(right_root->commit_root);
    7748         125 :         right_root_level = right_level;
    7749         251 :         right_path->nodes[right_level] =
    7750         125 :                         btrfs_clone_extent_buffer(right_root->commit_root);
    7751         126 :         if (!right_path->nodes[right_level]) {
    7752           0 :                 ret = -ENOMEM;
    7753           0 :                 goto out_unlock;
    7754             :         }
    7755             :         /*
    7756             :          * Our right root is the parent root, while the left root is the "send"
    7757             :          * root. We know that all new nodes/leaves in the left root must have
    7758             :          * a generation greater than the right root's generation, so we trigger
    7759             :          * readahead for those nodes and leaves of the left root, as we know we
    7760             :          * will need to read them at some point.
    7761             :          */
    7762         126 :         reada_min_gen = btrfs_header_generation(right_root->commit_root);
    7763             : 
    7764         126 :         if (left_level == 0)
    7765         100 :                 btrfs_item_key_to_cpu(left_path->nodes[left_level],
    7766             :                                 &left_key, left_path->slots[left_level]);
    7767             :         else
    7768          26 :                 btrfs_node_key_to_cpu(left_path->nodes[left_level],
    7769             :                                 &left_key, left_path->slots[left_level]);
    7770         126 :         if (right_level == 0)
    7771         101 :                 btrfs_item_key_to_cpu(right_path->nodes[right_level],
    7772             :                                 &right_key, right_path->slots[right_level]);
    7773             :         else
    7774          25 :                 btrfs_node_key_to_cpu(right_path->nodes[right_level],
    7775             :                                 &right_key, right_path->slots[right_level]);
    7776             : 
    7777         126 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7778             : 
    7779     4830744 :         while (1) {
    7780     4830744 :                 if (need_resched() ||
    7781             :                     rwsem_is_contended(&fs_info->commit_root_sem)) {
    7782        2783 :                         up_read(&fs_info->commit_root_sem);
    7783        2780 :                         cond_resched();
    7784        2780 :                         down_read(&fs_info->commit_root_sem);
    7785             :                 }
    7786             : 
    7787     4830747 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7788        1128 :                         ret = restart_after_relocation(left_path, right_path,
    7789             :                                                        &left_key, &right_key,
    7790             :                                                        left_level, right_level,
    7791             :                                                        sctx);
    7792        1128 :                         if (ret < 0)
    7793           0 :                                 goto out_unlock;
    7794        1128 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7795             :                 }
    7796             : 
    7797     4830747 :                 if (advance_left && !left_end_reached) {
    7798     4807524 :                         ret = tree_advance(left_path, &left_level,
    7799             :                                         left_root_level,
    7800             :                                         advance_left != ADVANCE_ONLY_NEXT,
    7801             :                                         &left_key, reada_min_gen);
    7802     4807526 :                         if (ret == -1)
    7803             :                                 left_end_reached = ADVANCE;
    7804     4807400 :                         else if (ret < 0)
    7805           0 :                                 goto out_unlock;
    7806             :                         advance_left = 0;
    7807             :                 }
    7808     4830749 :                 if (advance_right && !right_end_reached) {
    7809     1696542 :                         ret = tree_advance(right_path, &right_level,
    7810             :                                         right_root_level,
    7811             :                                         advance_right != ADVANCE_ONLY_NEXT,
    7812             :                                         &right_key, reada_min_gen);
    7813     1696541 :                         if (ret == -1)
    7814             :                                 right_end_reached = ADVANCE;
    7815     1696415 :                         else if (ret < 0)
    7816           0 :                                 goto out_unlock;
    7817             :                         advance_right = 0;
    7818             :                 }
    7819             : 
    7820     4830748 :                 if (left_end_reached && right_end_reached) {
    7821         126 :                         ret = 0;
    7822         126 :                         goto out_unlock;
    7823     4830622 :                 } else if (left_end_reached) {
    7824        2200 :                         if (right_level == 0) {
    7825        2190 :                                 up_read(&fs_info->commit_root_sem);
    7826        2190 :                                 ret = changed_cb(left_path, right_path,
    7827             :                                                 &right_key,
    7828             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7829             :                                                 sctx);
    7830        2190 :                                 if (ret < 0)
    7831           0 :                                         goto out;
    7832        2190 :                                 down_read(&fs_info->commit_root_sem);
    7833             :                         }
    7834        2200 :                         advance_right = ADVANCE;
    7835        2200 :                         continue;
    7836     4828422 :                 } else if (right_end_reached) {
    7837     1758880 :                         if (left_level == 0) {
    7838     1606757 :                                 up_read(&fs_info->commit_root_sem);
    7839     1606757 :                                 ret = changed_cb(left_path, right_path,
    7840             :                                                 &left_key,
    7841             :                                                 BTRFS_COMPARE_TREE_NEW,
    7842             :                                                 sctx);
    7843     1606757 :                                 if (ret < 0)
    7844           0 :                                         goto out;
    7845     1606757 :                                 down_read(&fs_info->commit_root_sem);
    7846             :                         }
    7847     1758880 :                         advance_left = ADVANCE;
    7848     1758880 :                         continue;
    7849             :                 }
    7850             : 
    7851     3069542 :                 if (left_level == 0 && right_level == 0) {
    7852     2899313 :                         up_read(&fs_info->commit_root_sem);
    7853     2899313 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7854     2899313 :                         if (cmp < 0) {
    7855     1218116 :                                 ret = changed_cb(left_path, right_path,
    7856             :                                                 &left_key,
    7857             :                                                 BTRFS_COMPARE_TREE_NEW,
    7858             :                                                 sctx);
    7859     1218116 :                                 advance_left = ADVANCE;
    7860     1681197 :                         } else if (cmp > 0) {
    7861       10399 :                                 ret = changed_cb(left_path, right_path,
    7862             :                                                 &right_key,
    7863             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7864             :                                                 sctx);
    7865       10399 :                                 advance_right = ADVANCE;
    7866             :                         } else {
    7867     1670798 :                                 enum btrfs_compare_tree_result result;
    7868             : 
    7869     3341596 :                                 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
    7870     1670798 :                                 ret = tree_compare_item(left_path, right_path,
    7871             :                                                         tmp_buf);
    7872     1670789 :                                 if (ret)
    7873             :                                         result = BTRFS_COMPARE_TREE_CHANGED;
    7874             :                                 else
    7875     1262267 :                                         result = BTRFS_COMPARE_TREE_SAME;
    7876     1670789 :                                 ret = changed_cb(left_path, right_path,
    7877             :                                                  &left_key, result, sctx);
    7878     1670789 :                                 advance_left = ADVANCE;
    7879     1670789 :                                 advance_right = ADVANCE;
    7880             :                         }
    7881             : 
    7882     2899305 :                         if (ret < 0)
    7883           0 :                                 goto out;
    7884     2899305 :                         down_read(&fs_info->commit_root_sem);
    7885      170229 :                 } else if (left_level == right_level) {
    7886        2822 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7887        2822 :                         if (cmp < 0) {
    7888             :                                 advance_left = ADVANCE;
    7889        2652 :                         } else if (cmp > 0) {
    7890             :                                 advance_right = ADVANCE;
    7891             :                         } else {
    7892        2646 :                                 left_blockptr = btrfs_node_blockptr(
    7893        2646 :                                                 left_path->nodes[left_level],
    7894             :                                                 left_path->slots[left_level]);
    7895        2646 :                                 right_blockptr = btrfs_node_blockptr(
    7896        2646 :                                                 right_path->nodes[right_level],
    7897             :                                                 right_path->slots[right_level]);
    7898        2646 :                                 left_gen = btrfs_node_ptr_generation(
    7899        2646 :                                                 left_path->nodes[left_level],
    7900             :                                                 left_path->slots[left_level]);
    7901        2646 :                                 right_gen = btrfs_node_ptr_generation(
    7902        2646 :                                                 right_path->nodes[right_level],
    7903             :                                                 right_path->slots[right_level]);
    7904        2646 :                                 if (left_blockptr == right_blockptr &&
    7905        2646 :                                     left_gen == right_gen) {
    7906             :                                         /*
    7907             :                                          * As we're on a shared block, don't
    7908             :                                          * allow to go deeper.
    7909             :                                          */
    7910             :                                         advance_left = ADVANCE_ONLY_NEXT;
    7911             :                                         advance_right = ADVANCE_ONLY_NEXT;
    7912             :                                 } else {
    7913         961 :                                         advance_left = ADVANCE;
    7914         961 :                                         advance_right = ADVANCE;
    7915             :                                 }
    7916             :                         }
    7917      167407 :                 } else if (left_level < right_level) {
    7918             :                         advance_right = ADVANCE;
    7919             :                 } else {
    7920      156916 :                         advance_left = ADVANCE;
    7921             :                 }
    7922             :         }
    7923             : 
    7924         126 : out_unlock:
    7925         126 :         up_read(&fs_info->commit_root_sem);
    7926         126 : out:
    7927         126 :         btrfs_free_path(left_path);
    7928         126 :         btrfs_free_path(right_path);
    7929         126 :         kvfree(tmp_buf);
    7930         124 :         return ret;
    7931             : }
    7932             : 
    7933         212 : static int send_subvol(struct send_ctx *sctx)
    7934             : {
    7935         212 :         int ret;
    7936             : 
    7937         212 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
    7938         212 :                 ret = send_header(sctx);
    7939         212 :                 if (ret < 0)
    7940           0 :                         goto out;
    7941             :         }
    7942             : 
    7943         212 :         ret = send_subvol_begin(sctx);
    7944         211 :         if (ret < 0)
    7945           0 :                 goto out;
    7946             : 
    7947         211 :         if (sctx->parent_root) {
    7948         124 :                 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
    7949         124 :                 if (ret < 0)
    7950           0 :                         goto out;
    7951         124 :                 ret = finish_inode_if_needed(sctx, 1);
    7952         125 :                 if (ret < 0)
    7953           0 :                         goto out;
    7954             :         } else {
    7955          87 :                 ret = full_send_tree(sctx);
    7956          87 :                 if (ret < 0)
    7957           0 :                         goto out;
    7958             :         }
    7959             : 
    7960          87 : out:
    7961         212 :         free_recorded_refs(sctx);
    7962         211 :         return ret;
    7963             : }
    7964             : 
    7965             : /*
    7966             :  * If orphan cleanup did remove any orphans from a root, it means the tree
    7967             :  * was modified and therefore the commit root is not the same as the current
    7968             :  * root anymore. This is a problem, because send uses the commit root and
    7969             :  * therefore can see inode items that don't exist in the current root anymore,
    7970             :  * and for example make calls to btrfs_iget, which will do tree lookups based
    7971             :  * on the current root and not on the commit root. Those lookups will fail,
    7972             :  * returning a -ESTALE error, and making send fail with that error. So make
    7973             :  * sure a send does not see any orphans we have just removed, and that it will
    7974             :  * see the same inodes regardless of whether a transaction commit happened
    7975             :  * before it started (meaning that the commit root will be the same as the
    7976             :  * current root) or not.
    7977             :  */
    7978         212 : static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
    7979             : {
    7980         212 :         int i;
    7981         212 :         struct btrfs_trans_handle *trans = NULL;
    7982             : 
    7983         224 : again:
    7984         224 :         if (sctx->parent_root &&
    7985         135 :             sctx->parent_root->node != sctx->parent_root->commit_root)
    7986           8 :                 goto commit_trans;
    7987             : 
    7988         544 :         for (i = 0; i < sctx->clone_roots_cnt; i++)
    7989         342 :                 if (sctx->clone_roots[i].root->node !=
    7990         342 :                     sctx->clone_roots[i].root->commit_root)
    7991          14 :                         goto commit_trans;
    7992             : 
    7993         202 :         if (trans)
    7994           0 :                 return btrfs_end_transaction(trans);
    7995             : 
    7996             :         return 0;
    7997             : 
    7998          22 : commit_trans:
    7999             :         /* Use any root, all fs roots will get their commit roots updated. */
    8000          22 :         if (!trans) {
    8001          11 :                 trans = btrfs_join_transaction(sctx->send_root);
    8002          12 :                 if (IS_ERR(trans))
    8003           0 :                         return PTR_ERR(trans);
    8004          12 :                 goto again;
    8005             :         }
    8006             : 
    8007          11 :         return btrfs_commit_transaction(trans);
    8008             : }
    8009             : 
    8010             : /*
    8011             :  * Make sure any existing dellaloc is flushed for any root used by a send
    8012             :  * operation so that we do not miss any data and we do not race with writeback
    8013             :  * finishing and changing a tree while send is using the tree. This could
    8014             :  * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
    8015             :  * a send operation then uses the subvolume.
    8016             :  * After flushing delalloc ensure_commit_roots_uptodate() must be called.
    8017             :  */
    8018         212 : static int flush_delalloc_roots(struct send_ctx *sctx)
    8019             : {
    8020         212 :         struct btrfs_root *root = sctx->parent_root;
    8021         212 :         int ret;
    8022         212 :         int i;
    8023             : 
    8024         212 :         if (root) {
    8025         125 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8026         126 :                 if (ret)
    8027             :                         return ret;
    8028         126 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8029             :         }
    8030             : 
    8031         554 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8032         341 :                 root = sctx->clone_roots[i].root;
    8033         341 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8034         341 :                 if (ret)
    8035           0 :                         return ret;
    8036         341 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8037             :         }
    8038             : 
    8039             :         return 0;
    8040             : }
    8041             : 
    8042         465 : static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
    8043             : {
    8044         465 :         spin_lock(&root->root_item_lock);
    8045         467 :         root->send_in_progress--;
    8046             :         /*
    8047             :          * Not much left to do, we don't know why it's unbalanced and
    8048             :          * can't blindly reset it to 0.
    8049             :          */
    8050         467 :         if (root->send_in_progress < 0)
    8051           0 :                 btrfs_err(root->fs_info,
    8052             :                           "send_in_progress unbalanced %d root %llu",
    8053             :                           root->send_in_progress, root->root_key.objectid);
    8054         467 :         spin_unlock(&root->root_item_lock);
    8055         467 : }
    8056             : 
    8057           0 : static void dedupe_in_progress_warn(const struct btrfs_root *root)
    8058             : {
    8059           0 :         btrfs_warn_rl(root->fs_info,
    8060             : "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
    8061             :                       root->root_key.objectid, root->dedupe_in_progress);
    8062           0 : }
    8063             : 
    8064         213 : long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
    8065             : {
    8066         213 :         int ret = 0;
    8067         213 :         struct btrfs_root *send_root = BTRFS_I(inode)->root;
    8068         213 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    8069         213 :         struct btrfs_root *clone_root;
    8070         213 :         struct send_ctx *sctx = NULL;
    8071         213 :         u32 i;
    8072         213 :         u64 *clone_sources_tmp = NULL;
    8073         213 :         int clone_sources_to_rollback = 0;
    8074         213 :         size_t alloc_size;
    8075         213 :         int sort_clone_roots = 0;
    8076         213 :         struct btrfs_lru_cache_entry *entry;
    8077         213 :         struct btrfs_lru_cache_entry *tmp;
    8078             : 
    8079         213 :         if (!capable(CAP_SYS_ADMIN))
    8080             :                 return -EPERM;
    8081             : 
    8082             :         /*
    8083             :          * The subvolume must remain read-only during send, protect against
    8084             :          * making it RW. This also protects against deletion.
    8085             :          */
    8086         213 :         spin_lock(&send_root->root_item_lock);
    8087         213 :         if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
    8088           0 :                 dedupe_in_progress_warn(send_root);
    8089           0 :                 spin_unlock(&send_root->root_item_lock);
    8090           0 :                 return -EAGAIN;
    8091             :         }
    8092         213 :         send_root->send_in_progress++;
    8093         213 :         spin_unlock(&send_root->root_item_lock);
    8094             : 
    8095             :         /*
    8096             :          * Userspace tools do the checks and warn the user if it's
    8097             :          * not RO.
    8098             :          */
    8099         213 :         if (!btrfs_root_readonly(send_root)) {
    8100           0 :                 ret = -EPERM;
    8101           0 :                 goto out;
    8102             :         }
    8103             : 
    8104             :         /*
    8105             :          * Check that we don't overflow at later allocations, we request
    8106             :          * clone_sources_count + 1 items, and compare to unsigned long inside
    8107             :          * access_ok. Also set an upper limit for allocation size so this can't
    8108             :          * easily exhaust memory. Max number of clone sources is about 200K.
    8109             :          */
    8110         213 :         if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
    8111           0 :                 ret = -EINVAL;
    8112           0 :                 goto out;
    8113             :         }
    8114             : 
    8115         213 :         if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
    8116           0 :                 ret = -EINVAL;
    8117           0 :                 goto out;
    8118             :         }
    8119             : 
    8120         213 :         sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
    8121         212 :         if (!sctx) {
    8122           0 :                 ret = -ENOMEM;
    8123           0 :                 goto out;
    8124             :         }
    8125             : 
    8126         212 :         INIT_LIST_HEAD(&sctx->new_refs);
    8127         212 :         INIT_LIST_HEAD(&sctx->deleted_refs);
    8128             : 
    8129         212 :         btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
    8130         212 :         btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
    8131         212 :         btrfs_lru_cache_init(&sctx->dir_created_cache,
    8132             :                              SEND_MAX_DIR_CREATED_CACHE_SIZE);
    8133             :         /*
    8134             :          * This cache is periodically trimmed to a fixed size elsewhere, see
    8135             :          * cache_dir_utimes() and trim_dir_utimes_cache().
    8136             :          */
    8137         213 :         btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
    8138             : 
    8139         213 :         sctx->pending_dir_moves = RB_ROOT;
    8140         213 :         sctx->waiting_dir_moves = RB_ROOT;
    8141         213 :         sctx->orphan_dirs = RB_ROOT;
    8142         213 :         sctx->rbtree_new_refs = RB_ROOT;
    8143         213 :         sctx->rbtree_deleted_refs = RB_ROOT;
    8144             : 
    8145         213 :         sctx->flags = arg->flags;
    8146             : 
    8147         213 :         if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
    8148           0 :                 if (arg->version > BTRFS_SEND_STREAM_VERSION) {
    8149           0 :                         ret = -EPROTO;
    8150           0 :                         goto out;
    8151             :                 }
    8152             :                 /* Zero means "use the highest version" */
    8153           0 :                 sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
    8154             :         } else {
    8155         213 :                 sctx->proto = 1;
    8156             :         }
    8157         213 :         if ((arg->flags & BTRFS_SEND_FLAG_COMPRESSED) && sctx->proto < 2) {
    8158           0 :                 ret = -EINVAL;
    8159           0 :                 goto out;
    8160             :         }
    8161             : 
    8162         213 :         sctx->send_filp = fget(arg->send_fd);
    8163         212 :         if (!sctx->send_filp) {
    8164           0 :                 ret = -EBADF;
    8165           0 :                 goto out;
    8166             :         }
    8167             : 
    8168         212 :         sctx->send_root = send_root;
    8169             :         /*
    8170             :          * Unlikely but possible, if the subvolume is marked for deletion but
    8171             :          * is slow to remove the directory entry, send can still be started
    8172             :          */
    8173         212 :         if (btrfs_root_dead(sctx->send_root)) {
    8174           0 :                 ret = -EPERM;
    8175           0 :                 goto out;
    8176             :         }
    8177             : 
    8178         212 :         sctx->clone_roots_cnt = arg->clone_sources_count;
    8179             : 
    8180         212 :         if (sctx->proto >= 2) {
    8181           0 :                 u32 send_buf_num_pages;
    8182             : 
    8183           0 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
    8184           0 :                 sctx->send_buf = vmalloc(sctx->send_max_size);
    8185           0 :                 if (!sctx->send_buf) {
    8186           0 :                         ret = -ENOMEM;
    8187           0 :                         goto out;
    8188             :                 }
    8189           0 :                 send_buf_num_pages = sctx->send_max_size >> PAGE_SHIFT;
    8190           0 :                 sctx->send_buf_pages = kcalloc(send_buf_num_pages,
    8191             :                                                sizeof(*sctx->send_buf_pages),
    8192             :                                                GFP_KERNEL);
    8193           0 :                 if (!sctx->send_buf_pages) {
    8194           0 :                         ret = -ENOMEM;
    8195           0 :                         goto out;
    8196             :                 }
    8197           0 :                 for (i = 0; i < send_buf_num_pages; i++) {
    8198           0 :                         sctx->send_buf_pages[i] =
    8199           0 :                                 vmalloc_to_page(sctx->send_buf + (i << PAGE_SHIFT));
    8200             :                 }
    8201             :         } else {
    8202         212 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
    8203         212 :                 sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
    8204             :         }
    8205         211 :         if (!sctx->send_buf) {
    8206           0 :                 ret = -ENOMEM;
    8207           0 :                 goto out;
    8208             :         }
    8209             : 
    8210         422 :         sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
    8211         211 :                                      arg->clone_sources_count + 1,
    8212             :                                      GFP_KERNEL);
    8213         211 :         if (!sctx->clone_roots) {
    8214           0 :                 ret = -ENOMEM;
    8215           0 :                 goto out;
    8216             :         }
    8217             : 
    8218         211 :         alloc_size = array_size(sizeof(*arg->clone_sources),
    8219             :                                 arg->clone_sources_count);
    8220             : 
    8221         211 :         if (arg->clone_sources_count) {
    8222         125 :                 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
    8223         126 :                 if (!clone_sources_tmp) {
    8224           0 :                         ret = -ENOMEM;
    8225           0 :                         goto out;
    8226             :                 }
    8227             : 
    8228         126 :                 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
    8229             :                                 alloc_size);
    8230         126 :                 if (ret) {
    8231           0 :                         ret = -EFAULT;
    8232           0 :                         goto out;
    8233             :                 }
    8234             : 
    8235         254 :                 for (i = 0; i < arg->clone_sources_count; i++) {
    8236         128 :                         clone_root = btrfs_get_fs_root(fs_info,
    8237         128 :                                                 clone_sources_tmp[i], true);
    8238         128 :                         if (IS_ERR(clone_root)) {
    8239           0 :                                 ret = PTR_ERR(clone_root);
    8240           0 :                                 goto out;
    8241             :                         }
    8242         128 :                         spin_lock(&clone_root->root_item_lock);
    8243         128 :                         if (!btrfs_root_readonly(clone_root) ||
    8244             :                             btrfs_root_dead(clone_root)) {
    8245           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8246           0 :                                 btrfs_put_root(clone_root);
    8247           0 :                                 ret = -EPERM;
    8248           0 :                                 goto out;
    8249             :                         }
    8250         128 :                         if (clone_root->dedupe_in_progress) {
    8251           0 :                                 dedupe_in_progress_warn(clone_root);
    8252           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8253           0 :                                 btrfs_put_root(clone_root);
    8254           0 :                                 ret = -EAGAIN;
    8255           0 :                                 goto out;
    8256             :                         }
    8257         128 :                         clone_root->send_in_progress++;
    8258         128 :                         spin_unlock(&clone_root->root_item_lock);
    8259             : 
    8260         128 :                         sctx->clone_roots[i].root = clone_root;
    8261         128 :                         clone_sources_to_rollback = i + 1;
    8262             :                 }
    8263         126 :                 kvfree(clone_sources_tmp);
    8264         126 :                 clone_sources_tmp = NULL;
    8265             :         }
    8266             : 
    8267         211 :         if (arg->parent_root) {
    8268         124 :                 sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
    8269             :                                                       true);
    8270         126 :                 if (IS_ERR(sctx->parent_root)) {
    8271           0 :                         ret = PTR_ERR(sctx->parent_root);
    8272           0 :                         goto out;
    8273             :                 }
    8274             : 
    8275         126 :                 spin_lock(&sctx->parent_root->root_item_lock);
    8276         126 :                 sctx->parent_root->send_in_progress++;
    8277         126 :                 if (!btrfs_root_readonly(sctx->parent_root) ||
    8278             :                                 btrfs_root_dead(sctx->parent_root)) {
    8279           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8280           0 :                         ret = -EPERM;
    8281           0 :                         goto out;
    8282             :                 }
    8283         126 :                 if (sctx->parent_root->dedupe_in_progress) {
    8284           0 :                         dedupe_in_progress_warn(sctx->parent_root);
    8285           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8286           0 :                         ret = -EAGAIN;
    8287           0 :                         goto out;
    8288             :                 }
    8289         126 :                 spin_unlock(&sctx->parent_root->root_item_lock);
    8290             :         }
    8291             : 
    8292             :         /*
    8293             :          * Clones from send_root are allowed, but only if the clone source
    8294             :          * is behind the current send position. This is checked while searching
    8295             :          * for possible clone sources.
    8296             :          */
    8297         426 :         sctx->clone_roots[sctx->clone_roots_cnt++].root =
    8298         213 :                 btrfs_grab_root(sctx->send_root);
    8299             : 
    8300             :         /* We do a bsearch later */
    8301         213 :         sort(sctx->clone_roots, sctx->clone_roots_cnt,
    8302             :                         sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
    8303             :                         NULL);
    8304         212 :         sort_clone_roots = 1;
    8305             : 
    8306         212 :         ret = flush_delalloc_roots(sctx);
    8307         213 :         if (ret)
    8308           0 :                 goto out;
    8309             : 
    8310         213 :         ret = ensure_commit_roots_uptodate(sctx);
    8311         212 :         if (ret)
    8312           0 :                 goto out;
    8313             : 
    8314         212 :         ret = send_subvol(sctx);
    8315         211 :         if (ret < 0)
    8316           0 :                 goto out;
    8317             : 
    8318        1509 :         btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
    8319        1296 :                 ret = send_utimes(sctx, entry->key, entry->gen);
    8320        1298 :                 if (ret < 0)
    8321           0 :                         goto out;
    8322        1298 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
    8323             :         }
    8324             : 
    8325         213 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
    8326         213 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
    8327         213 :                 if (ret < 0)
    8328           0 :                         goto out;
    8329         213 :                 ret = send_cmd(sctx);
    8330         213 :                 if (ret < 0)
    8331           0 :                         goto out;
    8332             :         }
    8333             : 
    8334         213 : out:
    8335         426 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
    8336         212 :         while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
    8337           0 :                 struct rb_node *n;
    8338           0 :                 struct pending_dir_move *pm;
    8339             : 
    8340           0 :                 n = rb_first(&sctx->pending_dir_moves);
    8341           0 :                 pm = rb_entry(n, struct pending_dir_move, node);
    8342           0 :                 while (!list_empty(&pm->list)) {
    8343           0 :                         struct pending_dir_move *pm2;
    8344             : 
    8345           0 :                         pm2 = list_first_entry(&pm->list,
    8346             :                                                struct pending_dir_move, list);
    8347           0 :                         free_pending_move(sctx, pm2);
    8348             :                 }
    8349           0 :                 free_pending_move(sctx, pm);
    8350             :         }
    8351             : 
    8352         425 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
    8353         212 :         while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
    8354           0 :                 struct rb_node *n;
    8355           0 :                 struct waiting_dir_move *dm;
    8356             : 
    8357           0 :                 n = rb_first(&sctx->waiting_dir_moves);
    8358           0 :                 dm = rb_entry(n, struct waiting_dir_move, node);
    8359           0 :                 rb_erase(&dm->node, &sctx->waiting_dir_moves);
    8360           0 :                 kfree(dm);
    8361             :         }
    8362             : 
    8363         424 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
    8364         212 :         while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
    8365           0 :                 struct rb_node *n;
    8366           0 :                 struct orphan_dir_info *odi;
    8367             : 
    8368           0 :                 n = rb_first(&sctx->orphan_dirs);
    8369           0 :                 odi = rb_entry(n, struct orphan_dir_info, node);
    8370           0 :                 free_orphan_dir_info(sctx, odi);
    8371             :         }
    8372             : 
    8373         212 :         if (sort_clone_roots) {
    8374         552 :                 for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8375         339 :                         btrfs_root_dec_send_in_progress(
    8376         339 :                                         sctx->clone_roots[i].root);
    8377         341 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8378             :                 }
    8379             :         } else {
    8380           0 :                 for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
    8381           0 :                         btrfs_root_dec_send_in_progress(
    8382           0 :                                         sctx->clone_roots[i].root);
    8383           0 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8384             :                 }
    8385             : 
    8386           0 :                 btrfs_root_dec_send_in_progress(send_root);
    8387             :         }
    8388         339 :         if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
    8389         126 :                 btrfs_root_dec_send_in_progress(sctx->parent_root);
    8390         126 :                 btrfs_put_root(sctx->parent_root);
    8391             :         }
    8392             : 
    8393         213 :         kvfree(clone_sources_tmp);
    8394             : 
    8395         213 :         if (sctx) {
    8396         213 :                 if (sctx->send_filp)
    8397         213 :                         fput(sctx->send_filp);
    8398             : 
    8399         213 :                 kvfree(sctx->clone_roots);
    8400         213 :                 kfree(sctx->send_buf_pages);
    8401         213 :                 kvfree(sctx->send_buf);
    8402         213 :                 kvfree(sctx->verity_descriptor);
    8403             : 
    8404         213 :                 close_current_inode(sctx);
    8405             : 
    8406         213 :                 btrfs_lru_cache_clear(&sctx->name_cache);
    8407         213 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    8408         213 :                 btrfs_lru_cache_clear(&sctx->dir_created_cache);
    8409         213 :                 btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
    8410             : 
    8411         213 :                 kfree(sctx);
    8412             :         }
    8413             : 
    8414         213 :         return ret;
    8415             : }

Generated by: LCOV version 1.14