LCOV - code coverage report
Current view: top level - fs/btrfs - send.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwa @ Mon Jul 31 20:08:17 PDT 2023 Lines: 0 4068 0.0 %
Date: 2023-07-31 20:08:17 Functions: 0 153 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2012 Alexander Block.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/bsearch.h>
       7             : #include <linux/fs.h>
       8             : #include <linux/file.h>
       9             : #include <linux/sort.h>
      10             : #include <linux/mount.h>
      11             : #include <linux/xattr.h>
      12             : #include <linux/posix_acl_xattr.h>
      13             : #include <linux/radix-tree.h>
      14             : #include <linux/vmalloc.h>
      15             : #include <linux/string.h>
      16             : #include <linux/compat.h>
      17             : #include <linux/crc32c.h>
      18             : #include <linux/fsverity.h>
      19             : 
      20             : #include "send.h"
      21             : #include "ctree.h"
      22             : #include "backref.h"
      23             : #include "locking.h"
      24             : #include "disk-io.h"
      25             : #include "btrfs_inode.h"
      26             : #include "transaction.h"
      27             : #include "compression.h"
      28             : #include "xattr.h"
      29             : #include "print-tree.h"
      30             : #include "accessors.h"
      31             : #include "dir-item.h"
      32             : #include "file-item.h"
      33             : #include "ioctl.h"
      34             : #include "verity.h"
      35             : #include "lru_cache.h"
      36             : 
      37             : /*
      38             :  * Maximum number of references an extent can have in order for us to attempt to
      39             :  * issue clone operations instead of write operations. This currently exists to
      40             :  * avoid hitting limitations of the backreference walking code (taking a lot of
      41             :  * time and using too much memory for extents with large number of references).
      42             :  */
      43             : #define SEND_MAX_EXTENT_REFS    1024
      44             : 
      45             : /*
      46             :  * A fs_path is a helper to dynamically build path names with unknown size.
      47             :  * It reallocates the internal buffer on demand.
      48             :  * It allows fast adding of path elements on the right side (normal path) and
      49             :  * fast adding to the left side (reversed path). A reversed path can also be
      50             :  * unreversed if needed.
      51             :  */
      52             : struct fs_path {
      53             :         union {
      54             :                 struct {
      55             :                         char *start;
      56             :                         char *end;
      57             : 
      58             :                         char *buf;
      59             :                         unsigned short buf_len:15;
      60             :                         unsigned short reversed:1;
      61             :                         char inline_buf[];
      62             :                 };
      63             :                 /*
      64             :                  * Average path length does not exceed 200 bytes, we'll have
      65             :                  * better packing in the slab and higher chance to satisfy
      66             :                  * a allocation later during send.
      67             :                  */
      68             :                 char pad[256];
      69             :         };
      70             : };
      71             : #define FS_PATH_INLINE_SIZE \
      72             :         (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
      73             : 
      74             : 
      75             : /* reused for each extent */
      76             : struct clone_root {
      77             :         struct btrfs_root *root;
      78             :         u64 ino;
      79             :         u64 offset;
      80             :         u64 num_bytes;
      81             :         bool found_ref;
      82             : };
      83             : 
      84             : #define SEND_MAX_NAME_CACHE_SIZE                        256
      85             : 
      86             : /*
      87             :  * Limit the root_ids array of struct backref_cache_entry to 17 elements.
      88             :  * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
      89             :  * can be satisfied from the kmalloc-192 slab, without wasting any space.
      90             :  * The most common case is to have a single root for cloning, which corresponds
      91             :  * to the send root. Having the user specify more than 16 clone roots is not
      92             :  * common, and in such rare cases we simply don't use caching if the number of
      93             :  * cloning roots that lead down to a leaf is more than 17.
      94             :  */
      95             : #define SEND_MAX_BACKREF_CACHE_ROOTS                    17
      96             : 
      97             : /*
      98             :  * Max number of entries in the cache.
      99             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
     100             :  * maple tree's internal nodes, is 24K.
     101             :  */
     102             : #define SEND_MAX_BACKREF_CACHE_SIZE 128
     103             : 
     104             : /*
     105             :  * A backref cache entry maps a leaf to a list of IDs of roots from which the
     106             :  * leaf is accessible and we can use for clone operations.
     107             :  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
     108             :  * x86_64).
     109             :  */
     110             : struct backref_cache_entry {
     111             :         struct btrfs_lru_cache_entry entry;
     112             :         u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
     113             :         /* Number of valid elements in the root_ids array. */
     114             :         int num_roots;
     115             : };
     116             : 
     117             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     118             : static_assert(offsetof(struct backref_cache_entry, entry) == 0);
     119             : 
     120             : /*
     121             :  * Max number of entries in the cache that stores directories that were already
     122             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     123             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     124             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     125             :  */
     126             : #define SEND_MAX_DIR_CREATED_CACHE_SIZE                 64
     127             : 
     128             : /*
     129             :  * Max number of entries in the cache that stores directories that were already
     130             :  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
     131             :  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
     132             :  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
     133             :  */
     134             : #define SEND_MAX_DIR_UTIMES_CACHE_SIZE                  64
     135             : 
     136             : struct send_ctx {
     137             :         struct file *send_filp;
     138             :         loff_t send_off;
     139             :         char *send_buf;
     140             :         u32 send_size;
     141             :         u32 send_max_size;
     142             :         /*
     143             :          * Whether BTRFS_SEND_A_DATA attribute was already added to current
     144             :          * command (since protocol v2, data must be the last attribute).
     145             :          */
     146             :         bool put_data;
     147             :         struct page **send_buf_pages;
     148             :         u64 flags;      /* 'flags' member of btrfs_ioctl_send_args is u64 */
     149             :         /* Protocol version compatibility requested */
     150             :         u32 proto;
     151             : 
     152             :         struct btrfs_root *send_root;
     153             :         struct btrfs_root *parent_root;
     154             :         struct clone_root *clone_roots;
     155             :         int clone_roots_cnt;
     156             : 
     157             :         /* current state of the compare_tree call */
     158             :         struct btrfs_path *left_path;
     159             :         struct btrfs_path *right_path;
     160             :         struct btrfs_key *cmp_key;
     161             : 
     162             :         /*
     163             :          * Keep track of the generation of the last transaction that was used
     164             :          * for relocating a block group. This is periodically checked in order
     165             :          * to detect if a relocation happened since the last check, so that we
     166             :          * don't operate on stale extent buffers for nodes (level >= 1) or on
     167             :          * stale disk_bytenr values of file extent items.
     168             :          */
     169             :         u64 last_reloc_trans;
     170             : 
     171             :         /*
     172             :          * infos of the currently processed inode. In case of deleted inodes,
     173             :          * these are the values from the deleted inode.
     174             :          */
     175             :         u64 cur_ino;
     176             :         u64 cur_inode_gen;
     177             :         u64 cur_inode_size;
     178             :         u64 cur_inode_mode;
     179             :         u64 cur_inode_rdev;
     180             :         u64 cur_inode_last_extent;
     181             :         u64 cur_inode_next_write_offset;
     182             :         bool cur_inode_new;
     183             :         bool cur_inode_new_gen;
     184             :         bool cur_inode_deleted;
     185             :         bool ignore_cur_inode;
     186             :         bool cur_inode_needs_verity;
     187             :         void *verity_descriptor;
     188             : 
     189             :         u64 send_progress;
     190             : 
     191             :         struct list_head new_refs;
     192             :         struct list_head deleted_refs;
     193             : 
     194             :         struct btrfs_lru_cache name_cache;
     195             : 
     196             :         /*
     197             :          * The inode we are currently processing. It's not NULL only when we
     198             :          * need to issue write commands for data extents from this inode.
     199             :          */
     200             :         struct inode *cur_inode;
     201             :         struct file_ra_state ra;
     202             :         u64 page_cache_clear_start;
     203             :         bool clean_page_cache;
     204             : 
     205             :         /*
     206             :          * We process inodes by their increasing order, so if before an
     207             :          * incremental send we reverse the parent/child relationship of
     208             :          * directories such that a directory with a lower inode number was
     209             :          * the parent of a directory with a higher inode number, and the one
     210             :          * becoming the new parent got renamed too, we can't rename/move the
     211             :          * directory with lower inode number when we finish processing it - we
     212             :          * must process the directory with higher inode number first, then
     213             :          * rename/move it and then rename/move the directory with lower inode
     214             :          * number. Example follows.
     215             :          *
     216             :          * Tree state when the first send was performed:
     217             :          *
     218             :          * .
     219             :          * |-- a                   (ino 257)
     220             :          *     |-- b               (ino 258)
     221             :          *         |
     222             :          *         |
     223             :          *         |-- c           (ino 259)
     224             :          *         |   |-- d       (ino 260)
     225             :          *         |
     226             :          *         |-- c2          (ino 261)
     227             :          *
     228             :          * Tree state when the second (incremental) send is performed:
     229             :          *
     230             :          * .
     231             :          * |-- a                   (ino 257)
     232             :          *     |-- b               (ino 258)
     233             :          *         |-- c2          (ino 261)
     234             :          *             |-- d2      (ino 260)
     235             :          *                 |-- cc  (ino 259)
     236             :          *
     237             :          * The sequence of steps that lead to the second state was:
     238             :          *
     239             :          * mv /a/b/c/d /a/b/c2/d2
     240             :          * mv /a/b/c /a/b/c2/d2/cc
     241             :          *
     242             :          * "c" has lower inode number, but we can't move it (2nd mv operation)
     243             :          * before we move "d", which has higher inode number.
     244             :          *
     245             :          * So we just memorize which move/rename operations must be performed
     246             :          * later when their respective parent is processed and moved/renamed.
     247             :          */
     248             : 
     249             :         /* Indexed by parent directory inode number. */
     250             :         struct rb_root pending_dir_moves;
     251             : 
     252             :         /*
     253             :          * Reverse index, indexed by the inode number of a directory that
     254             :          * is waiting for the move/rename of its immediate parent before its
     255             :          * own move/rename can be performed.
     256             :          */
     257             :         struct rb_root waiting_dir_moves;
     258             : 
     259             :         /*
     260             :          * A directory that is going to be rm'ed might have a child directory
     261             :          * which is in the pending directory moves index above. In this case,
     262             :          * the directory can only be removed after the move/rename of its child
     263             :          * is performed. Example:
     264             :          *
     265             :          * Parent snapshot:
     266             :          *
     267             :          * .                        (ino 256)
     268             :          * |-- a/                   (ino 257)
     269             :          *     |-- b/               (ino 258)
     270             :          *         |-- c/           (ino 259)
     271             :          *         |   |-- x/       (ino 260)
     272             :          *         |
     273             :          *         |-- y/           (ino 261)
     274             :          *
     275             :          * Send snapshot:
     276             :          *
     277             :          * .                        (ino 256)
     278             :          * |-- a/                   (ino 257)
     279             :          *     |-- b/               (ino 258)
     280             :          *         |-- YY/          (ino 261)
     281             :          *              |-- x/      (ino 260)
     282             :          *
     283             :          * Sequence of steps that lead to the send snapshot:
     284             :          * rm -f /a/b/c/foo.txt
     285             :          * mv /a/b/y /a/b/YY
     286             :          * mv /a/b/c/x /a/b/YY
     287             :          * rmdir /a/b/c
     288             :          *
     289             :          * When the child is processed, its move/rename is delayed until its
     290             :          * parent is processed (as explained above), but all other operations
     291             :          * like update utimes, chown, chgrp, etc, are performed and the paths
     292             :          * that it uses for those operations must use the orphanized name of
     293             :          * its parent (the directory we're going to rm later), so we need to
     294             :          * memorize that name.
     295             :          *
     296             :          * Indexed by the inode number of the directory to be deleted.
     297             :          */
     298             :         struct rb_root orphan_dirs;
     299             : 
     300             :         struct rb_root rbtree_new_refs;
     301             :         struct rb_root rbtree_deleted_refs;
     302             : 
     303             :         struct btrfs_lru_cache backref_cache;
     304             :         u64 backref_cache_last_reloc_trans;
     305             : 
     306             :         struct btrfs_lru_cache dir_created_cache;
     307             :         struct btrfs_lru_cache dir_utimes_cache;
     308             : };
     309             : 
     310             : struct pending_dir_move {
     311             :         struct rb_node node;
     312             :         struct list_head list;
     313             :         u64 parent_ino;
     314             :         u64 ino;
     315             :         u64 gen;
     316             :         struct list_head update_refs;
     317             : };
     318             : 
     319             : struct waiting_dir_move {
     320             :         struct rb_node node;
     321             :         u64 ino;
     322             :         /*
     323             :          * There might be some directory that could not be removed because it
     324             :          * was waiting for this directory inode to be moved first. Therefore
     325             :          * after this directory is moved, we can try to rmdir the ino rmdir_ino.
     326             :          */
     327             :         u64 rmdir_ino;
     328             :         u64 rmdir_gen;
     329             :         bool orphanized;
     330             : };
     331             : 
     332             : struct orphan_dir_info {
     333             :         struct rb_node node;
     334             :         u64 ino;
     335             :         u64 gen;
     336             :         u64 last_dir_index_offset;
     337             :         u64 dir_high_seq_ino;
     338             : };
     339             : 
     340             : struct name_cache_entry {
     341             :         /*
     342             :          * The key in the entry is an inode number, and the generation matches
     343             :          * the inode's generation.
     344             :          */
     345             :         struct btrfs_lru_cache_entry entry;
     346             :         u64 parent_ino;
     347             :         u64 parent_gen;
     348             :         int ret;
     349             :         int need_later_update;
     350             :         int name_len;
     351             :         char name[];
     352             : };
     353             : 
     354             : /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
     355             : static_assert(offsetof(struct name_cache_entry, entry) == 0);
     356             : 
     357             : #define ADVANCE                                                 1
     358             : #define ADVANCE_ONLY_NEXT                                       -1
     359             : 
     360             : enum btrfs_compare_tree_result {
     361             :         BTRFS_COMPARE_TREE_NEW,
     362             :         BTRFS_COMPARE_TREE_DELETED,
     363             :         BTRFS_COMPARE_TREE_CHANGED,
     364             :         BTRFS_COMPARE_TREE_SAME,
     365             : };
     366             : 
     367             : __cold
     368           0 : static void inconsistent_snapshot_error(struct send_ctx *sctx,
     369             :                                         enum btrfs_compare_tree_result result,
     370             :                                         const char *what)
     371             : {
     372           0 :         const char *result_string;
     373             : 
     374           0 :         switch (result) {
     375             :         case BTRFS_COMPARE_TREE_NEW:
     376             :                 result_string = "new";
     377             :                 break;
     378             :         case BTRFS_COMPARE_TREE_DELETED:
     379             :                 result_string = "deleted";
     380             :                 break;
     381             :         case BTRFS_COMPARE_TREE_CHANGED:
     382             :                 result_string = "updated";
     383             :                 break;
     384             :         case BTRFS_COMPARE_TREE_SAME:
     385             :                 ASSERT(0);
     386             :                 result_string = "unchanged";
     387             :                 break;
     388             :         default:
     389             :                 ASSERT(0);
     390             :                 result_string = "unexpected";
     391             :         }
     392             : 
     393           0 :         btrfs_err(sctx->send_root->fs_info,
     394             :                   "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
     395             :                   result_string, what, sctx->cmp_key->objectid,
     396             :                   sctx->send_root->root_key.objectid,
     397             :                   (sctx->parent_root ?
     398             :                    sctx->parent_root->root_key.objectid : 0));
     399           0 : }
     400             : 
     401             : __maybe_unused
     402             : static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
     403             : {
     404           0 :         switch (sctx->proto) {
     405             :         case 1:  return cmd <= BTRFS_SEND_C_MAX_V1;
     406             :         case 2:  return cmd <= BTRFS_SEND_C_MAX_V2;
     407           0 :         case 3:  return cmd <= BTRFS_SEND_C_MAX_V3;
     408             :         default: return false;
     409             :         }
     410             : }
     411             : 
     412             : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
     413             : 
     414             : static struct waiting_dir_move *
     415             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
     416             : 
     417             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
     418             : 
     419           0 : static int need_send_hole(struct send_ctx *sctx)
     420             : {
     421           0 :         return (sctx->parent_root && !sctx->cur_inode_new &&
     422           0 :                 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
     423           0 :                 S_ISREG(sctx->cur_inode_mode));
     424             : }
     425             : 
     426           0 : static void fs_path_reset(struct fs_path *p)
     427             : {
     428           0 :         if (p->reversed) {
     429           0 :                 p->start = p->buf + p->buf_len - 1;
     430           0 :                 p->end = p->start;
     431           0 :                 *p->start = 0;
     432             :         } else {
     433           0 :                 p->start = p->buf;
     434           0 :                 p->end = p->start;
     435           0 :                 *p->start = 0;
     436             :         }
     437           0 : }
     438             : 
     439           0 : static struct fs_path *fs_path_alloc(void)
     440             : {
     441           0 :         struct fs_path *p;
     442             : 
     443           0 :         p = kmalloc(sizeof(*p), GFP_KERNEL);
     444           0 :         if (!p)
     445             :                 return NULL;
     446           0 :         p->reversed = 0;
     447           0 :         p->buf = p->inline_buf;
     448           0 :         p->buf_len = FS_PATH_INLINE_SIZE;
     449           0 :         fs_path_reset(p);
     450           0 :         return p;
     451             : }
     452             : 
     453           0 : static struct fs_path *fs_path_alloc_reversed(void)
     454             : {
     455           0 :         struct fs_path *p;
     456             : 
     457           0 :         p = fs_path_alloc();
     458           0 :         if (!p)
     459             :                 return NULL;
     460           0 :         p->reversed = 1;
     461           0 :         fs_path_reset(p);
     462           0 :         return p;
     463             : }
     464             : 
     465           0 : static void fs_path_free(struct fs_path *p)
     466             : {
     467           0 :         if (!p)
     468             :                 return;
     469           0 :         if (p->buf != p->inline_buf)
     470           0 :                 kfree(p->buf);
     471           0 :         kfree(p);
     472             : }
     473             : 
     474             : static int fs_path_len(struct fs_path *p)
     475             : {
     476           0 :         return p->end - p->start;
     477             : }
     478             : 
     479           0 : static int fs_path_ensure_buf(struct fs_path *p, int len)
     480             : {
     481           0 :         char *tmp_buf;
     482           0 :         int path_len;
     483           0 :         int old_buf_len;
     484             : 
     485           0 :         len++;
     486             : 
     487           0 :         if (p->buf_len >= len)
     488             :                 return 0;
     489             : 
     490           0 :         if (len > PATH_MAX) {
     491           0 :                 WARN_ON(1);
     492           0 :                 return -ENOMEM;
     493             :         }
     494             : 
     495           0 :         path_len = p->end - p->start;
     496           0 :         old_buf_len = p->buf_len;
     497             : 
     498             :         /*
     499             :          * Allocate to the next largest kmalloc bucket size, to let
     500             :          * the fast path happen most of the time.
     501             :          */
     502           0 :         len = kmalloc_size_roundup(len);
     503             :         /*
     504             :          * First time the inline_buf does not suffice
     505             :          */
     506           0 :         if (p->buf == p->inline_buf) {
     507           0 :                 tmp_buf = kmalloc(len, GFP_KERNEL);
     508           0 :                 if (tmp_buf)
     509           0 :                         memcpy(tmp_buf, p->buf, old_buf_len);
     510             :         } else {
     511           0 :                 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
     512             :         }
     513           0 :         if (!tmp_buf)
     514             :                 return -ENOMEM;
     515           0 :         p->buf = tmp_buf;
     516           0 :         p->buf_len = len;
     517             : 
     518           0 :         if (p->reversed) {
     519           0 :                 tmp_buf = p->buf + old_buf_len - path_len - 1;
     520           0 :                 p->end = p->buf + p->buf_len - 1;
     521           0 :                 p->start = p->end - path_len;
     522           0 :                 memmove(p->start, tmp_buf, path_len + 1);
     523             :         } else {
     524           0 :                 p->start = p->buf;
     525           0 :                 p->end = p->start + path_len;
     526             :         }
     527             :         return 0;
     528             : }
     529             : 
     530           0 : static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
     531             :                                    char **prepared)
     532             : {
     533           0 :         int ret;
     534           0 :         int new_len;
     535             : 
     536           0 :         new_len = p->end - p->start + name_len;
     537           0 :         if (p->start != p->end)
     538           0 :                 new_len++;
     539           0 :         ret = fs_path_ensure_buf(p, new_len);
     540           0 :         if (ret < 0)
     541           0 :                 goto out;
     542             : 
     543           0 :         if (p->reversed) {
     544           0 :                 if (p->start != p->end)
     545           0 :                         *--p->start = '/';
     546           0 :                 p->start -= name_len;
     547           0 :                 *prepared = p->start;
     548             :         } else {
     549           0 :                 if (p->start != p->end)
     550           0 :                         *p->end++ = '/';
     551           0 :                 *prepared = p->end;
     552           0 :                 p->end += name_len;
     553           0 :                 *p->end = 0;
     554             :         }
     555             : 
     556           0 : out:
     557           0 :         return ret;
     558             : }
     559             : 
     560           0 : static int fs_path_add(struct fs_path *p, const char *name, int name_len)
     561             : {
     562           0 :         int ret;
     563           0 :         char *prepared;
     564             : 
     565           0 :         ret = fs_path_prepare_for_add(p, name_len, &prepared);
     566           0 :         if (ret < 0)
     567           0 :                 goto out;
     568           0 :         memcpy(prepared, name, name_len);
     569             : 
     570           0 : out:
     571           0 :         return ret;
     572             : }
     573             : 
     574           0 : static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
     575             : {
     576           0 :         int ret;
     577           0 :         char *prepared;
     578             : 
     579           0 :         ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
     580           0 :         if (ret < 0)
     581           0 :                 goto out;
     582           0 :         memcpy(prepared, p2->start, p2->end - p2->start);
     583             : 
     584           0 : out:
     585           0 :         return ret;
     586             : }
     587             : 
     588           0 : static int fs_path_add_from_extent_buffer(struct fs_path *p,
     589             :                                           struct extent_buffer *eb,
     590             :                                           unsigned long off, int len)
     591             : {
     592           0 :         int ret;
     593           0 :         char *prepared;
     594             : 
     595           0 :         ret = fs_path_prepare_for_add(p, len, &prepared);
     596           0 :         if (ret < 0)
     597           0 :                 goto out;
     598             : 
     599           0 :         read_extent_buffer(eb, prepared, off, len);
     600             : 
     601           0 : out:
     602           0 :         return ret;
     603             : }
     604             : 
     605           0 : static int fs_path_copy(struct fs_path *p, struct fs_path *from)
     606             : {
     607           0 :         p->reversed = from->reversed;
     608           0 :         fs_path_reset(p);
     609             : 
     610           0 :         return fs_path_add_path(p, from);
     611             : }
     612             : 
     613           0 : static void fs_path_unreverse(struct fs_path *p)
     614             : {
     615           0 :         char *tmp;
     616           0 :         int len;
     617             : 
     618           0 :         if (!p->reversed)
     619             :                 return;
     620             : 
     621           0 :         tmp = p->start;
     622           0 :         len = p->end - p->start;
     623           0 :         p->start = p->buf;
     624           0 :         p->end = p->start + len;
     625           0 :         memmove(p->start, tmp, len + 1);
     626           0 :         p->reversed = 0;
     627             : }
     628             : 
     629           0 : static struct btrfs_path *alloc_path_for_send(void)
     630             : {
     631           0 :         struct btrfs_path *path;
     632             : 
     633           0 :         path = btrfs_alloc_path();
     634           0 :         if (!path)
     635             :                 return NULL;
     636           0 :         path->search_commit_root = 1;
     637           0 :         path->skip_locking = 1;
     638           0 :         path->need_commit_sem = 1;
     639           0 :         return path;
     640             : }
     641             : 
     642           0 : static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
     643             : {
     644           0 :         int ret;
     645           0 :         u32 pos = 0;
     646             : 
     647           0 :         while (pos < len) {
     648           0 :                 ret = kernel_write(filp, buf + pos, len - pos, off);
     649           0 :                 if (ret < 0)
     650           0 :                         return ret;
     651           0 :                 if (ret == 0)
     652             :                         return -EIO;
     653           0 :                 pos += ret;
     654             :         }
     655             : 
     656             :         return 0;
     657             : }
     658             : 
     659           0 : static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
     660             : {
     661           0 :         struct btrfs_tlv_header *hdr;
     662           0 :         int total_len = sizeof(*hdr) + len;
     663           0 :         int left = sctx->send_max_size - sctx->send_size;
     664             : 
     665           0 :         if (WARN_ON_ONCE(sctx->put_data))
     666             :                 return -EINVAL;
     667             : 
     668           0 :         if (unlikely(left < total_len))
     669             :                 return -EOVERFLOW;
     670             : 
     671           0 :         hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
     672           0 :         put_unaligned_le16(attr, &hdr->tlv_type);
     673           0 :         put_unaligned_le16(len, &hdr->tlv_len);
     674           0 :         memcpy(hdr + 1, data, len);
     675           0 :         sctx->send_size += total_len;
     676             : 
     677           0 :         return 0;
     678             : }
     679             : 
     680             : #define TLV_PUT_DEFINE_INT(bits) \
     681             :         static int tlv_put_u##bits(struct send_ctx *sctx,               \
     682             :                         u##bits attr, u##bits value)                    \
     683             :         {                                                               \
     684             :                 __le##bits __tmp = cpu_to_le##bits(value);              \
     685             :                 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));  \
     686             :         }
     687             : 
     688             : TLV_PUT_DEFINE_INT(8)
     689           0 : TLV_PUT_DEFINE_INT(32)
     690           0 : TLV_PUT_DEFINE_INT(64)
     691             : 
     692           0 : static int tlv_put_string(struct send_ctx *sctx, u16 attr,
     693             :                           const char *str, int len)
     694             : {
     695           0 :         if (len == -1)
     696           0 :                 len = strlen(str);
     697           0 :         return tlv_put(sctx, attr, str, len);
     698             : }
     699             : 
     700             : static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
     701             :                         const u8 *uuid)
     702             : {
     703           0 :         return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
     704             : }
     705             : 
     706           0 : static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
     707             :                                   struct extent_buffer *eb,
     708             :                                   struct btrfs_timespec *ts)
     709             : {
     710           0 :         struct btrfs_timespec bts;
     711           0 :         read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
     712           0 :         return tlv_put(sctx, attr, &bts, sizeof(bts));
     713             : }
     714             : 
     715             : 
     716             : #define TLV_PUT(sctx, attrtype, data, attrlen) \
     717             :         do { \
     718             :                 ret = tlv_put(sctx, attrtype, data, attrlen); \
     719             :                 if (ret < 0) \
     720             :                         goto tlv_put_failure; \
     721             :         } while (0)
     722             : 
     723             : #define TLV_PUT_INT(sctx, attrtype, bits, value) \
     724             :         do { \
     725             :                 ret = tlv_put_u##bits(sctx, attrtype, value); \
     726             :                 if (ret < 0) \
     727             :                         goto tlv_put_failure; \
     728             :         } while (0)
     729             : 
     730             : #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
     731             : #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
     732             : #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
     733             : #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
     734             : #define TLV_PUT_STRING(sctx, attrtype, str, len) \
     735             :         do { \
     736             :                 ret = tlv_put_string(sctx, attrtype, str, len); \
     737             :                 if (ret < 0) \
     738             :                         goto tlv_put_failure; \
     739             :         } while (0)
     740             : #define TLV_PUT_PATH(sctx, attrtype, p) \
     741             :         do { \
     742             :                 ret = tlv_put_string(sctx, attrtype, p->start, \
     743             :                         p->end - p->start); \
     744             :                 if (ret < 0) \
     745             :                         goto tlv_put_failure; \
     746             :         } while(0)
     747             : #define TLV_PUT_UUID(sctx, attrtype, uuid) \
     748             :         do { \
     749             :                 ret = tlv_put_uuid(sctx, attrtype, uuid); \
     750             :                 if (ret < 0) \
     751             :                         goto tlv_put_failure; \
     752             :         } while (0)
     753             : #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
     754             :         do { \
     755             :                 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
     756             :                 if (ret < 0) \
     757             :                         goto tlv_put_failure; \
     758             :         } while (0)
     759             : 
     760           0 : static int send_header(struct send_ctx *sctx)
     761             : {
     762           0 :         struct btrfs_stream_header hdr;
     763             : 
     764           0 :         strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
     765           0 :         hdr.version = cpu_to_le32(sctx->proto);
     766           0 :         return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
     767             :                                         &sctx->send_off);
     768             : }
     769             : 
     770             : /*
     771             :  * For each command/item we want to send to userspace, we call this function.
     772             :  */
     773           0 : static int begin_cmd(struct send_ctx *sctx, int cmd)
     774             : {
     775           0 :         struct btrfs_cmd_header *hdr;
     776             : 
     777           0 :         if (WARN_ON(!sctx->send_buf))
     778             :                 return -EINVAL;
     779             : 
     780           0 :         BUG_ON(sctx->send_size);
     781             : 
     782           0 :         sctx->send_size += sizeof(*hdr);
     783           0 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     784           0 :         put_unaligned_le16(cmd, &hdr->cmd);
     785             : 
     786           0 :         return 0;
     787             : }
     788             : 
     789           0 : static int send_cmd(struct send_ctx *sctx)
     790             : {
     791           0 :         int ret;
     792           0 :         struct btrfs_cmd_header *hdr;
     793           0 :         u32 crc;
     794             : 
     795           0 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
     796           0 :         put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
     797           0 :         put_unaligned_le32(0, &hdr->crc);
     798             : 
     799           0 :         crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
     800           0 :         put_unaligned_le32(crc, &hdr->crc);
     801             : 
     802           0 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
     803             :                                         &sctx->send_off);
     804             : 
     805           0 :         sctx->send_size = 0;
     806           0 :         sctx->put_data = false;
     807             : 
     808           0 :         return ret;
     809             : }
     810             : 
     811             : /*
     812             :  * Sends a move instruction to user space
     813             :  */
     814           0 : static int send_rename(struct send_ctx *sctx,
     815             :                      struct fs_path *from, struct fs_path *to)
     816             : {
     817           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     818           0 :         int ret;
     819             : 
     820           0 :         btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
     821             : 
     822           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
     823           0 :         if (ret < 0)
     824           0 :                 goto out;
     825             : 
     826           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
     827           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
     828             : 
     829           0 :         ret = send_cmd(sctx);
     830             : 
     831           0 : tlv_put_failure:
     832           0 : out:
     833           0 :         return ret;
     834             : }
     835             : 
     836             : /*
     837             :  * Sends a link instruction to user space
     838             :  */
     839           0 : static int send_link(struct send_ctx *sctx,
     840             :                      struct fs_path *path, struct fs_path *lnk)
     841             : {
     842           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     843           0 :         int ret;
     844             : 
     845           0 :         btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
     846             : 
     847           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
     848           0 :         if (ret < 0)
     849           0 :                 goto out;
     850             : 
     851           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     852           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
     853             : 
     854           0 :         ret = send_cmd(sctx);
     855             : 
     856           0 : tlv_put_failure:
     857           0 : out:
     858           0 :         return ret;
     859             : }
     860             : 
     861             : /*
     862             :  * Sends an unlink instruction to user space
     863             :  */
     864           0 : static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
     865             : {
     866           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     867           0 :         int ret;
     868             : 
     869           0 :         btrfs_debug(fs_info, "send_unlink %s", path->start);
     870             : 
     871           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
     872           0 :         if (ret < 0)
     873           0 :                 goto out;
     874             : 
     875           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     876             : 
     877           0 :         ret = send_cmd(sctx);
     878             : 
     879           0 : tlv_put_failure:
     880           0 : out:
     881           0 :         return ret;
     882             : }
     883             : 
     884             : /*
     885             :  * Sends a rmdir instruction to user space
     886             :  */
     887           0 : static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
     888             : {
     889           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
     890           0 :         int ret;
     891             : 
     892           0 :         btrfs_debug(fs_info, "send_rmdir %s", path->start);
     893             : 
     894           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
     895           0 :         if (ret < 0)
     896           0 :                 goto out;
     897             : 
     898           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
     899             : 
     900           0 :         ret = send_cmd(sctx);
     901             : 
     902           0 : tlv_put_failure:
     903           0 : out:
     904           0 :         return ret;
     905             : }
     906             : 
     907             : struct btrfs_inode_info {
     908             :         u64 size;
     909             :         u64 gen;
     910             :         u64 mode;
     911             :         u64 uid;
     912             :         u64 gid;
     913             :         u64 rdev;
     914             :         u64 fileattr;
     915             :         u64 nlink;
     916             : };
     917             : 
     918             : /*
     919             :  * Helper function to retrieve some fields from an inode item.
     920             :  */
     921           0 : static int get_inode_info(struct btrfs_root *root, u64 ino,
     922             :                           struct btrfs_inode_info *info)
     923             : {
     924           0 :         int ret;
     925           0 :         struct btrfs_path *path;
     926           0 :         struct btrfs_inode_item *ii;
     927           0 :         struct btrfs_key key;
     928             : 
     929           0 :         path = alloc_path_for_send();
     930           0 :         if (!path)
     931             :                 return -ENOMEM;
     932             : 
     933           0 :         key.objectid = ino;
     934           0 :         key.type = BTRFS_INODE_ITEM_KEY;
     935           0 :         key.offset = 0;
     936           0 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
     937           0 :         if (ret) {
     938           0 :                 if (ret > 0)
     939           0 :                         ret = -ENOENT;
     940           0 :                 goto out;
     941             :         }
     942             : 
     943           0 :         if (!info)
     944           0 :                 goto out;
     945             : 
     946           0 :         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
     947             :                         struct btrfs_inode_item);
     948           0 :         info->size = btrfs_inode_size(path->nodes[0], ii);
     949           0 :         info->gen = btrfs_inode_generation(path->nodes[0], ii);
     950           0 :         info->mode = btrfs_inode_mode(path->nodes[0], ii);
     951           0 :         info->uid = btrfs_inode_uid(path->nodes[0], ii);
     952           0 :         info->gid = btrfs_inode_gid(path->nodes[0], ii);
     953           0 :         info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
     954           0 :         info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
     955             :         /*
     956             :          * Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
     957             :          * otherwise logically split to 32/32 parts.
     958             :          */
     959           0 :         info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
     960             : 
     961           0 : out:
     962           0 :         btrfs_free_path(path);
     963           0 :         return ret;
     964             : }
     965             : 
     966           0 : static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
     967             : {
     968           0 :         int ret;
     969           0 :         struct btrfs_inode_info info = { 0 };
     970             : 
     971           0 :         ASSERT(gen);
     972             : 
     973           0 :         ret = get_inode_info(root, ino, &info);
     974           0 :         *gen = info.gen;
     975           0 :         return ret;
     976             : }
     977             : 
     978             : typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
     979             :                                    struct fs_path *p,
     980             :                                    void *ctx);
     981             : 
     982             : /*
     983             :  * Helper function to iterate the entries in ONE btrfs_inode_ref or
     984             :  * btrfs_inode_extref.
     985             :  * The iterate callback may return a non zero value to stop iteration. This can
     986             :  * be a negative value for error codes or 1 to simply stop it.
     987             :  *
     988             :  * path must point to the INODE_REF or INODE_EXTREF when called.
     989             :  */
     990           0 : static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
     991             :                              struct btrfs_key *found_key, int resolve,
     992             :                              iterate_inode_ref_t iterate, void *ctx)
     993             : {
     994           0 :         struct extent_buffer *eb = path->nodes[0];
     995           0 :         struct btrfs_inode_ref *iref;
     996           0 :         struct btrfs_inode_extref *extref;
     997           0 :         struct btrfs_path *tmp_path;
     998           0 :         struct fs_path *p;
     999           0 :         u32 cur = 0;
    1000           0 :         u32 total;
    1001           0 :         int slot = path->slots[0];
    1002           0 :         u32 name_len;
    1003           0 :         char *start;
    1004           0 :         int ret = 0;
    1005           0 :         int num = 0;
    1006           0 :         int index;
    1007           0 :         u64 dir;
    1008           0 :         unsigned long name_off;
    1009           0 :         unsigned long elem_size;
    1010           0 :         unsigned long ptr;
    1011             : 
    1012           0 :         p = fs_path_alloc_reversed();
    1013           0 :         if (!p)
    1014             :                 return -ENOMEM;
    1015             : 
    1016           0 :         tmp_path = alloc_path_for_send();
    1017           0 :         if (!tmp_path) {
    1018           0 :                 fs_path_free(p);
    1019           0 :                 return -ENOMEM;
    1020             :         }
    1021             : 
    1022             : 
    1023           0 :         if (found_key->type == BTRFS_INODE_REF_KEY) {
    1024           0 :                 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
    1025             :                                                     struct btrfs_inode_ref);
    1026           0 :                 total = btrfs_item_size(eb, slot);
    1027           0 :                 elem_size = sizeof(*iref);
    1028             :         } else {
    1029           0 :                 ptr = btrfs_item_ptr_offset(eb, slot);
    1030           0 :                 total = btrfs_item_size(eb, slot);
    1031           0 :                 elem_size = sizeof(*extref);
    1032             :         }
    1033             : 
    1034           0 :         while (cur < total) {
    1035           0 :                 fs_path_reset(p);
    1036             : 
    1037           0 :                 if (found_key->type == BTRFS_INODE_REF_KEY) {
    1038           0 :                         iref = (struct btrfs_inode_ref *)(ptr + cur);
    1039           0 :                         name_len = btrfs_inode_ref_name_len(eb, iref);
    1040           0 :                         name_off = (unsigned long)(iref + 1);
    1041           0 :                         index = btrfs_inode_ref_index(eb, iref);
    1042           0 :                         dir = found_key->offset;
    1043             :                 } else {
    1044           0 :                         extref = (struct btrfs_inode_extref *)(ptr + cur);
    1045           0 :                         name_len = btrfs_inode_extref_name_len(eb, extref);
    1046           0 :                         name_off = (unsigned long)&extref->name;
    1047           0 :                         index = btrfs_inode_extref_index(eb, extref);
    1048           0 :                         dir = btrfs_inode_extref_parent(eb, extref);
    1049             :                 }
    1050             : 
    1051           0 :                 if (resolve) {
    1052           0 :                         start = btrfs_ref_to_path(root, tmp_path, name_len,
    1053             :                                                   name_off, eb, dir,
    1054           0 :                                                   p->buf, p->buf_len);
    1055           0 :                         if (IS_ERR(start)) {
    1056           0 :                                 ret = PTR_ERR(start);
    1057           0 :                                 goto out;
    1058             :                         }
    1059           0 :                         if (start < p->buf) {
    1060             :                                 /* overflow , try again with larger buffer */
    1061           0 :                                 ret = fs_path_ensure_buf(p,
    1062           0 :                                                 p->buf_len + p->buf - start);
    1063           0 :                                 if (ret < 0)
    1064           0 :                                         goto out;
    1065           0 :                                 start = btrfs_ref_to_path(root, tmp_path,
    1066             :                                                           name_len, name_off,
    1067             :                                                           eb, dir,
    1068           0 :                                                           p->buf, p->buf_len);
    1069           0 :                                 if (IS_ERR(start)) {
    1070           0 :                                         ret = PTR_ERR(start);
    1071           0 :                                         goto out;
    1072             :                                 }
    1073           0 :                                 BUG_ON(start < p->buf);
    1074             :                         }
    1075           0 :                         p->start = start;
    1076             :                 } else {
    1077           0 :                         ret = fs_path_add_from_extent_buffer(p, eb, name_off,
    1078             :                                                              name_len);
    1079           0 :                         if (ret < 0)
    1080           0 :                                 goto out;
    1081             :                 }
    1082             : 
    1083           0 :                 cur += elem_size + name_len;
    1084           0 :                 ret = iterate(num, dir, index, p, ctx);
    1085           0 :                 if (ret)
    1086           0 :                         goto out;
    1087           0 :                 num++;
    1088             :         }
    1089             : 
    1090           0 : out:
    1091           0 :         btrfs_free_path(tmp_path);
    1092           0 :         fs_path_free(p);
    1093           0 :         return ret;
    1094             : }
    1095             : 
    1096             : typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
    1097             :                                   const char *name, int name_len,
    1098             :                                   const char *data, int data_len,
    1099             :                                   void *ctx);
    1100             : 
    1101             : /*
    1102             :  * Helper function to iterate the entries in ONE btrfs_dir_item.
    1103             :  * The iterate callback may return a non zero value to stop iteration. This can
    1104             :  * be a negative value for error codes or 1 to simply stop it.
    1105             :  *
    1106             :  * path must point to the dir item when called.
    1107             :  */
    1108           0 : static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
    1109             :                             iterate_dir_item_t iterate, void *ctx)
    1110             : {
    1111           0 :         int ret = 0;
    1112           0 :         struct extent_buffer *eb;
    1113           0 :         struct btrfs_dir_item *di;
    1114           0 :         struct btrfs_key di_key;
    1115           0 :         char *buf = NULL;
    1116           0 :         int buf_len;
    1117           0 :         u32 name_len;
    1118           0 :         u32 data_len;
    1119           0 :         u32 cur;
    1120           0 :         u32 len;
    1121           0 :         u32 total;
    1122           0 :         int slot;
    1123           0 :         int num;
    1124             : 
    1125             :         /*
    1126             :          * Start with a small buffer (1 page). If later we end up needing more
    1127             :          * space, which can happen for xattrs on a fs with a leaf size greater
    1128             :          * then the page size, attempt to increase the buffer. Typically xattr
    1129             :          * values are small.
    1130             :          */
    1131           0 :         buf_len = PATH_MAX;
    1132           0 :         buf = kmalloc(buf_len, GFP_KERNEL);
    1133           0 :         if (!buf) {
    1134           0 :                 ret = -ENOMEM;
    1135           0 :                 goto out;
    1136             :         }
    1137             : 
    1138           0 :         eb = path->nodes[0];
    1139           0 :         slot = path->slots[0];
    1140           0 :         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
    1141           0 :         cur = 0;
    1142           0 :         len = 0;
    1143           0 :         total = btrfs_item_size(eb, slot);
    1144             : 
    1145           0 :         num = 0;
    1146           0 :         while (cur < total) {
    1147           0 :                 name_len = btrfs_dir_name_len(eb, di);
    1148           0 :                 data_len = btrfs_dir_data_len(eb, di);
    1149           0 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    1150             : 
    1151           0 :                 if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
    1152           0 :                         if (name_len > XATTR_NAME_MAX) {
    1153           0 :                                 ret = -ENAMETOOLONG;
    1154           0 :                                 goto out;
    1155             :                         }
    1156           0 :                         if (name_len + data_len >
    1157           0 :                                         BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
    1158           0 :                                 ret = -E2BIG;
    1159           0 :                                 goto out;
    1160             :                         }
    1161             :                 } else {
    1162             :                         /*
    1163             :                          * Path too long
    1164             :                          */
    1165           0 :                         if (name_len + data_len > PATH_MAX) {
    1166           0 :                                 ret = -ENAMETOOLONG;
    1167           0 :                                 goto out;
    1168             :                         }
    1169             :                 }
    1170             : 
    1171           0 :                 if (name_len + data_len > buf_len) {
    1172           0 :                         buf_len = name_len + data_len;
    1173           0 :                         if (is_vmalloc_addr(buf)) {
    1174           0 :                                 vfree(buf);
    1175           0 :                                 buf = NULL;
    1176             :                         } else {
    1177           0 :                                 char *tmp = krealloc(buf, buf_len,
    1178             :                                                 GFP_KERNEL | __GFP_NOWARN);
    1179             : 
    1180           0 :                                 if (!tmp)
    1181           0 :                                         kfree(buf);
    1182           0 :                                 buf = tmp;
    1183             :                         }
    1184           0 :                         if (!buf) {
    1185           0 :                                 buf = kvmalloc(buf_len, GFP_KERNEL);
    1186           0 :                                 if (!buf) {
    1187           0 :                                         ret = -ENOMEM;
    1188           0 :                                         goto out;
    1189             :                                 }
    1190             :                         }
    1191             :                 }
    1192             : 
    1193           0 :                 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
    1194             :                                 name_len + data_len);
    1195             : 
    1196           0 :                 len = sizeof(*di) + name_len + data_len;
    1197           0 :                 di = (struct btrfs_dir_item *)((char *)di + len);
    1198           0 :                 cur += len;
    1199             : 
    1200           0 :                 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
    1201             :                               data_len, ctx);
    1202           0 :                 if (ret < 0)
    1203           0 :                         goto out;
    1204           0 :                 if (ret) {
    1205           0 :                         ret = 0;
    1206           0 :                         goto out;
    1207             :                 }
    1208             : 
    1209           0 :                 num++;
    1210             :         }
    1211             : 
    1212           0 : out:
    1213           0 :         kvfree(buf);
    1214           0 :         return ret;
    1215             : }
    1216             : 
    1217           0 : static int __copy_first_ref(int num, u64 dir, int index,
    1218             :                             struct fs_path *p, void *ctx)
    1219             : {
    1220           0 :         int ret;
    1221           0 :         struct fs_path *pt = ctx;
    1222             : 
    1223           0 :         ret = fs_path_copy(pt, p);
    1224           0 :         if (ret < 0)
    1225           0 :                 return ret;
    1226             : 
    1227             :         /* we want the first only */
    1228             :         return 1;
    1229             : }
    1230             : 
    1231             : /*
    1232             :  * Retrieve the first path of an inode. If an inode has more then one
    1233             :  * ref/hardlink, this is ignored.
    1234             :  */
    1235           0 : static int get_inode_path(struct btrfs_root *root,
    1236             :                           u64 ino, struct fs_path *path)
    1237             : {
    1238           0 :         int ret;
    1239           0 :         struct btrfs_key key, found_key;
    1240           0 :         struct btrfs_path *p;
    1241             : 
    1242           0 :         p = alloc_path_for_send();
    1243           0 :         if (!p)
    1244             :                 return -ENOMEM;
    1245             : 
    1246           0 :         fs_path_reset(path);
    1247             : 
    1248           0 :         key.objectid = ino;
    1249           0 :         key.type = BTRFS_INODE_REF_KEY;
    1250           0 :         key.offset = 0;
    1251             : 
    1252           0 :         ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
    1253           0 :         if (ret < 0)
    1254           0 :                 goto out;
    1255           0 :         if (ret) {
    1256           0 :                 ret = 1;
    1257           0 :                 goto out;
    1258             :         }
    1259           0 :         btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
    1260           0 :         if (found_key.objectid != ino ||
    1261           0 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    1262             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    1263           0 :                 ret = -ENOENT;
    1264           0 :                 goto out;
    1265             :         }
    1266             : 
    1267           0 :         ret = iterate_inode_ref(root, p, &found_key, 1,
    1268             :                                 __copy_first_ref, path);
    1269           0 :         if (ret < 0)
    1270             :                 goto out;
    1271             :         ret = 0;
    1272             : 
    1273           0 : out:
    1274           0 :         btrfs_free_path(p);
    1275           0 :         return ret;
    1276             : }
    1277             : 
    1278             : struct backref_ctx {
    1279             :         struct send_ctx *sctx;
    1280             : 
    1281             :         /* number of total found references */
    1282             :         u64 found;
    1283             : 
    1284             :         /*
    1285             :          * used for clones found in send_root. clones found behind cur_objectid
    1286             :          * and cur_offset are not considered as allowed clones.
    1287             :          */
    1288             :         u64 cur_objectid;
    1289             :         u64 cur_offset;
    1290             : 
    1291             :         /* may be truncated in case it's the last extent in a file */
    1292             :         u64 extent_len;
    1293             : 
    1294             :         /* The bytenr the file extent item we are processing refers to. */
    1295             :         u64 bytenr;
    1296             :         /* The owner (root id) of the data backref for the current extent. */
    1297             :         u64 backref_owner;
    1298             :         /* The offset of the data backref for the current extent. */
    1299             :         u64 backref_offset;
    1300             : };
    1301             : 
    1302           0 : static int __clone_root_cmp_bsearch(const void *key, const void *elt)
    1303             : {
    1304           0 :         u64 root = (u64)(uintptr_t)key;
    1305           0 :         const struct clone_root *cr = elt;
    1306             : 
    1307           0 :         if (root < cr->root->root_key.objectid)
    1308             :                 return -1;
    1309           0 :         if (root > cr->root->root_key.objectid)
    1310           0 :                 return 1;
    1311             :         return 0;
    1312             : }
    1313             : 
    1314           0 : static int __clone_root_cmp_sort(const void *e1, const void *e2)
    1315             : {
    1316           0 :         const struct clone_root *cr1 = e1;
    1317           0 :         const struct clone_root *cr2 = e2;
    1318             : 
    1319           0 :         if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
    1320             :                 return -1;
    1321           0 :         if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
    1322           0 :                 return 1;
    1323             :         return 0;
    1324             : }
    1325             : 
    1326             : /*
    1327             :  * Called for every backref that is found for the current extent.
    1328             :  * Results are collected in sctx->clone_roots->ino/offset.
    1329             :  */
    1330           0 : static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
    1331             :                             void *ctx_)
    1332             : {
    1333           0 :         struct backref_ctx *bctx = ctx_;
    1334           0 :         struct clone_root *clone_root;
    1335             : 
    1336             :         /* First check if the root is in the list of accepted clone sources */
    1337           0 :         clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
    1338           0 :                              bctx->sctx->clone_roots_cnt,
    1339             :                              sizeof(struct clone_root),
    1340             :                              __clone_root_cmp_bsearch);
    1341           0 :         if (!clone_root)
    1342             :                 return 0;
    1343             : 
    1344             :         /* This is our own reference, bail out as we can't clone from it. */
    1345           0 :         if (clone_root->root == bctx->sctx->send_root &&
    1346           0 :             ino == bctx->cur_objectid &&
    1347           0 :             offset == bctx->cur_offset)
    1348             :                 return 0;
    1349             : 
    1350             :         /*
    1351             :          * Make sure we don't consider clones from send_root that are
    1352             :          * behind the current inode/offset.
    1353             :          */
    1354           0 :         if (clone_root->root == bctx->sctx->send_root) {
    1355             :                 /*
    1356             :                  * If the source inode was not yet processed we can't issue a
    1357             :                  * clone operation, as the source extent does not exist yet at
    1358             :                  * the destination of the stream.
    1359             :                  */
    1360           0 :                 if (ino > bctx->cur_objectid)
    1361             :                         return 0;
    1362             :                 /*
    1363             :                  * We clone from the inode currently being sent as long as the
    1364             :                  * source extent is already processed, otherwise we could try
    1365             :                  * to clone from an extent that does not exist yet at the
    1366             :                  * destination of the stream.
    1367             :                  */
    1368           0 :                 if (ino == bctx->cur_objectid &&
    1369           0 :                     offset + bctx->extent_len >
    1370           0 :                     bctx->sctx->cur_inode_next_write_offset)
    1371             :                         return 0;
    1372             :         }
    1373             : 
    1374           0 :         bctx->found++;
    1375           0 :         clone_root->found_ref = true;
    1376             : 
    1377             :         /*
    1378             :          * If the given backref refers to a file extent item with a larger
    1379             :          * number of bytes than what we found before, use the new one so that
    1380             :          * we clone more optimally and end up doing less writes and getting
    1381             :          * less exclusive, non-shared extents at the destination.
    1382             :          */
    1383           0 :         if (num_bytes > clone_root->num_bytes) {
    1384           0 :                 clone_root->ino = ino;
    1385           0 :                 clone_root->offset = offset;
    1386           0 :                 clone_root->num_bytes = num_bytes;
    1387             : 
    1388             :                 /*
    1389             :                  * Found a perfect candidate, so there's no need to continue
    1390             :                  * backref walking.
    1391             :                  */
    1392           0 :                 if (num_bytes >= bctx->extent_len)
    1393           0 :                         return BTRFS_ITERATE_EXTENT_INODES_STOP;
    1394             :         }
    1395             : 
    1396             :         return 0;
    1397             : }
    1398             : 
    1399           0 : static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
    1400             :                                  const u64 **root_ids_ret, int *root_count_ret)
    1401             : {
    1402           0 :         struct backref_ctx *bctx = ctx;
    1403           0 :         struct send_ctx *sctx = bctx->sctx;
    1404           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1405           0 :         const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
    1406           0 :         struct btrfs_lru_cache_entry *raw_entry;
    1407           0 :         struct backref_cache_entry *entry;
    1408             : 
    1409           0 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
    1410             :                 return false;
    1411             : 
    1412             :         /*
    1413             :          * If relocation happened since we first filled the cache, then we must
    1414             :          * empty the cache and can not use it, because even though we operate on
    1415             :          * read-only roots, their leaves and nodes may have been reallocated and
    1416             :          * now be used for different nodes/leaves of the same tree or some other
    1417             :          * tree.
    1418             :          *
    1419             :          * We are called from iterate_extent_inodes() while either holding a
    1420             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1421             :          * to take any lock here.
    1422             :          */
    1423           0 :         if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
    1424           0 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    1425           0 :                 return false;
    1426             :         }
    1427             : 
    1428           0 :         raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
    1429           0 :         if (!raw_entry)
    1430             :                 return false;
    1431             : 
    1432           0 :         entry = container_of(raw_entry, struct backref_cache_entry, entry);
    1433           0 :         *root_ids_ret = entry->root_ids;
    1434           0 :         *root_count_ret = entry->num_roots;
    1435             : 
    1436           0 :         return true;
    1437             : }
    1438             : 
    1439           0 : static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
    1440             :                                 void *ctx)
    1441             : {
    1442           0 :         struct backref_ctx *bctx = ctx;
    1443           0 :         struct send_ctx *sctx = bctx->sctx;
    1444           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1445           0 :         struct backref_cache_entry *new_entry;
    1446           0 :         struct ulist_iterator uiter;
    1447           0 :         struct ulist_node *node;
    1448           0 :         int ret;
    1449             : 
    1450             :         /*
    1451             :          * We're called while holding a transaction handle or while holding
    1452             :          * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
    1453             :          * NOFS allocation.
    1454             :          */
    1455           0 :         new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS);
    1456             :         /* No worries, cache is optional. */
    1457           0 :         if (!new_entry)
    1458           0 :                 return;
    1459             : 
    1460           0 :         new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
    1461           0 :         new_entry->entry.gen = 0;
    1462           0 :         new_entry->num_roots = 0;
    1463           0 :         ULIST_ITER_INIT(&uiter);
    1464           0 :         while ((node = ulist_next(root_ids, &uiter)) != NULL) {
    1465           0 :                 const u64 root_id = node->val;
    1466           0 :                 struct clone_root *root;
    1467             : 
    1468           0 :                 root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
    1469           0 :                                sctx->clone_roots_cnt, sizeof(struct clone_root),
    1470             :                                __clone_root_cmp_bsearch);
    1471           0 :                 if (!root)
    1472           0 :                         continue;
    1473             : 
    1474             :                 /* Too many roots, just exit, no worries as caching is optional. */
    1475           0 :                 if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
    1476           0 :                         kfree(new_entry);
    1477           0 :                         return;
    1478             :                 }
    1479             : 
    1480           0 :                 new_entry->root_ids[new_entry->num_roots] = root_id;
    1481           0 :                 new_entry->num_roots++;
    1482             :         }
    1483             : 
    1484             :         /*
    1485             :          * We may have not added any roots to the new cache entry, which means
    1486             :          * none of the roots is part of the list of roots from which we are
    1487             :          * allowed to clone. Cache the new entry as it's still useful to avoid
    1488             :          * backref walking to determine which roots have a path to the leaf.
    1489             :          *
    1490             :          * Also use GFP_NOFS because we're called while holding a transaction
    1491             :          * handle or while holding fs_info->commit_root_sem.
    1492             :          */
    1493           0 :         ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
    1494             :                                     GFP_NOFS);
    1495           0 :         ASSERT(ret == 0 || ret == -ENOMEM);
    1496           0 :         if (ret) {
    1497             :                 /* Caching is optional, no worries. */
    1498           0 :                 kfree(new_entry);
    1499           0 :                 return;
    1500             :         }
    1501             : 
    1502             :         /*
    1503             :          * We are called from iterate_extent_inodes() while either holding a
    1504             :          * transaction handle or holding fs_info->commit_root_sem, so no need
    1505             :          * to take any lock here.
    1506             :          */
    1507           0 :         if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
    1508           0 :                 sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
    1509             : }
    1510             : 
    1511           0 : static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
    1512             :                              const struct extent_buffer *leaf, void *ctx)
    1513             : {
    1514           0 :         const u64 refs = btrfs_extent_refs(leaf, ei);
    1515           0 :         const struct backref_ctx *bctx = ctx;
    1516           0 :         const struct send_ctx *sctx = bctx->sctx;
    1517             : 
    1518           0 :         if (bytenr == bctx->bytenr) {
    1519           0 :                 const u64 flags = btrfs_extent_flags(leaf, ei);
    1520             : 
    1521           0 :                 if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
    1522             :                         return -EUCLEAN;
    1523             : 
    1524             :                 /*
    1525             :                  * If we have only one reference and only the send root as a
    1526             :                  * clone source - meaning no clone roots were given in the
    1527             :                  * struct btrfs_ioctl_send_args passed to the send ioctl - then
    1528             :                  * it's our reference and there's no point in doing backref
    1529             :                  * walking which is expensive, so exit early.
    1530             :                  */
    1531           0 :                 if (refs == 1 && sctx->clone_roots_cnt == 1)
    1532             :                         return -ENOENT;
    1533             :         }
    1534             : 
    1535             :         /*
    1536             :          * Backreference walking (iterate_extent_inodes() below) is currently
    1537             :          * too expensive when an extent has a large number of references, both
    1538             :          * in time spent and used memory. So for now just fallback to write
    1539             :          * operations instead of clone operations when an extent has more than
    1540             :          * a certain amount of references.
    1541             :          */
    1542           0 :         if (refs > SEND_MAX_EXTENT_REFS)
    1543           0 :                 return -ENOENT;
    1544             : 
    1545             :         return 0;
    1546             : }
    1547             : 
    1548           0 : static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
    1549             : {
    1550           0 :         const struct backref_ctx *bctx = ctx;
    1551             : 
    1552           0 :         if (ino == bctx->cur_objectid &&
    1553           0 :             root == bctx->backref_owner &&
    1554           0 :             offset == bctx->backref_offset)
    1555           0 :                 return true;
    1556             : 
    1557             :         return false;
    1558             : }
    1559             : 
    1560             : /*
    1561             :  * Given an inode, offset and extent item, it finds a good clone for a clone
    1562             :  * instruction. Returns -ENOENT when none could be found. The function makes
    1563             :  * sure that the returned clone is usable at the point where sending is at the
    1564             :  * moment. This means, that no clones are accepted which lie behind the current
    1565             :  * inode+offset.
    1566             :  *
    1567             :  * path must point to the extent item when called.
    1568             :  */
    1569           0 : static int find_extent_clone(struct send_ctx *sctx,
    1570             :                              struct btrfs_path *path,
    1571             :                              u64 ino, u64 data_offset,
    1572             :                              u64 ino_size,
    1573             :                              struct clone_root **found)
    1574             : {
    1575           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    1576           0 :         int ret;
    1577           0 :         int extent_type;
    1578           0 :         u64 logical;
    1579           0 :         u64 disk_byte;
    1580           0 :         u64 num_bytes;
    1581           0 :         struct btrfs_file_extent_item *fi;
    1582           0 :         struct extent_buffer *eb = path->nodes[0];
    1583           0 :         struct backref_ctx backref_ctx = { 0 };
    1584           0 :         struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
    1585           0 :         struct clone_root *cur_clone_root;
    1586           0 :         int compressed;
    1587           0 :         u32 i;
    1588             : 
    1589             :         /*
    1590             :          * With fallocate we can get prealloc extents beyond the inode's i_size,
    1591             :          * so we don't do anything here because clone operations can not clone
    1592             :          * to a range beyond i_size without increasing the i_size of the
    1593             :          * destination inode.
    1594             :          */
    1595           0 :         if (data_offset >= ino_size)
    1596             :                 return 0;
    1597             : 
    1598           0 :         fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
    1599           0 :         extent_type = btrfs_file_extent_type(eb, fi);
    1600           0 :         if (extent_type == BTRFS_FILE_EXTENT_INLINE)
    1601             :                 return -ENOENT;
    1602             : 
    1603           0 :         disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
    1604           0 :         if (disk_byte == 0)
    1605             :                 return -ENOENT;
    1606             : 
    1607           0 :         compressed = btrfs_file_extent_compression(eb, fi);
    1608           0 :         num_bytes = btrfs_file_extent_num_bytes(eb, fi);
    1609           0 :         logical = disk_byte + btrfs_file_extent_offset(eb, fi);
    1610             : 
    1611             :         /*
    1612             :          * Setup the clone roots.
    1613             :          */
    1614           0 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1615           0 :                 cur_clone_root = sctx->clone_roots + i;
    1616           0 :                 cur_clone_root->ino = (u64)-1;
    1617           0 :                 cur_clone_root->offset = 0;
    1618           0 :                 cur_clone_root->num_bytes = 0;
    1619           0 :                 cur_clone_root->found_ref = false;
    1620             :         }
    1621             : 
    1622           0 :         backref_ctx.sctx = sctx;
    1623           0 :         backref_ctx.cur_objectid = ino;
    1624           0 :         backref_ctx.cur_offset = data_offset;
    1625           0 :         backref_ctx.bytenr = disk_byte;
    1626             :         /*
    1627             :          * Use the header owner and not the send root's id, because in case of a
    1628             :          * snapshot we can have shared subtrees.
    1629             :          */
    1630           0 :         backref_ctx.backref_owner = btrfs_header_owner(eb);
    1631           0 :         backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
    1632             : 
    1633             :         /*
    1634             :          * The last extent of a file may be too large due to page alignment.
    1635             :          * We need to adjust extent_len in this case so that the checks in
    1636             :          * iterate_backrefs() work.
    1637             :          */
    1638           0 :         if (data_offset + num_bytes >= ino_size)
    1639           0 :                 backref_ctx.extent_len = ino_size - data_offset;
    1640             :         else
    1641           0 :                 backref_ctx.extent_len = num_bytes;
    1642             : 
    1643             :         /*
    1644             :          * Now collect all backrefs.
    1645             :          */
    1646           0 :         backref_walk_ctx.bytenr = disk_byte;
    1647           0 :         if (compressed == BTRFS_COMPRESS_NONE)
    1648           0 :                 backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
    1649           0 :         backref_walk_ctx.fs_info = fs_info;
    1650           0 :         backref_walk_ctx.cache_lookup = lookup_backref_cache;
    1651           0 :         backref_walk_ctx.cache_store = store_backref_cache;
    1652           0 :         backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
    1653           0 :         backref_walk_ctx.check_extent_item = check_extent_item;
    1654           0 :         backref_walk_ctx.user_ctx = &backref_ctx;
    1655             : 
    1656             :         /*
    1657             :          * If have a single clone root, then it's the send root and we can tell
    1658             :          * the backref walking code to skip our own backref and not resolve it,
    1659             :          * since we can not use it for cloning - the source and destination
    1660             :          * ranges can't overlap and in case the leaf is shared through a subtree
    1661             :          * due to snapshots, we can't use those other roots since they are not
    1662             :          * in the list of clone roots.
    1663             :          */
    1664           0 :         if (sctx->clone_roots_cnt == 1)
    1665           0 :                 backref_walk_ctx.skip_data_ref = skip_self_data_ref;
    1666             : 
    1667           0 :         ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
    1668             :                                     &backref_ctx);
    1669           0 :         if (ret < 0)
    1670             :                 return ret;
    1671             : 
    1672           0 :         down_read(&fs_info->commit_root_sem);
    1673           0 :         if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    1674             :                 /*
    1675             :                  * A transaction commit for a transaction in which block group
    1676             :                  * relocation was done just happened.
    1677             :                  * The disk_bytenr of the file extent item we processed is
    1678             :                  * possibly stale, referring to the extent's location before
    1679             :                  * relocation. So act as if we haven't found any clone sources
    1680             :                  * and fallback to write commands, which will read the correct
    1681             :                  * data from the new extent location. Otherwise we will fail
    1682             :                  * below because we haven't found our own back reference or we
    1683             :                  * could be getting incorrect sources in case the old extent
    1684             :                  * was already reallocated after the relocation.
    1685             :                  */
    1686           0 :                 up_read(&fs_info->commit_root_sem);
    1687           0 :                 return -ENOENT;
    1688             :         }
    1689           0 :         up_read(&fs_info->commit_root_sem);
    1690             : 
    1691           0 :         btrfs_debug(fs_info,
    1692             :                     "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
    1693             :                     data_offset, ino, num_bytes, logical);
    1694             : 
    1695           0 :         if (!backref_ctx.found) {
    1696             :                 btrfs_debug(fs_info, "no clones found");
    1697             :                 return -ENOENT;
    1698             :         }
    1699             : 
    1700             :         cur_clone_root = NULL;
    1701           0 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    1702           0 :                 struct clone_root *clone_root = &sctx->clone_roots[i];
    1703             : 
    1704           0 :                 if (!clone_root->found_ref)
    1705           0 :                         continue;
    1706             : 
    1707             :                 /*
    1708             :                  * Choose the root from which we can clone more bytes, to
    1709             :                  * minimize write operations and therefore have more extent
    1710             :                  * sharing at the destination (the same as in the source).
    1711             :                  */
    1712           0 :                 if (!cur_clone_root ||
    1713           0 :                     clone_root->num_bytes > cur_clone_root->num_bytes) {
    1714           0 :                         cur_clone_root = clone_root;
    1715             : 
    1716             :                         /*
    1717             :                          * We found an optimal clone candidate (any inode from
    1718             :                          * any root is fine), so we're done.
    1719             :                          */
    1720           0 :                         if (clone_root->num_bytes >= backref_ctx.extent_len)
    1721             :                                 break;
    1722             :                 }
    1723             :         }
    1724             : 
    1725           0 :         if (cur_clone_root) {
    1726           0 :                 *found = cur_clone_root;
    1727           0 :                 ret = 0;
    1728             :         } else {
    1729             :                 ret = -ENOENT;
    1730             :         }
    1731             : 
    1732             :         return ret;
    1733             : }
    1734             : 
    1735           0 : static int read_symlink(struct btrfs_root *root,
    1736             :                         u64 ino,
    1737             :                         struct fs_path *dest)
    1738             : {
    1739           0 :         int ret;
    1740           0 :         struct btrfs_path *path;
    1741           0 :         struct btrfs_key key;
    1742           0 :         struct btrfs_file_extent_item *ei;
    1743           0 :         u8 type;
    1744           0 :         u8 compression;
    1745           0 :         unsigned long off;
    1746           0 :         int len;
    1747             : 
    1748           0 :         path = alloc_path_for_send();
    1749           0 :         if (!path)
    1750             :                 return -ENOMEM;
    1751             : 
    1752           0 :         key.objectid = ino;
    1753           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    1754           0 :         key.offset = 0;
    1755           0 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    1756           0 :         if (ret < 0)
    1757           0 :                 goto out;
    1758           0 :         if (ret) {
    1759             :                 /*
    1760             :                  * An empty symlink inode. Can happen in rare error paths when
    1761             :                  * creating a symlink (transaction committed before the inode
    1762             :                  * eviction handler removed the symlink inode items and a crash
    1763             :                  * happened in between or the subvol was snapshoted in between).
    1764             :                  * Print an informative message to dmesg/syslog so that the user
    1765             :                  * can delete the symlink.
    1766             :                  */
    1767           0 :                 btrfs_err(root->fs_info,
    1768             :                           "Found empty symlink inode %llu at root %llu",
    1769             :                           ino, root->root_key.objectid);
    1770           0 :                 ret = -EIO;
    1771           0 :                 goto out;
    1772             :         }
    1773             : 
    1774           0 :         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    1775             :                         struct btrfs_file_extent_item);
    1776           0 :         type = btrfs_file_extent_type(path->nodes[0], ei);
    1777           0 :         if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
    1778           0 :                 ret = -EUCLEAN;
    1779           0 :                 btrfs_crit(root->fs_info,
    1780             : "send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
    1781             :                            ino, btrfs_root_id(root), type);
    1782           0 :                 goto out;
    1783             :         }
    1784           0 :         compression = btrfs_file_extent_compression(path->nodes[0], ei);
    1785           0 :         if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
    1786           0 :                 ret = -EUCLEAN;
    1787           0 :                 btrfs_crit(root->fs_info,
    1788             : "send: found symlink extent with compression, ino %llu root %llu compression type %d",
    1789             :                            ino, btrfs_root_id(root), compression);
    1790           0 :                 goto out;
    1791             :         }
    1792             : 
    1793           0 :         off = btrfs_file_extent_inline_start(ei);
    1794           0 :         len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
    1795             : 
    1796           0 :         ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
    1797             : 
    1798           0 : out:
    1799           0 :         btrfs_free_path(path);
    1800           0 :         return ret;
    1801             : }
    1802             : 
    1803             : /*
    1804             :  * Helper function to generate a file name that is unique in the root of
    1805             :  * send_root and parent_root. This is used to generate names for orphan inodes.
    1806             :  */
    1807           0 : static int gen_unique_name(struct send_ctx *sctx,
    1808             :                            u64 ino, u64 gen,
    1809             :                            struct fs_path *dest)
    1810             : {
    1811           0 :         int ret = 0;
    1812           0 :         struct btrfs_path *path;
    1813           0 :         struct btrfs_dir_item *di;
    1814           0 :         char tmp[64];
    1815           0 :         int len;
    1816           0 :         u64 idx = 0;
    1817             : 
    1818           0 :         path = alloc_path_for_send();
    1819           0 :         if (!path)
    1820             :                 return -ENOMEM;
    1821             : 
    1822           0 :         while (1) {
    1823           0 :                 struct fscrypt_str tmp_name;
    1824             : 
    1825           0 :                 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
    1826             :                                 ino, gen, idx);
    1827           0 :                 ASSERT(len < sizeof(tmp));
    1828           0 :                 tmp_name.name = tmp;
    1829           0 :                 tmp_name.len = strlen(tmp);
    1830             : 
    1831           0 :                 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
    1832             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1833             :                                 &tmp_name, 0);
    1834           0 :                 btrfs_release_path(path);
    1835           0 :                 if (IS_ERR(di)) {
    1836           0 :                         ret = PTR_ERR(di);
    1837           0 :                         goto out;
    1838             :                 }
    1839           0 :                 if (di) {
    1840             :                         /* not unique, try again */
    1841           0 :                         idx++;
    1842           0 :                         continue;
    1843             :                 }
    1844             : 
    1845           0 :                 if (!sctx->parent_root) {
    1846             :                         /* unique */
    1847             :                         ret = 0;
    1848           0 :                         break;
    1849             :                 }
    1850             : 
    1851           0 :                 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
    1852             :                                 path, BTRFS_FIRST_FREE_OBJECTID,
    1853             :                                 &tmp_name, 0);
    1854           0 :                 btrfs_release_path(path);
    1855           0 :                 if (IS_ERR(di)) {
    1856           0 :                         ret = PTR_ERR(di);
    1857           0 :                         goto out;
    1858             :                 }
    1859           0 :                 if (di) {
    1860             :                         /* not unique, try again */
    1861           0 :                         idx++;
    1862           0 :                         continue;
    1863             :                 }
    1864             :                 /* unique */
    1865             :                 break;
    1866             :         }
    1867             : 
    1868           0 :         ret = fs_path_add(dest, tmp, strlen(tmp));
    1869             : 
    1870           0 : out:
    1871           0 :         btrfs_free_path(path);
    1872           0 :         return ret;
    1873             : }
    1874             : 
    1875             : enum inode_state {
    1876             :         inode_state_no_change,
    1877             :         inode_state_will_create,
    1878             :         inode_state_did_create,
    1879             :         inode_state_will_delete,
    1880             :         inode_state_did_delete,
    1881             : };
    1882             : 
    1883           0 : static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
    1884             :                                u64 *send_gen, u64 *parent_gen)
    1885             : {
    1886           0 :         int ret;
    1887           0 :         int left_ret;
    1888           0 :         int right_ret;
    1889           0 :         u64 left_gen;
    1890           0 :         u64 right_gen = 0;
    1891           0 :         struct btrfs_inode_info info;
    1892             : 
    1893           0 :         ret = get_inode_info(sctx->send_root, ino, &info);
    1894           0 :         if (ret < 0 && ret != -ENOENT)
    1895           0 :                 goto out;
    1896           0 :         left_ret = (info.nlink == 0) ? -ENOENT : ret;
    1897           0 :         left_gen = info.gen;
    1898           0 :         if (send_gen)
    1899           0 :                 *send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
    1900             : 
    1901           0 :         if (!sctx->parent_root) {
    1902             :                 right_ret = -ENOENT;
    1903             :         } else {
    1904           0 :                 ret = get_inode_info(sctx->parent_root, ino, &info);
    1905           0 :                 if (ret < 0 && ret != -ENOENT)
    1906           0 :                         goto out;
    1907           0 :                 right_ret = (info.nlink == 0) ? -ENOENT : ret;
    1908           0 :                 right_gen = info.gen;
    1909           0 :                 if (parent_gen)
    1910           0 :                         *parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
    1911             :         }
    1912             : 
    1913           0 :         if (!left_ret && !right_ret) {
    1914           0 :                 if (left_gen == gen && right_gen == gen) {
    1915             :                         ret = inode_state_no_change;
    1916           0 :                 } else if (left_gen == gen) {
    1917           0 :                         if (ino < sctx->send_progress)
    1918             :                                 ret = inode_state_did_create;
    1919             :                         else
    1920           0 :                                 ret = inode_state_will_create;
    1921           0 :                 } else if (right_gen == gen) {
    1922           0 :                         if (ino < sctx->send_progress)
    1923             :                                 ret = inode_state_did_delete;
    1924             :                         else
    1925           0 :                                 ret = inode_state_will_delete;
    1926             :                 } else  {
    1927             :                         ret = -ENOENT;
    1928             :                 }
    1929           0 :         } else if (!left_ret) {
    1930           0 :                 if (left_gen == gen) {
    1931           0 :                         if (ino < sctx->send_progress)
    1932             :                                 ret = inode_state_did_create;
    1933             :                         else
    1934           0 :                                 ret = inode_state_will_create;
    1935             :                 } else {
    1936             :                         ret = -ENOENT;
    1937             :                 }
    1938           0 :         } else if (!right_ret) {
    1939           0 :                 if (right_gen == gen) {
    1940           0 :                         if (ino < sctx->send_progress)
    1941             :                                 ret = inode_state_did_delete;
    1942             :                         else
    1943           0 :                                 ret = inode_state_will_delete;
    1944             :                 } else {
    1945             :                         ret = -ENOENT;
    1946             :                 }
    1947             :         } else {
    1948             :                 ret = -ENOENT;
    1949             :         }
    1950             : 
    1951           0 : out:
    1952           0 :         return ret;
    1953             : }
    1954             : 
    1955           0 : static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
    1956             :                              u64 *send_gen, u64 *parent_gen)
    1957             : {
    1958           0 :         int ret;
    1959             : 
    1960           0 :         if (ino == BTRFS_FIRST_FREE_OBJECTID)
    1961             :                 return 1;
    1962             : 
    1963           0 :         ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
    1964           0 :         if (ret < 0)
    1965           0 :                 goto out;
    1966             : 
    1967           0 :         if (ret == inode_state_no_change ||
    1968           0 :             ret == inode_state_did_create ||
    1969             :             ret == inode_state_will_delete)
    1970             :                 ret = 1;
    1971             :         else
    1972             :                 ret = 0;
    1973             : 
    1974             : out:
    1975             :         return ret;
    1976             : }
    1977             : 
    1978             : /*
    1979             :  * Helper function to lookup a dir item in a dir.
    1980             :  */
    1981           0 : static int lookup_dir_item_inode(struct btrfs_root *root,
    1982             :                                  u64 dir, const char *name, int name_len,
    1983             :                                  u64 *found_inode)
    1984             : {
    1985           0 :         int ret = 0;
    1986           0 :         struct btrfs_dir_item *di;
    1987           0 :         struct btrfs_key key;
    1988           0 :         struct btrfs_path *path;
    1989           0 :         struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
    1990             : 
    1991           0 :         path = alloc_path_for_send();
    1992           0 :         if (!path)
    1993             :                 return -ENOMEM;
    1994             : 
    1995           0 :         di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
    1996           0 :         if (IS_ERR_OR_NULL(di)) {
    1997           0 :                 ret = di ? PTR_ERR(di) : -ENOENT;
    1998           0 :                 goto out;
    1999             :         }
    2000           0 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
    2001           0 :         if (key.type == BTRFS_ROOT_ITEM_KEY) {
    2002           0 :                 ret = -ENOENT;
    2003           0 :                 goto out;
    2004             :         }
    2005           0 :         *found_inode = key.objectid;
    2006             : 
    2007           0 : out:
    2008           0 :         btrfs_free_path(path);
    2009           0 :         return ret;
    2010             : }
    2011             : 
    2012             : /*
    2013             :  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
    2014             :  * generation of the parent dir and the name of the dir entry.
    2015             :  */
    2016           0 : static int get_first_ref(struct btrfs_root *root, u64 ino,
    2017             :                          u64 *dir, u64 *dir_gen, struct fs_path *name)
    2018             : {
    2019           0 :         int ret;
    2020           0 :         struct btrfs_key key;
    2021           0 :         struct btrfs_key found_key;
    2022           0 :         struct btrfs_path *path;
    2023           0 :         int len;
    2024           0 :         u64 parent_dir;
    2025             : 
    2026           0 :         path = alloc_path_for_send();
    2027           0 :         if (!path)
    2028             :                 return -ENOMEM;
    2029             : 
    2030           0 :         key.objectid = ino;
    2031           0 :         key.type = BTRFS_INODE_REF_KEY;
    2032           0 :         key.offset = 0;
    2033             : 
    2034           0 :         ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
    2035           0 :         if (ret < 0)
    2036           0 :                 goto out;
    2037           0 :         if (!ret)
    2038           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
    2039             :                                 path->slots[0]);
    2040           0 :         if (ret || found_key.objectid != ino ||
    2041           0 :             (found_key.type != BTRFS_INODE_REF_KEY &&
    2042             :              found_key.type != BTRFS_INODE_EXTREF_KEY)) {
    2043           0 :                 ret = -ENOENT;
    2044           0 :                 goto out;
    2045             :         }
    2046             : 
    2047           0 :         if (found_key.type == BTRFS_INODE_REF_KEY) {
    2048           0 :                 struct btrfs_inode_ref *iref;
    2049           0 :                 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2050             :                                       struct btrfs_inode_ref);
    2051           0 :                 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
    2052           0 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2053           0 :                                                      (unsigned long)(iref + 1),
    2054             :                                                      len);
    2055           0 :                 parent_dir = found_key.offset;
    2056             :         } else {
    2057           0 :                 struct btrfs_inode_extref *extref;
    2058           0 :                 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
    2059             :                                         struct btrfs_inode_extref);
    2060           0 :                 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
    2061           0 :                 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
    2062           0 :                                         (unsigned long)&extref->name, len);
    2063           0 :                 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
    2064             :         }
    2065           0 :         if (ret < 0)
    2066           0 :                 goto out;
    2067           0 :         btrfs_release_path(path);
    2068             : 
    2069           0 :         if (dir_gen) {
    2070           0 :                 ret = get_inode_gen(root, parent_dir, dir_gen);
    2071           0 :                 if (ret < 0)
    2072           0 :                         goto out;
    2073             :         }
    2074             : 
    2075           0 :         *dir = parent_dir;
    2076             : 
    2077           0 : out:
    2078           0 :         btrfs_free_path(path);
    2079           0 :         return ret;
    2080             : }
    2081             : 
    2082           0 : static int is_first_ref(struct btrfs_root *root,
    2083             :                         u64 ino, u64 dir,
    2084             :                         const char *name, int name_len)
    2085             : {
    2086           0 :         int ret;
    2087           0 :         struct fs_path *tmp_name;
    2088           0 :         u64 tmp_dir;
    2089             : 
    2090           0 :         tmp_name = fs_path_alloc();
    2091           0 :         if (!tmp_name)
    2092             :                 return -ENOMEM;
    2093             : 
    2094           0 :         ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
    2095           0 :         if (ret < 0)
    2096           0 :                 goto out;
    2097             : 
    2098           0 :         if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
    2099           0 :                 ret = 0;
    2100           0 :                 goto out;
    2101             :         }
    2102             : 
    2103           0 :         ret = !memcmp(tmp_name->start, name, name_len);
    2104             : 
    2105           0 : out:
    2106           0 :         fs_path_free(tmp_name);
    2107           0 :         return ret;
    2108             : }
    2109             : 
    2110             : /*
    2111             :  * Used by process_recorded_refs to determine if a new ref would overwrite an
    2112             :  * already existing ref. In case it detects an overwrite, it returns the
    2113             :  * inode/gen in who_ino/who_gen.
    2114             :  * When an overwrite is detected, process_recorded_refs does proper orphanizing
    2115             :  * to make sure later references to the overwritten inode are possible.
    2116             :  * Orphanizing is however only required for the first ref of an inode.
    2117             :  * process_recorded_refs does an additional is_first_ref check to see if
    2118             :  * orphanizing is really required.
    2119             :  */
    2120           0 : static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
    2121             :                               const char *name, int name_len,
    2122             :                               u64 *who_ino, u64 *who_gen, u64 *who_mode)
    2123             : {
    2124           0 :         int ret;
    2125           0 :         u64 parent_root_dir_gen;
    2126           0 :         u64 other_inode = 0;
    2127           0 :         struct btrfs_inode_info info;
    2128             : 
    2129           0 :         if (!sctx->parent_root)
    2130             :                 return 0;
    2131             : 
    2132           0 :         ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
    2133           0 :         if (ret <= 0)
    2134             :                 return 0;
    2135             : 
    2136             :         /*
    2137             :          * If we have a parent root we need to verify that the parent dir was
    2138             :          * not deleted and then re-created, if it was then we have no overwrite
    2139             :          * and we can just unlink this entry.
    2140             :          *
    2141             :          * @parent_root_dir_gen was set to 0 if the inode does not exist in the
    2142             :          * parent root.
    2143             :          */
    2144           0 :         if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
    2145           0 :             parent_root_dir_gen != dir_gen)
    2146             :                 return 0;
    2147             : 
    2148           0 :         ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
    2149             :                                     &other_inode);
    2150           0 :         if (ret == -ENOENT)
    2151             :                 return 0;
    2152           0 :         else if (ret < 0)
    2153             :                 return ret;
    2154             : 
    2155             :         /*
    2156             :          * Check if the overwritten ref was already processed. If yes, the ref
    2157             :          * was already unlinked/moved, so we can safely assume that we will not
    2158             :          * overwrite anything at this point in time.
    2159             :          */
    2160           0 :         if (other_inode > sctx->send_progress ||
    2161           0 :             is_waiting_for_move(sctx, other_inode)) {
    2162           0 :                 ret = get_inode_info(sctx->parent_root, other_inode, &info);
    2163           0 :                 if (ret < 0)
    2164             :                         return ret;
    2165             : 
    2166           0 :                 *who_ino = other_inode;
    2167           0 :                 *who_gen = info.gen;
    2168           0 :                 *who_mode = info.mode;
    2169           0 :                 return 1;
    2170             :         }
    2171             : 
    2172             :         return 0;
    2173             : }
    2174             : 
    2175             : /*
    2176             :  * Checks if the ref was overwritten by an already processed inode. This is
    2177             :  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
    2178             :  * thus the orphan name needs be used.
    2179             :  * process_recorded_refs also uses it to avoid unlinking of refs that were
    2180             :  * overwritten.
    2181             :  */
    2182           0 : static int did_overwrite_ref(struct send_ctx *sctx,
    2183             :                             u64 dir, u64 dir_gen,
    2184             :                             u64 ino, u64 ino_gen,
    2185             :                             const char *name, int name_len)
    2186             : {
    2187           0 :         int ret;
    2188           0 :         u64 ow_inode;
    2189           0 :         u64 ow_gen = 0;
    2190           0 :         u64 send_root_dir_gen;
    2191             : 
    2192           0 :         if (!sctx->parent_root)
    2193             :                 return 0;
    2194             : 
    2195           0 :         ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
    2196           0 :         if (ret <= 0)
    2197             :                 return ret;
    2198             : 
    2199             :         /*
    2200             :          * @send_root_dir_gen was set to 0 if the inode does not exist in the
    2201             :          * send root.
    2202             :          */
    2203           0 :         if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
    2204             :                 return 0;
    2205             : 
    2206             :         /* check if the ref was overwritten by another ref */
    2207           0 :         ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
    2208             :                                     &ow_inode);
    2209           0 :         if (ret == -ENOENT) {
    2210             :                 /* was never and will never be overwritten */
    2211             :                 return 0;
    2212           0 :         } else if (ret < 0) {
    2213             :                 return ret;
    2214             :         }
    2215             : 
    2216           0 :         if (ow_inode == ino) {
    2217           0 :                 ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2218           0 :                 if (ret < 0)
    2219             :                         return ret;
    2220             : 
    2221             :                 /* It's the same inode, so no overwrite happened. */
    2222           0 :                 if (ow_gen == ino_gen)
    2223             :                         return 0;
    2224             :         }
    2225             : 
    2226             :         /*
    2227             :          * We know that it is or will be overwritten. Check this now.
    2228             :          * The current inode being processed might have been the one that caused
    2229             :          * inode 'ino' to be orphanized, therefore check if ow_inode matches
    2230             :          * the current inode being processed.
    2231             :          */
    2232           0 :         if (ow_inode < sctx->send_progress)
    2233             :                 return 1;
    2234             : 
    2235           0 :         if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
    2236           0 :                 if (ow_gen == 0) {
    2237           0 :                         ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
    2238           0 :                         if (ret < 0)
    2239             :                                 return ret;
    2240             :                 }
    2241           0 :                 if (ow_gen == sctx->cur_inode_gen)
    2242           0 :                         return 1;
    2243             :         }
    2244             : 
    2245             :         return 0;
    2246             : }
    2247             : 
    2248             : /*
    2249             :  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
    2250             :  * that got overwritten. This is used by process_recorded_refs to determine
    2251             :  * if it has to use the path as returned by get_cur_path or the orphan name.
    2252             :  */
    2253           0 : static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
    2254             : {
    2255           0 :         int ret = 0;
    2256           0 :         struct fs_path *name = NULL;
    2257           0 :         u64 dir;
    2258           0 :         u64 dir_gen;
    2259             : 
    2260           0 :         if (!sctx->parent_root)
    2261           0 :                 goto out;
    2262             : 
    2263           0 :         name = fs_path_alloc();
    2264           0 :         if (!name)
    2265             :                 return -ENOMEM;
    2266             : 
    2267           0 :         ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
    2268           0 :         if (ret < 0)
    2269           0 :                 goto out;
    2270             : 
    2271           0 :         ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
    2272           0 :                         name->start, fs_path_len(name));
    2273             : 
    2274           0 : out:
    2275           0 :         fs_path_free(name);
    2276           0 :         return ret;
    2277             : }
    2278             : 
    2279             : static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
    2280             :                                                          u64 ino, u64 gen)
    2281             : {
    2282           0 :         struct btrfs_lru_cache_entry *entry;
    2283             : 
    2284           0 :         entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
    2285           0 :         if (!entry)
    2286             :                 return NULL;
    2287             : 
    2288             :         return container_of(entry, struct name_cache_entry, entry);
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * Used by get_cur_path for each ref up to the root.
    2293             :  * Returns 0 if it succeeded.
    2294             :  * Returns 1 if the inode is not existent or got overwritten. In that case, the
    2295             :  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
    2296             :  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
    2297             :  * Returns <0 in case of error.
    2298             :  */
    2299           0 : static int __get_cur_name_and_parent(struct send_ctx *sctx,
    2300             :                                      u64 ino, u64 gen,
    2301             :                                      u64 *parent_ino,
    2302             :                                      u64 *parent_gen,
    2303             :                                      struct fs_path *dest)
    2304             : {
    2305           0 :         int ret;
    2306           0 :         int nce_ret;
    2307           0 :         struct name_cache_entry *nce;
    2308             : 
    2309             :         /*
    2310             :          * First check if we already did a call to this function with the same
    2311             :          * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
    2312             :          * return the cached result.
    2313             :          */
    2314           0 :         nce = name_cache_search(sctx, ino, gen);
    2315           0 :         if (nce) {
    2316           0 :                 if (ino < sctx->send_progress && nce->need_later_update) {
    2317           0 :                         btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
    2318           0 :                         nce = NULL;
    2319             :                 } else {
    2320           0 :                         *parent_ino = nce->parent_ino;
    2321           0 :                         *parent_gen = nce->parent_gen;
    2322           0 :                         ret = fs_path_add(dest, nce->name, nce->name_len);
    2323           0 :                         if (ret < 0)
    2324           0 :                                 goto out;
    2325           0 :                         ret = nce->ret;
    2326           0 :                         goto out;
    2327             :                 }
    2328             :         }
    2329             : 
    2330             :         /*
    2331             :          * If the inode is not existent yet, add the orphan name and return 1.
    2332             :          * This should only happen for the parent dir that we determine in
    2333             :          * record_new_ref_if_needed().
    2334             :          */
    2335           0 :         ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
    2336           0 :         if (ret < 0)
    2337           0 :                 goto out;
    2338             : 
    2339           0 :         if (!ret) {
    2340           0 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2341           0 :                 if (ret < 0)
    2342           0 :                         goto out;
    2343           0 :                 ret = 1;
    2344           0 :                 goto out_cache;
    2345             :         }
    2346             : 
    2347             :         /*
    2348             :          * Depending on whether the inode was already processed or not, use
    2349             :          * send_root or parent_root for ref lookup.
    2350             :          */
    2351           0 :         if (ino < sctx->send_progress)
    2352           0 :                 ret = get_first_ref(sctx->send_root, ino,
    2353             :                                     parent_ino, parent_gen, dest);
    2354             :         else
    2355           0 :                 ret = get_first_ref(sctx->parent_root, ino,
    2356             :                                     parent_ino, parent_gen, dest);
    2357           0 :         if (ret < 0)
    2358           0 :                 goto out;
    2359             : 
    2360             :         /*
    2361             :          * Check if the ref was overwritten by an inode's ref that was processed
    2362             :          * earlier. If yes, treat as orphan and return 1.
    2363             :          */
    2364           0 :         ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
    2365           0 :                         dest->start, dest->end - dest->start);
    2366           0 :         if (ret < 0)
    2367           0 :                 goto out;
    2368           0 :         if (ret) {
    2369           0 :                 fs_path_reset(dest);
    2370           0 :                 ret = gen_unique_name(sctx, ino, gen, dest);
    2371           0 :                 if (ret < 0)
    2372           0 :                         goto out;
    2373             :                 ret = 1;
    2374             :         }
    2375             : 
    2376           0 : out_cache:
    2377             :         /*
    2378             :          * Store the result of the lookup in the name cache.
    2379             :          */
    2380           0 :         nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
    2381           0 :         if (!nce) {
    2382           0 :                 ret = -ENOMEM;
    2383           0 :                 goto out;
    2384             :         }
    2385             : 
    2386           0 :         nce->entry.key = ino;
    2387           0 :         nce->entry.gen = gen;
    2388           0 :         nce->parent_ino = *parent_ino;
    2389           0 :         nce->parent_gen = *parent_gen;
    2390           0 :         nce->name_len = fs_path_len(dest);
    2391           0 :         nce->ret = ret;
    2392           0 :         strcpy(nce->name, dest->start);
    2393             : 
    2394           0 :         if (ino < sctx->send_progress)
    2395           0 :                 nce->need_later_update = 0;
    2396             :         else
    2397           0 :                 nce->need_later_update = 1;
    2398             : 
    2399           0 :         nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
    2400           0 :         if (nce_ret < 0) {
    2401           0 :                 kfree(nce);
    2402           0 :                 ret = nce_ret;
    2403             :         }
    2404             : 
    2405           0 : out:
    2406           0 :         return ret;
    2407             : }
    2408             : 
    2409             : /*
    2410             :  * Magic happens here. This function returns the first ref to an inode as it
    2411             :  * would look like while receiving the stream at this point in time.
    2412             :  * We walk the path up to the root. For every inode in between, we check if it
    2413             :  * was already processed/sent. If yes, we continue with the parent as found
    2414             :  * in send_root. If not, we continue with the parent as found in parent_root.
    2415             :  * If we encounter an inode that was deleted at this point in time, we use the
    2416             :  * inodes "orphan" name instead of the real name and stop. Same with new inodes
    2417             :  * that were not created yet and overwritten inodes/refs.
    2418             :  *
    2419             :  * When do we have orphan inodes:
    2420             :  * 1. When an inode is freshly created and thus no valid refs are available yet
    2421             :  * 2. When a directory lost all it's refs (deleted) but still has dir items
    2422             :  *    inside which were not processed yet (pending for move/delete). If anyone
    2423             :  *    tried to get the path to the dir items, it would get a path inside that
    2424             :  *    orphan directory.
    2425             :  * 3. When an inode is moved around or gets new links, it may overwrite the ref
    2426             :  *    of an unprocessed inode. If in that case the first ref would be
    2427             :  *    overwritten, the overwritten inode gets "orphanized". Later when we
    2428             :  *    process this overwritten inode, it is restored at a new place by moving
    2429             :  *    the orphan inode.
    2430             :  *
    2431             :  * sctx->send_progress tells this function at which point in time receiving
    2432             :  * would be.
    2433             :  */
    2434           0 : static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
    2435             :                         struct fs_path *dest)
    2436             : {
    2437           0 :         int ret = 0;
    2438           0 :         struct fs_path *name = NULL;
    2439           0 :         u64 parent_inode = 0;
    2440           0 :         u64 parent_gen = 0;
    2441           0 :         int stop = 0;
    2442             : 
    2443           0 :         name = fs_path_alloc();
    2444           0 :         if (!name) {
    2445           0 :                 ret = -ENOMEM;
    2446           0 :                 goto out;
    2447             :         }
    2448             : 
    2449           0 :         dest->reversed = 1;
    2450           0 :         fs_path_reset(dest);
    2451             : 
    2452           0 :         while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
    2453           0 :                 struct waiting_dir_move *wdm;
    2454             : 
    2455           0 :                 fs_path_reset(name);
    2456             : 
    2457           0 :                 if (is_waiting_for_rm(sctx, ino, gen)) {
    2458           0 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2459           0 :                         if (ret < 0)
    2460           0 :                                 goto out;
    2461           0 :                         ret = fs_path_add_path(dest, name);
    2462           0 :                         break;
    2463             :                 }
    2464             : 
    2465           0 :                 wdm = get_waiting_dir_move(sctx, ino);
    2466           0 :                 if (wdm && wdm->orphanized) {
    2467           0 :                         ret = gen_unique_name(sctx, ino, gen, name);
    2468           0 :                         stop = 1;
    2469           0 :                 } else if (wdm) {
    2470           0 :                         ret = get_first_ref(sctx->parent_root, ino,
    2471             :                                             &parent_inode, &parent_gen, name);
    2472             :                 } else {
    2473           0 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    2474             :                                                         &parent_inode,
    2475             :                                                         &parent_gen, name);
    2476           0 :                         if (ret)
    2477           0 :                                 stop = 1;
    2478             :                 }
    2479             : 
    2480           0 :                 if (ret < 0)
    2481           0 :                         goto out;
    2482             : 
    2483           0 :                 ret = fs_path_add_path(dest, name);
    2484           0 :                 if (ret < 0)
    2485           0 :                         goto out;
    2486             : 
    2487           0 :                 ino = parent_inode;
    2488           0 :                 gen = parent_gen;
    2489             :         }
    2490             : 
    2491           0 : out:
    2492           0 :         fs_path_free(name);
    2493           0 :         if (!ret)
    2494           0 :                 fs_path_unreverse(dest);
    2495           0 :         return ret;
    2496             : }
    2497             : 
    2498             : /*
    2499             :  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
    2500             :  */
    2501           0 : static int send_subvol_begin(struct send_ctx *sctx)
    2502             : {
    2503           0 :         int ret;
    2504           0 :         struct btrfs_root *send_root = sctx->send_root;
    2505           0 :         struct btrfs_root *parent_root = sctx->parent_root;
    2506           0 :         struct btrfs_path *path;
    2507           0 :         struct btrfs_key key;
    2508           0 :         struct btrfs_root_ref *ref;
    2509           0 :         struct extent_buffer *leaf;
    2510           0 :         char *name = NULL;
    2511           0 :         int namelen;
    2512             : 
    2513           0 :         path = btrfs_alloc_path();
    2514           0 :         if (!path)
    2515             :                 return -ENOMEM;
    2516             : 
    2517           0 :         name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
    2518           0 :         if (!name) {
    2519           0 :                 btrfs_free_path(path);
    2520           0 :                 return -ENOMEM;
    2521             :         }
    2522             : 
    2523           0 :         key.objectid = send_root->root_key.objectid;
    2524           0 :         key.type = BTRFS_ROOT_BACKREF_KEY;
    2525           0 :         key.offset = 0;
    2526             : 
    2527           0 :         ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
    2528             :                                 &key, path, 1, 0);
    2529           0 :         if (ret < 0)
    2530           0 :                 goto out;
    2531           0 :         if (ret) {
    2532           0 :                 ret = -ENOENT;
    2533           0 :                 goto out;
    2534             :         }
    2535             : 
    2536           0 :         leaf = path->nodes[0];
    2537           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    2538           0 :         if (key.type != BTRFS_ROOT_BACKREF_KEY ||
    2539           0 :             key.objectid != send_root->root_key.objectid) {
    2540           0 :                 ret = -ENOENT;
    2541           0 :                 goto out;
    2542             :         }
    2543           0 :         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
    2544           0 :         namelen = btrfs_root_ref_name_len(leaf, ref);
    2545           0 :         read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
    2546           0 :         btrfs_release_path(path);
    2547             : 
    2548           0 :         if (parent_root) {
    2549           0 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
    2550           0 :                 if (ret < 0)
    2551           0 :                         goto out;
    2552             :         } else {
    2553           0 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
    2554           0 :                 if (ret < 0)
    2555           0 :                         goto out;
    2556             :         }
    2557             : 
    2558           0 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
    2559             : 
    2560           0 :         if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
    2561           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2562             :                             sctx->send_root->root_item.received_uuid);
    2563             :         else
    2564           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
    2565             :                             sctx->send_root->root_item.uuid);
    2566             : 
    2567           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
    2568             :                     btrfs_root_ctransid(&sctx->send_root->root_item));
    2569           0 :         if (parent_root) {
    2570           0 :                 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
    2571           0 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2572             :                                      parent_root->root_item.received_uuid);
    2573             :                 else
    2574           0 :                         TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    2575             :                                      parent_root->root_item.uuid);
    2576           0 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    2577             :                             btrfs_root_ctransid(&sctx->parent_root->root_item));
    2578             :         }
    2579             : 
    2580           0 :         ret = send_cmd(sctx);
    2581             : 
    2582           0 : tlv_put_failure:
    2583           0 : out:
    2584           0 :         btrfs_free_path(path);
    2585           0 :         kfree(name);
    2586           0 :         return ret;
    2587             : }
    2588             : 
    2589           0 : static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
    2590             : {
    2591           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2592           0 :         int ret = 0;
    2593           0 :         struct fs_path *p;
    2594             : 
    2595           0 :         btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
    2596             : 
    2597           0 :         p = fs_path_alloc();
    2598           0 :         if (!p)
    2599             :                 return -ENOMEM;
    2600             : 
    2601           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
    2602           0 :         if (ret < 0)
    2603           0 :                 goto out;
    2604             : 
    2605           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2606           0 :         if (ret < 0)
    2607           0 :                 goto out;
    2608           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2609           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
    2610             : 
    2611           0 :         ret = send_cmd(sctx);
    2612             : 
    2613           0 : tlv_put_failure:
    2614           0 : out:
    2615           0 :         fs_path_free(p);
    2616           0 :         return ret;
    2617             : }
    2618             : 
    2619           0 : static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
    2620             : {
    2621           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2622           0 :         int ret = 0;
    2623           0 :         struct fs_path *p;
    2624             : 
    2625           0 :         btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
    2626             : 
    2627           0 :         p = fs_path_alloc();
    2628           0 :         if (!p)
    2629             :                 return -ENOMEM;
    2630             : 
    2631           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
    2632           0 :         if (ret < 0)
    2633           0 :                 goto out;
    2634             : 
    2635           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2636           0 :         if (ret < 0)
    2637           0 :                 goto out;
    2638           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2639           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
    2640             : 
    2641           0 :         ret = send_cmd(sctx);
    2642             : 
    2643           0 : tlv_put_failure:
    2644           0 : out:
    2645           0 :         fs_path_free(p);
    2646           0 :         return ret;
    2647             : }
    2648             : 
    2649           0 : static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
    2650             : {
    2651           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2652           0 :         int ret = 0;
    2653           0 :         struct fs_path *p;
    2654             : 
    2655           0 :         if (sctx->proto < 2)
    2656             :                 return 0;
    2657             : 
    2658           0 :         btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
    2659             : 
    2660           0 :         p = fs_path_alloc();
    2661           0 :         if (!p)
    2662             :                 return -ENOMEM;
    2663             : 
    2664           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
    2665           0 :         if (ret < 0)
    2666           0 :                 goto out;
    2667             : 
    2668           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2669           0 :         if (ret < 0)
    2670           0 :                 goto out;
    2671           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2672           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
    2673             : 
    2674           0 :         ret = send_cmd(sctx);
    2675             : 
    2676           0 : tlv_put_failure:
    2677           0 : out:
    2678           0 :         fs_path_free(p);
    2679           0 :         return ret;
    2680             : }
    2681             : 
    2682           0 : static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
    2683             : {
    2684           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2685           0 :         int ret = 0;
    2686           0 :         struct fs_path *p;
    2687             : 
    2688           0 :         btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
    2689             :                     ino, uid, gid);
    2690             : 
    2691           0 :         p = fs_path_alloc();
    2692           0 :         if (!p)
    2693             :                 return -ENOMEM;
    2694             : 
    2695           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
    2696           0 :         if (ret < 0)
    2697           0 :                 goto out;
    2698             : 
    2699           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2700           0 :         if (ret < 0)
    2701           0 :                 goto out;
    2702           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2703           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
    2704           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
    2705             : 
    2706           0 :         ret = send_cmd(sctx);
    2707             : 
    2708           0 : tlv_put_failure:
    2709           0 : out:
    2710           0 :         fs_path_free(p);
    2711           0 :         return ret;
    2712             : }
    2713             : 
    2714           0 : static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
    2715             : {
    2716           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2717           0 :         int ret = 0;
    2718           0 :         struct fs_path *p = NULL;
    2719           0 :         struct btrfs_inode_item *ii;
    2720           0 :         struct btrfs_path *path = NULL;
    2721           0 :         struct extent_buffer *eb;
    2722           0 :         struct btrfs_key key;
    2723           0 :         int slot;
    2724             : 
    2725           0 :         btrfs_debug(fs_info, "send_utimes %llu", ino);
    2726             : 
    2727           0 :         p = fs_path_alloc();
    2728           0 :         if (!p)
    2729             :                 return -ENOMEM;
    2730             : 
    2731           0 :         path = alloc_path_for_send();
    2732           0 :         if (!path) {
    2733           0 :                 ret = -ENOMEM;
    2734           0 :                 goto out;
    2735             :         }
    2736             : 
    2737           0 :         key.objectid = ino;
    2738           0 :         key.type = BTRFS_INODE_ITEM_KEY;
    2739           0 :         key.offset = 0;
    2740           0 :         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
    2741           0 :         if (ret > 0)
    2742             :                 ret = -ENOENT;
    2743           0 :         if (ret < 0)
    2744           0 :                 goto out;
    2745             : 
    2746           0 :         eb = path->nodes[0];
    2747           0 :         slot = path->slots[0];
    2748           0 :         ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
    2749             : 
    2750           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
    2751           0 :         if (ret < 0)
    2752           0 :                 goto out;
    2753             : 
    2754           0 :         ret = get_cur_path(sctx, ino, gen, p);
    2755           0 :         if (ret < 0)
    2756           0 :                 goto out;
    2757           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2758           0 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
    2759           0 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
    2760           0 :         TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
    2761           0 :         if (sctx->proto >= 2)
    2762           0 :                 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_OTIME, eb, &ii->otime);
    2763             : 
    2764           0 :         ret = send_cmd(sctx);
    2765             : 
    2766           0 : tlv_put_failure:
    2767           0 : out:
    2768           0 :         fs_path_free(p);
    2769           0 :         btrfs_free_path(path);
    2770           0 :         return ret;
    2771             : }
    2772             : 
    2773             : /*
    2774             :  * If the cache is full, we can't remove entries from it and do a call to
    2775             :  * send_utimes() for each respective inode, because we might be finishing
    2776             :  * processing an inode that is a directory and it just got renamed, and existing
    2777             :  * entries in the cache may refer to inodes that have the directory in their
    2778             :  * full path - in which case we would generate outdated paths (pre-rename)
    2779             :  * for the inodes that the cache entries point to. Instead of prunning the
    2780             :  * cache when inserting, do it after we finish processing each inode at
    2781             :  * finish_inode_if_needed().
    2782             :  */
    2783           0 : static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
    2784             : {
    2785           0 :         struct btrfs_lru_cache_entry *entry;
    2786           0 :         int ret;
    2787             : 
    2788           0 :         entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
    2789           0 :         if (entry != NULL)
    2790             :                 return 0;
    2791             : 
    2792             :         /* Caching is optional, don't fail if we can't allocate memory. */
    2793           0 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2794           0 :         if (!entry)
    2795           0 :                 return send_utimes(sctx, dir, gen);
    2796             : 
    2797           0 :         entry->key = dir;
    2798           0 :         entry->gen = gen;
    2799             : 
    2800           0 :         ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
    2801           0 :         ASSERT(ret != -EEXIST);
    2802           0 :         if (ret) {
    2803           0 :                 kfree(entry);
    2804           0 :                 return send_utimes(sctx, dir, gen);
    2805             :         }
    2806             : 
    2807             :         return 0;
    2808             : }
    2809             : 
    2810           0 : static int trim_dir_utimes_cache(struct send_ctx *sctx)
    2811             : {
    2812           0 :         while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
    2813             :                SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
    2814           0 :                 struct btrfs_lru_cache_entry *lru;
    2815           0 :                 int ret;
    2816             : 
    2817           0 :                 lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
    2818           0 :                 ASSERT(lru != NULL);
    2819             : 
    2820           0 :                 ret = send_utimes(sctx, lru->key, lru->gen);
    2821           0 :                 if (ret)
    2822           0 :                         return ret;
    2823             : 
    2824           0 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
    2825             :         }
    2826             : 
    2827             :         return 0;
    2828             : }
    2829             : 
    2830             : /*
    2831             :  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
    2832             :  * a valid path yet because we did not process the refs yet. So, the inode
    2833             :  * is created as orphan.
    2834             :  */
    2835           0 : static int send_create_inode(struct send_ctx *sctx, u64 ino)
    2836             : {
    2837           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    2838           0 :         int ret = 0;
    2839           0 :         struct fs_path *p;
    2840           0 :         int cmd;
    2841           0 :         struct btrfs_inode_info info;
    2842           0 :         u64 gen;
    2843           0 :         u64 mode;
    2844           0 :         u64 rdev;
    2845             : 
    2846           0 :         btrfs_debug(fs_info, "send_create_inode %llu", ino);
    2847             : 
    2848           0 :         p = fs_path_alloc();
    2849           0 :         if (!p)
    2850             :                 return -ENOMEM;
    2851             : 
    2852           0 :         if (ino != sctx->cur_ino) {
    2853           0 :                 ret = get_inode_info(sctx->send_root, ino, &info);
    2854           0 :                 if (ret < 0)
    2855           0 :                         goto out;
    2856           0 :                 gen = info.gen;
    2857           0 :                 mode = info.mode;
    2858           0 :                 rdev = info.rdev;
    2859             :         } else {
    2860           0 :                 gen = sctx->cur_inode_gen;
    2861           0 :                 mode = sctx->cur_inode_mode;
    2862           0 :                 rdev = sctx->cur_inode_rdev;
    2863             :         }
    2864             : 
    2865           0 :         if (S_ISREG(mode)) {
    2866             :                 cmd = BTRFS_SEND_C_MKFILE;
    2867           0 :         } else if (S_ISDIR(mode)) {
    2868             :                 cmd = BTRFS_SEND_C_MKDIR;
    2869           0 :         } else if (S_ISLNK(mode)) {
    2870             :                 cmd = BTRFS_SEND_C_SYMLINK;
    2871           0 :         } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
    2872             :                 cmd = BTRFS_SEND_C_MKNOD;
    2873           0 :         } else if (S_ISFIFO(mode)) {
    2874             :                 cmd = BTRFS_SEND_C_MKFIFO;
    2875           0 :         } else if (S_ISSOCK(mode)) {
    2876             :                 cmd = BTRFS_SEND_C_MKSOCK;
    2877             :         } else {
    2878           0 :                 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
    2879             :                                 (int)(mode & S_IFMT));
    2880           0 :                 ret = -EOPNOTSUPP;
    2881           0 :                 goto out;
    2882             :         }
    2883             : 
    2884           0 :         ret = begin_cmd(sctx, cmd);
    2885           0 :         if (ret < 0)
    2886           0 :                 goto out;
    2887             : 
    2888           0 :         ret = gen_unique_name(sctx, ino, gen, p);
    2889           0 :         if (ret < 0)
    2890           0 :                 goto out;
    2891             : 
    2892           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    2893           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
    2894             : 
    2895           0 :         if (S_ISLNK(mode)) {
    2896           0 :                 fs_path_reset(p);
    2897           0 :                 ret = read_symlink(sctx->send_root, ino, p);
    2898           0 :                 if (ret < 0)
    2899           0 :                         goto out;
    2900           0 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
    2901           0 :         } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
    2902           0 :                    S_ISFIFO(mode) || S_ISSOCK(mode)) {
    2903           0 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
    2904           0 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
    2905             :         }
    2906             : 
    2907           0 :         ret = send_cmd(sctx);
    2908           0 :         if (ret < 0)
    2909           0 :                 goto out;
    2910             : 
    2911             : 
    2912           0 : tlv_put_failure:
    2913           0 : out:
    2914           0 :         fs_path_free(p);
    2915           0 :         return ret;
    2916             : }
    2917             : 
    2918           0 : static void cache_dir_created(struct send_ctx *sctx, u64 dir)
    2919             : {
    2920           0 :         struct btrfs_lru_cache_entry *entry;
    2921           0 :         int ret;
    2922             : 
    2923             :         /* Caching is optional, ignore any failures. */
    2924           0 :         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
    2925           0 :         if (!entry)
    2926             :                 return;
    2927             : 
    2928           0 :         entry->key = dir;
    2929           0 :         entry->gen = 0;
    2930           0 :         ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
    2931           0 :         if (ret < 0)
    2932           0 :                 kfree(entry);
    2933             : }
    2934             : 
    2935             : /*
    2936             :  * We need some special handling for inodes that get processed before the parent
    2937             :  * directory got created. See process_recorded_refs for details.
    2938             :  * This function does the check if we already created the dir out of order.
    2939             :  */
    2940           0 : static int did_create_dir(struct send_ctx *sctx, u64 dir)
    2941             : {
    2942           0 :         int ret = 0;
    2943           0 :         int iter_ret = 0;
    2944           0 :         struct btrfs_path *path = NULL;
    2945           0 :         struct btrfs_key key;
    2946           0 :         struct btrfs_key found_key;
    2947           0 :         struct btrfs_key di_key;
    2948           0 :         struct btrfs_dir_item *di;
    2949             : 
    2950           0 :         if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
    2951             :                 return 1;
    2952             : 
    2953           0 :         path = alloc_path_for_send();
    2954           0 :         if (!path)
    2955             :                 return -ENOMEM;
    2956             : 
    2957           0 :         key.objectid = dir;
    2958           0 :         key.type = BTRFS_DIR_INDEX_KEY;
    2959           0 :         key.offset = 0;
    2960             : 
    2961           0 :         btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
    2962           0 :                 struct extent_buffer *eb = path->nodes[0];
    2963             : 
    2964           0 :                 if (found_key.objectid != key.objectid ||
    2965           0 :                     found_key.type != key.type) {
    2966             :                         ret = 0;
    2967             :                         break;
    2968             :                 }
    2969             : 
    2970           0 :                 di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
    2971           0 :                 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
    2972             : 
    2973           0 :                 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
    2974           0 :                     di_key.objectid < sctx->send_progress) {
    2975           0 :                         ret = 1;
    2976           0 :                         cache_dir_created(sctx, dir);
    2977           0 :                         break;
    2978             :                 }
    2979             :         }
    2980             :         /* Catch error found during iteration */
    2981           0 :         if (iter_ret < 0)
    2982           0 :                 ret = iter_ret;
    2983             : 
    2984           0 :         btrfs_free_path(path);
    2985           0 :         return ret;
    2986             : }
    2987             : 
    2988             : /*
    2989             :  * Only creates the inode if it is:
    2990             :  * 1. Not a directory
    2991             :  * 2. Or a directory which was not created already due to out of order
    2992             :  *    directories. See did_create_dir and process_recorded_refs for details.
    2993             :  */
    2994           0 : static int send_create_inode_if_needed(struct send_ctx *sctx)
    2995             : {
    2996           0 :         int ret;
    2997             : 
    2998           0 :         if (S_ISDIR(sctx->cur_inode_mode)) {
    2999           0 :                 ret = did_create_dir(sctx, sctx->cur_ino);
    3000           0 :                 if (ret < 0)
    3001             :                         return ret;
    3002           0 :                 else if (ret > 0)
    3003             :                         return 0;
    3004             :         }
    3005             : 
    3006           0 :         ret = send_create_inode(sctx, sctx->cur_ino);
    3007             : 
    3008           0 :         if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
    3009           0 :                 cache_dir_created(sctx, sctx->cur_ino);
    3010             : 
    3011             :         return ret;
    3012             : }
    3013             : 
    3014             : struct recorded_ref {
    3015             :         struct list_head list;
    3016             :         char *name;
    3017             :         struct fs_path *full_path;
    3018             :         u64 dir;
    3019             :         u64 dir_gen;
    3020             :         int name_len;
    3021             :         struct rb_node node;
    3022             :         struct rb_root *root;
    3023             : };
    3024             : 
    3025           0 : static struct recorded_ref *recorded_ref_alloc(void)
    3026             : {
    3027           0 :         struct recorded_ref *ref;
    3028             : 
    3029           0 :         ref = kzalloc(sizeof(*ref), GFP_KERNEL);
    3030           0 :         if (!ref)
    3031             :                 return NULL;
    3032           0 :         RB_CLEAR_NODE(&ref->node);
    3033           0 :         INIT_LIST_HEAD(&ref->list);
    3034           0 :         return ref;
    3035             : }
    3036             : 
    3037           0 : static void recorded_ref_free(struct recorded_ref *ref)
    3038             : {
    3039           0 :         if (!ref)
    3040             :                 return;
    3041           0 :         if (!RB_EMPTY_NODE(&ref->node))
    3042           0 :                 rb_erase(&ref->node, ref->root);
    3043           0 :         list_del(&ref->list);
    3044           0 :         fs_path_free(ref->full_path);
    3045           0 :         kfree(ref);
    3046             : }
    3047             : 
    3048           0 : static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
    3049             : {
    3050           0 :         ref->full_path = path;
    3051           0 :         ref->name = (char *)kbasename(ref->full_path->start);
    3052           0 :         ref->name_len = ref->full_path->end - ref->name;
    3053           0 : }
    3054             : 
    3055           0 : static int dup_ref(struct recorded_ref *ref, struct list_head *list)
    3056             : {
    3057           0 :         struct recorded_ref *new;
    3058             : 
    3059           0 :         new = recorded_ref_alloc();
    3060           0 :         if (!new)
    3061             :                 return -ENOMEM;
    3062             : 
    3063           0 :         new->dir = ref->dir;
    3064           0 :         new->dir_gen = ref->dir_gen;
    3065           0 :         list_add_tail(&new->list, list);
    3066           0 :         return 0;
    3067             : }
    3068             : 
    3069             : static void __free_recorded_refs(struct list_head *head)
    3070             : {
    3071           0 :         struct recorded_ref *cur;
    3072             : 
    3073           0 :         while (!list_empty(head)) {
    3074           0 :                 cur = list_entry(head->next, struct recorded_ref, list);
    3075           0 :                 recorded_ref_free(cur);
    3076             :         }
    3077             : }
    3078             : 
    3079           0 : static void free_recorded_refs(struct send_ctx *sctx)
    3080             : {
    3081           0 :         __free_recorded_refs(&sctx->new_refs);
    3082           0 :         __free_recorded_refs(&sctx->deleted_refs);
    3083           0 : }
    3084             : 
    3085             : /*
    3086             :  * Renames/moves a file/dir to its orphan name. Used when the first
    3087             :  * ref of an unprocessed inode gets overwritten and for all non empty
    3088             :  * directories.
    3089             :  */
    3090           0 : static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
    3091             :                           struct fs_path *path)
    3092             : {
    3093           0 :         int ret;
    3094           0 :         struct fs_path *orphan;
    3095             : 
    3096           0 :         orphan = fs_path_alloc();
    3097           0 :         if (!orphan)
    3098             :                 return -ENOMEM;
    3099             : 
    3100           0 :         ret = gen_unique_name(sctx, ino, gen, orphan);
    3101           0 :         if (ret < 0)
    3102           0 :                 goto out;
    3103             : 
    3104           0 :         ret = send_rename(sctx, path, orphan);
    3105             : 
    3106           0 : out:
    3107           0 :         fs_path_free(orphan);
    3108           0 :         return ret;
    3109             : }
    3110             : 
    3111           0 : static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
    3112             :                                                    u64 dir_ino, u64 dir_gen)
    3113             : {
    3114           0 :         struct rb_node **p = &sctx->orphan_dirs.rb_node;
    3115           0 :         struct rb_node *parent = NULL;
    3116           0 :         struct orphan_dir_info *entry, *odi;
    3117             : 
    3118           0 :         while (*p) {
    3119           0 :                 parent = *p;
    3120           0 :                 entry = rb_entry(parent, struct orphan_dir_info, node);
    3121           0 :                 if (dir_ino < entry->ino)
    3122           0 :                         p = &(*p)->rb_left;
    3123           0 :                 else if (dir_ino > entry->ino)
    3124           0 :                         p = &(*p)->rb_right;
    3125           0 :                 else if (dir_gen < entry->gen)
    3126           0 :                         p = &(*p)->rb_left;
    3127           0 :                 else if (dir_gen > entry->gen)
    3128           0 :                         p = &(*p)->rb_right;
    3129             :                 else
    3130           0 :                         return entry;
    3131             :         }
    3132             : 
    3133           0 :         odi = kmalloc(sizeof(*odi), GFP_KERNEL);
    3134           0 :         if (!odi)
    3135             :                 return ERR_PTR(-ENOMEM);
    3136           0 :         odi->ino = dir_ino;
    3137           0 :         odi->gen = dir_gen;
    3138           0 :         odi->last_dir_index_offset = 0;
    3139           0 :         odi->dir_high_seq_ino = 0;
    3140             : 
    3141           0 :         rb_link_node(&odi->node, parent, p);
    3142           0 :         rb_insert_color(&odi->node, &sctx->orphan_dirs);
    3143           0 :         return odi;
    3144             : }
    3145             : 
    3146           0 : static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
    3147             :                                                    u64 dir_ino, u64 gen)
    3148             : {
    3149           0 :         struct rb_node *n = sctx->orphan_dirs.rb_node;
    3150           0 :         struct orphan_dir_info *entry;
    3151             : 
    3152           0 :         while (n) {
    3153           0 :                 entry = rb_entry(n, struct orphan_dir_info, node);
    3154           0 :                 if (dir_ino < entry->ino)
    3155           0 :                         n = n->rb_left;
    3156           0 :                 else if (dir_ino > entry->ino)
    3157           0 :                         n = n->rb_right;
    3158           0 :                 else if (gen < entry->gen)
    3159           0 :                         n = n->rb_left;
    3160           0 :                 else if (gen > entry->gen)
    3161           0 :                         n = n->rb_right;
    3162             :                 else
    3163           0 :                         return entry;
    3164             :         }
    3165             :         return NULL;
    3166             : }
    3167             : 
    3168             : static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
    3169             : {
    3170           0 :         struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
    3171             : 
    3172           0 :         return odi != NULL;
    3173             : }
    3174             : 
    3175           0 : static void free_orphan_dir_info(struct send_ctx *sctx,
    3176             :                                  struct orphan_dir_info *odi)
    3177             : {
    3178           0 :         if (!odi)
    3179             :                 return;
    3180           0 :         rb_erase(&odi->node, &sctx->orphan_dirs);
    3181           0 :         kfree(odi);
    3182             : }
    3183             : 
    3184             : /*
    3185             :  * Returns 1 if a directory can be removed at this point in time.
    3186             :  * We check this by iterating all dir items and checking if the inode behind
    3187             :  * the dir item was already processed.
    3188             :  */
    3189           0 : static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
    3190             : {
    3191           0 :         int ret = 0;
    3192           0 :         int iter_ret = 0;
    3193           0 :         struct btrfs_root *root = sctx->parent_root;
    3194           0 :         struct btrfs_path *path;
    3195           0 :         struct btrfs_key key;
    3196           0 :         struct btrfs_key found_key;
    3197           0 :         struct btrfs_key loc;
    3198           0 :         struct btrfs_dir_item *di;
    3199           0 :         struct orphan_dir_info *odi = NULL;
    3200           0 :         u64 dir_high_seq_ino = 0;
    3201           0 :         u64 last_dir_index_offset = 0;
    3202             : 
    3203             :         /*
    3204             :          * Don't try to rmdir the top/root subvolume dir.
    3205             :          */
    3206           0 :         if (dir == BTRFS_FIRST_FREE_OBJECTID)
    3207             :                 return 0;
    3208             : 
    3209           0 :         odi = get_orphan_dir_info(sctx, dir, dir_gen);
    3210           0 :         if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
    3211             :                 return 0;
    3212             : 
    3213           0 :         path = alloc_path_for_send();
    3214           0 :         if (!path)
    3215             :                 return -ENOMEM;
    3216             : 
    3217           0 :         if (!odi) {
    3218             :                 /*
    3219             :                  * Find the inode number associated with the last dir index
    3220             :                  * entry. This is very likely the inode with the highest number
    3221             :                  * of all inodes that have an entry in the directory. We can
    3222             :                  * then use it to avoid future calls to can_rmdir(), when
    3223             :                  * processing inodes with a lower number, from having to search
    3224             :                  * the parent root b+tree for dir index keys.
    3225             :                  */
    3226           0 :                 key.objectid = dir;
    3227           0 :                 key.type = BTRFS_DIR_INDEX_KEY;
    3228           0 :                 key.offset = (u64)-1;
    3229             : 
    3230           0 :                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    3231           0 :                 if (ret < 0) {
    3232           0 :                         goto out;
    3233           0 :                 } else if (ret > 0) {
    3234             :                         /* Can't happen, the root is never empty. */
    3235           0 :                         ASSERT(path->slots[0] > 0);
    3236           0 :                         if (WARN_ON(path->slots[0] == 0)) {
    3237           0 :                                 ret = -EUCLEAN;
    3238           0 :                                 goto out;
    3239             :                         }
    3240           0 :                         path->slots[0]--;
    3241             :                 }
    3242             : 
    3243           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    3244           0 :                 if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
    3245             :                         /* No index keys, dir can be removed. */
    3246           0 :                         ret = 1;
    3247           0 :                         goto out;
    3248             :                 }
    3249             : 
    3250           0 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3251             :                                     struct btrfs_dir_item);
    3252           0 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3253           0 :                 dir_high_seq_ino = loc.objectid;
    3254           0 :                 if (sctx->cur_ino < dir_high_seq_ino) {
    3255           0 :                         ret = 0;
    3256           0 :                         goto out;
    3257             :                 }
    3258             : 
    3259           0 :                 btrfs_release_path(path);
    3260             :         }
    3261             : 
    3262           0 :         key.objectid = dir;
    3263           0 :         key.type = BTRFS_DIR_INDEX_KEY;
    3264           0 :         key.offset = (odi ? odi->last_dir_index_offset : 0);
    3265             : 
    3266           0 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    3267           0 :                 struct waiting_dir_move *dm;
    3268             : 
    3269           0 :                 if (found_key.objectid != key.objectid ||
    3270           0 :                     found_key.type != key.type)
    3271             :                         break;
    3272             : 
    3273           0 :                 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
    3274             :                                 struct btrfs_dir_item);
    3275           0 :                 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
    3276             : 
    3277           0 :                 dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
    3278           0 :                 last_dir_index_offset = found_key.offset;
    3279             : 
    3280           0 :                 dm = get_waiting_dir_move(sctx, loc.objectid);
    3281           0 :                 if (dm) {
    3282           0 :                         dm->rmdir_ino = dir;
    3283           0 :                         dm->rmdir_gen = dir_gen;
    3284           0 :                         ret = 0;
    3285           0 :                         goto out;
    3286             :                 }
    3287             : 
    3288           0 :                 if (loc.objectid > sctx->cur_ino) {
    3289           0 :                         ret = 0;
    3290           0 :                         goto out;
    3291             :                 }
    3292             :         }
    3293           0 :         if (iter_ret < 0) {
    3294           0 :                 ret = iter_ret;
    3295           0 :                 goto out;
    3296             :         }
    3297           0 :         free_orphan_dir_info(sctx, odi);
    3298             : 
    3299           0 :         ret = 1;
    3300             : 
    3301           0 : out:
    3302           0 :         btrfs_free_path(path);
    3303             : 
    3304           0 :         if (ret)
    3305             :                 return ret;
    3306             : 
    3307           0 :         if (!odi) {
    3308           0 :                 odi = add_orphan_dir_info(sctx, dir, dir_gen);
    3309           0 :                 if (IS_ERR(odi))
    3310           0 :                         return PTR_ERR(odi);
    3311             : 
    3312           0 :                 odi->gen = dir_gen;
    3313             :         }
    3314             : 
    3315           0 :         odi->last_dir_index_offset = last_dir_index_offset;
    3316           0 :         odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
    3317             : 
    3318           0 :         return 0;
    3319             : }
    3320             : 
    3321           0 : static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
    3322             : {
    3323           0 :         struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
    3324             : 
    3325           0 :         return entry != NULL;
    3326             : }
    3327             : 
    3328           0 : static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
    3329             : {
    3330           0 :         struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
    3331           0 :         struct rb_node *parent = NULL;
    3332           0 :         struct waiting_dir_move *entry, *dm;
    3333             : 
    3334           0 :         dm = kmalloc(sizeof(*dm), GFP_KERNEL);
    3335           0 :         if (!dm)
    3336             :                 return -ENOMEM;
    3337           0 :         dm->ino = ino;
    3338           0 :         dm->rmdir_ino = 0;
    3339           0 :         dm->rmdir_gen = 0;
    3340           0 :         dm->orphanized = orphanized;
    3341             : 
    3342           0 :         while (*p) {
    3343           0 :                 parent = *p;
    3344           0 :                 entry = rb_entry(parent, struct waiting_dir_move, node);
    3345           0 :                 if (ino < entry->ino) {
    3346           0 :                         p = &(*p)->rb_left;
    3347           0 :                 } else if (ino > entry->ino) {
    3348           0 :                         p = &(*p)->rb_right;
    3349             :                 } else {
    3350           0 :                         kfree(dm);
    3351           0 :                         return -EEXIST;
    3352             :                 }
    3353             :         }
    3354             : 
    3355           0 :         rb_link_node(&dm->node, parent, p);
    3356           0 :         rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
    3357           0 :         return 0;
    3358             : }
    3359             : 
    3360             : static struct waiting_dir_move *
    3361             : get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
    3362             : {
    3363           0 :         struct rb_node *n = sctx->waiting_dir_moves.rb_node;
    3364           0 :         struct waiting_dir_move *entry;
    3365             : 
    3366           0 :         while (n) {
    3367           0 :                 entry = rb_entry(n, struct waiting_dir_move, node);
    3368           0 :                 if (ino < entry->ino)
    3369           0 :                         n = n->rb_left;
    3370           0 :                 else if (ino > entry->ino)
    3371           0 :                         n = n->rb_right;
    3372             :                 else
    3373             :                         return entry;
    3374             :         }
    3375             :         return NULL;
    3376             : }
    3377             : 
    3378           0 : static void free_waiting_dir_move(struct send_ctx *sctx,
    3379             :                                   struct waiting_dir_move *dm)
    3380             : {
    3381           0 :         if (!dm)
    3382             :                 return;
    3383           0 :         rb_erase(&dm->node, &sctx->waiting_dir_moves);
    3384           0 :         kfree(dm);
    3385             : }
    3386             : 
    3387           0 : static int add_pending_dir_move(struct send_ctx *sctx,
    3388             :                                 u64 ino,
    3389             :                                 u64 ino_gen,
    3390             :                                 u64 parent_ino,
    3391             :                                 struct list_head *new_refs,
    3392             :                                 struct list_head *deleted_refs,
    3393             :                                 const bool is_orphan)
    3394             : {
    3395           0 :         struct rb_node **p = &sctx->pending_dir_moves.rb_node;
    3396           0 :         struct rb_node *parent = NULL;
    3397           0 :         struct pending_dir_move *entry = NULL, *pm;
    3398           0 :         struct recorded_ref *cur;
    3399           0 :         int exists = 0;
    3400           0 :         int ret;
    3401             : 
    3402           0 :         pm = kmalloc(sizeof(*pm), GFP_KERNEL);
    3403           0 :         if (!pm)
    3404             :                 return -ENOMEM;
    3405           0 :         pm->parent_ino = parent_ino;
    3406           0 :         pm->ino = ino;
    3407           0 :         pm->gen = ino_gen;
    3408           0 :         INIT_LIST_HEAD(&pm->list);
    3409           0 :         INIT_LIST_HEAD(&pm->update_refs);
    3410           0 :         RB_CLEAR_NODE(&pm->node);
    3411             : 
    3412           0 :         while (*p) {
    3413           0 :                 parent = *p;
    3414           0 :                 entry = rb_entry(parent, struct pending_dir_move, node);
    3415           0 :                 if (parent_ino < entry->parent_ino) {
    3416           0 :                         p = &(*p)->rb_left;
    3417           0 :                 } else if (parent_ino > entry->parent_ino) {
    3418           0 :                         p = &(*p)->rb_right;
    3419             :                 } else {
    3420             :                         exists = 1;
    3421             :                         break;
    3422             :                 }
    3423             :         }
    3424             : 
    3425           0 :         list_for_each_entry(cur, deleted_refs, list) {
    3426           0 :                 ret = dup_ref(cur, &pm->update_refs);
    3427           0 :                 if (ret < 0)
    3428           0 :                         goto out;
    3429             :         }
    3430           0 :         list_for_each_entry(cur, new_refs, list) {
    3431           0 :                 ret = dup_ref(cur, &pm->update_refs);
    3432           0 :                 if (ret < 0)
    3433           0 :                         goto out;
    3434             :         }
    3435             : 
    3436           0 :         ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
    3437           0 :         if (ret)
    3438           0 :                 goto out;
    3439             : 
    3440           0 :         if (exists) {
    3441           0 :                 list_add_tail(&pm->list, &entry->list);
    3442             :         } else {
    3443           0 :                 rb_link_node(&pm->node, parent, p);
    3444           0 :                 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
    3445             :         }
    3446             :         ret = 0;
    3447           0 : out:
    3448           0 :         if (ret) {
    3449             :                 __free_recorded_refs(&pm->update_refs);
    3450           0 :                 kfree(pm);
    3451             :         }
    3452             :         return ret;
    3453             : }
    3454             : 
    3455             : static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
    3456             :                                                       u64 parent_ino)
    3457             : {
    3458           0 :         struct rb_node *n = sctx->pending_dir_moves.rb_node;
    3459           0 :         struct pending_dir_move *entry;
    3460             : 
    3461           0 :         while (n) {
    3462           0 :                 entry = rb_entry(n, struct pending_dir_move, node);
    3463           0 :                 if (parent_ino < entry->parent_ino)
    3464           0 :                         n = n->rb_left;
    3465           0 :                 else if (parent_ino > entry->parent_ino)
    3466           0 :                         n = n->rb_right;
    3467             :                 else
    3468             :                         return entry;
    3469             :         }
    3470             :         return NULL;
    3471             : }
    3472             : 
    3473           0 : static int path_loop(struct send_ctx *sctx, struct fs_path *name,
    3474             :                      u64 ino, u64 gen, u64 *ancestor_ino)
    3475             : {
    3476           0 :         int ret = 0;
    3477           0 :         u64 parent_inode = 0;
    3478           0 :         u64 parent_gen = 0;
    3479           0 :         u64 start_ino = ino;
    3480             : 
    3481           0 :         *ancestor_ino = 0;
    3482           0 :         while (ino != BTRFS_FIRST_FREE_OBJECTID) {
    3483           0 :                 fs_path_reset(name);
    3484             : 
    3485           0 :                 if (is_waiting_for_rm(sctx, ino, gen))
    3486             :                         break;
    3487           0 :                 if (is_waiting_for_move(sctx, ino)) {
    3488           0 :                         if (*ancestor_ino == 0)
    3489           0 :                                 *ancestor_ino = ino;
    3490           0 :                         ret = get_first_ref(sctx->parent_root, ino,
    3491             :                                             &parent_inode, &parent_gen, name);
    3492             :                 } else {
    3493           0 :                         ret = __get_cur_name_and_parent(sctx, ino, gen,
    3494             :                                                         &parent_inode,
    3495             :                                                         &parent_gen, name);
    3496           0 :                         if (ret > 0) {
    3497             :                                 ret = 0;
    3498             :                                 break;
    3499             :                         }
    3500             :                 }
    3501           0 :                 if (ret < 0)
    3502             :                         break;
    3503           0 :                 if (parent_inode == start_ino) {
    3504           0 :                         ret = 1;
    3505           0 :                         if (*ancestor_ino == 0)
    3506           0 :                                 *ancestor_ino = ino;
    3507             :                         break;
    3508             :                 }
    3509           0 :                 ino = parent_inode;
    3510           0 :                 gen = parent_gen;
    3511             :         }
    3512           0 :         return ret;
    3513             : }
    3514             : 
    3515           0 : static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
    3516             : {
    3517           0 :         struct fs_path *from_path = NULL;
    3518           0 :         struct fs_path *to_path = NULL;
    3519           0 :         struct fs_path *name = NULL;
    3520           0 :         u64 orig_progress = sctx->send_progress;
    3521           0 :         struct recorded_ref *cur;
    3522           0 :         u64 parent_ino, parent_gen;
    3523           0 :         struct waiting_dir_move *dm = NULL;
    3524           0 :         u64 rmdir_ino = 0;
    3525           0 :         u64 rmdir_gen;
    3526           0 :         u64 ancestor;
    3527           0 :         bool is_orphan;
    3528           0 :         int ret;
    3529             : 
    3530           0 :         name = fs_path_alloc();
    3531           0 :         from_path = fs_path_alloc();
    3532           0 :         if (!name || !from_path) {
    3533           0 :                 ret = -ENOMEM;
    3534           0 :                 goto out;
    3535             :         }
    3536             : 
    3537           0 :         dm = get_waiting_dir_move(sctx, pm->ino);
    3538           0 :         ASSERT(dm);
    3539           0 :         rmdir_ino = dm->rmdir_ino;
    3540           0 :         rmdir_gen = dm->rmdir_gen;
    3541           0 :         is_orphan = dm->orphanized;
    3542           0 :         free_waiting_dir_move(sctx, dm);
    3543             : 
    3544           0 :         if (is_orphan) {
    3545           0 :                 ret = gen_unique_name(sctx, pm->ino,
    3546             :                                       pm->gen, from_path);
    3547             :         } else {
    3548           0 :                 ret = get_first_ref(sctx->parent_root, pm->ino,
    3549             :                                     &parent_ino, &parent_gen, name);
    3550           0 :                 if (ret < 0)
    3551           0 :                         goto out;
    3552           0 :                 ret = get_cur_path(sctx, parent_ino, parent_gen,
    3553             :                                    from_path);
    3554           0 :                 if (ret < 0)
    3555           0 :                         goto out;
    3556           0 :                 ret = fs_path_add_path(from_path, name);
    3557             :         }
    3558           0 :         if (ret < 0)
    3559           0 :                 goto out;
    3560             : 
    3561           0 :         sctx->send_progress = sctx->cur_ino + 1;
    3562           0 :         ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
    3563           0 :         if (ret < 0)
    3564           0 :                 goto out;
    3565           0 :         if (ret) {
    3566           0 :                 LIST_HEAD(deleted_refs);
    3567           0 :                 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
    3568           0 :                 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
    3569             :                                            &pm->update_refs, &deleted_refs,
    3570             :                                            is_orphan);
    3571           0 :                 if (ret < 0)
    3572           0 :                         goto out;
    3573           0 :                 if (rmdir_ino) {
    3574           0 :                         dm = get_waiting_dir_move(sctx, pm->ino);
    3575           0 :                         ASSERT(dm);
    3576           0 :                         dm->rmdir_ino = rmdir_ino;
    3577           0 :                         dm->rmdir_gen = rmdir_gen;
    3578             :                 }
    3579           0 :                 goto out;
    3580             :         }
    3581           0 :         fs_path_reset(name);
    3582           0 :         to_path = name;
    3583           0 :         name = NULL;
    3584           0 :         ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
    3585           0 :         if (ret < 0)
    3586           0 :                 goto out;
    3587             : 
    3588           0 :         ret = send_rename(sctx, from_path, to_path);
    3589           0 :         if (ret < 0)
    3590           0 :                 goto out;
    3591             : 
    3592           0 :         if (rmdir_ino) {
    3593           0 :                 struct orphan_dir_info *odi;
    3594           0 :                 u64 gen;
    3595             : 
    3596           0 :                 odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
    3597           0 :                 if (!odi) {
    3598             :                         /* already deleted */
    3599           0 :                         goto finish;
    3600             :                 }
    3601           0 :                 gen = odi->gen;
    3602             : 
    3603           0 :                 ret = can_rmdir(sctx, rmdir_ino, gen);
    3604           0 :                 if (ret < 0)
    3605           0 :                         goto out;
    3606           0 :                 if (!ret)
    3607           0 :                         goto finish;
    3608             : 
    3609           0 :                 name = fs_path_alloc();
    3610           0 :                 if (!name) {
    3611           0 :                         ret = -ENOMEM;
    3612           0 :                         goto out;
    3613             :                 }
    3614           0 :                 ret = get_cur_path(sctx, rmdir_ino, gen, name);
    3615           0 :                 if (ret < 0)
    3616           0 :                         goto out;
    3617           0 :                 ret = send_rmdir(sctx, name);
    3618           0 :                 if (ret < 0)
    3619           0 :                         goto out;
    3620             :         }
    3621             : 
    3622           0 : finish:
    3623           0 :         ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
    3624           0 :         if (ret < 0)
    3625           0 :                 goto out;
    3626             : 
    3627             :         /*
    3628             :          * After rename/move, need to update the utimes of both new parent(s)
    3629             :          * and old parent(s).
    3630             :          */
    3631           0 :         list_for_each_entry(cur, &pm->update_refs, list) {
    3632             :                 /*
    3633             :                  * The parent inode might have been deleted in the send snapshot
    3634             :                  */
    3635           0 :                 ret = get_inode_info(sctx->send_root, cur->dir, NULL);
    3636           0 :                 if (ret == -ENOENT) {
    3637           0 :                         ret = 0;
    3638           0 :                         continue;
    3639             :                 }
    3640           0 :                 if (ret < 0)
    3641           0 :                         goto out;
    3642             : 
    3643           0 :                 ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    3644           0 :                 if (ret < 0)
    3645           0 :                         goto out;
    3646             :         }
    3647             : 
    3648           0 : out:
    3649           0 :         fs_path_free(name);
    3650           0 :         fs_path_free(from_path);
    3651           0 :         fs_path_free(to_path);
    3652           0 :         sctx->send_progress = orig_progress;
    3653             : 
    3654           0 :         return ret;
    3655             : }
    3656             : 
    3657           0 : static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
    3658             : {
    3659           0 :         if (!list_empty(&m->list))
    3660           0 :                 list_del(&m->list);
    3661           0 :         if (!RB_EMPTY_NODE(&m->node))
    3662           0 :                 rb_erase(&m->node, &sctx->pending_dir_moves);
    3663           0 :         __free_recorded_refs(&m->update_refs);
    3664           0 :         kfree(m);
    3665           0 : }
    3666             : 
    3667           0 : static void tail_append_pending_moves(struct send_ctx *sctx,
    3668             :                                       struct pending_dir_move *moves,
    3669             :                                       struct list_head *stack)
    3670             : {
    3671           0 :         if (list_empty(&moves->list)) {
    3672           0 :                 list_add_tail(&moves->list, stack);
    3673             :         } else {
    3674           0 :                 LIST_HEAD(list);
    3675           0 :                 list_splice_init(&moves->list, &list);
    3676           0 :                 list_add_tail(&moves->list, stack);
    3677           0 :                 list_splice_tail(&list, stack);
    3678             :         }
    3679           0 :         if (!RB_EMPTY_NODE(&moves->node)) {
    3680           0 :                 rb_erase(&moves->node, &sctx->pending_dir_moves);
    3681           0 :                 RB_CLEAR_NODE(&moves->node);
    3682             :         }
    3683           0 : }
    3684             : 
    3685           0 : static int apply_children_dir_moves(struct send_ctx *sctx)
    3686             : {
    3687           0 :         struct pending_dir_move *pm;
    3688           0 :         struct list_head stack;
    3689           0 :         u64 parent_ino = sctx->cur_ino;
    3690           0 :         int ret = 0;
    3691             : 
    3692           0 :         pm = get_pending_dir_moves(sctx, parent_ino);
    3693           0 :         if (!pm)
    3694             :                 return 0;
    3695             : 
    3696           0 :         INIT_LIST_HEAD(&stack);
    3697           0 :         tail_append_pending_moves(sctx, pm, &stack);
    3698             : 
    3699           0 :         while (!list_empty(&stack)) {
    3700           0 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3701           0 :                 parent_ino = pm->ino;
    3702           0 :                 ret = apply_dir_move(sctx, pm);
    3703           0 :                 free_pending_move(sctx, pm);
    3704           0 :                 if (ret)
    3705           0 :                         goto out;
    3706           0 :                 pm = get_pending_dir_moves(sctx, parent_ino);
    3707           0 :                 if (pm)
    3708           0 :                         tail_append_pending_moves(sctx, pm, &stack);
    3709             :         }
    3710             :         return 0;
    3711             : 
    3712             : out:
    3713           0 :         while (!list_empty(&stack)) {
    3714           0 :                 pm = list_first_entry(&stack, struct pending_dir_move, list);
    3715           0 :                 free_pending_move(sctx, pm);
    3716             :         }
    3717             :         return ret;
    3718             : }
    3719             : 
    3720             : /*
    3721             :  * We might need to delay a directory rename even when no ancestor directory
    3722             :  * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
    3723             :  * renamed. This happens when we rename a directory to the old name (the name
    3724             :  * in the parent root) of some other unrelated directory that got its rename
    3725             :  * delayed due to some ancestor with higher number that got renamed.
    3726             :  *
    3727             :  * Example:
    3728             :  *
    3729             :  * Parent snapshot:
    3730             :  * .                                       (ino 256)
    3731             :  * |---- a/                                (ino 257)
    3732             :  * |     |---- file                        (ino 260)
    3733             :  * |
    3734             :  * |---- b/                                (ino 258)
    3735             :  * |---- c/                                (ino 259)
    3736             :  *
    3737             :  * Send snapshot:
    3738             :  * .                                       (ino 256)
    3739             :  * |---- a/                                (ino 258)
    3740             :  * |---- x/                                (ino 259)
    3741             :  *       |---- y/                          (ino 257)
    3742             :  *             |----- file                 (ino 260)
    3743             :  *
    3744             :  * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
    3745             :  * from 'a' to 'x/y' happening first, which in turn depends on the rename of
    3746             :  * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
    3747             :  * must issue is:
    3748             :  *
    3749             :  * 1 - rename 259 from 'c' to 'x'
    3750             :  * 2 - rename 257 from 'a' to 'x/y'
    3751             :  * 3 - rename 258 from 'b' to 'a'
    3752             :  *
    3753             :  * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
    3754             :  * be done right away and < 0 on error.
    3755             :  */
    3756           0 : static int wait_for_dest_dir_move(struct send_ctx *sctx,
    3757             :                                   struct recorded_ref *parent_ref,
    3758             :                                   const bool is_orphan)
    3759             : {
    3760           0 :         struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
    3761           0 :         struct btrfs_path *path;
    3762           0 :         struct btrfs_key key;
    3763           0 :         struct btrfs_key di_key;
    3764           0 :         struct btrfs_dir_item *di;
    3765           0 :         u64 left_gen;
    3766           0 :         u64 right_gen;
    3767           0 :         int ret = 0;
    3768           0 :         struct waiting_dir_move *wdm;
    3769             : 
    3770           0 :         if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
    3771             :                 return 0;
    3772             : 
    3773           0 :         path = alloc_path_for_send();
    3774           0 :         if (!path)
    3775             :                 return -ENOMEM;
    3776             : 
    3777           0 :         key.objectid = parent_ref->dir;
    3778           0 :         key.type = BTRFS_DIR_ITEM_KEY;
    3779           0 :         key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
    3780             : 
    3781           0 :         ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
    3782           0 :         if (ret < 0) {
    3783           0 :                 goto out;
    3784           0 :         } else if (ret > 0) {
    3785           0 :                 ret = 0;
    3786           0 :                 goto out;
    3787             :         }
    3788             : 
    3789           0 :         di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
    3790             :                                        parent_ref->name_len);
    3791           0 :         if (!di) {
    3792           0 :                 ret = 0;
    3793           0 :                 goto out;
    3794             :         }
    3795             :         /*
    3796             :          * di_key.objectid has the number of the inode that has a dentry in the
    3797             :          * parent directory with the same name that sctx->cur_ino is being
    3798             :          * renamed to. We need to check if that inode is in the send root as
    3799             :          * well and if it is currently marked as an inode with a pending rename,
    3800             :          * if it is, we need to delay the rename of sctx->cur_ino as well, so
    3801             :          * that it happens after that other inode is renamed.
    3802             :          */
    3803           0 :         btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
    3804           0 :         if (di_key.type != BTRFS_INODE_ITEM_KEY) {
    3805           0 :                 ret = 0;
    3806           0 :                 goto out;
    3807             :         }
    3808             : 
    3809           0 :         ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
    3810           0 :         if (ret < 0)
    3811           0 :                 goto out;
    3812           0 :         ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
    3813           0 :         if (ret < 0) {
    3814           0 :                 if (ret == -ENOENT)
    3815           0 :                         ret = 0;
    3816           0 :                 goto out;
    3817             :         }
    3818             : 
    3819             :         /* Different inode, no need to delay the rename of sctx->cur_ino */
    3820           0 :         if (right_gen != left_gen) {
    3821           0 :                 ret = 0;
    3822           0 :                 goto out;
    3823             :         }
    3824             : 
    3825           0 :         wdm = get_waiting_dir_move(sctx, di_key.objectid);
    3826           0 :         if (wdm && !wdm->orphanized) {
    3827           0 :                 ret = add_pending_dir_move(sctx,
    3828             :                                            sctx->cur_ino,
    3829             :                                            sctx->cur_inode_gen,
    3830             :                                            di_key.objectid,
    3831             :                                            &sctx->new_refs,
    3832             :                                            &sctx->deleted_refs,
    3833             :                                            is_orphan);
    3834           0 :                 if (!ret)
    3835           0 :                         ret = 1;
    3836             :         }
    3837           0 : out:
    3838           0 :         btrfs_free_path(path);
    3839           0 :         return ret;
    3840             : }
    3841             : 
    3842             : /*
    3843             :  * Check if inode ino2, or any of its ancestors, is inode ino1.
    3844             :  * Return 1 if true, 0 if false and < 0 on error.
    3845             :  */
    3846           0 : static int check_ino_in_path(struct btrfs_root *root,
    3847             :                              const u64 ino1,
    3848             :                              const u64 ino1_gen,
    3849             :                              const u64 ino2,
    3850             :                              const u64 ino2_gen,
    3851             :                              struct fs_path *fs_path)
    3852             : {
    3853           0 :         u64 ino = ino2;
    3854             : 
    3855           0 :         if (ino1 == ino2)
    3856           0 :                 return ino1_gen == ino2_gen;
    3857             : 
    3858           0 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3859           0 :                 u64 parent;
    3860           0 :                 u64 parent_gen;
    3861           0 :                 int ret;
    3862             : 
    3863           0 :                 fs_path_reset(fs_path);
    3864           0 :                 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
    3865           0 :                 if (ret < 0)
    3866           0 :                         return ret;
    3867           0 :                 if (parent == ino1)
    3868           0 :                         return parent_gen == ino1_gen;
    3869           0 :                 ino = parent;
    3870             :         }
    3871             :         return 0;
    3872             : }
    3873             : 
    3874             : /*
    3875             :  * Check if inode ino1 is an ancestor of inode ino2 in the given root for any
    3876             :  * possible path (in case ino2 is not a directory and has multiple hard links).
    3877             :  * Return 1 if true, 0 if false and < 0 on error.
    3878             :  */
    3879           0 : static int is_ancestor(struct btrfs_root *root,
    3880             :                        const u64 ino1,
    3881             :                        const u64 ino1_gen,
    3882             :                        const u64 ino2,
    3883             :                        struct fs_path *fs_path)
    3884             : {
    3885           0 :         bool free_fs_path = false;
    3886           0 :         int ret = 0;
    3887           0 :         int iter_ret = 0;
    3888           0 :         struct btrfs_path *path = NULL;
    3889           0 :         struct btrfs_key key;
    3890             : 
    3891           0 :         if (!fs_path) {
    3892           0 :                 fs_path = fs_path_alloc();
    3893           0 :                 if (!fs_path)
    3894             :                         return -ENOMEM;
    3895             :                 free_fs_path = true;
    3896             :         }
    3897             : 
    3898           0 :         path = alloc_path_for_send();
    3899           0 :         if (!path) {
    3900           0 :                 ret = -ENOMEM;
    3901           0 :                 goto out;
    3902             :         }
    3903             : 
    3904           0 :         key.objectid = ino2;
    3905           0 :         key.type = BTRFS_INODE_REF_KEY;
    3906           0 :         key.offset = 0;
    3907             : 
    3908           0 :         btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
    3909           0 :                 struct extent_buffer *leaf = path->nodes[0];
    3910           0 :                 int slot = path->slots[0];
    3911           0 :                 u32 cur_offset = 0;
    3912           0 :                 u32 item_size;
    3913             : 
    3914           0 :                 if (key.objectid != ino2)
    3915             :                         break;
    3916           0 :                 if (key.type != BTRFS_INODE_REF_KEY &&
    3917             :                     key.type != BTRFS_INODE_EXTREF_KEY)
    3918             :                         break;
    3919             : 
    3920           0 :                 item_size = btrfs_item_size(leaf, slot);
    3921           0 :                 while (cur_offset < item_size) {
    3922           0 :                         u64 parent;
    3923           0 :                         u64 parent_gen;
    3924             : 
    3925           0 :                         if (key.type == BTRFS_INODE_EXTREF_KEY) {
    3926           0 :                                 unsigned long ptr;
    3927           0 :                                 struct btrfs_inode_extref *extref;
    3928             : 
    3929           0 :                                 ptr = btrfs_item_ptr_offset(leaf, slot);
    3930           0 :                                 extref = (struct btrfs_inode_extref *)
    3931           0 :                                         (ptr + cur_offset);
    3932           0 :                                 parent = btrfs_inode_extref_parent(leaf,
    3933             :                                                                    extref);
    3934           0 :                                 cur_offset += sizeof(*extref);
    3935           0 :                                 cur_offset += btrfs_inode_extref_name_len(leaf,
    3936             :                                                                   extref);
    3937             :                         } else {
    3938           0 :                                 parent = key.offset;
    3939           0 :                                 cur_offset = item_size;
    3940             :                         }
    3941             : 
    3942           0 :                         ret = get_inode_gen(root, parent, &parent_gen);
    3943           0 :                         if (ret < 0)
    3944           0 :                                 goto out;
    3945           0 :                         ret = check_ino_in_path(root, ino1, ino1_gen,
    3946             :                                                 parent, parent_gen, fs_path);
    3947           0 :                         if (ret)
    3948           0 :                                 goto out;
    3949             :                 }
    3950             :         }
    3951           0 :         ret = 0;
    3952           0 :         if (iter_ret < 0)
    3953             :                 ret = iter_ret;
    3954             : 
    3955           0 : out:
    3956           0 :         btrfs_free_path(path);
    3957           0 :         if (free_fs_path)
    3958           0 :                 fs_path_free(fs_path);
    3959             :         return ret;
    3960             : }
    3961             : 
    3962           0 : static int wait_for_parent_move(struct send_ctx *sctx,
    3963             :                                 struct recorded_ref *parent_ref,
    3964             :                                 const bool is_orphan)
    3965             : {
    3966           0 :         int ret = 0;
    3967           0 :         u64 ino = parent_ref->dir;
    3968           0 :         u64 ino_gen = parent_ref->dir_gen;
    3969           0 :         u64 parent_ino_before, parent_ino_after;
    3970           0 :         struct fs_path *path_before = NULL;
    3971           0 :         struct fs_path *path_after = NULL;
    3972           0 :         int len1, len2;
    3973             : 
    3974           0 :         path_after = fs_path_alloc();
    3975           0 :         path_before = fs_path_alloc();
    3976           0 :         if (!path_after || !path_before) {
    3977           0 :                 ret = -ENOMEM;
    3978           0 :                 goto out;
    3979             :         }
    3980             : 
    3981             :         /*
    3982             :          * Our current directory inode may not yet be renamed/moved because some
    3983             :          * ancestor (immediate or not) has to be renamed/moved first. So find if
    3984             :          * such ancestor exists and make sure our own rename/move happens after
    3985             :          * that ancestor is processed to avoid path build infinite loops (done
    3986             :          * at get_cur_path()).
    3987             :          */
    3988           0 :         while (ino > BTRFS_FIRST_FREE_OBJECTID) {
    3989           0 :                 u64 parent_ino_after_gen;
    3990             : 
    3991           0 :                 if (is_waiting_for_move(sctx, ino)) {
    3992             :                         /*
    3993             :                          * If the current inode is an ancestor of ino in the
    3994             :                          * parent root, we need to delay the rename of the
    3995             :                          * current inode, otherwise don't delayed the rename
    3996             :                          * because we can end up with a circular dependency
    3997             :                          * of renames, resulting in some directories never
    3998             :                          * getting the respective rename operations issued in
    3999             :                          * the send stream or getting into infinite path build
    4000             :                          * loops.
    4001             :                          */
    4002           0 :                         ret = is_ancestor(sctx->parent_root,
    4003             :                                           sctx->cur_ino, sctx->cur_inode_gen,
    4004             :                                           ino, path_before);
    4005           0 :                         if (ret)
    4006             :                                 break;
    4007             :                 }
    4008             : 
    4009           0 :                 fs_path_reset(path_before);
    4010           0 :                 fs_path_reset(path_after);
    4011             : 
    4012           0 :                 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
    4013             :                                     &parent_ino_after_gen, path_after);
    4014           0 :                 if (ret < 0)
    4015           0 :                         goto out;
    4016           0 :                 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
    4017             :                                     NULL, path_before);
    4018           0 :                 if (ret < 0 && ret != -ENOENT) {
    4019           0 :                         goto out;
    4020           0 :                 } else if (ret == -ENOENT) {
    4021             :                         ret = 0;
    4022             :                         break;
    4023             :                 }
    4024             : 
    4025           0 :                 len1 = fs_path_len(path_before);
    4026           0 :                 len2 = fs_path_len(path_after);
    4027           0 :                 if (ino > sctx->cur_ino &&
    4028           0 :                     (parent_ino_before != parent_ino_after || len1 != len2 ||
    4029           0 :                      memcmp(path_before->start, path_after->start, len1))) {
    4030           0 :                         u64 parent_ino_gen;
    4031             : 
    4032           0 :                         ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
    4033           0 :                         if (ret < 0)
    4034           0 :                                 goto out;
    4035           0 :                         if (ino_gen == parent_ino_gen) {
    4036           0 :                                 ret = 1;
    4037           0 :                                 break;
    4038             :                         }
    4039             :                 }
    4040           0 :                 ino = parent_ino_after;
    4041           0 :                 ino_gen = parent_ino_after_gen;
    4042             :         }
    4043             : 
    4044           0 : out:
    4045           0 :         fs_path_free(path_before);
    4046           0 :         fs_path_free(path_after);
    4047             : 
    4048           0 :         if (ret == 1) {
    4049           0 :                 ret = add_pending_dir_move(sctx,
    4050             :                                            sctx->cur_ino,
    4051             :                                            sctx->cur_inode_gen,
    4052             :                                            ino,
    4053             :                                            &sctx->new_refs,
    4054             :                                            &sctx->deleted_refs,
    4055             :                                            is_orphan);
    4056           0 :                 if (!ret)
    4057           0 :                         ret = 1;
    4058             :         }
    4059             : 
    4060           0 :         return ret;
    4061             : }
    4062             : 
    4063           0 : static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4064             : {
    4065           0 :         int ret;
    4066           0 :         struct fs_path *new_path;
    4067             : 
    4068             :         /*
    4069             :          * Our reference's name member points to its full_path member string, so
    4070             :          * we use here a new path.
    4071             :          */
    4072           0 :         new_path = fs_path_alloc();
    4073           0 :         if (!new_path)
    4074             :                 return -ENOMEM;
    4075             : 
    4076           0 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
    4077           0 :         if (ret < 0) {
    4078           0 :                 fs_path_free(new_path);
    4079           0 :                 return ret;
    4080             :         }
    4081           0 :         ret = fs_path_add(new_path, ref->name, ref->name_len);
    4082           0 :         if (ret < 0) {
    4083           0 :                 fs_path_free(new_path);
    4084           0 :                 return ret;
    4085             :         }
    4086             : 
    4087           0 :         fs_path_free(ref->full_path);
    4088           0 :         set_ref_path(ref, new_path);
    4089             : 
    4090           0 :         return 0;
    4091             : }
    4092             : 
    4093             : /*
    4094             :  * When processing the new references for an inode we may orphanize an existing
    4095             :  * directory inode because its old name conflicts with one of the new references
    4096             :  * of the current inode. Later, when processing another new reference of our
    4097             :  * inode, we might need to orphanize another inode, but the path we have in the
    4098             :  * reference reflects the pre-orphanization name of the directory we previously
    4099             :  * orphanized. For example:
    4100             :  *
    4101             :  * parent snapshot looks like:
    4102             :  *
    4103             :  * .                                     (ino 256)
    4104             :  * |----- f1                             (ino 257)
    4105             :  * |----- f2                             (ino 258)
    4106             :  * |----- d1/                            (ino 259)
    4107             :  *        |----- d2/                     (ino 260)
    4108             :  *
    4109             :  * send snapshot looks like:
    4110             :  *
    4111             :  * .                                     (ino 256)
    4112             :  * |----- d1                             (ino 258)
    4113             :  * |----- f2/                            (ino 259)
    4114             :  *        |----- f2_link/                (ino 260)
    4115             :  *        |       |----- f1              (ino 257)
    4116             :  *        |
    4117             :  *        |----- d2                      (ino 258)
    4118             :  *
    4119             :  * When processing inode 257 we compute the name for inode 259 as "d1", and we
    4120             :  * cache it in the name cache. Later when we start processing inode 258, when
    4121             :  * collecting all its new references we set a full path of "d1/d2" for its new
    4122             :  * reference with name "d2". When we start processing the new references we
    4123             :  * start by processing the new reference with name "d1", and this results in
    4124             :  * orphanizing inode 259, since its old reference causes a conflict. Then we
    4125             :  * move on the next new reference, with name "d2", and we find out we must
    4126             :  * orphanize inode 260, as its old reference conflicts with ours - but for the
    4127             :  * orphanization we use a source path corresponding to the path we stored in the
    4128             :  * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
    4129             :  * receiver fail since the path component "d1/" no longer exists, it was renamed
    4130             :  * to "o259-6-0/" when processing the previous new reference. So in this case we
    4131             :  * must recompute the path in the new reference and use it for the new
    4132             :  * orphanization operation.
    4133             :  */
    4134           0 : static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
    4135             : {
    4136           0 :         char *name;
    4137           0 :         int ret;
    4138             : 
    4139           0 :         name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
    4140           0 :         if (!name)
    4141             :                 return -ENOMEM;
    4142             : 
    4143           0 :         fs_path_reset(ref->full_path);
    4144           0 :         ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
    4145           0 :         if (ret < 0)
    4146           0 :                 goto out;
    4147             : 
    4148           0 :         ret = fs_path_add(ref->full_path, name, ref->name_len);
    4149           0 :         if (ret < 0)
    4150           0 :                 goto out;
    4151             : 
    4152             :         /* Update the reference's base name pointer. */
    4153           0 :         set_ref_path(ref, ref->full_path);
    4154           0 : out:
    4155           0 :         kfree(name);
    4156           0 :         return ret;
    4157             : }
    4158             : 
    4159             : /*
    4160             :  * This does all the move/link/unlink/rmdir magic.
    4161             :  */
    4162           0 : static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
    4163             : {
    4164           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    4165           0 :         int ret = 0;
    4166           0 :         struct recorded_ref *cur;
    4167           0 :         struct recorded_ref *cur2;
    4168           0 :         struct list_head check_dirs;
    4169           0 :         struct fs_path *valid_path = NULL;
    4170           0 :         u64 ow_inode = 0;
    4171           0 :         u64 ow_gen;
    4172           0 :         u64 ow_mode;
    4173           0 :         int did_overwrite = 0;
    4174           0 :         int is_orphan = 0;
    4175           0 :         u64 last_dir_ino_rm = 0;
    4176           0 :         bool can_rename = true;
    4177           0 :         bool orphanized_dir = false;
    4178           0 :         bool orphanized_ancestor = false;
    4179             : 
    4180           0 :         btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
    4181             : 
    4182             :         /*
    4183             :          * This should never happen as the root dir always has the same ref
    4184             :          * which is always '..'
    4185             :          */
    4186           0 :         BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
    4187           0 :         INIT_LIST_HEAD(&check_dirs);
    4188             : 
    4189           0 :         valid_path = fs_path_alloc();
    4190           0 :         if (!valid_path) {
    4191           0 :                 ret = -ENOMEM;
    4192           0 :                 goto out;
    4193             :         }
    4194             : 
    4195             :         /*
    4196             :          * First, check if the first ref of the current inode was overwritten
    4197             :          * before. If yes, we know that the current inode was already orphanized
    4198             :          * and thus use the orphan name. If not, we can use get_cur_path to
    4199             :          * get the path of the first ref as it would like while receiving at
    4200             :          * this point in time.
    4201             :          * New inodes are always orphan at the beginning, so force to use the
    4202             :          * orphan name in this case.
    4203             :          * The first ref is stored in valid_path and will be updated if it
    4204             :          * gets moved around.
    4205             :          */
    4206           0 :         if (!sctx->cur_inode_new) {
    4207           0 :                 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
    4208             :                                 sctx->cur_inode_gen);
    4209           0 :                 if (ret < 0)
    4210           0 :                         goto out;
    4211           0 :                 if (ret)
    4212           0 :                         did_overwrite = 1;
    4213             :         }
    4214           0 :         if (sctx->cur_inode_new || did_overwrite) {
    4215           0 :                 ret = gen_unique_name(sctx, sctx->cur_ino,
    4216             :                                 sctx->cur_inode_gen, valid_path);
    4217           0 :                 if (ret < 0)
    4218           0 :                         goto out;
    4219             :                 is_orphan = 1;
    4220             :         } else {
    4221           0 :                 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    4222             :                                 valid_path);
    4223           0 :                 if (ret < 0)
    4224           0 :                         goto out;
    4225             :         }
    4226             : 
    4227             :         /*
    4228             :          * Before doing any rename and link operations, do a first pass on the
    4229             :          * new references to orphanize any unprocessed inodes that may have a
    4230             :          * reference that conflicts with one of the new references of the current
    4231             :          * inode. This needs to happen first because a new reference may conflict
    4232             :          * with the old reference of a parent directory, so we must make sure
    4233             :          * that the path used for link and rename commands don't use an
    4234             :          * orphanized name when an ancestor was not yet orphanized.
    4235             :          *
    4236             :          * Example:
    4237             :          *
    4238             :          * Parent snapshot:
    4239             :          *
    4240             :          * .                                                      (ino 256)
    4241             :          * |----- testdir/                                        (ino 259)
    4242             :          * |          |----- a                                    (ino 257)
    4243             :          * |
    4244             :          * |----- b                                               (ino 258)
    4245             :          *
    4246             :          * Send snapshot:
    4247             :          *
    4248             :          * .                                                      (ino 256)
    4249             :          * |----- testdir_2/                                      (ino 259)
    4250             :          * |          |----- a                                    (ino 260)
    4251             :          * |
    4252             :          * |----- testdir                                         (ino 257)
    4253             :          * |----- b                                               (ino 257)
    4254             :          * |----- b2                                              (ino 258)
    4255             :          *
    4256             :          * Processing the new reference for inode 257 with name "b" may happen
    4257             :          * before processing the new reference with name "testdir". If so, we
    4258             :          * must make sure that by the time we send a link command to create the
    4259             :          * hard link "b", inode 259 was already orphanized, since the generated
    4260             :          * path in "valid_path" already contains the orphanized name for 259.
    4261             :          * We are processing inode 257, so only later when processing 259 we do
    4262             :          * the rename operation to change its temporary (orphanized) name to
    4263             :          * "testdir_2".
    4264             :          */
    4265           0 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4266           0 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4267           0 :                 if (ret < 0)
    4268           0 :                         goto out;
    4269           0 :                 if (ret == inode_state_will_create)
    4270           0 :                         continue;
    4271             : 
    4272             :                 /*
    4273             :                  * Check if this new ref would overwrite the first ref of another
    4274             :                  * unprocessed inode. If yes, orphanize the overwritten inode.
    4275             :                  * If we find an overwritten ref that is not the first ref,
    4276             :                  * simply unlink it.
    4277             :                  */
    4278           0 :                 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4279           0 :                                 cur->name, cur->name_len,
    4280             :                                 &ow_inode, &ow_gen, &ow_mode);
    4281           0 :                 if (ret < 0)
    4282           0 :                         goto out;
    4283           0 :                 if (ret) {
    4284           0 :                         ret = is_first_ref(sctx->parent_root,
    4285           0 :                                            ow_inode, cur->dir, cur->name,
    4286             :                                            cur->name_len);
    4287           0 :                         if (ret < 0)
    4288           0 :                                 goto out;
    4289           0 :                         if (ret) {
    4290           0 :                                 struct name_cache_entry *nce;
    4291           0 :                                 struct waiting_dir_move *wdm;
    4292             : 
    4293           0 :                                 if (orphanized_dir) {
    4294           0 :                                         ret = refresh_ref_path(sctx, cur);
    4295           0 :                                         if (ret < 0)
    4296           0 :                                                 goto out;
    4297             :                                 }
    4298             : 
    4299           0 :                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
    4300             :                                                 cur->full_path);
    4301           0 :                                 if (ret < 0)
    4302           0 :                                         goto out;
    4303           0 :                                 if (S_ISDIR(ow_mode))
    4304           0 :                                         orphanized_dir = true;
    4305             : 
    4306             :                                 /*
    4307             :                                  * If ow_inode has its rename operation delayed
    4308             :                                  * make sure that its orphanized name is used in
    4309             :                                  * the source path when performing its rename
    4310             :                                  * operation.
    4311             :                                  */
    4312           0 :                                 wdm = get_waiting_dir_move(sctx, ow_inode);
    4313           0 :                                 if (wdm)
    4314           0 :                                         wdm->orphanized = true;
    4315             : 
    4316             :                                 /*
    4317             :                                  * Make sure we clear our orphanized inode's
    4318             :                                  * name from the name cache. This is because the
    4319             :                                  * inode ow_inode might be an ancestor of some
    4320             :                                  * other inode that will be orphanized as well
    4321             :                                  * later and has an inode number greater than
    4322             :                                  * sctx->send_progress. We need to prevent
    4323             :                                  * future name lookups from using the old name
    4324             :                                  * and get instead the orphan name.
    4325             :                                  */
    4326           0 :                                 nce = name_cache_search(sctx, ow_inode, ow_gen);
    4327           0 :                                 if (nce)
    4328           0 :                                         btrfs_lru_cache_remove(&sctx->name_cache,
    4329             :                                                                &nce->entry);
    4330             : 
    4331             :                                 /*
    4332             :                                  * ow_inode might currently be an ancestor of
    4333             :                                  * cur_ino, therefore compute valid_path (the
    4334             :                                  * current path of cur_ino) again because it
    4335             :                                  * might contain the pre-orphanization name of
    4336             :                                  * ow_inode, which is no longer valid.
    4337             :                                  */
    4338           0 :                                 ret = is_ancestor(sctx->parent_root,
    4339             :                                                   ow_inode, ow_gen,
    4340             :                                                   sctx->cur_ino, NULL);
    4341           0 :                                 if (ret > 0) {
    4342           0 :                                         orphanized_ancestor = true;
    4343           0 :                                         fs_path_reset(valid_path);
    4344           0 :                                         ret = get_cur_path(sctx, sctx->cur_ino,
    4345             :                                                            sctx->cur_inode_gen,
    4346             :                                                            valid_path);
    4347             :                                 }
    4348           0 :                                 if (ret < 0)
    4349           0 :                                         goto out;
    4350             :                         } else {
    4351             :                                 /*
    4352             :                                  * If we previously orphanized a directory that
    4353             :                                  * collided with a new reference that we already
    4354             :                                  * processed, recompute the current path because
    4355             :                                  * that directory may be part of the path.
    4356             :                                  */
    4357           0 :                                 if (orphanized_dir) {
    4358           0 :                                         ret = refresh_ref_path(sctx, cur);
    4359           0 :                                         if (ret < 0)
    4360           0 :                                                 goto out;
    4361             :                                 }
    4362           0 :                                 ret = send_unlink(sctx, cur->full_path);
    4363           0 :                                 if (ret < 0)
    4364           0 :                                         goto out;
    4365             :                         }
    4366             :                 }
    4367             : 
    4368             :         }
    4369             : 
    4370           0 :         list_for_each_entry(cur, &sctx->new_refs, list) {
    4371             :                 /*
    4372             :                  * We may have refs where the parent directory does not exist
    4373             :                  * yet. This happens if the parent directories inum is higher
    4374             :                  * than the current inum. To handle this case, we create the
    4375             :                  * parent directory out of order. But we need to check if this
    4376             :                  * did already happen before due to other refs in the same dir.
    4377             :                  */
    4378           0 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4379           0 :                 if (ret < 0)
    4380           0 :                         goto out;
    4381           0 :                 if (ret == inode_state_will_create) {
    4382           0 :                         ret = 0;
    4383             :                         /*
    4384             :                          * First check if any of the current inodes refs did
    4385             :                          * already create the dir.
    4386             :                          */
    4387           0 :                         list_for_each_entry(cur2, &sctx->new_refs, list) {
    4388           0 :                                 if (cur == cur2)
    4389             :                                         break;
    4390           0 :                                 if (cur2->dir == cur->dir) {
    4391             :                                         ret = 1;
    4392             :                                         break;
    4393             :                                 }
    4394             :                         }
    4395             : 
    4396             :                         /*
    4397             :                          * If that did not happen, check if a previous inode
    4398             :                          * did already create the dir.
    4399             :                          */
    4400           0 :                         if (!ret)
    4401           0 :                                 ret = did_create_dir(sctx, cur->dir);
    4402           0 :                         if (ret < 0)
    4403           0 :                                 goto out;
    4404           0 :                         if (!ret) {
    4405           0 :                                 ret = send_create_inode(sctx, cur->dir);
    4406           0 :                                 if (ret < 0)
    4407           0 :                                         goto out;
    4408           0 :                                 cache_dir_created(sctx, cur->dir);
    4409             :                         }
    4410             :                 }
    4411             : 
    4412           0 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
    4413           0 :                         ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
    4414           0 :                         if (ret < 0)
    4415           0 :                                 goto out;
    4416           0 :                         if (ret == 1) {
    4417           0 :                                 can_rename = false;
    4418           0 :                                 *pending_move = 1;
    4419             :                         }
    4420             :                 }
    4421             : 
    4422           0 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
    4423             :                     can_rename) {
    4424           0 :                         ret = wait_for_parent_move(sctx, cur, is_orphan);
    4425           0 :                         if (ret < 0)
    4426           0 :                                 goto out;
    4427           0 :                         if (ret == 1) {
    4428           0 :                                 can_rename = false;
    4429           0 :                                 *pending_move = 1;
    4430             :                         }
    4431             :                 }
    4432             : 
    4433             :                 /*
    4434             :                  * link/move the ref to the new place. If we have an orphan
    4435             :                  * inode, move it and update valid_path. If not, link or move
    4436             :                  * it depending on the inode mode.
    4437             :                  */
    4438           0 :                 if (is_orphan && can_rename) {
    4439           0 :                         ret = send_rename(sctx, valid_path, cur->full_path);
    4440           0 :                         if (ret < 0)
    4441           0 :                                 goto out;
    4442           0 :                         is_orphan = 0;
    4443           0 :                         ret = fs_path_copy(valid_path, cur->full_path);
    4444           0 :                         if (ret < 0)
    4445           0 :                                 goto out;
    4446           0 :                 } else if (can_rename) {
    4447           0 :                         if (S_ISDIR(sctx->cur_inode_mode)) {
    4448             :                                 /*
    4449             :                                  * Dirs can't be linked, so move it. For moved
    4450             :                                  * dirs, we always have one new and one deleted
    4451             :                                  * ref. The deleted ref is ignored later.
    4452             :                                  */
    4453           0 :                                 ret = send_rename(sctx, valid_path,
    4454             :                                                   cur->full_path);
    4455           0 :                                 if (!ret)
    4456           0 :                                         ret = fs_path_copy(valid_path,
    4457             :                                                            cur->full_path);
    4458           0 :                                 if (ret < 0)
    4459           0 :                                         goto out;
    4460             :                         } else {
    4461             :                                 /*
    4462             :                                  * We might have previously orphanized an inode
    4463             :                                  * which is an ancestor of our current inode,
    4464             :                                  * so our reference's full path, which was
    4465             :                                  * computed before any such orphanizations, must
    4466             :                                  * be updated.
    4467             :                                  */
    4468           0 :                                 if (orphanized_dir) {
    4469           0 :                                         ret = update_ref_path(sctx, cur);
    4470           0 :                                         if (ret < 0)
    4471           0 :                                                 goto out;
    4472             :                                 }
    4473           0 :                                 ret = send_link(sctx, cur->full_path,
    4474             :                                                 valid_path);
    4475           0 :                                 if (ret < 0)
    4476           0 :                                         goto out;
    4477             :                         }
    4478             :                 }
    4479           0 :                 ret = dup_ref(cur, &check_dirs);
    4480           0 :                 if (ret < 0)
    4481           0 :                         goto out;
    4482             :         }
    4483             : 
    4484           0 :         if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
    4485             :                 /*
    4486             :                  * Check if we can already rmdir the directory. If not,
    4487             :                  * orphanize it. For every dir item inside that gets deleted
    4488             :                  * later, we do this check again and rmdir it then if possible.
    4489             :                  * See the use of check_dirs for more details.
    4490             :                  */
    4491           0 :                 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    4492           0 :                 if (ret < 0)
    4493           0 :                         goto out;
    4494           0 :                 if (ret) {
    4495           0 :                         ret = send_rmdir(sctx, valid_path);
    4496           0 :                         if (ret < 0)
    4497           0 :                                 goto out;
    4498           0 :                 } else if (!is_orphan) {
    4499           0 :                         ret = orphanize_inode(sctx, sctx->cur_ino,
    4500             :                                         sctx->cur_inode_gen, valid_path);
    4501           0 :                         if (ret < 0)
    4502           0 :                                 goto out;
    4503             :                         is_orphan = 1;
    4504             :                 }
    4505             : 
    4506           0 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4507           0 :                         ret = dup_ref(cur, &check_dirs);
    4508           0 :                         if (ret < 0)
    4509           0 :                                 goto out;
    4510             :                 }
    4511           0 :         } else if (S_ISDIR(sctx->cur_inode_mode) &&
    4512           0 :                    !list_empty(&sctx->deleted_refs)) {
    4513             :                 /*
    4514             :                  * We have a moved dir. Add the old parent to check_dirs
    4515             :                  */
    4516           0 :                 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
    4517             :                                 list);
    4518           0 :                 ret = dup_ref(cur, &check_dirs);
    4519           0 :                 if (ret < 0)
    4520           0 :                         goto out;
    4521           0 :         } else if (!S_ISDIR(sctx->cur_inode_mode)) {
    4522             :                 /*
    4523             :                  * We have a non dir inode. Go through all deleted refs and
    4524             :                  * unlink them if they were not already overwritten by other
    4525             :                  * inodes.
    4526             :                  */
    4527           0 :                 list_for_each_entry(cur, &sctx->deleted_refs, list) {
    4528           0 :                         ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
    4529             :                                         sctx->cur_ino, sctx->cur_inode_gen,
    4530           0 :                                         cur->name, cur->name_len);
    4531           0 :                         if (ret < 0)
    4532           0 :                                 goto out;
    4533           0 :                         if (!ret) {
    4534             :                                 /*
    4535             :                                  * If we orphanized any ancestor before, we need
    4536             :                                  * to recompute the full path for deleted names,
    4537             :                                  * since any such path was computed before we
    4538             :                                  * processed any references and orphanized any
    4539             :                                  * ancestor inode.
    4540             :                                  */
    4541           0 :                                 if (orphanized_ancestor) {
    4542           0 :                                         ret = update_ref_path(sctx, cur);
    4543           0 :                                         if (ret < 0)
    4544           0 :                                                 goto out;
    4545             :                                 }
    4546           0 :                                 ret = send_unlink(sctx, cur->full_path);
    4547           0 :                                 if (ret < 0)
    4548           0 :                                         goto out;
    4549             :                         }
    4550           0 :                         ret = dup_ref(cur, &check_dirs);
    4551           0 :                         if (ret < 0)
    4552           0 :                                 goto out;
    4553             :                 }
    4554             :                 /*
    4555             :                  * If the inode is still orphan, unlink the orphan. This may
    4556             :                  * happen when a previous inode did overwrite the first ref
    4557             :                  * of this inode and no new refs were added for the current
    4558             :                  * inode. Unlinking does not mean that the inode is deleted in
    4559             :                  * all cases. There may still be links to this inode in other
    4560             :                  * places.
    4561             :                  */
    4562           0 :                 if (is_orphan) {
    4563           0 :                         ret = send_unlink(sctx, valid_path);
    4564           0 :                         if (ret < 0)
    4565           0 :                                 goto out;
    4566             :                 }
    4567             :         }
    4568             : 
    4569             :         /*
    4570             :          * We did collect all parent dirs where cur_inode was once located. We
    4571             :          * now go through all these dirs and check if they are pending for
    4572             :          * deletion and if it's finally possible to perform the rmdir now.
    4573             :          * We also update the inode stats of the parent dirs here.
    4574             :          */
    4575           0 :         list_for_each_entry(cur, &check_dirs, list) {
    4576             :                 /*
    4577             :                  * In case we had refs into dirs that were not processed yet,
    4578             :                  * we don't need to do the utime and rmdir logic for these dirs.
    4579             :                  * The dir will be processed later.
    4580             :                  */
    4581           0 :                 if (cur->dir > sctx->cur_ino)
    4582           0 :                         continue;
    4583             : 
    4584           0 :                 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
    4585           0 :                 if (ret < 0)
    4586           0 :                         goto out;
    4587             : 
    4588           0 :                 if (ret == inode_state_did_create ||
    4589           0 :                     ret == inode_state_no_change) {
    4590           0 :                         ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
    4591           0 :                         if (ret < 0)
    4592           0 :                                 goto out;
    4593           0 :                 } else if (ret == inode_state_did_delete &&
    4594           0 :                            cur->dir != last_dir_ino_rm) {
    4595           0 :                         ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
    4596           0 :                         if (ret < 0)
    4597           0 :                                 goto out;
    4598           0 :                         if (ret) {
    4599           0 :                                 ret = get_cur_path(sctx, cur->dir,
    4600             :                                                    cur->dir_gen, valid_path);
    4601           0 :                                 if (ret < 0)
    4602           0 :                                         goto out;
    4603           0 :                                 ret = send_rmdir(sctx, valid_path);
    4604           0 :                                 if (ret < 0)
    4605           0 :                                         goto out;
    4606           0 :                                 last_dir_ino_rm = cur->dir;
    4607             :                         }
    4608             :                 }
    4609             :         }
    4610             : 
    4611             :         ret = 0;
    4612             : 
    4613           0 : out:
    4614           0 :         __free_recorded_refs(&check_dirs);
    4615           0 :         free_recorded_refs(sctx);
    4616           0 :         fs_path_free(valid_path);
    4617           0 :         return ret;
    4618             : }
    4619             : 
    4620           0 : static int rbtree_ref_comp(const void *k, const struct rb_node *node)
    4621             : {
    4622           0 :         const struct recorded_ref *data = k;
    4623           0 :         const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
    4624           0 :         int result;
    4625             : 
    4626           0 :         if (data->dir > ref->dir)
    4627             :                 return 1;
    4628           0 :         if (data->dir < ref->dir)
    4629             :                 return -1;
    4630           0 :         if (data->dir_gen > ref->dir_gen)
    4631             :                 return 1;
    4632           0 :         if (data->dir_gen < ref->dir_gen)
    4633             :                 return -1;
    4634           0 :         if (data->name_len > ref->name_len)
    4635             :                 return 1;
    4636           0 :         if (data->name_len < ref->name_len)
    4637             :                 return -1;
    4638           0 :         result = strcmp(data->name, ref->name);
    4639           0 :         if (result > 0)
    4640             :                 return 1;
    4641           0 :         if (result < 0)
    4642           0 :                 return -1;
    4643             :         return 0;
    4644             : }
    4645             : 
    4646             : static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
    4647             : {
    4648           0 :         const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
    4649             : 
    4650           0 :         return rbtree_ref_comp(entry, parent) < 0;
    4651             : }
    4652             : 
    4653           0 : static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
    4654             :                               struct fs_path *name, u64 dir, u64 dir_gen,
    4655             :                               struct send_ctx *sctx)
    4656             : {
    4657           0 :         int ret = 0;
    4658           0 :         struct fs_path *path = NULL;
    4659           0 :         struct recorded_ref *ref = NULL;
    4660             : 
    4661           0 :         path = fs_path_alloc();
    4662           0 :         if (!path) {
    4663           0 :                 ret = -ENOMEM;
    4664           0 :                 goto out;
    4665             :         }
    4666             : 
    4667           0 :         ref = recorded_ref_alloc();
    4668           0 :         if (!ref) {
    4669           0 :                 ret = -ENOMEM;
    4670           0 :                 goto out;
    4671             :         }
    4672             : 
    4673           0 :         ret = get_cur_path(sctx, dir, dir_gen, path);
    4674           0 :         if (ret < 0)
    4675           0 :                 goto out;
    4676           0 :         ret = fs_path_add_path(path, name);
    4677           0 :         if (ret < 0)
    4678           0 :                 goto out;
    4679             : 
    4680           0 :         ref->dir = dir;
    4681           0 :         ref->dir_gen = dir_gen;
    4682           0 :         set_ref_path(ref, path);
    4683           0 :         list_add_tail(&ref->list, refs);
    4684           0 :         rb_add(&ref->node, root, rbtree_ref_less);
    4685           0 :         ref->root = root;
    4686           0 : out:
    4687           0 :         if (ret) {
    4688           0 :                 if (path && (!ref || !ref->full_path))
    4689           0 :                         fs_path_free(path);
    4690           0 :                 recorded_ref_free(ref);
    4691             :         }
    4692           0 :         return ret;
    4693             : }
    4694             : 
    4695           0 : static int record_new_ref_if_needed(int num, u64 dir, int index,
    4696             :                                     struct fs_path *name, void *ctx)
    4697             : {
    4698           0 :         int ret = 0;
    4699           0 :         struct send_ctx *sctx = ctx;
    4700           0 :         struct rb_node *node = NULL;
    4701           0 :         struct recorded_ref data;
    4702           0 :         struct recorded_ref *ref;
    4703           0 :         u64 dir_gen;
    4704             : 
    4705           0 :         ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
    4706           0 :         if (ret < 0)
    4707           0 :                 goto out;
    4708             : 
    4709           0 :         data.dir = dir;
    4710           0 :         data.dir_gen = dir_gen;
    4711           0 :         set_ref_path(&data, name);
    4712           0 :         node = rb_find(&data, &sctx->rbtree_deleted_refs, rbtree_ref_comp);
    4713           0 :         if (node) {
    4714           0 :                 ref = rb_entry(node, struct recorded_ref, node);
    4715           0 :                 recorded_ref_free(ref);
    4716             :         } else {
    4717           0 :                 ret = record_ref_in_tree(&sctx->rbtree_new_refs,
    4718             :                                          &sctx->new_refs, name, dir, dir_gen,
    4719             :                                          sctx);
    4720             :         }
    4721           0 : out:
    4722           0 :         return ret;
    4723             : }
    4724             : 
    4725           0 : static int record_deleted_ref_if_needed(int num, u64 dir, int index,
    4726             :                                         struct fs_path *name, void *ctx)
    4727             : {
    4728           0 :         int ret = 0;
    4729           0 :         struct send_ctx *sctx = ctx;
    4730           0 :         struct rb_node *node = NULL;
    4731           0 :         struct recorded_ref data;
    4732           0 :         struct recorded_ref *ref;
    4733           0 :         u64 dir_gen;
    4734             : 
    4735           0 :         ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
    4736           0 :         if (ret < 0)
    4737           0 :                 goto out;
    4738             : 
    4739           0 :         data.dir = dir;
    4740           0 :         data.dir_gen = dir_gen;
    4741           0 :         set_ref_path(&data, name);
    4742           0 :         node = rb_find(&data, &sctx->rbtree_new_refs, rbtree_ref_comp);
    4743           0 :         if (node) {
    4744           0 :                 ref = rb_entry(node, struct recorded_ref, node);
    4745           0 :                 recorded_ref_free(ref);
    4746             :         } else {
    4747           0 :                 ret = record_ref_in_tree(&sctx->rbtree_deleted_refs,
    4748             :                                          &sctx->deleted_refs, name, dir,
    4749             :                                          dir_gen, sctx);
    4750             :         }
    4751           0 : out:
    4752           0 :         return ret;
    4753             : }
    4754             : 
    4755           0 : static int record_new_ref(struct send_ctx *sctx)
    4756             : {
    4757           0 :         int ret;
    4758             : 
    4759           0 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4760             :                                 sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4761           0 :         if (ret < 0)
    4762             :                 goto out;
    4763             :         ret = 0;
    4764             : 
    4765             : out:
    4766           0 :         return ret;
    4767             : }
    4768             : 
    4769           0 : static int record_deleted_ref(struct send_ctx *sctx)
    4770             : {
    4771           0 :         int ret;
    4772             : 
    4773           0 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4774             :                                 sctx->cmp_key, 0, record_deleted_ref_if_needed,
    4775             :                                 sctx);
    4776           0 :         if (ret < 0)
    4777             :                 goto out;
    4778             :         ret = 0;
    4779             : 
    4780             : out:
    4781           0 :         return ret;
    4782             : }
    4783             : 
    4784           0 : static int record_changed_ref(struct send_ctx *sctx)
    4785             : {
    4786           0 :         int ret = 0;
    4787             : 
    4788           0 :         ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
    4789             :                         sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
    4790           0 :         if (ret < 0)
    4791           0 :                 goto out;
    4792           0 :         ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
    4793             :                         sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
    4794           0 :         if (ret < 0)
    4795             :                 goto out;
    4796             :         ret = 0;
    4797             : 
    4798           0 : out:
    4799           0 :         return ret;
    4800             : }
    4801             : 
    4802             : /*
    4803             :  * Record and process all refs at once. Needed when an inode changes the
    4804             :  * generation number, which means that it was deleted and recreated.
    4805             :  */
    4806           0 : static int process_all_refs(struct send_ctx *sctx,
    4807             :                             enum btrfs_compare_tree_result cmd)
    4808             : {
    4809           0 :         int ret = 0;
    4810           0 :         int iter_ret = 0;
    4811           0 :         struct btrfs_root *root;
    4812           0 :         struct btrfs_path *path;
    4813           0 :         struct btrfs_key key;
    4814           0 :         struct btrfs_key found_key;
    4815           0 :         iterate_inode_ref_t cb;
    4816           0 :         int pending_move = 0;
    4817             : 
    4818           0 :         path = alloc_path_for_send();
    4819           0 :         if (!path)
    4820             :                 return -ENOMEM;
    4821             : 
    4822           0 :         if (cmd == BTRFS_COMPARE_TREE_NEW) {
    4823           0 :                 root = sctx->send_root;
    4824           0 :                 cb = record_new_ref_if_needed;
    4825           0 :         } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
    4826           0 :                 root = sctx->parent_root;
    4827           0 :                 cb = record_deleted_ref_if_needed;
    4828             :         } else {
    4829           0 :                 btrfs_err(sctx->send_root->fs_info,
    4830             :                                 "Wrong command %d in process_all_refs", cmd);
    4831           0 :                 ret = -EINVAL;
    4832           0 :                 goto out;
    4833             :         }
    4834             : 
    4835           0 :         key.objectid = sctx->cmp_key->objectid;
    4836           0 :         key.type = BTRFS_INODE_REF_KEY;
    4837           0 :         key.offset = 0;
    4838           0 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    4839           0 :                 if (found_key.objectid != key.objectid ||
    4840           0 :                     (found_key.type != BTRFS_INODE_REF_KEY &&
    4841             :                      found_key.type != BTRFS_INODE_EXTREF_KEY))
    4842             :                         break;
    4843             : 
    4844           0 :                 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
    4845           0 :                 if (ret < 0)
    4846           0 :                         goto out;
    4847             :         }
    4848             :         /* Catch error found during iteration */
    4849           0 :         if (iter_ret < 0) {
    4850           0 :                 ret = iter_ret;
    4851           0 :                 goto out;
    4852             :         }
    4853           0 :         btrfs_release_path(path);
    4854             : 
    4855             :         /*
    4856             :          * We don't actually care about pending_move as we are simply
    4857             :          * re-creating this inode and will be rename'ing it into place once we
    4858             :          * rename the parent directory.
    4859             :          */
    4860           0 :         ret = process_recorded_refs(sctx, &pending_move);
    4861           0 : out:
    4862           0 :         btrfs_free_path(path);
    4863           0 :         return ret;
    4864             : }
    4865             : 
    4866           0 : static int send_set_xattr(struct send_ctx *sctx,
    4867             :                           struct fs_path *path,
    4868             :                           const char *name, int name_len,
    4869             :                           const char *data, int data_len)
    4870             : {
    4871           0 :         int ret = 0;
    4872             : 
    4873           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
    4874           0 :         if (ret < 0)
    4875           0 :                 goto out;
    4876             : 
    4877           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4878           0 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4879           0 :         TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
    4880             : 
    4881           0 :         ret = send_cmd(sctx);
    4882             : 
    4883           0 : tlv_put_failure:
    4884           0 : out:
    4885           0 :         return ret;
    4886             : }
    4887             : 
    4888           0 : static int send_remove_xattr(struct send_ctx *sctx,
    4889             :                           struct fs_path *path,
    4890             :                           const char *name, int name_len)
    4891             : {
    4892           0 :         int ret = 0;
    4893             : 
    4894           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
    4895           0 :         if (ret < 0)
    4896           0 :                 goto out;
    4897             : 
    4898           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    4899           0 :         TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
    4900             : 
    4901           0 :         ret = send_cmd(sctx);
    4902             : 
    4903           0 : tlv_put_failure:
    4904           0 : out:
    4905           0 :         return ret;
    4906             : }
    4907             : 
    4908           0 : static int __process_new_xattr(int num, struct btrfs_key *di_key,
    4909             :                                const char *name, int name_len, const char *data,
    4910             :                                int data_len, void *ctx)
    4911             : {
    4912           0 :         int ret;
    4913           0 :         struct send_ctx *sctx = ctx;
    4914           0 :         struct fs_path *p;
    4915           0 :         struct posix_acl_xattr_header dummy_acl;
    4916             : 
    4917             :         /* Capabilities are emitted by finish_inode_if_needed */
    4918           0 :         if (!strncmp(name, XATTR_NAME_CAPS, name_len))
    4919             :                 return 0;
    4920             : 
    4921           0 :         p = fs_path_alloc();
    4922           0 :         if (!p)
    4923             :                 return -ENOMEM;
    4924             : 
    4925             :         /*
    4926             :          * This hack is needed because empty acls are stored as zero byte
    4927             :          * data in xattrs. Problem with that is, that receiving these zero byte
    4928             :          * acls will fail later. To fix this, we send a dummy acl list that
    4929             :          * only contains the version number and no entries.
    4930             :          */
    4931           0 :         if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
    4932           0 :             !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
    4933           0 :                 if (data_len == 0) {
    4934           0 :                         dummy_acl.a_version =
    4935             :                                         cpu_to_le32(POSIX_ACL_XATTR_VERSION);
    4936           0 :                         data = (char *)&dummy_acl;
    4937           0 :                         data_len = sizeof(dummy_acl);
    4938             :                 }
    4939             :         }
    4940             : 
    4941           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4942           0 :         if (ret < 0)
    4943           0 :                 goto out;
    4944             : 
    4945           0 :         ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
    4946             : 
    4947           0 : out:
    4948           0 :         fs_path_free(p);
    4949           0 :         return ret;
    4950             : }
    4951             : 
    4952           0 : static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
    4953             :                                    const char *name, int name_len,
    4954             :                                    const char *data, int data_len, void *ctx)
    4955             : {
    4956           0 :         int ret;
    4957           0 :         struct send_ctx *sctx = ctx;
    4958           0 :         struct fs_path *p;
    4959             : 
    4960           0 :         p = fs_path_alloc();
    4961           0 :         if (!p)
    4962             :                 return -ENOMEM;
    4963             : 
    4964           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    4965           0 :         if (ret < 0)
    4966           0 :                 goto out;
    4967             : 
    4968           0 :         ret = send_remove_xattr(sctx, p, name, name_len);
    4969             : 
    4970           0 : out:
    4971           0 :         fs_path_free(p);
    4972           0 :         return ret;
    4973             : }
    4974             : 
    4975           0 : static int process_new_xattr(struct send_ctx *sctx)
    4976             : {
    4977           0 :         int ret = 0;
    4978             : 
    4979           0 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    4980             :                                __process_new_xattr, sctx);
    4981             : 
    4982           0 :         return ret;
    4983             : }
    4984             : 
    4985           0 : static int process_deleted_xattr(struct send_ctx *sctx)
    4986             : {
    4987           0 :         return iterate_dir_item(sctx->parent_root, sctx->right_path,
    4988             :                                 __process_deleted_xattr, sctx);
    4989             : }
    4990             : 
    4991             : struct find_xattr_ctx {
    4992             :         const char *name;
    4993             :         int name_len;
    4994             :         int found_idx;
    4995             :         char *found_data;
    4996             :         int found_data_len;
    4997             : };
    4998             : 
    4999           0 : static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
    5000             :                         int name_len, const char *data, int data_len, void *vctx)
    5001             : {
    5002           0 :         struct find_xattr_ctx *ctx = vctx;
    5003             : 
    5004           0 :         if (name_len == ctx->name_len &&
    5005           0 :             strncmp(name, ctx->name, name_len) == 0) {
    5006           0 :                 ctx->found_idx = num;
    5007           0 :                 ctx->found_data_len = data_len;
    5008           0 :                 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
    5009           0 :                 if (!ctx->found_data)
    5010             :                         return -ENOMEM;
    5011           0 :                 return 1;
    5012             :         }
    5013             :         return 0;
    5014             : }
    5015             : 
    5016           0 : static int find_xattr(struct btrfs_root *root,
    5017             :                       struct btrfs_path *path,
    5018             :                       struct btrfs_key *key,
    5019             :                       const char *name, int name_len,
    5020             :                       char **data, int *data_len)
    5021             : {
    5022           0 :         int ret;
    5023           0 :         struct find_xattr_ctx ctx;
    5024             : 
    5025           0 :         ctx.name = name;
    5026           0 :         ctx.name_len = name_len;
    5027           0 :         ctx.found_idx = -1;
    5028           0 :         ctx.found_data = NULL;
    5029           0 :         ctx.found_data_len = 0;
    5030             : 
    5031           0 :         ret = iterate_dir_item(root, path, __find_xattr, &ctx);
    5032           0 :         if (ret < 0)
    5033             :                 return ret;
    5034             : 
    5035           0 :         if (ctx.found_idx == -1)
    5036             :                 return -ENOENT;
    5037           0 :         if (data) {
    5038           0 :                 *data = ctx.found_data;
    5039           0 :                 *data_len = ctx.found_data_len;
    5040             :         } else {
    5041           0 :                 kfree(ctx.found_data);
    5042             :         }
    5043           0 :         return ctx.found_idx;
    5044             : }
    5045             : 
    5046             : 
    5047           0 : static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
    5048             :                                        const char *name, int name_len,
    5049             :                                        const char *data, int data_len,
    5050             :                                        void *ctx)
    5051             : {
    5052           0 :         int ret;
    5053           0 :         struct send_ctx *sctx = ctx;
    5054           0 :         char *found_data = NULL;
    5055           0 :         int found_data_len  = 0;
    5056             : 
    5057           0 :         ret = find_xattr(sctx->parent_root, sctx->right_path,
    5058             :                          sctx->cmp_key, name, name_len, &found_data,
    5059             :                          &found_data_len);
    5060           0 :         if (ret == -ENOENT) {
    5061           0 :                 ret = __process_new_xattr(num, di_key, name, name_len, data,
    5062             :                                           data_len, ctx);
    5063           0 :         } else if (ret >= 0) {
    5064           0 :                 if (data_len != found_data_len ||
    5065           0 :                     memcmp(data, found_data, data_len)) {
    5066           0 :                         ret = __process_new_xattr(num, di_key, name, name_len,
    5067             :                                                   data, data_len, ctx);
    5068             :                 } else {
    5069             :                         ret = 0;
    5070             :                 }
    5071             :         }
    5072             : 
    5073           0 :         kfree(found_data);
    5074           0 :         return ret;
    5075             : }
    5076             : 
    5077           0 : static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
    5078             :                                            const char *name, int name_len,
    5079             :                                            const char *data, int data_len,
    5080             :                                            void *ctx)
    5081             : {
    5082           0 :         int ret;
    5083           0 :         struct send_ctx *sctx = ctx;
    5084             : 
    5085           0 :         ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
    5086             :                          name, name_len, NULL, NULL);
    5087           0 :         if (ret == -ENOENT)
    5088           0 :                 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
    5089             :                                               data_len, ctx);
    5090           0 :         else if (ret >= 0)
    5091             :                 ret = 0;
    5092             : 
    5093           0 :         return ret;
    5094             : }
    5095             : 
    5096           0 : static int process_changed_xattr(struct send_ctx *sctx)
    5097             : {
    5098           0 :         int ret = 0;
    5099             : 
    5100           0 :         ret = iterate_dir_item(sctx->send_root, sctx->left_path,
    5101             :                         __process_changed_new_xattr, sctx);
    5102           0 :         if (ret < 0)
    5103           0 :                 goto out;
    5104           0 :         ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
    5105             :                         __process_changed_deleted_xattr, sctx);
    5106             : 
    5107           0 : out:
    5108           0 :         return ret;
    5109             : }
    5110             : 
    5111           0 : static int process_all_new_xattrs(struct send_ctx *sctx)
    5112             : {
    5113           0 :         int ret = 0;
    5114           0 :         int iter_ret = 0;
    5115           0 :         struct btrfs_root *root;
    5116           0 :         struct btrfs_path *path;
    5117           0 :         struct btrfs_key key;
    5118           0 :         struct btrfs_key found_key;
    5119             : 
    5120           0 :         path = alloc_path_for_send();
    5121           0 :         if (!path)
    5122             :                 return -ENOMEM;
    5123             : 
    5124           0 :         root = sctx->send_root;
    5125             : 
    5126           0 :         key.objectid = sctx->cmp_key->objectid;
    5127           0 :         key.type = BTRFS_XATTR_ITEM_KEY;
    5128           0 :         key.offset = 0;
    5129           0 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    5130           0 :                 if (found_key.objectid != key.objectid ||
    5131           0 :                     found_key.type != key.type) {
    5132             :                         ret = 0;
    5133             :                         break;
    5134             :                 }
    5135             : 
    5136           0 :                 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
    5137           0 :                 if (ret < 0)
    5138             :                         break;
    5139             :         }
    5140             :         /* Catch error found during iteration */
    5141           0 :         if (iter_ret < 0)
    5142           0 :                 ret = iter_ret;
    5143             : 
    5144           0 :         btrfs_free_path(path);
    5145           0 :         return ret;
    5146             : }
    5147             : 
    5148             : static int send_verity(struct send_ctx *sctx, struct fs_path *path,
    5149             :                        struct fsverity_descriptor *desc)
    5150             : {
    5151             :         int ret;
    5152             : 
    5153             :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
    5154             :         if (ret < 0)
    5155             :                 goto out;
    5156             : 
    5157             :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
    5158             :         TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
    5159             :                         le8_to_cpu(desc->hash_algorithm));
    5160             :         TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
    5161             :                         1U << le8_to_cpu(desc->log_blocksize));
    5162             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
    5163             :                         le8_to_cpu(desc->salt_size));
    5164             :         TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
    5165             :                         le32_to_cpu(desc->sig_size));
    5166             : 
    5167             :         ret = send_cmd(sctx);
    5168             : 
    5169             : tlv_put_failure:
    5170             : out:
    5171             :         return ret;
    5172             : }
    5173             : 
    5174           0 : static int process_verity(struct send_ctx *sctx)
    5175             : {
    5176           0 :         int ret = 0;
    5177           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5178           0 :         struct inode *inode;
    5179           0 :         struct fs_path *p;
    5180             : 
    5181           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
    5182           0 :         if (IS_ERR(inode))
    5183           0 :                 return PTR_ERR(inode);
    5184             : 
    5185           0 :         ret = btrfs_get_verity_descriptor(inode, NULL, 0);
    5186           0 :         if (ret < 0)
    5187           0 :                 goto iput;
    5188             : 
    5189             :         if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
    5190             :                 ret = -EMSGSIZE;
    5191             :                 goto iput;
    5192             :         }
    5193             :         if (!sctx->verity_descriptor) {
    5194             :                 sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
    5195             :                                                    GFP_KERNEL);
    5196             :                 if (!sctx->verity_descriptor) {
    5197             :                         ret = -ENOMEM;
    5198             :                         goto iput;
    5199             :                 }
    5200             :         }
    5201             : 
    5202             :         ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
    5203             :         if (ret < 0)
    5204             :                 goto iput;
    5205             : 
    5206             :         p = fs_path_alloc();
    5207             :         if (!p) {
    5208             :                 ret = -ENOMEM;
    5209             :                 goto iput;
    5210             :         }
    5211             :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5212             :         if (ret < 0)
    5213             :                 goto free_path;
    5214             : 
    5215             :         ret = send_verity(sctx, p, sctx->verity_descriptor);
    5216             :         if (ret < 0)
    5217             :                 goto free_path;
    5218             : 
    5219             : free_path:
    5220             :         fs_path_free(p);
    5221             : iput:
    5222           0 :         iput(inode);
    5223           0 :         return ret;
    5224             : }
    5225             : 
    5226             : static inline u64 max_send_read_size(const struct send_ctx *sctx)
    5227             : {
    5228           0 :         return sctx->send_max_size - SZ_16K;
    5229             : }
    5230             : 
    5231           0 : static int put_data_header(struct send_ctx *sctx, u32 len)
    5232             : {
    5233           0 :         if (WARN_ON_ONCE(sctx->put_data))
    5234             :                 return -EINVAL;
    5235           0 :         sctx->put_data = true;
    5236           0 :         if (sctx->proto >= 2) {
    5237             :                 /*
    5238             :                  * Since v2, the data attribute header doesn't include a length,
    5239             :                  * it is implicitly to the end of the command.
    5240             :                  */
    5241           0 :                 if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)
    5242             :                         return -EOVERFLOW;
    5243           0 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
    5244           0 :                 sctx->send_size += sizeof(__le16);
    5245             :         } else {
    5246           0 :                 struct btrfs_tlv_header *hdr;
    5247             : 
    5248           0 :                 if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
    5249             :                         return -EOVERFLOW;
    5250           0 :                 hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
    5251           0 :                 put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
    5252           0 :                 put_unaligned_le16(len, &hdr->tlv_len);
    5253           0 :                 sctx->send_size += sizeof(*hdr);
    5254             :         }
    5255             :         return 0;
    5256             : }
    5257             : 
    5258           0 : static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
    5259             : {
    5260           0 :         struct btrfs_root *root = sctx->send_root;
    5261           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5262           0 :         struct page *page;
    5263           0 :         pgoff_t index = offset >> PAGE_SHIFT;
    5264           0 :         pgoff_t last_index;
    5265           0 :         unsigned pg_offset = offset_in_page(offset);
    5266           0 :         int ret;
    5267             : 
    5268           0 :         ret = put_data_header(sctx, len);
    5269           0 :         if (ret)
    5270             :                 return ret;
    5271             : 
    5272           0 :         last_index = (offset + len - 1) >> PAGE_SHIFT;
    5273             : 
    5274           0 :         while (index <= last_index) {
    5275           0 :                 unsigned cur_len = min_t(unsigned, len,
    5276             :                                          PAGE_SIZE - pg_offset);
    5277             : 
    5278           0 :                 page = find_lock_page(sctx->cur_inode->i_mapping, index);
    5279           0 :                 if (!page) {
    5280           0 :                         page_cache_sync_readahead(sctx->cur_inode->i_mapping,
    5281             :                                                   &sctx->ra, NULL, index,
    5282           0 :                                                   last_index + 1 - index);
    5283             : 
    5284           0 :                         page = find_or_create_page(sctx->cur_inode->i_mapping,
    5285             :                                                    index, GFP_KERNEL);
    5286           0 :                         if (!page) {
    5287             :                                 ret = -ENOMEM;
    5288             :                                 break;
    5289             :                         }
    5290             :                 }
    5291             : 
    5292           0 :                 if (PageReadahead(page))
    5293           0 :                         page_cache_async_readahead(sctx->cur_inode->i_mapping,
    5294             :                                                    &sctx->ra, NULL, page_folio(page),
    5295           0 :                                                    index, last_index + 1 - index);
    5296             : 
    5297           0 :                 if (!PageUptodate(page)) {
    5298           0 :                         btrfs_read_folio(NULL, page_folio(page));
    5299           0 :                         lock_page(page);
    5300           0 :                         if (!PageUptodate(page)) {
    5301           0 :                                 unlock_page(page);
    5302           0 :                                 btrfs_err(fs_info,
    5303             :                         "send: IO error at offset %llu for inode %llu root %llu",
    5304             :                                         page_offset(page), sctx->cur_ino,
    5305             :                                         sctx->send_root->root_key.objectid);
    5306           0 :                                 put_page(page);
    5307           0 :                                 ret = -EIO;
    5308           0 :                                 break;
    5309             :                         }
    5310             :                 }
    5311             : 
    5312           0 :                 memcpy_from_page(sctx->send_buf + sctx->send_size, page,
    5313             :                                  pg_offset, cur_len);
    5314           0 :                 unlock_page(page);
    5315           0 :                 put_page(page);
    5316           0 :                 index++;
    5317           0 :                 pg_offset = 0;
    5318           0 :                 len -= cur_len;
    5319           0 :                 sctx->send_size += cur_len;
    5320             :         }
    5321             : 
    5322             :         return ret;
    5323             : }
    5324             : 
    5325             : /*
    5326             :  * Read some bytes from the current inode/file and send a write command to
    5327             :  * user space.
    5328             :  */
    5329           0 : static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
    5330             : {
    5331           0 :         struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
    5332           0 :         int ret = 0;
    5333           0 :         struct fs_path *p;
    5334             : 
    5335           0 :         p = fs_path_alloc();
    5336           0 :         if (!p)
    5337             :                 return -ENOMEM;
    5338             : 
    5339           0 :         btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
    5340             : 
    5341           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5342           0 :         if (ret < 0)
    5343           0 :                 goto out;
    5344             : 
    5345           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5346           0 :         if (ret < 0)
    5347           0 :                 goto out;
    5348             : 
    5349           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5350           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5351           0 :         ret = put_file_data(sctx, offset, len);
    5352           0 :         if (ret < 0)
    5353           0 :                 goto out;
    5354             : 
    5355           0 :         ret = send_cmd(sctx);
    5356             : 
    5357           0 : tlv_put_failure:
    5358           0 : out:
    5359           0 :         fs_path_free(p);
    5360           0 :         return ret;
    5361             : }
    5362             : 
    5363             : /*
    5364             :  * Send a clone command to user space.
    5365             :  */
    5366           0 : static int send_clone(struct send_ctx *sctx,
    5367             :                       u64 offset, u32 len,
    5368             :                       struct clone_root *clone_root)
    5369             : {
    5370           0 :         int ret = 0;
    5371           0 :         struct fs_path *p;
    5372           0 :         u64 gen;
    5373             : 
    5374           0 :         btrfs_debug(sctx->send_root->fs_info,
    5375             :                     "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
    5376             :                     offset, len, clone_root->root->root_key.objectid,
    5377             :                     clone_root->ino, clone_root->offset);
    5378             : 
    5379           0 :         p = fs_path_alloc();
    5380           0 :         if (!p)
    5381             :                 return -ENOMEM;
    5382             : 
    5383           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
    5384           0 :         if (ret < 0)
    5385           0 :                 goto out;
    5386             : 
    5387           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5388           0 :         if (ret < 0)
    5389           0 :                 goto out;
    5390             : 
    5391           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5392           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
    5393           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5394             : 
    5395           0 :         if (clone_root->root == sctx->send_root) {
    5396           0 :                 ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
    5397           0 :                 if (ret < 0)
    5398           0 :                         goto out;
    5399           0 :                 ret = get_cur_path(sctx, clone_root->ino, gen, p);
    5400             :         } else {
    5401           0 :                 ret = get_inode_path(clone_root->root, clone_root->ino, p);
    5402             :         }
    5403           0 :         if (ret < 0)
    5404           0 :                 goto out;
    5405             : 
    5406             :         /*
    5407             :          * If the parent we're using has a received_uuid set then use that as
    5408             :          * our clone source as that is what we will look for when doing a
    5409             :          * receive.
    5410             :          *
    5411             :          * This covers the case that we create a snapshot off of a received
    5412             :          * subvolume and then use that as the parent and try to receive on a
    5413             :          * different host.
    5414             :          */
    5415           0 :         if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
    5416           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5417             :                              clone_root->root->root_item.received_uuid);
    5418             :         else
    5419           0 :                 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
    5420             :                              clone_root->root->root_item.uuid);
    5421           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
    5422             :                     btrfs_root_ctransid(&clone_root->root->root_item));
    5423           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
    5424           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
    5425             :                         clone_root->offset);
    5426             : 
    5427           0 :         ret = send_cmd(sctx);
    5428             : 
    5429           0 : tlv_put_failure:
    5430           0 : out:
    5431           0 :         fs_path_free(p);
    5432           0 :         return ret;
    5433             : }
    5434             : 
    5435             : /*
    5436             :  * Send an update extent command to user space.
    5437             :  */
    5438           0 : static int send_update_extent(struct send_ctx *sctx,
    5439             :                               u64 offset, u32 len)
    5440             : {
    5441           0 :         int ret = 0;
    5442           0 :         struct fs_path *p;
    5443             : 
    5444           0 :         p = fs_path_alloc();
    5445           0 :         if (!p)
    5446             :                 return -ENOMEM;
    5447             : 
    5448           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
    5449           0 :         if (ret < 0)
    5450           0 :                 goto out;
    5451             : 
    5452           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5453           0 :         if (ret < 0)
    5454           0 :                 goto out;
    5455             : 
    5456           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5457           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5458           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
    5459             : 
    5460           0 :         ret = send_cmd(sctx);
    5461             : 
    5462           0 : tlv_put_failure:
    5463           0 : out:
    5464           0 :         fs_path_free(p);
    5465           0 :         return ret;
    5466             : }
    5467             : 
    5468           0 : static int send_hole(struct send_ctx *sctx, u64 end)
    5469             : {
    5470           0 :         struct fs_path *p = NULL;
    5471           0 :         u64 read_size = max_send_read_size(sctx);
    5472           0 :         u64 offset = sctx->cur_inode_last_extent;
    5473           0 :         int ret = 0;
    5474             : 
    5475             :         /*
    5476             :          * A hole that starts at EOF or beyond it. Since we do not yet support
    5477             :          * fallocate (for extent preallocation and hole punching), sending a
    5478             :          * write of zeroes starting at EOF or beyond would later require issuing
    5479             :          * a truncate operation which would undo the write and achieve nothing.
    5480             :          */
    5481           0 :         if (offset >= sctx->cur_inode_size)
    5482             :                 return 0;
    5483             : 
    5484             :         /*
    5485             :          * Don't go beyond the inode's i_size due to prealloc extents that start
    5486             :          * after the i_size.
    5487             :          */
    5488           0 :         end = min_t(u64, end, sctx->cur_inode_size);
    5489             : 
    5490           0 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5491           0 :                 return send_update_extent(sctx, offset, end - offset);
    5492             : 
    5493           0 :         p = fs_path_alloc();
    5494           0 :         if (!p)
    5495             :                 return -ENOMEM;
    5496           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
    5497           0 :         if (ret < 0)
    5498           0 :                 goto tlv_put_failure;
    5499           0 :         while (offset < end) {
    5500           0 :                 u64 len = min(end - offset, read_size);
    5501             : 
    5502           0 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
    5503           0 :                 if (ret < 0)
    5504             :                         break;
    5505           0 :                 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
    5506           0 :                 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5507           0 :                 ret = put_data_header(sctx, len);
    5508           0 :                 if (ret < 0)
    5509             :                         break;
    5510           0 :                 memset(sctx->send_buf + sctx->send_size, 0, len);
    5511           0 :                 sctx->send_size += len;
    5512           0 :                 ret = send_cmd(sctx);
    5513           0 :                 if (ret < 0)
    5514             :                         break;
    5515           0 :                 offset += len;
    5516             :         }
    5517           0 :         sctx->cur_inode_next_write_offset = offset;
    5518           0 : tlv_put_failure:
    5519           0 :         fs_path_free(p);
    5520           0 :         return ret;
    5521             : }
    5522             : 
    5523           0 : static int send_encoded_inline_extent(struct send_ctx *sctx,
    5524             :                                       struct btrfs_path *path, u64 offset,
    5525             :                                       u64 len)
    5526             : {
    5527           0 :         struct btrfs_root *root = sctx->send_root;
    5528           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5529           0 :         struct inode *inode;
    5530           0 :         struct fs_path *fspath;
    5531           0 :         struct extent_buffer *leaf = path->nodes[0];
    5532           0 :         struct btrfs_key key;
    5533           0 :         struct btrfs_file_extent_item *ei;
    5534           0 :         u64 ram_bytes;
    5535           0 :         size_t inline_size;
    5536           0 :         int ret;
    5537             : 
    5538           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5539           0 :         if (IS_ERR(inode))
    5540           0 :                 return PTR_ERR(inode);
    5541             : 
    5542           0 :         fspath = fs_path_alloc();
    5543           0 :         if (!fspath) {
    5544           0 :                 ret = -ENOMEM;
    5545           0 :                 goto out;
    5546             :         }
    5547             : 
    5548           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5549           0 :         if (ret < 0)
    5550           0 :                 goto out;
    5551             : 
    5552           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5553           0 :         if (ret < 0)
    5554           0 :                 goto out;
    5555             : 
    5556           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5557           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5558           0 :         ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
    5559           0 :         inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
    5560             : 
    5561           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5562           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5563           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5564             :                     min(key.offset + ram_bytes - offset, len));
    5565           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
    5566           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
    5567           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5568             :                                 btrfs_file_extent_compression(leaf, ei));
    5569           0 :         if (ret < 0)
    5570           0 :                 goto out;
    5571           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5572             : 
    5573           0 :         ret = put_data_header(sctx, inline_size);
    5574           0 :         if (ret < 0)
    5575           0 :                 goto out;
    5576           0 :         read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
    5577             :                            btrfs_file_extent_inline_start(ei), inline_size);
    5578           0 :         sctx->send_size += inline_size;
    5579             : 
    5580           0 :         ret = send_cmd(sctx);
    5581             : 
    5582           0 : tlv_put_failure:
    5583           0 : out:
    5584           0 :         fs_path_free(fspath);
    5585           0 :         iput(inode);
    5586           0 :         return ret;
    5587             : }
    5588             : 
    5589           0 : static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
    5590             :                                u64 offset, u64 len)
    5591             : {
    5592           0 :         struct btrfs_root *root = sctx->send_root;
    5593           0 :         struct btrfs_fs_info *fs_info = root->fs_info;
    5594           0 :         struct inode *inode;
    5595           0 :         struct fs_path *fspath;
    5596           0 :         struct extent_buffer *leaf = path->nodes[0];
    5597           0 :         struct btrfs_key key;
    5598           0 :         struct btrfs_file_extent_item *ei;
    5599           0 :         u64 disk_bytenr, disk_num_bytes;
    5600           0 :         u32 data_offset;
    5601           0 :         struct btrfs_cmd_header *hdr;
    5602           0 :         u32 crc;
    5603           0 :         int ret;
    5604             : 
    5605           0 :         inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
    5606           0 :         if (IS_ERR(inode))
    5607           0 :                 return PTR_ERR(inode);
    5608             : 
    5609           0 :         fspath = fs_path_alloc();
    5610           0 :         if (!fspath) {
    5611           0 :                 ret = -ENOMEM;
    5612           0 :                 goto out;
    5613             :         }
    5614             : 
    5615           0 :         ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
    5616           0 :         if (ret < 0)
    5617           0 :                 goto out;
    5618             : 
    5619           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5620           0 :         if (ret < 0)
    5621           0 :                 goto out;
    5622             : 
    5623           0 :         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
    5624           0 :         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
    5625           0 :         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
    5626           0 :         disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
    5627             : 
    5628           0 :         TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
    5629           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
    5630           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
    5631             :                     min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
    5632             :                         len));
    5633           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
    5634             :                     btrfs_file_extent_ram_bytes(leaf, ei));
    5635           0 :         TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
    5636             :                     offset - key.offset + btrfs_file_extent_offset(leaf, ei));
    5637           0 :         ret = btrfs_encoded_io_compression_from_extent(fs_info,
    5638             :                                 btrfs_file_extent_compression(leaf, ei));
    5639           0 :         if (ret < 0)
    5640           0 :                 goto out;
    5641           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
    5642           0 :         TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
    5643             : 
    5644           0 :         ret = put_data_header(sctx, disk_num_bytes);
    5645           0 :         if (ret < 0)
    5646           0 :                 goto out;
    5647             : 
    5648             :         /*
    5649             :          * We want to do I/O directly into the send buffer, so get the next page
    5650             :          * boundary in the send buffer. This means that there may be a gap
    5651             :          * between the beginning of the command and the file data.
    5652             :          */
    5653           0 :         data_offset = PAGE_ALIGN(sctx->send_size);
    5654           0 :         if (data_offset > sctx->send_max_size ||
    5655           0 :             sctx->send_max_size - data_offset < disk_num_bytes) {
    5656           0 :                 ret = -EOVERFLOW;
    5657           0 :                 goto out;
    5658             :         }
    5659             : 
    5660             :         /*
    5661             :          * Note that send_buf is a mapping of send_buf_pages, so this is really
    5662             :          * reading into send_buf.
    5663             :          */
    5664           0 :         ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
    5665             :                                                     disk_bytenr, disk_num_bytes,
    5666           0 :                                                     sctx->send_buf_pages +
    5667           0 :                                                     (data_offset >> PAGE_SHIFT));
    5668           0 :         if (ret)
    5669           0 :                 goto out;
    5670             : 
    5671           0 :         hdr = (struct btrfs_cmd_header *)sctx->send_buf;
    5672           0 :         hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
    5673           0 :         hdr->crc = 0;
    5674           0 :         crc = btrfs_crc32c(0, sctx->send_buf, sctx->send_size);
    5675           0 :         crc = btrfs_crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
    5676           0 :         hdr->crc = cpu_to_le32(crc);
    5677             : 
    5678           0 :         ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
    5679             :                         &sctx->send_off);
    5680           0 :         if (!ret) {
    5681           0 :                 ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
    5682             :                                 disk_num_bytes, &sctx->send_off);
    5683             :         }
    5684           0 :         sctx->send_size = 0;
    5685           0 :         sctx->put_data = false;
    5686             : 
    5687           0 : tlv_put_failure:
    5688           0 : out:
    5689           0 :         fs_path_free(fspath);
    5690           0 :         iput(inode);
    5691           0 :         return ret;
    5692             : }
    5693             : 
    5694           0 : static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
    5695             :                             const u64 offset, const u64 len)
    5696             : {
    5697           0 :         const u64 end = offset + len;
    5698           0 :         struct extent_buffer *leaf = path->nodes[0];
    5699           0 :         struct btrfs_file_extent_item *ei;
    5700           0 :         u64 read_size = max_send_read_size(sctx);
    5701           0 :         u64 sent = 0;
    5702             : 
    5703           0 :         if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
    5704           0 :                 return send_update_extent(sctx, offset, len);
    5705             : 
    5706           0 :         ei = btrfs_item_ptr(leaf, path->slots[0],
    5707             :                             struct btrfs_file_extent_item);
    5708           0 :         if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
    5709             :             btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
    5710           0 :                 bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
    5711             :                                   BTRFS_FILE_EXTENT_INLINE);
    5712             : 
    5713             :                 /*
    5714             :                  * Send the compressed extent unless the compressed data is
    5715             :                  * larger than the decompressed data. This can happen if we're
    5716             :                  * not sending the entire extent, either because it has been
    5717             :                  * partially overwritten/truncated or because this is a part of
    5718             :                  * the extent that we couldn't clone in clone_range().
    5719             :                  */
    5720           0 :                 if (is_inline &&
    5721           0 :                     btrfs_file_extent_inline_item_len(leaf,
    5722             :                                                       path->slots[0]) <= len) {
    5723           0 :                         return send_encoded_inline_extent(sctx, path, offset,
    5724             :                                                           len);
    5725           0 :                 } else if (!is_inline &&
    5726             :                            btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
    5727           0 :                         return send_encoded_extent(sctx, path, offset, len);
    5728             :                 }
    5729             :         }
    5730             : 
    5731           0 :         if (sctx->cur_inode == NULL) {
    5732           0 :                 struct btrfs_root *root = sctx->send_root;
    5733             : 
    5734           0 :                 sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
    5735           0 :                 if (IS_ERR(sctx->cur_inode)) {
    5736           0 :                         int err = PTR_ERR(sctx->cur_inode);
    5737             : 
    5738           0 :                         sctx->cur_inode = NULL;
    5739           0 :                         return err;
    5740             :                 }
    5741           0 :                 memset(&sctx->ra, 0, sizeof(struct file_ra_state));
    5742           0 :                 file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
    5743             : 
    5744             :                 /*
    5745             :                  * It's very likely there are no pages from this inode in the page
    5746             :                  * cache, so after reading extents and sending their data, we clean
    5747             :                  * the page cache to avoid trashing the page cache (adding pressure
    5748             :                  * to the page cache and forcing eviction of other data more useful
    5749             :                  * for applications).
    5750             :                  *
    5751             :                  * We decide if we should clean the page cache simply by checking
    5752             :                  * if the inode's mapping nrpages is 0 when we first open it, and
    5753             :                  * not by using something like filemap_range_has_page() before
    5754             :                  * reading an extent because when we ask the readahead code to
    5755             :                  * read a given file range, it may (and almost always does) read
    5756             :                  * pages from beyond that range (see the documentation for
    5757             :                  * page_cache_sync_readahead()), so it would not be reliable,
    5758             :                  * because after reading the first extent future calls to
    5759             :                  * filemap_range_has_page() would return true because the readahead
    5760             :                  * on the previous extent resulted in reading pages of the current
    5761             :                  * extent as well.
    5762             :                  */
    5763           0 :                 sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
    5764           0 :                 sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
    5765             :         }
    5766             : 
    5767           0 :         while (sent < len) {
    5768           0 :                 u64 size = min(len - sent, read_size);
    5769           0 :                 int ret;
    5770             : 
    5771           0 :                 ret = send_write(sctx, offset + sent, size);
    5772           0 :                 if (ret < 0)
    5773           0 :                         return ret;
    5774           0 :                 sent += size;
    5775             :         }
    5776             : 
    5777           0 :         if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
    5778             :                 /*
    5779             :                  * Always operate only on ranges that are a multiple of the page
    5780             :                  * size. This is not only to prevent zeroing parts of a page in
    5781             :                  * the case of subpage sector size, but also to guarantee we evict
    5782             :                  * pages, as passing a range that is smaller than page size does
    5783             :                  * not evict the respective page (only zeroes part of its content).
    5784             :                  *
    5785             :                  * Always start from the end offset of the last range cleared.
    5786             :                  * This is because the readahead code may (and very often does)
    5787             :                  * reads pages beyond the range we request for readahead. So if
    5788             :                  * we have an extent layout like this:
    5789             :                  *
    5790             :                  *            [ extent A ] [ extent B ] [ extent C ]
    5791             :                  *
    5792             :                  * When we ask page_cache_sync_readahead() to read extent A, it
    5793             :                  * may also trigger reads for pages of extent B. If we are doing
    5794             :                  * an incremental send and extent B has not changed between the
    5795             :                  * parent and send snapshots, some or all of its pages may end
    5796             :                  * up being read and placed in the page cache. So when truncating
    5797             :                  * the page cache we always start from the end offset of the
    5798             :                  * previously processed extent up to the end of the current
    5799             :                  * extent.
    5800             :                  */
    5801           0 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    5802           0 :                                            sctx->page_cache_clear_start,
    5803           0 :                                            end - 1);
    5804           0 :                 sctx->page_cache_clear_start = end;
    5805             :         }
    5806             : 
    5807             :         return 0;
    5808             : }
    5809             : 
    5810             : /*
    5811             :  * Search for a capability xattr related to sctx->cur_ino. If the capability is
    5812             :  * found, call send_set_xattr function to emit it.
    5813             :  *
    5814             :  * Return 0 if there isn't a capability, or when the capability was emitted
    5815             :  * successfully, or < 0 if an error occurred.
    5816             :  */
    5817           0 : static int send_capabilities(struct send_ctx *sctx)
    5818             : {
    5819           0 :         struct fs_path *fspath = NULL;
    5820           0 :         struct btrfs_path *path;
    5821           0 :         struct btrfs_dir_item *di;
    5822           0 :         struct extent_buffer *leaf;
    5823           0 :         unsigned long data_ptr;
    5824           0 :         char *buf = NULL;
    5825           0 :         int buf_len;
    5826           0 :         int ret = 0;
    5827             : 
    5828           0 :         path = alloc_path_for_send();
    5829           0 :         if (!path)
    5830             :                 return -ENOMEM;
    5831             : 
    5832           0 :         di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
    5833             :                                 XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
    5834           0 :         if (!di) {
    5835             :                 /* There is no xattr for this inode */
    5836           0 :                 goto out;
    5837           0 :         } else if (IS_ERR(di)) {
    5838           0 :                 ret = PTR_ERR(di);
    5839           0 :                 goto out;
    5840             :         }
    5841             : 
    5842           0 :         leaf = path->nodes[0];
    5843           0 :         buf_len = btrfs_dir_data_len(leaf, di);
    5844             : 
    5845           0 :         fspath = fs_path_alloc();
    5846           0 :         buf = kmalloc(buf_len, GFP_KERNEL);
    5847           0 :         if (!fspath || !buf) {
    5848           0 :                 ret = -ENOMEM;
    5849           0 :                 goto out;
    5850             :         }
    5851             : 
    5852           0 :         ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
    5853           0 :         if (ret < 0)
    5854           0 :                 goto out;
    5855             : 
    5856           0 :         data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
    5857           0 :         read_extent_buffer(leaf, buf, data_ptr, buf_len);
    5858             : 
    5859           0 :         ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
    5860             :                         strlen(XATTR_NAME_CAPS), buf, buf_len);
    5861           0 : out:
    5862           0 :         kfree(buf);
    5863           0 :         fs_path_free(fspath);
    5864           0 :         btrfs_free_path(path);
    5865           0 :         return ret;
    5866             : }
    5867             : 
    5868           0 : static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
    5869             :                        struct clone_root *clone_root, const u64 disk_byte,
    5870             :                        u64 data_offset, u64 offset, u64 len)
    5871             : {
    5872           0 :         struct btrfs_path *path;
    5873           0 :         struct btrfs_key key;
    5874           0 :         int ret;
    5875           0 :         struct btrfs_inode_info info;
    5876           0 :         u64 clone_src_i_size = 0;
    5877             : 
    5878             :         /*
    5879             :          * Prevent cloning from a zero offset with a length matching the sector
    5880             :          * size because in some scenarios this will make the receiver fail.
    5881             :          *
    5882             :          * For example, if in the source filesystem the extent at offset 0
    5883             :          * has a length of sectorsize and it was written using direct IO, then
    5884             :          * it can never be an inline extent (even if compression is enabled).
    5885             :          * Then this extent can be cloned in the original filesystem to a non
    5886             :          * zero file offset, but it may not be possible to clone in the
    5887             :          * destination filesystem because it can be inlined due to compression
    5888             :          * on the destination filesystem (as the receiver's write operations are
    5889             :          * always done using buffered IO). The same happens when the original
    5890             :          * filesystem does not have compression enabled but the destination
    5891             :          * filesystem has.
    5892             :          */
    5893           0 :         if (clone_root->offset == 0 &&
    5894           0 :             len == sctx->send_root->fs_info->sectorsize)
    5895           0 :                 return send_extent_data(sctx, dst_path, offset, len);
    5896             : 
    5897           0 :         path = alloc_path_for_send();
    5898           0 :         if (!path)
    5899             :                 return -ENOMEM;
    5900             : 
    5901             :         /*
    5902             :          * There are inodes that have extents that lie behind its i_size. Don't
    5903             :          * accept clones from these extents.
    5904             :          */
    5905           0 :         ret = get_inode_info(clone_root->root, clone_root->ino, &info);
    5906           0 :         btrfs_release_path(path);
    5907           0 :         if (ret < 0)
    5908           0 :                 goto out;
    5909           0 :         clone_src_i_size = info.size;
    5910             : 
    5911             :         /*
    5912             :          * We can't send a clone operation for the entire range if we find
    5913             :          * extent items in the respective range in the source file that
    5914             :          * refer to different extents or if we find holes.
    5915             :          * So check for that and do a mix of clone and regular write/copy
    5916             :          * operations if needed.
    5917             :          *
    5918             :          * Example:
    5919             :          *
    5920             :          * mkfs.btrfs -f /dev/sda
    5921             :          * mount /dev/sda /mnt
    5922             :          * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
    5923             :          * cp --reflink=always /mnt/foo /mnt/bar
    5924             :          * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
    5925             :          * btrfs subvolume snapshot -r /mnt /mnt/snap
    5926             :          *
    5927             :          * If when we send the snapshot and we are processing file bar (which
    5928             :          * has a higher inode number than foo) we blindly send a clone operation
    5929             :          * for the [0, 100K[ range from foo to bar, the receiver ends up getting
    5930             :          * a file bar that matches the content of file foo - iow, doesn't match
    5931             :          * the content from bar in the original filesystem.
    5932             :          */
    5933           0 :         key.objectid = clone_root->ino;
    5934           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    5935           0 :         key.offset = clone_root->offset;
    5936           0 :         ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
    5937           0 :         if (ret < 0)
    5938           0 :                 goto out;
    5939           0 :         if (ret > 0 && path->slots[0] > 0) {
    5940           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
    5941           0 :                 if (key.objectid == clone_root->ino &&
    5942           0 :                     key.type == BTRFS_EXTENT_DATA_KEY)
    5943           0 :                         path->slots[0]--;
    5944             :         }
    5945             : 
    5946           0 :         while (true) {
    5947           0 :                 struct extent_buffer *leaf = path->nodes[0];
    5948           0 :                 int slot = path->slots[0];
    5949           0 :                 struct btrfs_file_extent_item *ei;
    5950           0 :                 u8 type;
    5951           0 :                 u64 ext_len;
    5952           0 :                 u64 clone_len;
    5953           0 :                 u64 clone_data_offset;
    5954           0 :                 bool crossed_src_i_size = false;
    5955             : 
    5956           0 :                 if (slot >= btrfs_header_nritems(leaf)) {
    5957           0 :                         ret = btrfs_next_leaf(clone_root->root, path);
    5958           0 :                         if (ret < 0)
    5959           0 :                                 goto out;
    5960           0 :                         else if (ret > 0)
    5961             :                                 break;
    5962           0 :                         continue;
    5963             :                 }
    5964             : 
    5965           0 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    5966             : 
    5967             :                 /*
    5968             :                  * We might have an implicit trailing hole (NO_HOLES feature
    5969             :                  * enabled). We deal with it after leaving this loop.
    5970             :                  */
    5971           0 :                 if (key.objectid != clone_root->ino ||
    5972           0 :                     key.type != BTRFS_EXTENT_DATA_KEY)
    5973             :                         break;
    5974             : 
    5975           0 :                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    5976           0 :                 type = btrfs_file_extent_type(leaf, ei);
    5977           0 :                 if (type == BTRFS_FILE_EXTENT_INLINE) {
    5978           0 :                         ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
    5979           0 :                         ext_len = PAGE_ALIGN(ext_len);
    5980             :                 } else {
    5981           0 :                         ext_len = btrfs_file_extent_num_bytes(leaf, ei);
    5982             :                 }
    5983             : 
    5984           0 :                 if (key.offset + ext_len <= clone_root->offset)
    5985           0 :                         goto next;
    5986             : 
    5987           0 :                 if (key.offset > clone_root->offset) {
    5988             :                         /* Implicit hole, NO_HOLES feature enabled. */
    5989           0 :                         u64 hole_len = key.offset - clone_root->offset;
    5990             : 
    5991           0 :                         if (hole_len > len)
    5992             :                                 hole_len = len;
    5993           0 :                         ret = send_extent_data(sctx, dst_path, offset,
    5994             :                                                hole_len);
    5995           0 :                         if (ret < 0)
    5996           0 :                                 goto out;
    5997             : 
    5998           0 :                         len -= hole_len;
    5999           0 :                         if (len == 0)
    6000             :                                 break;
    6001           0 :                         offset += hole_len;
    6002           0 :                         clone_root->offset += hole_len;
    6003           0 :                         data_offset += hole_len;
    6004             :                 }
    6005             : 
    6006           0 :                 if (key.offset >= clone_root->offset + len)
    6007             :                         break;
    6008             : 
    6009           0 :                 if (key.offset >= clone_src_i_size)
    6010             :                         break;
    6011             : 
    6012           0 :                 if (key.offset + ext_len > clone_src_i_size) {
    6013           0 :                         ext_len = clone_src_i_size - key.offset;
    6014           0 :                         crossed_src_i_size = true;
    6015             :                 }
    6016             : 
    6017           0 :                 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
    6018           0 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
    6019           0 :                         clone_root->offset = key.offset;
    6020           0 :                         if (clone_data_offset < data_offset &&
    6021           0 :                                 clone_data_offset + ext_len > data_offset) {
    6022           0 :                                 u64 extent_offset;
    6023             : 
    6024           0 :                                 extent_offset = data_offset - clone_data_offset;
    6025           0 :                                 ext_len -= extent_offset;
    6026           0 :                                 clone_data_offset += extent_offset;
    6027           0 :                                 clone_root->offset += extent_offset;
    6028             :                         }
    6029             :                 }
    6030             : 
    6031           0 :                 clone_len = min_t(u64, ext_len, len);
    6032             : 
    6033           0 :                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
    6034             :                     clone_data_offset == data_offset) {
    6035           0 :                         const u64 src_end = clone_root->offset + clone_len;
    6036           0 :                         const u64 sectorsize = SZ_64K;
    6037             : 
    6038             :                         /*
    6039             :                          * We can't clone the last block, when its size is not
    6040             :                          * sector size aligned, into the middle of a file. If we
    6041             :                          * do so, the receiver will get a failure (-EINVAL) when
    6042             :                          * trying to clone or will silently corrupt the data in
    6043             :                          * the destination file if it's on a kernel without the
    6044             :                          * fix introduced by commit ac765f83f1397646
    6045             :                          * ("Btrfs: fix data corruption due to cloning of eof
    6046             :                          * block).
    6047             :                          *
    6048             :                          * So issue a clone of the aligned down range plus a
    6049             :                          * regular write for the eof block, if we hit that case.
    6050             :                          *
    6051             :                          * Also, we use the maximum possible sector size, 64K,
    6052             :                          * because we don't know what's the sector size of the
    6053             :                          * filesystem that receives the stream, so we have to
    6054             :                          * assume the largest possible sector size.
    6055             :                          */
    6056           0 :                         if (src_end == clone_src_i_size &&
    6057           0 :                             !IS_ALIGNED(src_end, sectorsize) &&
    6058           0 :                             offset + clone_len < sctx->cur_inode_size) {
    6059           0 :                                 u64 slen;
    6060             : 
    6061           0 :                                 slen = ALIGN_DOWN(src_end - clone_root->offset,
    6062             :                                                   sectorsize);
    6063           0 :                                 if (slen > 0) {
    6064           0 :                                         ret = send_clone(sctx, offset, slen,
    6065             :                                                          clone_root);
    6066           0 :                                         if (ret < 0)
    6067           0 :                                                 goto out;
    6068             :                                 }
    6069           0 :                                 ret = send_extent_data(sctx, dst_path,
    6070             :                                                        offset + slen,
    6071             :                                                        clone_len - slen);
    6072             :                         } else {
    6073           0 :                                 ret = send_clone(sctx, offset, clone_len,
    6074             :                                                  clone_root);
    6075             :                         }
    6076           0 :                 } else if (crossed_src_i_size && clone_len < len) {
    6077             :                         /*
    6078             :                          * If we are at i_size of the clone source inode and we
    6079             :                          * can not clone from it, terminate the loop. This is
    6080             :                          * to avoid sending two write operations, one with a
    6081             :                          * length matching clone_len and the final one after
    6082             :                          * this loop with a length of len - clone_len.
    6083             :                          *
    6084             :                          * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
    6085             :                          * was passed to the send ioctl), this helps avoid
    6086             :                          * sending an encoded write for an offset that is not
    6087             :                          * sector size aligned, in case the i_size of the source
    6088             :                          * inode is not sector size aligned. That will make the
    6089             :                          * receiver fallback to decompression of the data and
    6090             :                          * writing it using regular buffered IO, therefore while
    6091             :                          * not incorrect, it's not optimal due decompression and
    6092             :                          * possible re-compression at the receiver.
    6093             :                          */
    6094             :                         break;
    6095             :                 } else {
    6096           0 :                         ret = send_extent_data(sctx, dst_path, offset,
    6097             :                                                clone_len);
    6098             :                 }
    6099             : 
    6100           0 :                 if (ret < 0)
    6101           0 :                         goto out;
    6102             : 
    6103           0 :                 len -= clone_len;
    6104           0 :                 if (len == 0)
    6105             :                         break;
    6106           0 :                 offset += clone_len;
    6107           0 :                 clone_root->offset += clone_len;
    6108             : 
    6109             :                 /*
    6110             :                  * If we are cloning from the file we are currently processing,
    6111             :                  * and using the send root as the clone root, we must stop once
    6112             :                  * the current clone offset reaches the current eof of the file
    6113             :                  * at the receiver, otherwise we would issue an invalid clone
    6114             :                  * operation (source range going beyond eof) and cause the
    6115             :                  * receiver to fail. So if we reach the current eof, bail out
    6116             :                  * and fallback to a regular write.
    6117             :                  */
    6118           0 :                 if (clone_root->root == sctx->send_root &&
    6119           0 :                     clone_root->ino == sctx->cur_ino &&
    6120           0 :                     clone_root->offset >= sctx->cur_inode_next_write_offset)
    6121             :                         break;
    6122             : 
    6123           0 :                 data_offset += clone_len;
    6124           0 : next:
    6125           0 :                 path->slots[0]++;
    6126             :         }
    6127             : 
    6128           0 :         if (len > 0)
    6129           0 :                 ret = send_extent_data(sctx, dst_path, offset, len);
    6130             :         else
    6131             :                 ret = 0;
    6132           0 : out:
    6133           0 :         btrfs_free_path(path);
    6134           0 :         return ret;
    6135             : }
    6136             : 
    6137           0 : static int send_write_or_clone(struct send_ctx *sctx,
    6138             :                                struct btrfs_path *path,
    6139             :                                struct btrfs_key *key,
    6140             :                                struct clone_root *clone_root)
    6141             : {
    6142           0 :         int ret = 0;
    6143           0 :         u64 offset = key->offset;
    6144           0 :         u64 end;
    6145           0 :         u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
    6146             : 
    6147           0 :         end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
    6148           0 :         if (offset >= end)
    6149             :                 return 0;
    6150             : 
    6151           0 :         if (clone_root && IS_ALIGNED(end, bs)) {
    6152           0 :                 struct btrfs_file_extent_item *ei;
    6153           0 :                 u64 disk_byte;
    6154           0 :                 u64 data_offset;
    6155             : 
    6156           0 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6157             :                                     struct btrfs_file_extent_item);
    6158           0 :                 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
    6159           0 :                 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
    6160           0 :                 ret = clone_range(sctx, path, clone_root, disk_byte,
    6161             :                                   data_offset, offset, end - offset);
    6162             :         } else {
    6163           0 :                 ret = send_extent_data(sctx, path, offset, end - offset);
    6164             :         }
    6165           0 :         sctx->cur_inode_next_write_offset = end;
    6166           0 :         return ret;
    6167             : }
    6168             : 
    6169           0 : static int is_extent_unchanged(struct send_ctx *sctx,
    6170             :                                struct btrfs_path *left_path,
    6171             :                                struct btrfs_key *ekey)
    6172             : {
    6173           0 :         int ret = 0;
    6174           0 :         struct btrfs_key key;
    6175           0 :         struct btrfs_path *path = NULL;
    6176           0 :         struct extent_buffer *eb;
    6177           0 :         int slot;
    6178           0 :         struct btrfs_key found_key;
    6179           0 :         struct btrfs_file_extent_item *ei;
    6180           0 :         u64 left_disknr;
    6181           0 :         u64 right_disknr;
    6182           0 :         u64 left_offset;
    6183           0 :         u64 right_offset;
    6184           0 :         u64 left_offset_fixed;
    6185           0 :         u64 left_len;
    6186           0 :         u64 right_len;
    6187           0 :         u64 left_gen;
    6188           0 :         u64 right_gen;
    6189           0 :         u8 left_type;
    6190           0 :         u8 right_type;
    6191             : 
    6192           0 :         path = alloc_path_for_send();
    6193           0 :         if (!path)
    6194             :                 return -ENOMEM;
    6195             : 
    6196           0 :         eb = left_path->nodes[0];
    6197           0 :         slot = left_path->slots[0];
    6198           0 :         ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6199           0 :         left_type = btrfs_file_extent_type(eb, ei);
    6200             : 
    6201           0 :         if (left_type != BTRFS_FILE_EXTENT_REG) {
    6202           0 :                 ret = 0;
    6203           0 :                 goto out;
    6204             :         }
    6205           0 :         left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6206           0 :         left_len = btrfs_file_extent_num_bytes(eb, ei);
    6207           0 :         left_offset = btrfs_file_extent_offset(eb, ei);
    6208           0 :         left_gen = btrfs_file_extent_generation(eb, ei);
    6209             : 
    6210             :         /*
    6211             :          * Following comments will refer to these graphics. L is the left
    6212             :          * extents which we are checking at the moment. 1-8 are the right
    6213             :          * extents that we iterate.
    6214             :          *
    6215             :          *       |-----L-----|
    6216             :          * |-1-|-2a-|-3-|-4-|-5-|-6-|
    6217             :          *
    6218             :          *       |-----L-----|
    6219             :          * |--1--|-2b-|...(same as above)
    6220             :          *
    6221             :          * Alternative situation. Happens on files where extents got split.
    6222             :          *       |-----L-----|
    6223             :          * |-----------7-----------|-6-|
    6224             :          *
    6225             :          * Alternative situation. Happens on files which got larger.
    6226             :          *       |-----L-----|
    6227             :          * |-8-|
    6228             :          * Nothing follows after 8.
    6229             :          */
    6230             : 
    6231           0 :         key.objectid = ekey->objectid;
    6232           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6233           0 :         key.offset = ekey->offset;
    6234           0 :         ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
    6235           0 :         if (ret < 0)
    6236           0 :                 goto out;
    6237           0 :         if (ret) {
    6238           0 :                 ret = 0;
    6239           0 :                 goto out;
    6240             :         }
    6241             : 
    6242             :         /*
    6243             :          * Handle special case where the right side has no extents at all.
    6244             :          */
    6245           0 :         eb = path->nodes[0];
    6246           0 :         slot = path->slots[0];
    6247           0 :         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6248           0 :         if (found_key.objectid != key.objectid ||
    6249           0 :             found_key.type != key.type) {
    6250             :                 /* If we're a hole then just pretend nothing changed */
    6251           0 :                 ret = (left_disknr) ? 0 : 1;
    6252           0 :                 goto out;
    6253             :         }
    6254             : 
    6255             :         /*
    6256             :          * We're now on 2a, 2b or 7.
    6257             :          */
    6258           0 :         key = found_key;
    6259           0 :         while (key.offset < ekey->offset + left_len) {
    6260           0 :                 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
    6261           0 :                 right_type = btrfs_file_extent_type(eb, ei);
    6262           0 :                 if (right_type != BTRFS_FILE_EXTENT_REG &&
    6263             :                     right_type != BTRFS_FILE_EXTENT_INLINE) {
    6264           0 :                         ret = 0;
    6265           0 :                         goto out;
    6266             :                 }
    6267             : 
    6268           0 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6269           0 :                         right_len = btrfs_file_extent_ram_bytes(eb, ei);
    6270           0 :                         right_len = PAGE_ALIGN(right_len);
    6271             :                 } else {
    6272           0 :                         right_len = btrfs_file_extent_num_bytes(eb, ei);
    6273             :                 }
    6274             : 
    6275             :                 /*
    6276             :                  * Are we at extent 8? If yes, we know the extent is changed.
    6277             :                  * This may only happen on the first iteration.
    6278             :                  */
    6279           0 :                 if (found_key.offset + right_len <= ekey->offset) {
    6280             :                         /* If we're a hole just pretend nothing changed */
    6281           0 :                         ret = (left_disknr) ? 0 : 1;
    6282           0 :                         goto out;
    6283             :                 }
    6284             : 
    6285             :                 /*
    6286             :                  * We just wanted to see if when we have an inline extent, what
    6287             :                  * follows it is a regular extent (wanted to check the above
    6288             :                  * condition for inline extents too). This should normally not
    6289             :                  * happen but it's possible for example when we have an inline
    6290             :                  * compressed extent representing data with a size matching
    6291             :                  * the page size (currently the same as sector size).
    6292             :                  */
    6293           0 :                 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
    6294           0 :                         ret = 0;
    6295           0 :                         goto out;
    6296             :                 }
    6297             : 
    6298           0 :                 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
    6299           0 :                 right_offset = btrfs_file_extent_offset(eb, ei);
    6300           0 :                 right_gen = btrfs_file_extent_generation(eb, ei);
    6301             : 
    6302           0 :                 left_offset_fixed = left_offset;
    6303           0 :                 if (key.offset < ekey->offset) {
    6304             :                         /* Fix the right offset for 2a and 7. */
    6305           0 :                         right_offset += ekey->offset - key.offset;
    6306             :                 } else {
    6307             :                         /* Fix the left offset for all behind 2a and 2b */
    6308           0 :                         left_offset_fixed += key.offset - ekey->offset;
    6309             :                 }
    6310             : 
    6311             :                 /*
    6312             :                  * Check if we have the same extent.
    6313             :                  */
    6314           0 :                 if (left_disknr != right_disknr ||
    6315           0 :                     left_offset_fixed != right_offset ||
    6316             :                     left_gen != right_gen) {
    6317           0 :                         ret = 0;
    6318           0 :                         goto out;
    6319             :                 }
    6320             : 
    6321             :                 /*
    6322             :                  * Go to the next extent.
    6323             :                  */
    6324           0 :                 ret = btrfs_next_item(sctx->parent_root, path);
    6325           0 :                 if (ret < 0)
    6326           0 :                         goto out;
    6327           0 :                 if (!ret) {
    6328           0 :                         eb = path->nodes[0];
    6329           0 :                         slot = path->slots[0];
    6330           0 :                         btrfs_item_key_to_cpu(eb, &found_key, slot);
    6331             :                 }
    6332           0 :                 if (ret || found_key.objectid != key.objectid ||
    6333           0 :                     found_key.type != key.type) {
    6334           0 :                         key.offset += right_len;
    6335           0 :                         break;
    6336             :                 }
    6337           0 :                 if (found_key.offset != key.offset + right_len) {
    6338           0 :                         ret = 0;
    6339           0 :                         goto out;
    6340             :                 }
    6341           0 :                 key = found_key;
    6342             :         }
    6343             : 
    6344             :         /*
    6345             :          * We're now behind the left extent (treat as unchanged) or at the end
    6346             :          * of the right side (treat as changed).
    6347             :          */
    6348           0 :         if (key.offset >= ekey->offset + left_len)
    6349             :                 ret = 1;
    6350             :         else
    6351           0 :                 ret = 0;
    6352             : 
    6353             : 
    6354           0 : out:
    6355           0 :         btrfs_free_path(path);
    6356           0 :         return ret;
    6357             : }
    6358             : 
    6359           0 : static int get_last_extent(struct send_ctx *sctx, u64 offset)
    6360             : {
    6361           0 :         struct btrfs_path *path;
    6362           0 :         struct btrfs_root *root = sctx->send_root;
    6363           0 :         struct btrfs_key key;
    6364           0 :         int ret;
    6365             : 
    6366           0 :         path = alloc_path_for_send();
    6367           0 :         if (!path)
    6368             :                 return -ENOMEM;
    6369             : 
    6370           0 :         sctx->cur_inode_last_extent = 0;
    6371             : 
    6372           0 :         key.objectid = sctx->cur_ino;
    6373           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6374           0 :         key.offset = offset;
    6375           0 :         ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
    6376           0 :         if (ret < 0)
    6377           0 :                 goto out;
    6378           0 :         ret = 0;
    6379           0 :         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    6380           0 :         if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
    6381           0 :                 goto out;
    6382             : 
    6383           0 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6384           0 : out:
    6385           0 :         btrfs_free_path(path);
    6386           0 :         return ret;
    6387             : }
    6388             : 
    6389           0 : static int range_is_hole_in_parent(struct send_ctx *sctx,
    6390             :                                    const u64 start,
    6391             :                                    const u64 end)
    6392             : {
    6393           0 :         struct btrfs_path *path;
    6394           0 :         struct btrfs_key key;
    6395           0 :         struct btrfs_root *root = sctx->parent_root;
    6396           0 :         u64 search_start = start;
    6397           0 :         int ret;
    6398             : 
    6399           0 :         path = alloc_path_for_send();
    6400           0 :         if (!path)
    6401             :                 return -ENOMEM;
    6402             : 
    6403           0 :         key.objectid = sctx->cur_ino;
    6404           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6405           0 :         key.offset = search_start;
    6406           0 :         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
    6407           0 :         if (ret < 0)
    6408           0 :                 goto out;
    6409           0 :         if (ret > 0 && path->slots[0] > 0)
    6410           0 :                 path->slots[0]--;
    6411             : 
    6412           0 :         while (search_start < end) {
    6413           0 :                 struct extent_buffer *leaf = path->nodes[0];
    6414           0 :                 int slot = path->slots[0];
    6415           0 :                 struct btrfs_file_extent_item *fi;
    6416           0 :                 u64 extent_end;
    6417             : 
    6418           0 :                 if (slot >= btrfs_header_nritems(leaf)) {
    6419           0 :                         ret = btrfs_next_leaf(root, path);
    6420           0 :                         if (ret < 0)
    6421           0 :                                 goto out;
    6422           0 :                         else if (ret > 0)
    6423             :                                 break;
    6424           0 :                         continue;
    6425             :                 }
    6426             : 
    6427           0 :                 btrfs_item_key_to_cpu(leaf, &key, slot);
    6428           0 :                 if (key.objectid < sctx->cur_ino ||
    6429           0 :                     key.type < BTRFS_EXTENT_DATA_KEY)
    6430           0 :                         goto next;
    6431           0 :                 if (key.objectid > sctx->cur_ino ||
    6432           0 :                     key.type > BTRFS_EXTENT_DATA_KEY ||
    6433           0 :                     key.offset >= end)
    6434             :                         break;
    6435             : 
    6436           0 :                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
    6437           0 :                 extent_end = btrfs_file_extent_end(path);
    6438           0 :                 if (extent_end <= start)
    6439           0 :                         goto next;
    6440           0 :                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
    6441           0 :                         search_start = extent_end;
    6442           0 :                         goto next;
    6443             :                 }
    6444           0 :                 ret = 0;
    6445           0 :                 goto out;
    6446           0 : next:
    6447           0 :                 path->slots[0]++;
    6448             :         }
    6449             :         ret = 1;
    6450           0 : out:
    6451           0 :         btrfs_free_path(path);
    6452           0 :         return ret;
    6453             : }
    6454             : 
    6455           0 : static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
    6456             :                            struct btrfs_key *key)
    6457             : {
    6458           0 :         int ret = 0;
    6459             : 
    6460           0 :         if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
    6461             :                 return 0;
    6462             : 
    6463           0 :         if (sctx->cur_inode_last_extent == (u64)-1) {
    6464           0 :                 ret = get_last_extent(sctx, key->offset - 1);
    6465           0 :                 if (ret)
    6466             :                         return ret;
    6467             :         }
    6468             : 
    6469           0 :         if (path->slots[0] == 0 &&
    6470           0 :             sctx->cur_inode_last_extent < key->offset) {
    6471             :                 /*
    6472             :                  * We might have skipped entire leafs that contained only
    6473             :                  * file extent items for our current inode. These leafs have
    6474             :                  * a generation number smaller (older) than the one in the
    6475             :                  * current leaf and the leaf our last extent came from, and
    6476             :                  * are located between these 2 leafs.
    6477             :                  */
    6478           0 :                 ret = get_last_extent(sctx, key->offset - 1);
    6479           0 :                 if (ret)
    6480             :                         return ret;
    6481             :         }
    6482             : 
    6483           0 :         if (sctx->cur_inode_last_extent < key->offset) {
    6484           0 :                 ret = range_is_hole_in_parent(sctx,
    6485             :                                               sctx->cur_inode_last_extent,
    6486             :                                               key->offset);
    6487           0 :                 if (ret < 0)
    6488             :                         return ret;
    6489           0 :                 else if (ret == 0)
    6490           0 :                         ret = send_hole(sctx, key->offset);
    6491             :                 else
    6492             :                         ret = 0;
    6493             :         }
    6494           0 :         sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
    6495           0 :         return ret;
    6496             : }
    6497             : 
    6498           0 : static int process_extent(struct send_ctx *sctx,
    6499             :                           struct btrfs_path *path,
    6500             :                           struct btrfs_key *key)
    6501             : {
    6502           0 :         struct clone_root *found_clone = NULL;
    6503           0 :         int ret = 0;
    6504             : 
    6505           0 :         if (S_ISLNK(sctx->cur_inode_mode))
    6506             :                 return 0;
    6507             : 
    6508           0 :         if (sctx->parent_root && !sctx->cur_inode_new) {
    6509           0 :                 ret = is_extent_unchanged(sctx, path, key);
    6510           0 :                 if (ret < 0)
    6511           0 :                         goto out;
    6512           0 :                 if (ret) {
    6513           0 :                         ret = 0;
    6514           0 :                         goto out_hole;
    6515             :                 }
    6516             :         } else {
    6517           0 :                 struct btrfs_file_extent_item *ei;
    6518           0 :                 u8 type;
    6519             : 
    6520           0 :                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
    6521             :                                     struct btrfs_file_extent_item);
    6522           0 :                 type = btrfs_file_extent_type(path->nodes[0], ei);
    6523           0 :                 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
    6524             :                     type == BTRFS_FILE_EXTENT_REG) {
    6525             :                         /*
    6526             :                          * The send spec does not have a prealloc command yet,
    6527             :                          * so just leave a hole for prealloc'ed extents until
    6528             :                          * we have enough commands queued up to justify rev'ing
    6529             :                          * the send spec.
    6530             :                          */
    6531           0 :                         if (type == BTRFS_FILE_EXTENT_PREALLOC) {
    6532           0 :                                 ret = 0;
    6533           0 :                                 goto out;
    6534             :                         }
    6535             : 
    6536             :                         /* Have a hole, just skip it. */
    6537           0 :                         if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
    6538           0 :                                 ret = 0;
    6539           0 :                                 goto out;
    6540             :                         }
    6541             :                 }
    6542             :         }
    6543             : 
    6544           0 :         ret = find_extent_clone(sctx, path, key->objectid, key->offset,
    6545             :                         sctx->cur_inode_size, &found_clone);
    6546           0 :         if (ret != -ENOENT && ret < 0)
    6547           0 :                 goto out;
    6548             : 
    6549           0 :         ret = send_write_or_clone(sctx, path, key, found_clone);
    6550           0 :         if (ret)
    6551           0 :                 goto out;
    6552           0 : out_hole:
    6553           0 :         ret = maybe_send_hole(sctx, path, key);
    6554             : out:
    6555             :         return ret;
    6556             : }
    6557             : 
    6558           0 : static int process_all_extents(struct send_ctx *sctx)
    6559             : {
    6560           0 :         int ret = 0;
    6561           0 :         int iter_ret = 0;
    6562           0 :         struct btrfs_root *root;
    6563           0 :         struct btrfs_path *path;
    6564           0 :         struct btrfs_key key;
    6565           0 :         struct btrfs_key found_key;
    6566             : 
    6567           0 :         root = sctx->send_root;
    6568           0 :         path = alloc_path_for_send();
    6569           0 :         if (!path)
    6570             :                 return -ENOMEM;
    6571             : 
    6572           0 :         key.objectid = sctx->cmp_key->objectid;
    6573           0 :         key.type = BTRFS_EXTENT_DATA_KEY;
    6574           0 :         key.offset = 0;
    6575           0 :         btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
    6576           0 :                 if (found_key.objectid != key.objectid ||
    6577           0 :                     found_key.type != key.type) {
    6578             :                         ret = 0;
    6579             :                         break;
    6580             :                 }
    6581             : 
    6582           0 :                 ret = process_extent(sctx, path, &found_key);
    6583           0 :                 if (ret < 0)
    6584             :                         break;
    6585             :         }
    6586             :         /* Catch error found during iteration */
    6587           0 :         if (iter_ret < 0)
    6588           0 :                 ret = iter_ret;
    6589             : 
    6590           0 :         btrfs_free_path(path);
    6591           0 :         return ret;
    6592             : }
    6593             : 
    6594           0 : static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
    6595             :                                            int *pending_move,
    6596             :                                            int *refs_processed)
    6597             : {
    6598           0 :         int ret = 0;
    6599             : 
    6600           0 :         if (sctx->cur_ino == 0)
    6601           0 :                 goto out;
    6602           0 :         if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
    6603           0 :             sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
    6604           0 :                 goto out;
    6605           0 :         if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
    6606           0 :                 goto out;
    6607             : 
    6608           0 :         ret = process_recorded_refs(sctx, pending_move);
    6609           0 :         if (ret < 0)
    6610           0 :                 goto out;
    6611             : 
    6612           0 :         *refs_processed = 1;
    6613           0 : out:
    6614           0 :         return ret;
    6615             : }
    6616             : 
    6617           0 : static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
    6618             : {
    6619           0 :         int ret = 0;
    6620           0 :         struct btrfs_inode_info info;
    6621           0 :         u64 left_mode;
    6622           0 :         u64 left_uid;
    6623           0 :         u64 left_gid;
    6624           0 :         u64 left_fileattr;
    6625           0 :         u64 right_mode;
    6626           0 :         u64 right_uid;
    6627           0 :         u64 right_gid;
    6628           0 :         u64 right_fileattr;
    6629           0 :         int need_chmod = 0;
    6630           0 :         int need_chown = 0;
    6631           0 :         bool need_fileattr = false;
    6632           0 :         int need_truncate = 1;
    6633           0 :         int pending_move = 0;
    6634           0 :         int refs_processed = 0;
    6635             : 
    6636           0 :         if (sctx->ignore_cur_inode)
    6637             :                 return 0;
    6638             : 
    6639           0 :         ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
    6640             :                                               &refs_processed);
    6641           0 :         if (ret < 0)
    6642           0 :                 goto out;
    6643             : 
    6644             :         /*
    6645             :          * We have processed the refs and thus need to advance send_progress.
    6646             :          * Now, calls to get_cur_xxx will take the updated refs of the current
    6647             :          * inode into account.
    6648             :          *
    6649             :          * On the other hand, if our current inode is a directory and couldn't
    6650             :          * be moved/renamed because its parent was renamed/moved too and it has
    6651             :          * a higher inode number, we can only move/rename our current inode
    6652             :          * after we moved/renamed its parent. Therefore in this case operate on
    6653             :          * the old path (pre move/rename) of our current inode, and the
    6654             :          * move/rename will be performed later.
    6655             :          */
    6656           0 :         if (refs_processed && !pending_move)
    6657           0 :                 sctx->send_progress = sctx->cur_ino + 1;
    6658             : 
    6659           0 :         if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
    6660           0 :                 goto out;
    6661           0 :         if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
    6662           0 :                 goto out;
    6663           0 :         ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
    6664           0 :         if (ret < 0)
    6665           0 :                 goto out;
    6666           0 :         left_mode = info.mode;
    6667           0 :         left_uid = info.uid;
    6668           0 :         left_gid = info.gid;
    6669           0 :         left_fileattr = info.fileattr;
    6670             : 
    6671           0 :         if (!sctx->parent_root || sctx->cur_inode_new) {
    6672           0 :                 need_chown = 1;
    6673           0 :                 if (!S_ISLNK(sctx->cur_inode_mode))
    6674           0 :                         need_chmod = 1;
    6675           0 :                 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
    6676           0 :                         need_truncate = 0;
    6677             :         } else {
    6678           0 :                 u64 old_size;
    6679             : 
    6680           0 :                 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
    6681           0 :                 if (ret < 0)
    6682           0 :                         goto out;
    6683           0 :                 old_size = info.size;
    6684           0 :                 right_mode = info.mode;
    6685           0 :                 right_uid = info.uid;
    6686           0 :                 right_gid = info.gid;
    6687           0 :                 right_fileattr = info.fileattr;
    6688             : 
    6689           0 :                 if (left_uid != right_uid || left_gid != right_gid)
    6690           0 :                         need_chown = 1;
    6691           0 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
    6692           0 :                         need_chmod = 1;
    6693           0 :                 if (!S_ISLNK(sctx->cur_inode_mode) && left_fileattr != right_fileattr)
    6694           0 :                         need_fileattr = true;
    6695           0 :                 if ((old_size == sctx->cur_inode_size) ||
    6696           0 :                     (sctx->cur_inode_size > old_size &&
    6697           0 :                      sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
    6698           0 :                         need_truncate = 0;
    6699             :         }
    6700             : 
    6701           0 :         if (S_ISREG(sctx->cur_inode_mode)) {
    6702           0 :                 if (need_send_hole(sctx)) {
    6703           0 :                         if (sctx->cur_inode_last_extent == (u64)-1 ||
    6704             :                             sctx->cur_inode_last_extent <
    6705           0 :                             sctx->cur_inode_size) {
    6706           0 :                                 ret = get_last_extent(sctx, (u64)-1);
    6707           0 :                                 if (ret)
    6708           0 :                                         goto out;
    6709             :                         }
    6710           0 :                         if (sctx->cur_inode_last_extent <
    6711           0 :                             sctx->cur_inode_size) {
    6712           0 :                                 ret = send_hole(sctx, sctx->cur_inode_size);
    6713           0 :                                 if (ret)
    6714           0 :                                         goto out;
    6715             :                         }
    6716             :                 }
    6717           0 :                 if (need_truncate) {
    6718           0 :                         ret = send_truncate(sctx, sctx->cur_ino,
    6719             :                                             sctx->cur_inode_gen,
    6720             :                                             sctx->cur_inode_size);
    6721           0 :                         if (ret < 0)
    6722           0 :                                 goto out;
    6723             :                 }
    6724             :         }
    6725             : 
    6726           0 :         if (need_chown) {
    6727           0 :                 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6728             :                                 left_uid, left_gid);
    6729           0 :                 if (ret < 0)
    6730           0 :                         goto out;
    6731             :         }
    6732           0 :         if (need_chmod) {
    6733           0 :                 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6734             :                                 left_mode);
    6735           0 :                 if (ret < 0)
    6736           0 :                         goto out;
    6737             :         }
    6738           0 :         if (need_fileattr) {
    6739           0 :                 ret = send_fileattr(sctx, sctx->cur_ino, sctx->cur_inode_gen,
    6740             :                                     left_fileattr);
    6741           0 :                 if (ret < 0)
    6742           0 :                         goto out;
    6743             :         }
    6744             : 
    6745           0 :         if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
    6746           0 :             && sctx->cur_inode_needs_verity) {
    6747           0 :                 ret = process_verity(sctx);
    6748           0 :                 if (ret < 0)
    6749           0 :                         goto out;
    6750             :         }
    6751             : 
    6752           0 :         ret = send_capabilities(sctx);
    6753           0 :         if (ret < 0)
    6754           0 :                 goto out;
    6755             : 
    6756             :         /*
    6757             :          * If other directory inodes depended on our current directory
    6758             :          * inode's move/rename, now do their move/rename operations.
    6759             :          */
    6760           0 :         if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
    6761           0 :                 ret = apply_children_dir_moves(sctx);
    6762           0 :                 if (ret)
    6763           0 :                         goto out;
    6764             :                 /*
    6765             :                  * Need to send that every time, no matter if it actually
    6766             :                  * changed between the two trees as we have done changes to
    6767             :                  * the inode before. If our inode is a directory and it's
    6768             :                  * waiting to be moved/renamed, we will send its utimes when
    6769             :                  * it's moved/renamed, therefore we don't need to do it here.
    6770             :                  */
    6771           0 :                 sctx->send_progress = sctx->cur_ino + 1;
    6772             : 
    6773             :                 /*
    6774             :                  * If the current inode is a non-empty directory, delay issuing
    6775             :                  * the utimes command for it, as it's very likely we have inodes
    6776             :                  * with an higher number inside it. We want to issue the utimes
    6777             :                  * command only after adding all dentries to it.
    6778             :                  */
    6779           0 :                 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
    6780           0 :                         ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6781             :                 else
    6782           0 :                         ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
    6783             : 
    6784           0 :                 if (ret < 0)
    6785           0 :                         goto out;
    6786             :         }
    6787             : 
    6788           0 : out:
    6789           0 :         if (!ret)
    6790           0 :                 ret = trim_dir_utimes_cache(sctx);
    6791             : 
    6792             :         return ret;
    6793             : }
    6794             : 
    6795           0 : static void close_current_inode(struct send_ctx *sctx)
    6796             : {
    6797           0 :         u64 i_size;
    6798             : 
    6799           0 :         if (sctx->cur_inode == NULL)
    6800             :                 return;
    6801             : 
    6802           0 :         i_size = i_size_read(sctx->cur_inode);
    6803             : 
    6804             :         /*
    6805             :          * If we are doing an incremental send, we may have extents between the
    6806             :          * last processed extent and the i_size that have not been processed
    6807             :          * because they haven't changed but we may have read some of their pages
    6808             :          * through readahead, see the comments at send_extent_data().
    6809             :          */
    6810           0 :         if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
    6811           0 :                 truncate_inode_pages_range(&sctx->cur_inode->i_data,
    6812             :                                            sctx->page_cache_clear_start,
    6813           0 :                                            round_up(i_size, PAGE_SIZE) - 1);
    6814             : 
    6815           0 :         iput(sctx->cur_inode);
    6816           0 :         sctx->cur_inode = NULL;
    6817             : }
    6818             : 
    6819           0 : static int changed_inode(struct send_ctx *sctx,
    6820             :                          enum btrfs_compare_tree_result result)
    6821             : {
    6822           0 :         int ret = 0;
    6823           0 :         struct btrfs_key *key = sctx->cmp_key;
    6824           0 :         struct btrfs_inode_item *left_ii = NULL;
    6825           0 :         struct btrfs_inode_item *right_ii = NULL;
    6826           0 :         u64 left_gen = 0;
    6827           0 :         u64 right_gen = 0;
    6828             : 
    6829           0 :         close_current_inode(sctx);
    6830             : 
    6831           0 :         sctx->cur_ino = key->objectid;
    6832           0 :         sctx->cur_inode_new_gen = false;
    6833           0 :         sctx->cur_inode_last_extent = (u64)-1;
    6834           0 :         sctx->cur_inode_next_write_offset = 0;
    6835           0 :         sctx->ignore_cur_inode = false;
    6836             : 
    6837             :         /*
    6838             :          * Set send_progress to current inode. This will tell all get_cur_xxx
    6839             :          * functions that the current inode's refs are not updated yet. Later,
    6840             :          * when process_recorded_refs is finished, it is set to cur_ino + 1.
    6841             :          */
    6842           0 :         sctx->send_progress = sctx->cur_ino;
    6843             : 
    6844           0 :         if (result == BTRFS_COMPARE_TREE_NEW ||
    6845           0 :             result == BTRFS_COMPARE_TREE_CHANGED) {
    6846           0 :                 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
    6847             :                                 sctx->left_path->slots[0],
    6848             :                                 struct btrfs_inode_item);
    6849           0 :                 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
    6850             :                                 left_ii);
    6851             :         } else {
    6852           0 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6853             :                                 sctx->right_path->slots[0],
    6854             :                                 struct btrfs_inode_item);
    6855           0 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6856             :                                 right_ii);
    6857             :         }
    6858           0 :         if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6859           0 :                 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
    6860             :                                 sctx->right_path->slots[0],
    6861             :                                 struct btrfs_inode_item);
    6862             : 
    6863           0 :                 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
    6864             :                                 right_ii);
    6865             : 
    6866             :                 /*
    6867             :                  * The cur_ino = root dir case is special here. We can't treat
    6868             :                  * the inode as deleted+reused because it would generate a
    6869             :                  * stream that tries to delete/mkdir the root dir.
    6870             :                  */
    6871           0 :                 if (left_gen != right_gen &&
    6872           0 :                     sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6873           0 :                         sctx->cur_inode_new_gen = true;
    6874             :         }
    6875             : 
    6876             :         /*
    6877             :          * Normally we do not find inodes with a link count of zero (orphans)
    6878             :          * because the most common case is to create a snapshot and use it
    6879             :          * for a send operation. However other less common use cases involve
    6880             :          * using a subvolume and send it after turning it to RO mode just
    6881             :          * after deleting all hard links of a file while holding an open
    6882             :          * file descriptor against it or turning a RO snapshot into RW mode,
    6883             :          * keep an open file descriptor against a file, delete it and then
    6884             :          * turn the snapshot back to RO mode before using it for a send
    6885             :          * operation. The former is what the receiver operation does.
    6886             :          * Therefore, if we want to send these snapshots soon after they're
    6887             :          * received, we need to handle orphan inodes as well. Moreover, orphans
    6888             :          * can appear not only in the send snapshot but also in the parent
    6889             :          * snapshot. Here are several cases:
    6890             :          *
    6891             :          * Case 1: BTRFS_COMPARE_TREE_NEW
    6892             :          *       |  send snapshot  | action
    6893             :          * --------------------------------
    6894             :          * nlink |        0        | ignore
    6895             :          *
    6896             :          * Case 2: BTRFS_COMPARE_TREE_DELETED
    6897             :          *       | parent snapshot | action
    6898             :          * ----------------------------------
    6899             :          * nlink |        0        | as usual
    6900             :          * Note: No unlinks will be sent because there're no paths for it.
    6901             :          *
    6902             :          * Case 3: BTRFS_COMPARE_TREE_CHANGED
    6903             :          *           |       | parent snapshot | send snapshot | action
    6904             :          * -----------------------------------------------------------------------
    6905             :          * subcase 1 | nlink |        0        |       0       | ignore
    6906             :          * subcase 2 | nlink |       >0        |       0       | new_gen(deletion)
    6907             :          * subcase 3 | nlink |        0        |      >0       | new_gen(creation)
    6908             :          *
    6909             :          */
    6910           0 :         if (result == BTRFS_COMPARE_TREE_NEW) {
    6911           0 :                 if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
    6912           0 :                         sctx->ignore_cur_inode = true;
    6913           0 :                         goto out;
    6914             :                 }
    6915           0 :                 sctx->cur_inode_gen = left_gen;
    6916           0 :                 sctx->cur_inode_new = true;
    6917           0 :                 sctx->cur_inode_deleted = false;
    6918           0 :                 sctx->cur_inode_size = btrfs_inode_size(
    6919           0 :                                 sctx->left_path->nodes[0], left_ii);
    6920           0 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6921           0 :                                 sctx->left_path->nodes[0], left_ii);
    6922           0 :                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6923           0 :                                 sctx->left_path->nodes[0], left_ii);
    6924           0 :                 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
    6925           0 :                         ret = send_create_inode_if_needed(sctx);
    6926           0 :         } else if (result == BTRFS_COMPARE_TREE_DELETED) {
    6927           0 :                 sctx->cur_inode_gen = right_gen;
    6928           0 :                 sctx->cur_inode_new = false;
    6929           0 :                 sctx->cur_inode_deleted = true;
    6930           0 :                 sctx->cur_inode_size = btrfs_inode_size(
    6931           0 :                                 sctx->right_path->nodes[0], right_ii);
    6932           0 :                 sctx->cur_inode_mode = btrfs_inode_mode(
    6933           0 :                                 sctx->right_path->nodes[0], right_ii);
    6934           0 :         } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
    6935           0 :                 u32 new_nlinks, old_nlinks;
    6936             : 
    6937           0 :                 new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
    6938           0 :                 old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
    6939           0 :                 if (new_nlinks == 0 && old_nlinks == 0) {
    6940           0 :                         sctx->ignore_cur_inode = true;
    6941           0 :                         goto out;
    6942           0 :                 } else if (new_nlinks == 0 || old_nlinks == 0) {
    6943           0 :                         sctx->cur_inode_new_gen = 1;
    6944             :                 }
    6945             :                 /*
    6946             :                  * We need to do some special handling in case the inode was
    6947             :                  * reported as changed with a changed generation number. This
    6948             :                  * means that the original inode was deleted and new inode
    6949             :                  * reused the same inum. So we have to treat the old inode as
    6950             :                  * deleted and the new one as new.
    6951             :                  */
    6952           0 :                 if (sctx->cur_inode_new_gen) {
    6953             :                         /*
    6954             :                          * First, process the inode as if it was deleted.
    6955             :                          */
    6956           0 :                         if (old_nlinks > 0) {
    6957           0 :                                 sctx->cur_inode_gen = right_gen;
    6958           0 :                                 sctx->cur_inode_new = false;
    6959           0 :                                 sctx->cur_inode_deleted = true;
    6960           0 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6961           0 :                                                 sctx->right_path->nodes[0], right_ii);
    6962           0 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6963           0 :                                                 sctx->right_path->nodes[0], right_ii);
    6964           0 :                                 ret = process_all_refs(sctx,
    6965             :                                                 BTRFS_COMPARE_TREE_DELETED);
    6966           0 :                                 if (ret < 0)
    6967           0 :                                         goto out;
    6968             :                         }
    6969             : 
    6970             :                         /*
    6971             :                          * Now process the inode as if it was new.
    6972             :                          */
    6973           0 :                         if (new_nlinks > 0) {
    6974           0 :                                 sctx->cur_inode_gen = left_gen;
    6975           0 :                                 sctx->cur_inode_new = true;
    6976           0 :                                 sctx->cur_inode_deleted = false;
    6977           0 :                                 sctx->cur_inode_size = btrfs_inode_size(
    6978           0 :                                                 sctx->left_path->nodes[0],
    6979             :                                                 left_ii);
    6980           0 :                                 sctx->cur_inode_mode = btrfs_inode_mode(
    6981           0 :                                                 sctx->left_path->nodes[0],
    6982             :                                                 left_ii);
    6983           0 :                                 sctx->cur_inode_rdev = btrfs_inode_rdev(
    6984           0 :                                                 sctx->left_path->nodes[0],
    6985             :                                                 left_ii);
    6986           0 :                                 ret = send_create_inode_if_needed(sctx);
    6987           0 :                                 if (ret < 0)
    6988           0 :                                         goto out;
    6989             : 
    6990           0 :                                 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
    6991           0 :                                 if (ret < 0)
    6992           0 :                                         goto out;
    6993             :                                 /*
    6994             :                                  * Advance send_progress now as we did not get
    6995             :                                  * into process_recorded_refs_if_needed in the
    6996             :                                  * new_gen case.
    6997             :                                  */
    6998           0 :                                 sctx->send_progress = sctx->cur_ino + 1;
    6999             : 
    7000             :                                 /*
    7001             :                                  * Now process all extents and xattrs of the
    7002             :                                  * inode as if they were all new.
    7003             :                                  */
    7004           0 :                                 ret = process_all_extents(sctx);
    7005           0 :                                 if (ret < 0)
    7006           0 :                                         goto out;
    7007           0 :                                 ret = process_all_new_xattrs(sctx);
    7008           0 :                                 if (ret < 0)
    7009           0 :                                         goto out;
    7010             :                         }
    7011             :                 } else {
    7012           0 :                         sctx->cur_inode_gen = left_gen;
    7013           0 :                         sctx->cur_inode_new = false;
    7014           0 :                         sctx->cur_inode_new_gen = false;
    7015           0 :                         sctx->cur_inode_deleted = false;
    7016           0 :                         sctx->cur_inode_size = btrfs_inode_size(
    7017           0 :                                         sctx->left_path->nodes[0], left_ii);
    7018           0 :                         sctx->cur_inode_mode = btrfs_inode_mode(
    7019           0 :                                         sctx->left_path->nodes[0], left_ii);
    7020             :                 }
    7021             :         }
    7022             : 
    7023           0 : out:
    7024           0 :         return ret;
    7025             : }
    7026             : 
    7027             : /*
    7028             :  * We have to process new refs before deleted refs, but compare_trees gives us
    7029             :  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
    7030             :  * first and later process them in process_recorded_refs.
    7031             :  * For the cur_inode_new_gen case, we skip recording completely because
    7032             :  * changed_inode did already initiate processing of refs. The reason for this is
    7033             :  * that in this case, compare_tree actually compares the refs of 2 different
    7034             :  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
    7035             :  * refs of the right tree as deleted and all refs of the left tree as new.
    7036             :  */
    7037           0 : static int changed_ref(struct send_ctx *sctx,
    7038             :                        enum btrfs_compare_tree_result result)
    7039             : {
    7040           0 :         int ret = 0;
    7041             : 
    7042           0 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7043           0 :                 inconsistent_snapshot_error(sctx, result, "reference");
    7044           0 :                 return -EIO;
    7045             :         }
    7046             : 
    7047           0 :         if (!sctx->cur_inode_new_gen &&
    7048             :             sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
    7049           0 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7050           0 :                         ret = record_new_ref(sctx);
    7051           0 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7052           0 :                         ret = record_deleted_ref(sctx);
    7053           0 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7054           0 :                         ret = record_changed_ref(sctx);
    7055             :         }
    7056             : 
    7057             :         return ret;
    7058             : }
    7059             : 
    7060             : /*
    7061             :  * Process new/deleted/changed xattrs. We skip processing in the
    7062             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7063             :  * of xattrs. The reason is the same as in changed_ref
    7064             :  */
    7065           0 : static int changed_xattr(struct send_ctx *sctx,
    7066             :                          enum btrfs_compare_tree_result result)
    7067             : {
    7068           0 :         int ret = 0;
    7069             : 
    7070           0 :         if (sctx->cur_ino != sctx->cmp_key->objectid) {
    7071           0 :                 inconsistent_snapshot_error(sctx, result, "xattr");
    7072           0 :                 return -EIO;
    7073             :         }
    7074             : 
    7075           0 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7076           0 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7077           0 :                         ret = process_new_xattr(sctx);
    7078           0 :                 else if (result == BTRFS_COMPARE_TREE_DELETED)
    7079           0 :                         ret = process_deleted_xattr(sctx);
    7080           0 :                 else if (result == BTRFS_COMPARE_TREE_CHANGED)
    7081           0 :                         ret = process_changed_xattr(sctx);
    7082             :         }
    7083             : 
    7084             :         return ret;
    7085             : }
    7086             : 
    7087             : /*
    7088             :  * Process new/deleted/changed extents. We skip processing in the
    7089             :  * cur_inode_new_gen case because changed_inode did already initiate processing
    7090             :  * of extents. The reason is the same as in changed_ref
    7091             :  */
    7092           0 : static int changed_extent(struct send_ctx *sctx,
    7093             :                           enum btrfs_compare_tree_result result)
    7094             : {
    7095           0 :         int ret = 0;
    7096             : 
    7097             :         /*
    7098             :          * We have found an extent item that changed without the inode item
    7099             :          * having changed. This can happen either after relocation (where the
    7100             :          * disk_bytenr of an extent item is replaced at
    7101             :          * relocation.c:replace_file_extents()) or after deduplication into a
    7102             :          * file in both the parent and send snapshots (where an extent item can
    7103             :          * get modified or replaced with a new one). Note that deduplication
    7104             :          * updates the inode item, but it only changes the iversion (sequence
    7105             :          * field in the inode item) of the inode, so if a file is deduplicated
    7106             :          * the same amount of times in both the parent and send snapshots, its
    7107             :          * iversion becomes the same in both snapshots, whence the inode item is
    7108             :          * the same on both snapshots.
    7109             :          */
    7110           0 :         if (sctx->cur_ino != sctx->cmp_key->objectid)
    7111             :                 return 0;
    7112             : 
    7113           0 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7114           0 :                 if (result != BTRFS_COMPARE_TREE_DELETED)
    7115           0 :                         ret = process_extent(sctx, sctx->left_path,
    7116             :                                         sctx->cmp_key);
    7117             :         }
    7118             : 
    7119             :         return ret;
    7120             : }
    7121             : 
    7122             : static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
    7123             : {
    7124           0 :         int ret = 0;
    7125             : 
    7126           0 :         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
    7127           0 :                 if (result == BTRFS_COMPARE_TREE_NEW)
    7128           0 :                         sctx->cur_inode_needs_verity = true;
    7129             :         }
    7130             :         return ret;
    7131             : }
    7132             : 
    7133           0 : static int dir_changed(struct send_ctx *sctx, u64 dir)
    7134             : {
    7135           0 :         u64 orig_gen, new_gen;
    7136           0 :         int ret;
    7137             : 
    7138           0 :         ret = get_inode_gen(sctx->send_root, dir, &new_gen);
    7139           0 :         if (ret)
    7140             :                 return ret;
    7141             : 
    7142           0 :         ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
    7143           0 :         if (ret)
    7144             :                 return ret;
    7145             : 
    7146           0 :         return (orig_gen != new_gen) ? 1 : 0;
    7147             : }
    7148             : 
    7149           0 : static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
    7150             :                         struct btrfs_key *key)
    7151             : {
    7152           0 :         struct btrfs_inode_extref *extref;
    7153           0 :         struct extent_buffer *leaf;
    7154           0 :         u64 dirid = 0, last_dirid = 0;
    7155           0 :         unsigned long ptr;
    7156           0 :         u32 item_size;
    7157           0 :         u32 cur_offset = 0;
    7158           0 :         int ref_name_len;
    7159           0 :         int ret = 0;
    7160             : 
    7161             :         /* Easy case, just check this one dirid */
    7162           0 :         if (key->type == BTRFS_INODE_REF_KEY) {
    7163           0 :                 dirid = key->offset;
    7164             : 
    7165           0 :                 ret = dir_changed(sctx, dirid);
    7166           0 :                 goto out;
    7167             :         }
    7168             : 
    7169           0 :         leaf = path->nodes[0];
    7170           0 :         item_size = btrfs_item_size(leaf, path->slots[0]);
    7171           0 :         ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
    7172           0 :         while (cur_offset < item_size) {
    7173           0 :                 extref = (struct btrfs_inode_extref *)(ptr +
    7174             :                                                        cur_offset);
    7175           0 :                 dirid = btrfs_inode_extref_parent(leaf, extref);
    7176           0 :                 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
    7177           0 :                 cur_offset += ref_name_len + sizeof(*extref);
    7178           0 :                 if (dirid == last_dirid)
    7179           0 :                         continue;
    7180           0 :                 ret = dir_changed(sctx, dirid);
    7181           0 :                 if (ret)
    7182             :                         break;
    7183             :                 last_dirid = dirid;
    7184             :         }
    7185           0 : out:
    7186           0 :         return ret;
    7187             : }
    7188             : 
    7189             : /*
    7190             :  * Updates compare related fields in sctx and simply forwards to the actual
    7191             :  * changed_xxx functions.
    7192             :  */
    7193           0 : static int changed_cb(struct btrfs_path *left_path,
    7194             :                       struct btrfs_path *right_path,
    7195             :                       struct btrfs_key *key,
    7196             :                       enum btrfs_compare_tree_result result,
    7197             :                       struct send_ctx *sctx)
    7198             : {
    7199           0 :         int ret = 0;
    7200             : 
    7201             :         /*
    7202             :          * We can not hold the commit root semaphore here. This is because in
    7203             :          * the case of sending and receiving to the same filesystem, using a
    7204             :          * pipe, could result in a deadlock:
    7205             :          *
    7206             :          * 1) The task running send blocks on the pipe because it's full;
    7207             :          *
    7208             :          * 2) The task running receive, which is the only consumer of the pipe,
    7209             :          *    is waiting for a transaction commit (for example due to a space
    7210             :          *    reservation when doing a write or triggering a transaction commit
    7211             :          *    when creating a subvolume);
    7212             :          *
    7213             :          * 3) The transaction is waiting to write lock the commit root semaphore,
    7214             :          *    but can not acquire it since it's being held at 1).
    7215             :          *
    7216             :          * Down this call chain we write to the pipe through kernel_write().
    7217             :          * The same type of problem can also happen when sending to a file that
    7218             :          * is stored in the same filesystem - when reserving space for a write
    7219             :          * into the file, we can trigger a transaction commit.
    7220             :          *
    7221             :          * Our caller has supplied us with clones of leaves from the send and
    7222             :          * parent roots, so we're safe here from a concurrent relocation and
    7223             :          * further reallocation of metadata extents while we are here. Below we
    7224             :          * also assert that the leaves are clones.
    7225             :          */
    7226           0 :         lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
    7227             : 
    7228             :         /*
    7229             :          * We always have a send root, so left_path is never NULL. We will not
    7230             :          * have a leaf when we have reached the end of the send root but have
    7231             :          * not yet reached the end of the parent root.
    7232             :          */
    7233           0 :         if (left_path->nodes[0])
    7234           0 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7235             :                                 &left_path->nodes[0]->bflags));
    7236             :         /*
    7237             :          * When doing a full send we don't have a parent root, so right_path is
    7238             :          * NULL. When doing an incremental send, we may have reached the end of
    7239             :          * the parent root already, so we don't have a leaf at right_path.
    7240             :          */
    7241           0 :         if (right_path && right_path->nodes[0])
    7242           0 :                 ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
    7243             :                                 &right_path->nodes[0]->bflags));
    7244             : 
    7245           0 :         if (result == BTRFS_COMPARE_TREE_SAME) {
    7246           0 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7247             :                     key->type == BTRFS_INODE_EXTREF_KEY) {
    7248           0 :                         ret = compare_refs(sctx, left_path, key);
    7249           0 :                         if (!ret)
    7250             :                                 return 0;
    7251           0 :                         if (ret < 0)
    7252             :                                 return ret;
    7253           0 :                 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
    7254           0 :                         return maybe_send_hole(sctx, left_path, key);
    7255             :                 } else {
    7256             :                         return 0;
    7257             :                 }
    7258             :                 result = BTRFS_COMPARE_TREE_CHANGED;
    7259             :                 ret = 0;
    7260             :         }
    7261             : 
    7262           0 :         sctx->left_path = left_path;
    7263           0 :         sctx->right_path = right_path;
    7264           0 :         sctx->cmp_key = key;
    7265             : 
    7266           0 :         ret = finish_inode_if_needed(sctx, 0);
    7267           0 :         if (ret < 0)
    7268           0 :                 goto out;
    7269             : 
    7270             :         /* Ignore non-FS objects */
    7271           0 :         if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
    7272             :             key->objectid == BTRFS_FREE_SPACE_OBJECTID)
    7273           0 :                 goto out;
    7274             : 
    7275           0 :         if (key->type == BTRFS_INODE_ITEM_KEY) {
    7276           0 :                 ret = changed_inode(sctx, result);
    7277           0 :         } else if (!sctx->ignore_cur_inode) {
    7278           0 :                 if (key->type == BTRFS_INODE_REF_KEY ||
    7279             :                     key->type == BTRFS_INODE_EXTREF_KEY)
    7280           0 :                         ret = changed_ref(sctx, result);
    7281           0 :                 else if (key->type == BTRFS_XATTR_ITEM_KEY)
    7282           0 :                         ret = changed_xattr(sctx, result);
    7283           0 :                 else if (key->type == BTRFS_EXTENT_DATA_KEY)
    7284           0 :                         ret = changed_extent(sctx, result);
    7285           0 :                 else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
    7286           0 :                          key->offset == 0)
    7287           0 :                         ret = changed_verity(sctx, result);
    7288             :         }
    7289             : 
    7290           0 : out:
    7291             :         return ret;
    7292             : }
    7293             : 
    7294           0 : static int search_key_again(const struct send_ctx *sctx,
    7295             :                             struct btrfs_root *root,
    7296             :                             struct btrfs_path *path,
    7297             :                             const struct btrfs_key *key)
    7298             : {
    7299           0 :         int ret;
    7300             : 
    7301           0 :         if (!path->need_commit_sem)
    7302           0 :                 lockdep_assert_held_read(&root->fs_info->commit_root_sem);
    7303             : 
    7304             :         /*
    7305             :          * Roots used for send operations are readonly and no one can add,
    7306             :          * update or remove keys from them, so we should be able to find our
    7307             :          * key again. The only exception is deduplication, which can operate on
    7308             :          * readonly roots and add, update or remove keys to/from them - but at
    7309             :          * the moment we don't allow it to run in parallel with send.
    7310             :          */
    7311           0 :         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
    7312           0 :         ASSERT(ret <= 0);
    7313           0 :         if (ret > 0) {
    7314           0 :                 btrfs_print_tree(path->nodes[path->lowest_level], false);
    7315           0 :                 btrfs_err(root->fs_info,
    7316             : "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
    7317             :                           key->objectid, key->type, key->offset,
    7318             :                           (root == sctx->parent_root ? "parent" : "send"),
    7319             :                           root->root_key.objectid, path->lowest_level,
    7320             :                           path->slots[path->lowest_level]);
    7321           0 :                 return -EUCLEAN;
    7322             :         }
    7323             : 
    7324             :         return ret;
    7325             : }
    7326             : 
    7327           0 : static int full_send_tree(struct send_ctx *sctx)
    7328             : {
    7329           0 :         int ret;
    7330           0 :         struct btrfs_root *send_root = sctx->send_root;
    7331           0 :         struct btrfs_key key;
    7332           0 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    7333           0 :         struct btrfs_path *path;
    7334             : 
    7335           0 :         path = alloc_path_for_send();
    7336           0 :         if (!path)
    7337             :                 return -ENOMEM;
    7338           0 :         path->reada = READA_FORWARD_ALWAYS;
    7339             : 
    7340           0 :         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
    7341           0 :         key.type = BTRFS_INODE_ITEM_KEY;
    7342           0 :         key.offset = 0;
    7343             : 
    7344           0 :         down_read(&fs_info->commit_root_sem);
    7345           0 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7346           0 :         up_read(&fs_info->commit_root_sem);
    7347             : 
    7348           0 :         ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
    7349           0 :         if (ret < 0)
    7350           0 :                 goto out;
    7351           0 :         if (ret)
    7352           0 :                 goto out_finish;
    7353             : 
    7354           0 :         while (1) {
    7355           0 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    7356             : 
    7357           0 :                 ret = changed_cb(path, NULL, &key,
    7358             :                                  BTRFS_COMPARE_TREE_NEW, sctx);
    7359           0 :                 if (ret < 0)
    7360           0 :                         goto out;
    7361             : 
    7362           0 :                 down_read(&fs_info->commit_root_sem);
    7363           0 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7364           0 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7365           0 :                         up_read(&fs_info->commit_root_sem);
    7366             :                         /*
    7367             :                          * A transaction used for relocating a block group was
    7368             :                          * committed or is about to finish its commit. Release
    7369             :                          * our path (leaf) and restart the search, so that we
    7370             :                          * avoid operating on any file extent items that are
    7371             :                          * stale, with a disk_bytenr that reflects a pre
    7372             :                          * relocation value. This way we avoid as much as
    7373             :                          * possible to fallback to regular writes when checking
    7374             :                          * if we can clone file ranges.
    7375             :                          */
    7376           0 :                         btrfs_release_path(path);
    7377           0 :                         ret = search_key_again(sctx, send_root, path, &key);
    7378           0 :                         if (ret < 0)
    7379           0 :                                 goto out;
    7380             :                 } else {
    7381           0 :                         up_read(&fs_info->commit_root_sem);
    7382             :                 }
    7383             : 
    7384           0 :                 ret = btrfs_next_item(send_root, path);
    7385           0 :                 if (ret < 0)
    7386           0 :                         goto out;
    7387           0 :                 if (ret) {
    7388             :                         ret  = 0;
    7389             :                         break;
    7390             :                 }
    7391             :         }
    7392             : 
    7393           0 : out_finish:
    7394           0 :         ret = finish_inode_if_needed(sctx, 1);
    7395             : 
    7396           0 : out:
    7397           0 :         btrfs_free_path(path);
    7398           0 :         return ret;
    7399             : }
    7400             : 
    7401           0 : static int replace_node_with_clone(struct btrfs_path *path, int level)
    7402             : {
    7403           0 :         struct extent_buffer *clone;
    7404             : 
    7405           0 :         clone = btrfs_clone_extent_buffer(path->nodes[level]);
    7406           0 :         if (!clone)
    7407             :                 return -ENOMEM;
    7408             : 
    7409           0 :         free_extent_buffer(path->nodes[level]);
    7410           0 :         path->nodes[level] = clone;
    7411             : 
    7412           0 :         return 0;
    7413             : }
    7414             : 
    7415           0 : static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
    7416             : {
    7417           0 :         struct extent_buffer *eb;
    7418           0 :         struct extent_buffer *parent = path->nodes[*level];
    7419           0 :         int slot = path->slots[*level];
    7420           0 :         const int nritems = btrfs_header_nritems(parent);
    7421           0 :         u64 reada_max;
    7422           0 :         u64 reada_done = 0;
    7423             : 
    7424           0 :         lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
    7425             : 
    7426           0 :         BUG_ON(*level == 0);
    7427           0 :         eb = btrfs_read_node_slot(parent, slot);
    7428           0 :         if (IS_ERR(eb))
    7429           0 :                 return PTR_ERR(eb);
    7430             : 
    7431             :         /*
    7432             :          * Trigger readahead for the next leaves we will process, so that it is
    7433             :          * very likely that when we need them they are already in memory and we
    7434             :          * will not block on disk IO. For nodes we only do readahead for one,
    7435             :          * since the time window between processing nodes is typically larger.
    7436             :          */
    7437           0 :         reada_max = (*level == 1 ? SZ_128K : eb->fs_info->nodesize);
    7438             : 
    7439           0 :         for (slot++; slot < nritems && reada_done < reada_max; slot++) {
    7440           0 :                 if (btrfs_node_ptr_generation(parent, slot) > reada_min_gen) {
    7441           0 :                         btrfs_readahead_node_child(parent, slot);
    7442           0 :                         reada_done += eb->fs_info->nodesize;
    7443             :                 }
    7444             :         }
    7445             : 
    7446           0 :         path->nodes[*level - 1] = eb;
    7447           0 :         path->slots[*level - 1] = 0;
    7448           0 :         (*level)--;
    7449             : 
    7450           0 :         if (*level == 0)
    7451           0 :                 return replace_node_with_clone(path, 0);
    7452             : 
    7453             :         return 0;
    7454             : }
    7455             : 
    7456           0 : static int tree_move_next_or_upnext(struct btrfs_path *path,
    7457             :                                     int *level, int root_level)
    7458             : {
    7459           0 :         int ret = 0;
    7460           0 :         int nritems;
    7461           0 :         nritems = btrfs_header_nritems(path->nodes[*level]);
    7462             : 
    7463           0 :         path->slots[*level]++;
    7464             : 
    7465           0 :         while (path->slots[*level] >= nritems) {
    7466           0 :                 if (*level == root_level) {
    7467           0 :                         path->slots[*level] = nritems - 1;
    7468           0 :                         return -1;
    7469             :                 }
    7470             : 
    7471             :                 /* move upnext */
    7472           0 :                 path->slots[*level] = 0;
    7473           0 :                 free_extent_buffer(path->nodes[*level]);
    7474           0 :                 path->nodes[*level] = NULL;
    7475           0 :                 (*level)++;
    7476           0 :                 path->slots[*level]++;
    7477             : 
    7478           0 :                 nritems = btrfs_header_nritems(path->nodes[*level]);
    7479           0 :                 ret = 1;
    7480             :         }
    7481             :         return ret;
    7482             : }
    7483             : 
    7484             : /*
    7485             :  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
    7486             :  * or down.
    7487             :  */
    7488           0 : static int tree_advance(struct btrfs_path *path,
    7489             :                         int *level, int root_level,
    7490             :                         int allow_down,
    7491             :                         struct btrfs_key *key,
    7492             :                         u64 reada_min_gen)
    7493             : {
    7494           0 :         int ret;
    7495             : 
    7496           0 :         if (*level == 0 || !allow_down) {
    7497           0 :                 ret = tree_move_next_or_upnext(path, level, root_level);
    7498             :         } else {
    7499           0 :                 ret = tree_move_down(path, level, reada_min_gen);
    7500             :         }
    7501             : 
    7502             :         /*
    7503             :          * Even if we have reached the end of a tree, ret is -1, update the key
    7504             :          * anyway, so that in case we need to restart due to a block group
    7505             :          * relocation, we can assert that the last key of the root node still
    7506             :          * exists in the tree.
    7507             :          */
    7508           0 :         if (*level == 0)
    7509           0 :                 btrfs_item_key_to_cpu(path->nodes[*level], key,
    7510             :                                       path->slots[*level]);
    7511             :         else
    7512           0 :                 btrfs_node_key_to_cpu(path->nodes[*level], key,
    7513             :                                       path->slots[*level]);
    7514             : 
    7515           0 :         return ret;
    7516             : }
    7517             : 
    7518           0 : static int tree_compare_item(struct btrfs_path *left_path,
    7519             :                              struct btrfs_path *right_path,
    7520             :                              char *tmp_buf)
    7521             : {
    7522           0 :         int cmp;
    7523           0 :         int len1, len2;
    7524           0 :         unsigned long off1, off2;
    7525             : 
    7526           0 :         len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]);
    7527           0 :         len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);
    7528           0 :         if (len1 != len2)
    7529             :                 return 1;
    7530             : 
    7531           0 :         off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
    7532           0 :         off2 = btrfs_item_ptr_offset(right_path->nodes[0],
    7533             :                                 right_path->slots[0]);
    7534             : 
    7535           0 :         read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
    7536             : 
    7537           0 :         cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
    7538           0 :         if (cmp)
    7539           0 :                 return 1;
    7540             :         return 0;
    7541             : }
    7542             : 
    7543             : /*
    7544             :  * A transaction used for relocating a block group was committed or is about to
    7545             :  * finish its commit. Release our paths and restart the search, so that we are
    7546             :  * not using stale extent buffers:
    7547             :  *
    7548             :  * 1) For levels > 0, we are only holding references of extent buffers, without
    7549             :  *    any locks on them, which does not prevent them from having been relocated
    7550             :  *    and reallocated after the last time we released the commit root semaphore.
    7551             :  *    The exception are the root nodes, for which we always have a clone, see
    7552             :  *    the comment at btrfs_compare_trees();
    7553             :  *
    7554             :  * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
    7555             :  *    we are safe from the concurrent relocation and reallocation. However they
    7556             :  *    can have file extent items with a pre relocation disk_bytenr value, so we
    7557             :  *    restart the start from the current commit roots and clone the new leaves so
    7558             :  *    that we get the post relocation disk_bytenr values. Not doing so, could
    7559             :  *    make us clone the wrong data in case there are new extents using the old
    7560             :  *    disk_bytenr that happen to be shared.
    7561             :  */
    7562           0 : static int restart_after_relocation(struct btrfs_path *left_path,
    7563             :                                     struct btrfs_path *right_path,
    7564             :                                     const struct btrfs_key *left_key,
    7565             :                                     const struct btrfs_key *right_key,
    7566             :                                     int left_level,
    7567             :                                     int right_level,
    7568             :                                     const struct send_ctx *sctx)
    7569             : {
    7570           0 :         int root_level;
    7571           0 :         int ret;
    7572             : 
    7573           0 :         lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
    7574             : 
    7575           0 :         btrfs_release_path(left_path);
    7576           0 :         btrfs_release_path(right_path);
    7577             : 
    7578             :         /*
    7579             :          * Since keys can not be added or removed to/from our roots because they
    7580             :          * are readonly and we do not allow deduplication to run in parallel
    7581             :          * (which can add, remove or change keys), the layout of the trees should
    7582             :          * not change.
    7583             :          */
    7584           0 :         left_path->lowest_level = left_level;
    7585           0 :         ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
    7586           0 :         if (ret < 0)
    7587             :                 return ret;
    7588             : 
    7589           0 :         right_path->lowest_level = right_level;
    7590           0 :         ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
    7591           0 :         if (ret < 0)
    7592             :                 return ret;
    7593             : 
    7594             :         /*
    7595             :          * If the lowest level nodes are leaves, clone them so that they can be
    7596             :          * safely used by changed_cb() while not under the protection of the
    7597             :          * commit root semaphore, even if relocation and reallocation happens in
    7598             :          * parallel.
    7599             :          */
    7600           0 :         if (left_level == 0) {
    7601           0 :                 ret = replace_node_with_clone(left_path, 0);
    7602           0 :                 if (ret < 0)
    7603             :                         return ret;
    7604             :         }
    7605             : 
    7606           0 :         if (right_level == 0) {
    7607           0 :                 ret = replace_node_with_clone(right_path, 0);
    7608           0 :                 if (ret < 0)
    7609             :                         return ret;
    7610             :         }
    7611             : 
    7612             :         /*
    7613             :          * Now clone the root nodes (unless they happen to be the leaves we have
    7614             :          * already cloned). This is to protect against concurrent snapshotting of
    7615             :          * the send and parent roots (see the comment at btrfs_compare_trees()).
    7616             :          */
    7617           0 :         root_level = btrfs_header_level(sctx->send_root->commit_root);
    7618           0 :         if (root_level > 0) {
    7619           0 :                 ret = replace_node_with_clone(left_path, root_level);
    7620           0 :                 if (ret < 0)
    7621             :                         return ret;
    7622             :         }
    7623             : 
    7624           0 :         root_level = btrfs_header_level(sctx->parent_root->commit_root);
    7625           0 :         if (root_level > 0) {
    7626           0 :                 ret = replace_node_with_clone(right_path, root_level);
    7627           0 :                 if (ret < 0)
    7628             :                         return ret;
    7629             :         }
    7630             : 
    7631             :         return 0;
    7632             : }
    7633             : 
    7634             : /*
    7635             :  * This function compares two trees and calls the provided callback for
    7636             :  * every changed/new/deleted item it finds.
    7637             :  * If shared tree blocks are encountered, whole subtrees are skipped, making
    7638             :  * the compare pretty fast on snapshotted subvolumes.
    7639             :  *
    7640             :  * This currently works on commit roots only. As commit roots are read only,
    7641             :  * we don't do any locking. The commit roots are protected with transactions.
    7642             :  * Transactions are ended and rejoined when a commit is tried in between.
    7643             :  *
    7644             :  * This function checks for modifications done to the trees while comparing.
    7645             :  * If it detects a change, it aborts immediately.
    7646             :  */
    7647           0 : static int btrfs_compare_trees(struct btrfs_root *left_root,
    7648             :                         struct btrfs_root *right_root, struct send_ctx *sctx)
    7649             : {
    7650           0 :         struct btrfs_fs_info *fs_info = left_root->fs_info;
    7651           0 :         int ret;
    7652           0 :         int cmp;
    7653           0 :         struct btrfs_path *left_path = NULL;
    7654           0 :         struct btrfs_path *right_path = NULL;
    7655           0 :         struct btrfs_key left_key;
    7656           0 :         struct btrfs_key right_key;
    7657           0 :         char *tmp_buf = NULL;
    7658           0 :         int left_root_level;
    7659           0 :         int right_root_level;
    7660           0 :         int left_level;
    7661           0 :         int right_level;
    7662           0 :         int left_end_reached = 0;
    7663           0 :         int right_end_reached = 0;
    7664           0 :         int advance_left = 0;
    7665           0 :         int advance_right = 0;
    7666           0 :         u64 left_blockptr;
    7667           0 :         u64 right_blockptr;
    7668           0 :         u64 left_gen;
    7669           0 :         u64 right_gen;
    7670           0 :         u64 reada_min_gen;
    7671             : 
    7672           0 :         left_path = btrfs_alloc_path();
    7673           0 :         if (!left_path) {
    7674           0 :                 ret = -ENOMEM;
    7675           0 :                 goto out;
    7676             :         }
    7677           0 :         right_path = btrfs_alloc_path();
    7678           0 :         if (!right_path) {
    7679           0 :                 ret = -ENOMEM;
    7680           0 :                 goto out;
    7681             :         }
    7682             : 
    7683           0 :         tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
    7684           0 :         if (!tmp_buf) {
    7685           0 :                 ret = -ENOMEM;
    7686           0 :                 goto out;
    7687             :         }
    7688             : 
    7689           0 :         left_path->search_commit_root = 1;
    7690           0 :         left_path->skip_locking = 1;
    7691           0 :         right_path->search_commit_root = 1;
    7692           0 :         right_path->skip_locking = 1;
    7693             : 
    7694             :         /*
    7695             :          * Strategy: Go to the first items of both trees. Then do
    7696             :          *
    7697             :          * If both trees are at level 0
    7698             :          *   Compare keys of current items
    7699             :          *     If left < right treat left item as new, advance left tree
    7700             :          *       and repeat
    7701             :          *     If left > right treat right item as deleted, advance right tree
    7702             :          *       and repeat
    7703             :          *     If left == right do deep compare of items, treat as changed if
    7704             :          *       needed, advance both trees and repeat
    7705             :          * If both trees are at the same level but not at level 0
    7706             :          *   Compare keys of current nodes/leafs
    7707             :          *     If left < right advance left tree and repeat
    7708             :          *     If left > right advance right tree and repeat
    7709             :          *     If left == right compare blockptrs of the next nodes/leafs
    7710             :          *       If they match advance both trees but stay at the same level
    7711             :          *         and repeat
    7712             :          *       If they don't match advance both trees while allowing to go
    7713             :          *         deeper and repeat
    7714             :          * If tree levels are different
    7715             :          *   Advance the tree that needs it and repeat
    7716             :          *
    7717             :          * Advancing a tree means:
    7718             :          *   If we are at level 0, try to go to the next slot. If that's not
    7719             :          *   possible, go one level up and repeat. Stop when we found a level
    7720             :          *   where we could go to the next slot. We may at this point be on a
    7721             :          *   node or a leaf.
    7722             :          *
    7723             :          *   If we are not at level 0 and not on shared tree blocks, go one
    7724             :          *   level deeper.
    7725             :          *
    7726             :          *   If we are not at level 0 and on shared tree blocks, go one slot to
    7727             :          *   the right if possible or go up and right.
    7728             :          */
    7729             : 
    7730           0 :         down_read(&fs_info->commit_root_sem);
    7731           0 :         left_level = btrfs_header_level(left_root->commit_root);
    7732           0 :         left_root_level = left_level;
    7733             :         /*
    7734             :          * We clone the root node of the send and parent roots to prevent races
    7735             :          * with snapshot creation of these roots. Snapshot creation COWs the
    7736             :          * root node of a tree, so after the transaction is committed the old
    7737             :          * extent can be reallocated while this send operation is still ongoing.
    7738             :          * So we clone them, under the commit root semaphore, to be race free.
    7739             :          */
    7740           0 :         left_path->nodes[left_level] =
    7741           0 :                         btrfs_clone_extent_buffer(left_root->commit_root);
    7742           0 :         if (!left_path->nodes[left_level]) {
    7743           0 :                 ret = -ENOMEM;
    7744           0 :                 goto out_unlock;
    7745             :         }
    7746             : 
    7747           0 :         right_level = btrfs_header_level(right_root->commit_root);
    7748           0 :         right_root_level = right_level;
    7749           0 :         right_path->nodes[right_level] =
    7750           0 :                         btrfs_clone_extent_buffer(right_root->commit_root);
    7751           0 :         if (!right_path->nodes[right_level]) {
    7752           0 :                 ret = -ENOMEM;
    7753           0 :                 goto out_unlock;
    7754             :         }
    7755             :         /*
    7756             :          * Our right root is the parent root, while the left root is the "send"
    7757             :          * root. We know that all new nodes/leaves in the left root must have
    7758             :          * a generation greater than the right root's generation, so we trigger
    7759             :          * readahead for those nodes and leaves of the left root, as we know we
    7760             :          * will need to read them at some point.
    7761             :          */
    7762           0 :         reada_min_gen = btrfs_header_generation(right_root->commit_root);
    7763             : 
    7764           0 :         if (left_level == 0)
    7765           0 :                 btrfs_item_key_to_cpu(left_path->nodes[left_level],
    7766             :                                 &left_key, left_path->slots[left_level]);
    7767             :         else
    7768           0 :                 btrfs_node_key_to_cpu(left_path->nodes[left_level],
    7769             :                                 &left_key, left_path->slots[left_level]);
    7770           0 :         if (right_level == 0)
    7771           0 :                 btrfs_item_key_to_cpu(right_path->nodes[right_level],
    7772             :                                 &right_key, right_path->slots[right_level]);
    7773             :         else
    7774           0 :                 btrfs_node_key_to_cpu(right_path->nodes[right_level],
    7775             :                                 &right_key, right_path->slots[right_level]);
    7776             : 
    7777           0 :         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7778             : 
    7779           0 :         while (1) {
    7780           0 :                 if (need_resched() ||
    7781             :                     rwsem_is_contended(&fs_info->commit_root_sem)) {
    7782           0 :                         up_read(&fs_info->commit_root_sem);
    7783           0 :                         cond_resched();
    7784           0 :                         down_read(&fs_info->commit_root_sem);
    7785             :                 }
    7786             : 
    7787           0 :                 if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
    7788           0 :                         ret = restart_after_relocation(left_path, right_path,
    7789             :                                                        &left_key, &right_key,
    7790             :                                                        left_level, right_level,
    7791             :                                                        sctx);
    7792           0 :                         if (ret < 0)
    7793           0 :                                 goto out_unlock;
    7794           0 :                         sctx->last_reloc_trans = fs_info->last_reloc_trans;
    7795             :                 }
    7796             : 
    7797           0 :                 if (advance_left && !left_end_reached) {
    7798           0 :                         ret = tree_advance(left_path, &left_level,
    7799             :                                         left_root_level,
    7800             :                                         advance_left != ADVANCE_ONLY_NEXT,
    7801             :                                         &left_key, reada_min_gen);
    7802           0 :                         if (ret == -1)
    7803             :                                 left_end_reached = ADVANCE;
    7804           0 :                         else if (ret < 0)
    7805           0 :                                 goto out_unlock;
    7806             :                         advance_left = 0;
    7807             :                 }
    7808           0 :                 if (advance_right && !right_end_reached) {
    7809           0 :                         ret = tree_advance(right_path, &right_level,
    7810             :                                         right_root_level,
    7811             :                                         advance_right != ADVANCE_ONLY_NEXT,
    7812             :                                         &right_key, reada_min_gen);
    7813           0 :                         if (ret == -1)
    7814             :                                 right_end_reached = ADVANCE;
    7815           0 :                         else if (ret < 0)
    7816           0 :                                 goto out_unlock;
    7817             :                         advance_right = 0;
    7818             :                 }
    7819             : 
    7820           0 :                 if (left_end_reached && right_end_reached) {
    7821           0 :                         ret = 0;
    7822           0 :                         goto out_unlock;
    7823           0 :                 } else if (left_end_reached) {
    7824           0 :                         if (right_level == 0) {
    7825           0 :                                 up_read(&fs_info->commit_root_sem);
    7826           0 :                                 ret = changed_cb(left_path, right_path,
    7827             :                                                 &right_key,
    7828             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7829             :                                                 sctx);
    7830           0 :                                 if (ret < 0)
    7831           0 :                                         goto out;
    7832           0 :                                 down_read(&fs_info->commit_root_sem);
    7833             :                         }
    7834           0 :                         advance_right = ADVANCE;
    7835           0 :                         continue;
    7836           0 :                 } else if (right_end_reached) {
    7837           0 :                         if (left_level == 0) {
    7838           0 :                                 up_read(&fs_info->commit_root_sem);
    7839           0 :                                 ret = changed_cb(left_path, right_path,
    7840             :                                                 &left_key,
    7841             :                                                 BTRFS_COMPARE_TREE_NEW,
    7842             :                                                 sctx);
    7843           0 :                                 if (ret < 0)
    7844           0 :                                         goto out;
    7845           0 :                                 down_read(&fs_info->commit_root_sem);
    7846             :                         }
    7847           0 :                         advance_left = ADVANCE;
    7848           0 :                         continue;
    7849             :                 }
    7850             : 
    7851           0 :                 if (left_level == 0 && right_level == 0) {
    7852           0 :                         up_read(&fs_info->commit_root_sem);
    7853           0 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7854           0 :                         if (cmp < 0) {
    7855           0 :                                 ret = changed_cb(left_path, right_path,
    7856             :                                                 &left_key,
    7857             :                                                 BTRFS_COMPARE_TREE_NEW,
    7858             :                                                 sctx);
    7859           0 :                                 advance_left = ADVANCE;
    7860           0 :                         } else if (cmp > 0) {
    7861           0 :                                 ret = changed_cb(left_path, right_path,
    7862             :                                                 &right_key,
    7863             :                                                 BTRFS_COMPARE_TREE_DELETED,
    7864             :                                                 sctx);
    7865           0 :                                 advance_right = ADVANCE;
    7866             :                         } else {
    7867           0 :                                 enum btrfs_compare_tree_result result;
    7868             : 
    7869           0 :                                 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
    7870           0 :                                 ret = tree_compare_item(left_path, right_path,
    7871             :                                                         tmp_buf);
    7872           0 :                                 if (ret)
    7873             :                                         result = BTRFS_COMPARE_TREE_CHANGED;
    7874             :                                 else
    7875           0 :                                         result = BTRFS_COMPARE_TREE_SAME;
    7876           0 :                                 ret = changed_cb(left_path, right_path,
    7877             :                                                  &left_key, result, sctx);
    7878           0 :                                 advance_left = ADVANCE;
    7879           0 :                                 advance_right = ADVANCE;
    7880             :                         }
    7881             : 
    7882           0 :                         if (ret < 0)
    7883           0 :                                 goto out;
    7884           0 :                         down_read(&fs_info->commit_root_sem);
    7885           0 :                 } else if (left_level == right_level) {
    7886           0 :                         cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
    7887           0 :                         if (cmp < 0) {
    7888             :                                 advance_left = ADVANCE;
    7889           0 :                         } else if (cmp > 0) {
    7890             :                                 advance_right = ADVANCE;
    7891             :                         } else {
    7892           0 :                                 left_blockptr = btrfs_node_blockptr(
    7893           0 :                                                 left_path->nodes[left_level],
    7894             :                                                 left_path->slots[left_level]);
    7895           0 :                                 right_blockptr = btrfs_node_blockptr(
    7896           0 :                                                 right_path->nodes[right_level],
    7897             :                                                 right_path->slots[right_level]);
    7898           0 :                                 left_gen = btrfs_node_ptr_generation(
    7899           0 :                                                 left_path->nodes[left_level],
    7900             :                                                 left_path->slots[left_level]);
    7901           0 :                                 right_gen = btrfs_node_ptr_generation(
    7902           0 :                                                 right_path->nodes[right_level],
    7903             :                                                 right_path->slots[right_level]);
    7904           0 :                                 if (left_blockptr == right_blockptr &&
    7905           0 :                                     left_gen == right_gen) {
    7906             :                                         /*
    7907             :                                          * As we're on a shared block, don't
    7908             :                                          * allow to go deeper.
    7909             :                                          */
    7910             :                                         advance_left = ADVANCE_ONLY_NEXT;
    7911             :                                         advance_right = ADVANCE_ONLY_NEXT;
    7912             :                                 } else {
    7913           0 :                                         advance_left = ADVANCE;
    7914           0 :                                         advance_right = ADVANCE;
    7915             :                                 }
    7916             :                         }
    7917           0 :                 } else if (left_level < right_level) {
    7918             :                         advance_right = ADVANCE;
    7919             :                 } else {
    7920           0 :                         advance_left = ADVANCE;
    7921             :                 }
    7922             :         }
    7923             : 
    7924           0 : out_unlock:
    7925           0 :         up_read(&fs_info->commit_root_sem);
    7926           0 : out:
    7927           0 :         btrfs_free_path(left_path);
    7928           0 :         btrfs_free_path(right_path);
    7929           0 :         kvfree(tmp_buf);
    7930           0 :         return ret;
    7931             : }
    7932             : 
    7933           0 : static int send_subvol(struct send_ctx *sctx)
    7934             : {
    7935           0 :         int ret;
    7936             : 
    7937           0 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
    7938           0 :                 ret = send_header(sctx);
    7939           0 :                 if (ret < 0)
    7940           0 :                         goto out;
    7941             :         }
    7942             : 
    7943           0 :         ret = send_subvol_begin(sctx);
    7944           0 :         if (ret < 0)
    7945           0 :                 goto out;
    7946             : 
    7947           0 :         if (sctx->parent_root) {
    7948           0 :                 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
    7949           0 :                 if (ret < 0)
    7950           0 :                         goto out;
    7951           0 :                 ret = finish_inode_if_needed(sctx, 1);
    7952           0 :                 if (ret < 0)
    7953           0 :                         goto out;
    7954             :         } else {
    7955           0 :                 ret = full_send_tree(sctx);
    7956           0 :                 if (ret < 0)
    7957           0 :                         goto out;
    7958             :         }
    7959             : 
    7960           0 : out:
    7961           0 :         free_recorded_refs(sctx);
    7962           0 :         return ret;
    7963             : }
    7964             : 
    7965             : /*
    7966             :  * If orphan cleanup did remove any orphans from a root, it means the tree
    7967             :  * was modified and therefore the commit root is not the same as the current
    7968             :  * root anymore. This is a problem, because send uses the commit root and
    7969             :  * therefore can see inode items that don't exist in the current root anymore,
    7970             :  * and for example make calls to btrfs_iget, which will do tree lookups based
    7971             :  * on the current root and not on the commit root. Those lookups will fail,
    7972             :  * returning a -ESTALE error, and making send fail with that error. So make
    7973             :  * sure a send does not see any orphans we have just removed, and that it will
    7974             :  * see the same inodes regardless of whether a transaction commit happened
    7975             :  * before it started (meaning that the commit root will be the same as the
    7976             :  * current root) or not.
    7977             :  */
    7978           0 : static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
    7979             : {
    7980           0 :         int i;
    7981           0 :         struct btrfs_trans_handle *trans = NULL;
    7982             : 
    7983           0 : again:
    7984           0 :         if (sctx->parent_root &&
    7985           0 :             sctx->parent_root->node != sctx->parent_root->commit_root)
    7986           0 :                 goto commit_trans;
    7987             : 
    7988           0 :         for (i = 0; i < sctx->clone_roots_cnt; i++)
    7989           0 :                 if (sctx->clone_roots[i].root->node !=
    7990           0 :                     sctx->clone_roots[i].root->commit_root)
    7991           0 :                         goto commit_trans;
    7992             : 
    7993           0 :         if (trans)
    7994           0 :                 return btrfs_end_transaction(trans);
    7995             : 
    7996             :         return 0;
    7997             : 
    7998           0 : commit_trans:
    7999             :         /* Use any root, all fs roots will get their commit roots updated. */
    8000           0 :         if (!trans) {
    8001           0 :                 trans = btrfs_join_transaction(sctx->send_root);
    8002           0 :                 if (IS_ERR(trans))
    8003           0 :                         return PTR_ERR(trans);
    8004           0 :                 goto again;
    8005             :         }
    8006             : 
    8007           0 :         return btrfs_commit_transaction(trans);
    8008             : }
    8009             : 
    8010             : /*
    8011             :  * Make sure any existing dellaloc is flushed for any root used by a send
    8012             :  * operation so that we do not miss any data and we do not race with writeback
    8013             :  * finishing and changing a tree while send is using the tree. This could
    8014             :  * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
    8015             :  * a send operation then uses the subvolume.
    8016             :  * After flushing delalloc ensure_commit_roots_uptodate() must be called.
    8017             :  */
    8018           0 : static int flush_delalloc_roots(struct send_ctx *sctx)
    8019             : {
    8020           0 :         struct btrfs_root *root = sctx->parent_root;
    8021           0 :         int ret;
    8022           0 :         int i;
    8023             : 
    8024           0 :         if (root) {
    8025           0 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8026           0 :                 if (ret)
    8027             :                         return ret;
    8028           0 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8029             :         }
    8030             : 
    8031           0 :         for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8032           0 :                 root = sctx->clone_roots[i].root;
    8033           0 :                 ret = btrfs_start_delalloc_snapshot(root, false);
    8034           0 :                 if (ret)
    8035           0 :                         return ret;
    8036           0 :                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
    8037             :         }
    8038             : 
    8039             :         return 0;
    8040             : }
    8041             : 
    8042           0 : static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
    8043             : {
    8044           0 :         spin_lock(&root->root_item_lock);
    8045           0 :         root->send_in_progress--;
    8046             :         /*
    8047             :          * Not much left to do, we don't know why it's unbalanced and
    8048             :          * can't blindly reset it to 0.
    8049             :          */
    8050           0 :         if (root->send_in_progress < 0)
    8051           0 :                 btrfs_err(root->fs_info,
    8052             :                           "send_in_progress unbalanced %d root %llu",
    8053             :                           root->send_in_progress, root->root_key.objectid);
    8054           0 :         spin_unlock(&root->root_item_lock);
    8055           0 : }
    8056             : 
    8057           0 : static void dedupe_in_progress_warn(const struct btrfs_root *root)
    8058             : {
    8059           0 :         btrfs_warn_rl(root->fs_info,
    8060             : "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
    8061             :                       root->root_key.objectid, root->dedupe_in_progress);
    8062           0 : }
    8063             : 
    8064           0 : long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
    8065             : {
    8066           0 :         int ret = 0;
    8067           0 :         struct btrfs_root *send_root = BTRFS_I(inode)->root;
    8068           0 :         struct btrfs_fs_info *fs_info = send_root->fs_info;
    8069           0 :         struct btrfs_root *clone_root;
    8070           0 :         struct send_ctx *sctx = NULL;
    8071           0 :         u32 i;
    8072           0 :         u64 *clone_sources_tmp = NULL;
    8073           0 :         int clone_sources_to_rollback = 0;
    8074           0 :         size_t alloc_size;
    8075           0 :         int sort_clone_roots = 0;
    8076           0 :         struct btrfs_lru_cache_entry *entry;
    8077           0 :         struct btrfs_lru_cache_entry *tmp;
    8078             : 
    8079           0 :         if (!capable(CAP_SYS_ADMIN))
    8080             :                 return -EPERM;
    8081             : 
    8082             :         /*
    8083             :          * The subvolume must remain read-only during send, protect against
    8084             :          * making it RW. This also protects against deletion.
    8085             :          */
    8086           0 :         spin_lock(&send_root->root_item_lock);
    8087           0 :         if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
    8088           0 :                 dedupe_in_progress_warn(send_root);
    8089           0 :                 spin_unlock(&send_root->root_item_lock);
    8090           0 :                 return -EAGAIN;
    8091             :         }
    8092           0 :         send_root->send_in_progress++;
    8093           0 :         spin_unlock(&send_root->root_item_lock);
    8094             : 
    8095             :         /*
    8096             :          * Userspace tools do the checks and warn the user if it's
    8097             :          * not RO.
    8098             :          */
    8099           0 :         if (!btrfs_root_readonly(send_root)) {
    8100           0 :                 ret = -EPERM;
    8101           0 :                 goto out;
    8102             :         }
    8103             : 
    8104             :         /*
    8105             :          * Check that we don't overflow at later allocations, we request
    8106             :          * clone_sources_count + 1 items, and compare to unsigned long inside
    8107             :          * access_ok. Also set an upper limit for allocation size so this can't
    8108             :          * easily exhaust memory. Max number of clone sources is about 200K.
    8109             :          */
    8110           0 :         if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
    8111           0 :                 ret = -EINVAL;
    8112           0 :                 goto out;
    8113             :         }
    8114             : 
    8115           0 :         if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
    8116           0 :                 ret = -EINVAL;
    8117           0 :                 goto out;
    8118             :         }
    8119             : 
    8120           0 :         sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
    8121           0 :         if (!sctx) {
    8122           0 :                 ret = -ENOMEM;
    8123           0 :                 goto out;
    8124             :         }
    8125             : 
    8126           0 :         INIT_LIST_HEAD(&sctx->new_refs);
    8127           0 :         INIT_LIST_HEAD(&sctx->deleted_refs);
    8128             : 
    8129           0 :         btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
    8130           0 :         btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
    8131           0 :         btrfs_lru_cache_init(&sctx->dir_created_cache,
    8132             :                              SEND_MAX_DIR_CREATED_CACHE_SIZE);
    8133             :         /*
    8134             :          * This cache is periodically trimmed to a fixed size elsewhere, see
    8135             :          * cache_dir_utimes() and trim_dir_utimes_cache().
    8136             :          */
    8137           0 :         btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
    8138             : 
    8139           0 :         sctx->pending_dir_moves = RB_ROOT;
    8140           0 :         sctx->waiting_dir_moves = RB_ROOT;
    8141           0 :         sctx->orphan_dirs = RB_ROOT;
    8142           0 :         sctx->rbtree_new_refs = RB_ROOT;
    8143           0 :         sctx->rbtree_deleted_refs = RB_ROOT;
    8144             : 
    8145           0 :         sctx->flags = arg->flags;
    8146             : 
    8147           0 :         if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
    8148           0 :                 if (arg->version > BTRFS_SEND_STREAM_VERSION) {
    8149           0 :                         ret = -EPROTO;
    8150           0 :                         goto out;
    8151             :                 }
    8152             :                 /* Zero means "use the highest version" */
    8153           0 :                 sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
    8154             :         } else {
    8155           0 :                 sctx->proto = 1;
    8156             :         }
    8157           0 :         if ((arg->flags & BTRFS_SEND_FLAG_COMPRESSED) && sctx->proto < 2) {
    8158           0 :                 ret = -EINVAL;
    8159           0 :                 goto out;
    8160             :         }
    8161             : 
    8162           0 :         sctx->send_filp = fget(arg->send_fd);
    8163           0 :         if (!sctx->send_filp) {
    8164           0 :                 ret = -EBADF;
    8165           0 :                 goto out;
    8166             :         }
    8167             : 
    8168           0 :         sctx->send_root = send_root;
    8169             :         /*
    8170             :          * Unlikely but possible, if the subvolume is marked for deletion but
    8171             :          * is slow to remove the directory entry, send can still be started
    8172             :          */
    8173           0 :         if (btrfs_root_dead(sctx->send_root)) {
    8174           0 :                 ret = -EPERM;
    8175           0 :                 goto out;
    8176             :         }
    8177             : 
    8178           0 :         sctx->clone_roots_cnt = arg->clone_sources_count;
    8179             : 
    8180           0 :         if (sctx->proto >= 2) {
    8181           0 :                 u32 send_buf_num_pages;
    8182             : 
    8183           0 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
    8184           0 :                 sctx->send_buf = vmalloc(sctx->send_max_size);
    8185           0 :                 if (!sctx->send_buf) {
    8186           0 :                         ret = -ENOMEM;
    8187           0 :                         goto out;
    8188             :                 }
    8189           0 :                 send_buf_num_pages = sctx->send_max_size >> PAGE_SHIFT;
    8190           0 :                 sctx->send_buf_pages = kcalloc(send_buf_num_pages,
    8191             :                                                sizeof(*sctx->send_buf_pages),
    8192             :                                                GFP_KERNEL);
    8193           0 :                 if (!sctx->send_buf_pages) {
    8194           0 :                         ret = -ENOMEM;
    8195           0 :                         goto out;
    8196             :                 }
    8197           0 :                 for (i = 0; i < send_buf_num_pages; i++) {
    8198           0 :                         sctx->send_buf_pages[i] =
    8199           0 :                                 vmalloc_to_page(sctx->send_buf + (i << PAGE_SHIFT));
    8200             :                 }
    8201             :         } else {
    8202           0 :                 sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
    8203           0 :                 sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
    8204             :         }
    8205           0 :         if (!sctx->send_buf) {
    8206           0 :                 ret = -ENOMEM;
    8207           0 :                 goto out;
    8208             :         }
    8209             : 
    8210           0 :         sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
    8211           0 :                                      arg->clone_sources_count + 1,
    8212             :                                      GFP_KERNEL);
    8213           0 :         if (!sctx->clone_roots) {
    8214           0 :                 ret = -ENOMEM;
    8215           0 :                 goto out;
    8216             :         }
    8217             : 
    8218           0 :         alloc_size = array_size(sizeof(*arg->clone_sources),
    8219             :                                 arg->clone_sources_count);
    8220             : 
    8221           0 :         if (arg->clone_sources_count) {
    8222           0 :                 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
    8223           0 :                 if (!clone_sources_tmp) {
    8224           0 :                         ret = -ENOMEM;
    8225           0 :                         goto out;
    8226             :                 }
    8227             : 
    8228           0 :                 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
    8229             :                                 alloc_size);
    8230           0 :                 if (ret) {
    8231           0 :                         ret = -EFAULT;
    8232           0 :                         goto out;
    8233             :                 }
    8234             : 
    8235           0 :                 for (i = 0; i < arg->clone_sources_count; i++) {
    8236           0 :                         clone_root = btrfs_get_fs_root(fs_info,
    8237           0 :                                                 clone_sources_tmp[i], true);
    8238           0 :                         if (IS_ERR(clone_root)) {
    8239           0 :                                 ret = PTR_ERR(clone_root);
    8240           0 :                                 goto out;
    8241             :                         }
    8242           0 :                         spin_lock(&clone_root->root_item_lock);
    8243           0 :                         if (!btrfs_root_readonly(clone_root) ||
    8244             :                             btrfs_root_dead(clone_root)) {
    8245           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8246           0 :                                 btrfs_put_root(clone_root);
    8247           0 :                                 ret = -EPERM;
    8248           0 :                                 goto out;
    8249             :                         }
    8250           0 :                         if (clone_root->dedupe_in_progress) {
    8251           0 :                                 dedupe_in_progress_warn(clone_root);
    8252           0 :                                 spin_unlock(&clone_root->root_item_lock);
    8253           0 :                                 btrfs_put_root(clone_root);
    8254           0 :                                 ret = -EAGAIN;
    8255           0 :                                 goto out;
    8256             :                         }
    8257           0 :                         clone_root->send_in_progress++;
    8258           0 :                         spin_unlock(&clone_root->root_item_lock);
    8259             : 
    8260           0 :                         sctx->clone_roots[i].root = clone_root;
    8261           0 :                         clone_sources_to_rollback = i + 1;
    8262             :                 }
    8263           0 :                 kvfree(clone_sources_tmp);
    8264           0 :                 clone_sources_tmp = NULL;
    8265             :         }
    8266             : 
    8267           0 :         if (arg->parent_root) {
    8268           0 :                 sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
    8269             :                                                       true);
    8270           0 :                 if (IS_ERR(sctx->parent_root)) {
    8271           0 :                         ret = PTR_ERR(sctx->parent_root);
    8272           0 :                         goto out;
    8273             :                 }
    8274             : 
    8275           0 :                 spin_lock(&sctx->parent_root->root_item_lock);
    8276           0 :                 sctx->parent_root->send_in_progress++;
    8277           0 :                 if (!btrfs_root_readonly(sctx->parent_root) ||
    8278             :                                 btrfs_root_dead(sctx->parent_root)) {
    8279           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8280           0 :                         ret = -EPERM;
    8281           0 :                         goto out;
    8282             :                 }
    8283           0 :                 if (sctx->parent_root->dedupe_in_progress) {
    8284           0 :                         dedupe_in_progress_warn(sctx->parent_root);
    8285           0 :                         spin_unlock(&sctx->parent_root->root_item_lock);
    8286           0 :                         ret = -EAGAIN;
    8287           0 :                         goto out;
    8288             :                 }
    8289           0 :                 spin_unlock(&sctx->parent_root->root_item_lock);
    8290             :         }
    8291             : 
    8292             :         /*
    8293             :          * Clones from send_root are allowed, but only if the clone source
    8294             :          * is behind the current send position. This is checked while searching
    8295             :          * for possible clone sources.
    8296             :          */
    8297           0 :         sctx->clone_roots[sctx->clone_roots_cnt++].root =
    8298           0 :                 btrfs_grab_root(sctx->send_root);
    8299             : 
    8300             :         /* We do a bsearch later */
    8301           0 :         sort(sctx->clone_roots, sctx->clone_roots_cnt,
    8302             :                         sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
    8303             :                         NULL);
    8304           0 :         sort_clone_roots = 1;
    8305             : 
    8306           0 :         ret = flush_delalloc_roots(sctx);
    8307           0 :         if (ret)
    8308           0 :                 goto out;
    8309             : 
    8310           0 :         ret = ensure_commit_roots_uptodate(sctx);
    8311           0 :         if (ret)
    8312           0 :                 goto out;
    8313             : 
    8314           0 :         ret = send_subvol(sctx);
    8315           0 :         if (ret < 0)
    8316           0 :                 goto out;
    8317             : 
    8318           0 :         btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
    8319           0 :                 ret = send_utimes(sctx, entry->key, entry->gen);
    8320           0 :                 if (ret < 0)
    8321           0 :                         goto out;
    8322           0 :                 btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
    8323             :         }
    8324             : 
    8325           0 :         if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
    8326           0 :                 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
    8327           0 :                 if (ret < 0)
    8328           0 :                         goto out;
    8329           0 :                 ret = send_cmd(sctx);
    8330           0 :                 if (ret < 0)
    8331           0 :                         goto out;
    8332             :         }
    8333             : 
    8334           0 : out:
    8335           0 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
    8336           0 :         while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
    8337           0 :                 struct rb_node *n;
    8338           0 :                 struct pending_dir_move *pm;
    8339             : 
    8340           0 :                 n = rb_first(&sctx->pending_dir_moves);
    8341           0 :                 pm = rb_entry(n, struct pending_dir_move, node);
    8342           0 :                 while (!list_empty(&pm->list)) {
    8343           0 :                         struct pending_dir_move *pm2;
    8344             : 
    8345           0 :                         pm2 = list_first_entry(&pm->list,
    8346             :                                                struct pending_dir_move, list);
    8347           0 :                         free_pending_move(sctx, pm2);
    8348             :                 }
    8349           0 :                 free_pending_move(sctx, pm);
    8350             :         }
    8351             : 
    8352           0 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
    8353           0 :         while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
    8354           0 :                 struct rb_node *n;
    8355           0 :                 struct waiting_dir_move *dm;
    8356             : 
    8357           0 :                 n = rb_first(&sctx->waiting_dir_moves);
    8358           0 :                 dm = rb_entry(n, struct waiting_dir_move, node);
    8359           0 :                 rb_erase(&dm->node, &sctx->waiting_dir_moves);
    8360           0 :                 kfree(dm);
    8361             :         }
    8362             : 
    8363           0 :         WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
    8364           0 :         while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
    8365           0 :                 struct rb_node *n;
    8366           0 :                 struct orphan_dir_info *odi;
    8367             : 
    8368           0 :                 n = rb_first(&sctx->orphan_dirs);
    8369           0 :                 odi = rb_entry(n, struct orphan_dir_info, node);
    8370           0 :                 free_orphan_dir_info(sctx, odi);
    8371             :         }
    8372             : 
    8373           0 :         if (sort_clone_roots) {
    8374           0 :                 for (i = 0; i < sctx->clone_roots_cnt; i++) {
    8375           0 :                         btrfs_root_dec_send_in_progress(
    8376           0 :                                         sctx->clone_roots[i].root);
    8377           0 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8378             :                 }
    8379             :         } else {
    8380           0 :                 for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
    8381           0 :                         btrfs_root_dec_send_in_progress(
    8382           0 :                                         sctx->clone_roots[i].root);
    8383           0 :                         btrfs_put_root(sctx->clone_roots[i].root);
    8384             :                 }
    8385             : 
    8386           0 :                 btrfs_root_dec_send_in_progress(send_root);
    8387             :         }
    8388           0 :         if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
    8389           0 :                 btrfs_root_dec_send_in_progress(sctx->parent_root);
    8390           0 :                 btrfs_put_root(sctx->parent_root);
    8391             :         }
    8392             : 
    8393           0 :         kvfree(clone_sources_tmp);
    8394             : 
    8395           0 :         if (sctx) {
    8396           0 :                 if (sctx->send_filp)
    8397           0 :                         fput(sctx->send_filp);
    8398             : 
    8399           0 :                 kvfree(sctx->clone_roots);
    8400           0 :                 kfree(sctx->send_buf_pages);
    8401           0 :                 kvfree(sctx->send_buf);
    8402           0 :                 kvfree(sctx->verity_descriptor);
    8403             : 
    8404           0 :                 close_current_inode(sctx);
    8405             : 
    8406           0 :                 btrfs_lru_cache_clear(&sctx->name_cache);
    8407           0 :                 btrfs_lru_cache_clear(&sctx->backref_cache);
    8408           0 :                 btrfs_lru_cache_clear(&sctx->dir_created_cache);
    8409           0 :                 btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
    8410             : 
    8411           0 :                 kfree(sctx);
    8412             :         }
    8413             : 
    8414           0 :         return ret;
    8415             : }

Generated by: LCOV version 1.14