LCOV - code coverage report
Current view: top level - fs/btrfs - extent_map.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 475 501 94.8 %
Date: 2023-07-31 20:08:12 Functions: 25 26 96.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/err.h>
       4             : #include <linux/slab.h>
       5             : #include <linux/spinlock.h>
       6             : #include "messages.h"
       7             : #include "ctree.h"
       8             : #include "volumes.h"
       9             : #include "extent_map.h"
      10             : #include "compression.h"
      11             : #include "btrfs_inode.h"
      12             : 
      13             : 
      14             : static struct kmem_cache *extent_map_cache;
      15             : 
      16          11 : int __init extent_map_init(void)
      17             : {
      18          11 :         extent_map_cache = kmem_cache_create("btrfs_extent_map",
      19             :                         sizeof(struct extent_map), 0,
      20             :                         SLAB_MEM_SPREAD, NULL);
      21          11 :         if (!extent_map_cache)
      22           0 :                 return -ENOMEM;
      23             :         return 0;
      24             : }
      25             : 
      26           0 : void __cold extent_map_exit(void)
      27             : {
      28           0 :         kmem_cache_destroy(extent_map_cache);
      29           0 : }
      30             : 
      31             : /*
      32             :  * Initialize the extent tree @tree.  Should be called for each new inode or
      33             :  * other user of the extent_map interface.
      34             :  */
      35     3848221 : void extent_map_tree_init(struct extent_map_tree *tree)
      36             : {
      37     3848221 :         tree->map = RB_ROOT_CACHED;
      38     3848221 :         INIT_LIST_HEAD(&tree->modified_extents);
      39     3848221 :         rwlock_init(&tree->lock);
      40     3846931 : }
      41             : 
      42             : /*
      43             :  * Allocate a new extent_map structure.  The new structure is returned with a
      44             :  * reference count of one and needs to be freed using free_extent_map()
      45             :  */
      46    76134586 : struct extent_map *alloc_extent_map(void)
      47             : {
      48    76134586 :         struct extent_map *em;
      49    76134586 :         em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
      50    76150847 :         if (!em)
      51             :                 return NULL;
      52    76150847 :         RB_CLEAR_NODE(&em->rb_node);
      53    76150847 :         em->compress_type = BTRFS_COMPRESS_NONE;
      54    76150847 :         refcount_set(&em->refs, 1);
      55    76150847 :         INIT_LIST_HEAD(&em->list);
      56    76150847 :         return em;
      57             : }
      58             : 
      59             : /*
      60             :  * Drop the reference out on @em by one and free the structure if the reference
      61             :  * count hits zero.
      62             :  */
      63   310825316 : void free_extent_map(struct extent_map *em)
      64             : {
      65   310825316 :         if (!em)
      66             :                 return;
      67   299432925 :         if (refcount_dec_and_test(&em->refs)) {
      68    76154127 :                 WARN_ON(extent_map_in_tree(em));
      69    76154127 :                 WARN_ON(!list_empty(&em->list));
      70   152308254 :                 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
      71       22204 :                         kfree(em->map_lookup);
      72    76154127 :                 kmem_cache_free(extent_map_cache, em);
      73             :         }
      74             : }
      75             : 
      76             : /* Do the math around the end of an extent, handling wrapping. */
      77             : static u64 range_end(u64 start, u64 len)
      78             : {
      79   196453062 :         if (start + len < start)
      80      166943 :                 return (u64)-1;
      81             :         return start + len;
      82             : }
      83             : 
      84    20492317 : static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
      85             : {
      86    20492317 :         struct rb_node **p = &root->rb_root.rb_node;
      87    20492317 :         struct rb_node *parent = NULL;
      88    20492317 :         struct extent_map *entry = NULL;
      89    20492317 :         struct rb_node *orig_parent = NULL;
      90    20492317 :         u64 end = range_end(em->start, em->len);
      91    20492317 :         bool leftmost = true;
      92             : 
      93   337132189 :         while (*p) {
      94   316640585 :                 parent = *p;
      95   316640585 :                 entry = rb_entry(parent, struct extent_map, rb_node);
      96             : 
      97   316640585 :                 if (em->start < entry->start) {
      98    20563515 :                         p = &(*p)->rb_left;
      99   296077070 :                 } else if (em->start >= extent_map_end(entry)) {
     100   296076357 :                         p = &(*p)->rb_right;
     101   296076357 :                         leftmost = false;
     102             :                 } else {
     103             :                         return -EEXIST;
     104             :                 }
     105             :         }
     106             : 
     107             :         orig_parent = parent;
     108    34559468 :         while (parent && em->start >= extent_map_end(entry)) {
     109    14067692 :                 parent = rb_next(parent);
     110    14067692 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     111             :         }
     112    20491776 :         if (parent)
     113     8479888 :                 if (end > entry->start && em->start < extent_map_end(entry))
     114             :                         return -EEXIST;
     115             : 
     116             :         parent = orig_parent;
     117             :         entry = rb_entry(parent, struct extent_map, rb_node);
     118    25510694 :         while (parent && em->start < entry->start) {
     119     5027836 :                 parent = rb_prev(parent);
     120     5027836 :                 entry = rb_entry(parent, struct extent_map, rb_node);
     121             :         }
     122    20482858 :         if (parent)
     123    19041895 :                 if (end > entry->start && em->start < extent_map_end(entry))
     124             :                         return -EEXIST;
     125             : 
     126    20482858 :         rb_link_node(&em->rb_node, orig_parent, p);
     127    20482858 :         rb_insert_color_cached(&em->rb_node, root, leftmost);
     128    20482858 :         return 0;
     129             : }
     130             : 
     131             : /*
     132             :  * Search through the tree for an extent_map with a given offset.  If it can't
     133             :  * be found, try to find some neighboring extents
     134             :  */
     135   175983490 : static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
     136             :                                      struct rb_node **prev_or_next_ret)
     137             : {
     138   175983490 :         struct rb_node *n = root->rb_node;
     139   175983490 :         struct rb_node *prev = NULL;
     140   175983490 :         struct rb_node *orig_prev = NULL;
     141   175983490 :         struct extent_map *entry;
     142   175983490 :         struct extent_map *prev_entry = NULL;
     143             : 
     144   175983490 :         ASSERT(prev_or_next_ret);
     145             : 
     146  1239182285 :         while (n) {
     147  1208697476 :                 entry = rb_entry(n, struct extent_map, rb_node);
     148  1208697476 :                 prev = n;
     149  1208697476 :                 prev_entry = entry;
     150             : 
     151  1208697476 :                 if (offset < entry->start)
     152   148476664 :                         n = n->rb_left;
     153  1060220812 :                 else if (offset >= extent_map_end(entry))
     154   914722131 :                         n = n->rb_right;
     155             :                 else
     156   145498681 :                         return n;
     157             :         }
     158             : 
     159             :         orig_prev = prev;
     160    65261650 :         while (prev && offset >= extent_map_end(prev_entry)) {
     161    14585401 :                 prev = rb_next(prev);
     162    14585401 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     163             :         }
     164             : 
     165             :         /*
     166             :          * Previous extent map found, return as in this case the caller does not
     167             :          * care about the next one.
     168             :          */
     169    30483966 :         if (prev) {
     170     5606881 :                 *prev_or_next_ret = prev;
     171     5606881 :                 return NULL;
     172             :         }
     173             : 
     174             :         prev = orig_prev;
     175             :         prev_entry = rb_entry(prev, struct extent_map, rb_node);
     176    24877132 :         while (prev && offset < prev_entry->start) {
     177           0 :                 prev = rb_prev(prev);
     178           0 :                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
     179             :         }
     180    24877132 :         *prev_or_next_ret = prev;
     181             : 
     182    24877132 :         return NULL;
     183             : }
     184             : 
     185             : /* Check to see if two extent_map structs are adjacent and safe to merge. */
     186    22408960 : static int mergable_maps(struct extent_map *prev, struct extent_map *next)
     187             : {
     188    44817920 :         if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
     189             :                 return 0;
     190             : 
     191             :         /*
     192             :          * don't merge compressed extents, we need to know their
     193             :          * actual size
     194             :          */
     195    44103116 :         if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
     196             :                 return 0;
     197             : 
     198    43518470 :         if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
     199    21759235 :             test_bit(EXTENT_FLAG_LOGGING, &next->flags))
     200             :                 return 0;
     201             : 
     202             :         /*
     203             :          * We don't want to merge stuff that hasn't been written to the log yet
     204             :          * since it may not reflect exactly what is on disk, and that would be
     205             :          * bad.
     206             :          */
     207    21647776 :         if (!list_empty(&prev->list) || !list_empty(&next->list))
     208             :                 return 0;
     209             : 
     210    10488143 :         ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
     211             :                prev->block_start != EXTENT_MAP_DELALLOC);
     212             : 
     213    10488143 :         if (prev->map_lookup || next->map_lookup)
     214       19011 :                 ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
     215             :                        test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
     216             : 
     217    10488143 :         if (extent_map_end(prev) == next->start &&
     218     7297481 :             prev->flags == next->flags &&
     219     3974039 :             prev->map_lookup == next->map_lookup &&
     220     2963930 :             ((next->block_start == EXTENT_MAP_HOLE &&
     221     3955382 :               prev->block_start == EXTENT_MAP_HOLE) ||
     222           0 :              (next->block_start == EXTENT_MAP_INLINE &&
     223     1289710 :               prev->block_start == EXTENT_MAP_INLINE) ||
     224      991452 :              (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
     225             :               next->block_start == extent_map_block_end(prev)))) {
     226     2718784 :                 return 1;
     227             :         }
     228             :         return 0;
     229             : }
     230             : 
     231    17510714 : static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
     232             : {
     233    17510714 :         struct extent_map *merge = NULL;
     234    17510714 :         struct rb_node *rb;
     235             : 
     236             :         /*
     237             :          * We can't modify an extent map that is in the tree and that is being
     238             :          * used by another task, as it can cause that other task to see it in
     239             :          * inconsistent state during the merging. We always have 1 reference for
     240             :          * the tree and 1 for this task (which is unpinning the extent map or
     241             :          * clearing the logging flag), so anything > 2 means it's being used by
     242             :          * other tasks too.
     243             :          */
     244    17510714 :         if (refcount_read(&em->refs) > 2)
     245             :                 return;
     246             : 
     247    17500843 :         if (em->start != 0) {
     248    16160023 :                 rb = rb_prev(&em->rb_node);
     249    16159979 :                 if (rb)
     250    16094371 :                         merge = rb_entry(rb, struct extent_map, rb_node);
     251    16159979 :                 if (rb && mergable_maps(merge, em)) {
     252     2672819 :                         em->start = merge->start;
     253     2672819 :                         em->orig_start = merge->orig_start;
     254     2672819 :                         em->len += merge->len;
     255     2672819 :                         em->block_len += merge->block_len;
     256     2672819 :                         em->block_start = merge->block_start;
     257     2672819 :                         em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
     258     2672819 :                         em->mod_start = merge->mod_start;
     259     2672819 :                         em->generation = max(em->generation, merge->generation);
     260     2672819 :                         set_bit(EXTENT_FLAG_MERGED, &em->flags);
     261             : 
     262     2672819 :                         rb_erase_cached(&merge->rb_node, &tree->map);
     263     2672819 :                         RB_CLEAR_NODE(&merge->rb_node);
     264     2672819 :                         free_extent_map(merge);
     265             :                 }
     266             :         }
     267             : 
     268    17500281 :         rb = rb_next(&em->rb_node);
     269    17500494 :         if (rb)
     270     6314623 :                 merge = rb_entry(rb, struct extent_map, rb_node);
     271    17500494 :         if (rb && mergable_maps(em, merge)) {
     272       45965 :                 em->len += merge->len;
     273       45965 :                 em->block_len += merge->block_len;
     274       45965 :                 rb_erase_cached(&merge->rb_node, &tree->map);
     275       45965 :                 RB_CLEAR_NODE(&merge->rb_node);
     276       45965 :                 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
     277       45965 :                 em->generation = max(em->generation, merge->generation);
     278       45965 :                 set_bit(EXTENT_FLAG_MERGED, &em->flags);
     279       45965 :                 free_extent_map(merge);
     280             :         }
     281             : }
     282             : 
     283             : /*
     284             :  * Unpin an extent from the cache.
     285             :  *
     286             :  * @tree:       tree to unpin the extent in
     287             :  * @start:      logical offset in the file
     288             :  * @len:        length of the extent
     289             :  * @gen:        generation that this extent has been modified in
     290             :  *
     291             :  * Called after an extent has been written to disk properly.  Set the generation
     292             :  * to the generation that actually added the file item to the inode so we know
     293             :  * we need to sync this extent when we call fsync().
     294             :  */
     295     3410426 : int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
     296             :                        u64 gen)
     297             : {
     298     3410426 :         int ret = 0;
     299     3410426 :         struct extent_map *em;
     300     3410426 :         bool prealloc = false;
     301             : 
     302     3410426 :         write_lock(&tree->lock);
     303     3410430 :         em = lookup_extent_mapping(tree, start, len);
     304             : 
     305     6820890 :         WARN_ON(!em || em->start != start);
     306             : 
     307     3410447 :         if (!em)
     308           0 :                 goto out;
     309             : 
     310     3410447 :         em->generation = gen;
     311     3410447 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     312     3410451 :         em->mod_start = em->start;
     313     3410451 :         em->mod_len = em->len;
     314             : 
     315     6820902 :         if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
     316      285573 :                 prealloc = true;
     317      285573 :                 clear_bit(EXTENT_FLAG_FILLING, &em->flags);
     318             :         }
     319             : 
     320     3410452 :         try_merge_map(tree, em);
     321             : 
     322     3410375 :         if (prealloc) {
     323      285574 :                 em->mod_start = em->start;
     324      285574 :                 em->mod_len = em->len;
     325             :         }
     326             : 
     327     3410375 :         free_extent_map(em);
     328     3410420 : out:
     329     3410420 :         write_unlock(&tree->lock);
     330     3410326 :         return ret;
     331             : 
     332             : }
     333             : 
     334      330306 : void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
     335             : {
     336      330306 :         lockdep_assert_held_write(&tree->lock);
     337             : 
     338      330306 :         clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     339      330338 :         if (extent_map_in_tree(em))
     340      330340 :                 try_merge_map(tree, em);
     341      330335 : }
     342             : 
     343    26997658 : static inline void setup_extent_mapping(struct extent_map_tree *tree,
     344             :                                         struct extent_map *em,
     345             :                                         int modified)
     346             : {
     347    26997658 :         refcount_inc(&em->refs);
     348    27003532 :         em->mod_start = em->start;
     349    27003532 :         em->mod_len = em->len;
     350             : 
     351    27003532 :         if (modified)
     352    13233277 :                 list_move(&em->list, &tree->modified_extents);
     353             :         else
     354    13770255 :                 try_merge_map(tree, em);
     355    26999910 : }
     356             : 
     357       22201 : static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
     358             : {
     359       22201 :         struct map_lookup *map = em->map_lookup;
     360       22201 :         u64 stripe_size = em->orig_block_len;
     361       22201 :         int i;
     362             : 
     363       51183 :         for (i = 0; i < map->num_stripes; i++) {
     364       28982 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     365       28982 :                 struct btrfs_device *device = stripe->dev;
     366             : 
     367       28982 :                 set_extent_bit(&device->alloc_state, stripe->physical,
     368       28982 :                                stripe->physical + stripe_size - 1,
     369             :                                bits | EXTENT_NOWAIT, NULL);
     370             :         }
     371       22201 : }
     372             : 
     373       44405 : static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
     374             : {
     375       44405 :         struct map_lookup *map = em->map_lookup;
     376       44405 :         u64 stripe_size = em->orig_block_len;
     377       44405 :         int i;
     378             : 
     379      102374 :         for (i = 0; i < map->num_stripes; i++) {
     380       57969 :                 struct btrfs_io_stripe *stripe = &map->stripes[i];
     381       57969 :                 struct btrfs_device *device = stripe->dev;
     382             : 
     383       57969 :                 __clear_extent_bit(&device->alloc_state, stripe->physical,
     384       57969 :                                    stripe->physical + stripe_size - 1,
     385             :                                    bits | EXTENT_NOWAIT,
     386             :                                    NULL, NULL);
     387             :         }
     388       44405 : }
     389             : 
     390             : /*
     391             :  * Add new extent map to the extent tree
     392             :  *
     393             :  * @tree:       tree to insert new map in
     394             :  * @em:         map to insert
     395             :  * @modified:   indicate whether the given @em should be added to the
     396             :  *              modified list, which indicates the extent needs to be logged
     397             :  *
     398             :  * Insert @em into @tree or perform a simple forward/backward merge with
     399             :  * existing mappings.  The extent_map struct passed in will be inserted
     400             :  * into the tree directly, with an additional reference taken, or a
     401             :  * reference dropped if the merge attempt was successful.
     402             :  */
     403    20492010 : int add_extent_mapping(struct extent_map_tree *tree,
     404             :                        struct extent_map *em, int modified)
     405             : {
     406    20492010 :         int ret = 0;
     407             : 
     408    20492010 :         lockdep_assert_held_write(&tree->lock);
     409             : 
     410    20492010 :         ret = tree_insert(&tree->map, em);
     411    20489155 :         if (ret)
     412        9275 :                 goto out;
     413             : 
     414    20479880 :         setup_extent_mapping(tree, em, modified);
     415    40951860 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
     416       22201 :                 extent_map_device_set_bits(em, CHUNK_ALLOCATED);
     417       22201 :                 extent_map_device_clear_bits(em, CHUNK_TRIMMED);
     418             :         }
     419    20453729 : out:
     420    20485205 :         return ret;
     421             : }
     422             : 
     423             : static struct extent_map *
     424   175960745 : __lookup_extent_mapping(struct extent_map_tree *tree,
     425             :                         u64 start, u64 len, int strict)
     426             : {
     427   175960745 :         struct extent_map *em;
     428   175960745 :         struct rb_node *rb_node;
     429   175960745 :         struct rb_node *prev_or_next = NULL;
     430   175960745 :         u64 end = range_end(start, len);
     431             : 
     432   175960745 :         rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
     433   175969599 :         if (!rb_node) {
     434    30481512 :                 if (prev_or_next)
     435             :                         rb_node = prev_or_next;
     436             :                 else
     437             :                         return NULL;
     438             :         }
     439             : 
     440   164161813 :         em = rb_entry(rb_node, struct extent_map, rb_node);
     441             : 
     442   319509600 :         if (strict && !(end > em->start && start < extent_map_end(em)))
     443             :                 return NULL;
     444             : 
     445   145699670 :         refcount_inc(&em->refs);
     446   145699670 :         return em;
     447             : }
     448             : 
     449             : /*
     450             :  * Lookup extent_map that intersects @start + @len range.
     451             :  *
     452             :  * @tree:       tree to lookup in
     453             :  * @start:      byte offset to start the search
     454             :  * @len:        length of the lookup range
     455             :  *
     456             :  * Find and return the first extent_map struct in @tree that intersects the
     457             :  * [start, len] range.  There may be additional objects in the tree that
     458             :  * intersect, so check the object returned carefully to make sure that no
     459             :  * additional lookups are needed.
     460             :  */
     461   139109880 : struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
     462             :                                          u64 start, u64 len)
     463             : {
     464   142520310 :         return __lookup_extent_mapping(tree, start, len, 1);
     465             : }
     466             : 
     467             : /*
     468             :  * Find a nearby extent map intersecting @start + @len (not an exact search).
     469             :  *
     470             :  * @tree:       tree to lookup in
     471             :  * @start:      byte offset to start the search
     472             :  * @len:        length of the lookup range
     473             :  *
     474             :  * Find and return the first extent_map struct in @tree that intersects the
     475             :  * [start, len] range.
     476             :  *
     477             :  * If one can't be found, any nearby extent may be returned
     478             :  */
     479     4002584 : struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
     480             :                                          u64 start, u64 len)
     481             : {
     482     4002584 :         return __lookup_extent_mapping(tree, start, len, 0);
     483             : }
     484             : 
     485             : /*
     486             :  * Remove an extent_map from the extent tree.
     487             :  *
     488             :  * @tree:       extent tree to remove from
     489             :  * @em:         extent map being removed
     490             :  *
     491             :  * Remove @em from @tree.  No reference counts are dropped, and no checks
     492             :  * are done to see if the range is in use.
     493             :  */
     494    17761518 : void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
     495             : {
     496    17761518 :         lockdep_assert_held_write(&tree->lock);
     497             : 
     498    17761518 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
     499    17761518 :         rb_erase_cached(&em->rb_node, &tree->map);
     500    17761299 :         if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
     501    17761294 :                 list_del_init(&em->list);
     502    35521472 :         if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
     503       22204 :                 extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
     504    17760736 :         RB_CLEAR_NODE(&em->rb_node);
     505    17760736 : }
     506             : 
     507     6519409 : static void replace_extent_mapping(struct extent_map_tree *tree,
     508             :                                    struct extent_map *cur,
     509             :                                    struct extent_map *new,
     510             :                                    int modified)
     511             : {
     512     6519409 :         lockdep_assert_held_write(&tree->lock);
     513             : 
     514     6519409 :         WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
     515     6519409 :         ASSERT(extent_map_in_tree(cur));
     516     6519409 :         if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
     517     6519367 :                 list_del_init(&cur->list);
     518     6519382 :         rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
     519     6519301 :         RB_CLEAR_NODE(&cur->rb_node);
     520             : 
     521     6519301 :         setup_extent_mapping(tree, new, modified);
     522     6519329 : }
     523             : 
     524             : static struct extent_map *next_extent_map(const struct extent_map *em)
     525             : {
     526     6394232 :         struct rb_node *next;
     527             : 
     528     6394232 :         next = rb_next(&em->rb_node);
     529     6394220 :         if (!next)
     530           2 :                 return NULL;
     531             :         return container_of(next, struct extent_map, rb_node);
     532             : }
     533             : 
     534             : static struct extent_map *prev_extent_map(struct extent_map *em)
     535             : {
     536        8901 :         struct rb_node *prev;
     537             : 
     538        8901 :         prev = rb_prev(&em->rb_node);
     539        8901 :         if (!prev)
     540        1134 :                 return NULL;
     541             :         return container_of(prev, struct extent_map, rb_node);
     542             : }
     543             : 
     544             : /*
     545             :  * Helper for btrfs_get_extent.  Given an existing extent in the tree,
     546             :  * the existing extent is the nearest extent to map_start,
     547             :  * and an extent that you want to insert, deal with overlap and insert
     548             :  * the best fitted new extent into the tree.
     549             :  */
     550        8903 : static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
     551             :                                          struct extent_map *existing,
     552             :                                          struct extent_map *em,
     553             :                                          u64 map_start)
     554             : {
     555        8903 :         struct extent_map *prev;
     556        8903 :         struct extent_map *next;
     557        8903 :         u64 start;
     558        8903 :         u64 end;
     559        8903 :         u64 start_diff;
     560             : 
     561       17806 :         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
     562             : 
     563        8903 :         if (existing->start > map_start) {
     564        8901 :                 next = existing;
     565        8901 :                 prev = prev_extent_map(next);
     566             :         } else {
     567           2 :                 prev = existing;
     568           2 :                 next = next_extent_map(prev);
     569             :         }
     570             : 
     571        8903 :         start = prev ? extent_map_end(prev) : em->start;
     572        8903 :         start = max_t(u64, start, em->start);
     573        8903 :         end = next ? next->start : extent_map_end(em);
     574        8903 :         end = min_t(u64, end, extent_map_end(em));
     575        8903 :         start_diff = start - em->start;
     576        8903 :         em->start = start;
     577        8903 :         em->len = end - start;
     578        9503 :         if (em->block_start < EXTENT_MAP_LAST_BYTE &&
     579         600 :             !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
     580         600 :                 em->block_start += start_diff;
     581         600 :                 em->block_len = em->len;
     582             :         }
     583        8903 :         return add_extent_mapping(em_tree, em, 0);
     584             : }
     585             : 
     586             : /*
     587             :  * Add extent mapping into em_tree.
     588             :  *
     589             :  * @fs_info:  the filesystem
     590             :  * @em_tree:  extent tree into which we want to insert the extent mapping
     591             :  * @em_in:    extent we are inserting
     592             :  * @start:    start of the logical range btrfs_get_extent() is requesting
     593             :  * @len:      length of the logical range btrfs_get_extent() is requesting
     594             :  *
     595             :  * Note that @em_in's range may be different from [start, start+len),
     596             :  * but they must be overlapped.
     597             :  *
     598             :  * Insert @em_in into @em_tree. In case there is an overlapping range, handle
     599             :  * the -EEXIST by either:
     600             :  * a) Returning the existing extent in @em_in if @start is within the
     601             :  *    existing em.
     602             :  * b) Merge the existing extent with @em_in passed in.
     603             :  *
     604             :  * Return 0 on success, otherwise -EEXIST.
     605             :  *
     606             :  */
     607     8357551 : int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
     608             :                              struct extent_map_tree *em_tree,
     609             :                              struct extent_map **em_in, u64 start, u64 len)
     610             : {
     611     8357551 :         int ret;
     612     8357551 :         struct extent_map *em = *em_in;
     613             : 
     614             :         /*
     615             :          * Tree-checker should have rejected any inline extent with non-zero
     616             :          * file offset. Here just do a sanity check.
     617             :          */
     618     8357551 :         if (em->block_start == EXTENT_MAP_INLINE)
     619             :                 ASSERT(em->start == 0);
     620             : 
     621     8357551 :         ret = add_extent_mapping(em_tree, em, 0);
     622             :         /* it is possible that someone inserted the extent into the tree
     623             :          * while we had the lock dropped.  It is also possible that
     624             :          * an overlapping map exists in the tree
     625             :          */
     626     8351134 :         if (ret == -EEXIST) {
     627        9275 :                 struct extent_map *existing;
     628             : 
     629        9275 :                 ret = 0;
     630             : 
     631        9275 :                 existing = search_extent_mapping(em_tree, start, len);
     632             : 
     633        9275 :                 trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
     634             : 
     635             :                 /*
     636             :                  * existing will always be non-NULL, since there must be
     637             :                  * extent causing the -EEXIST.
     638             :                  */
     639        9649 :                 if (start >= existing->start &&
     640             :                     start < extent_map_end(existing)) {
     641         372 :                         free_extent_map(em);
     642         372 :                         *em_in = existing;
     643         372 :                         ret = 0;
     644             :                 } else {
     645        8903 :                         u64 orig_start = em->start;
     646        8903 :                         u64 orig_len = em->len;
     647             : 
     648             :                         /*
     649             :                          * The existing extent map is the one nearest to
     650             :                          * the [start, start + len) range which overlaps
     651             :                          */
     652        8903 :                         ret = merge_extent_mapping(em_tree, existing,
     653             :                                                    em, start);
     654        8903 :                         if (ret) {
     655           0 :                                 free_extent_map(em);
     656           0 :                                 *em_in = NULL;
     657           0 :                                 WARN_ONCE(ret,
     658             : "unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
     659             :                                           ret, existing->start, existing->len,
     660             :                                           orig_start, orig_len);
     661             :                         }
     662        8903 :                         free_extent_map(existing);
     663             :                 }
     664             :         }
     665             : 
     666     8351134 :         ASSERT(ret == 0 || ret == -EEXIST);
     667     8351134 :         return ret;
     668             : }
     669             : 
     670             : /*
     671             :  * Drop all extent maps from a tree in the fastest possible way, rescheduling
     672             :  * if needed. This avoids searching the tree, from the root down to the first
     673             :  * extent map, before each deletion.
     674             :  */
     675     7779933 : static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
     676             : {
     677     7779933 :         write_lock(&tree->lock);
     678    16720144 :         while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
     679     8939742 :                 struct extent_map *em;
     680     8939742 :                 struct rb_node *node;
     681             : 
     682     8939742 :                 node = rb_first_cached(&tree->map);
     683     8939742 :                 em = rb_entry(node, struct extent_map, rb_node);
     684     8939742 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     685     8939898 :                 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
     686     8939980 :                 remove_extent_mapping(tree, em);
     687     8939755 :                 free_extent_map(em);
     688     8939822 :                 cond_resched_rwlock_write(&tree->lock);
     689             :         }
     690     7781431 :         write_unlock(&tree->lock);
     691     7781125 : }
     692             : 
     693             : /*
     694             :  * Drop all extent maps in a given range.
     695             :  *
     696             :  * @inode:       The target inode.
     697             :  * @start:       Start offset of the range.
     698             :  * @end:         End offset of the range (inclusive value).
     699             :  * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
     700             :  *
     701             :  * This drops all the extent maps that intersect the given range [@start, @end].
     702             :  * Extent maps that partially overlap the range and extend behind or beyond it,
     703             :  * are split.
     704             :  * The caller should have locked an appropriate file range in the inode's io
     705             :  * tree before calling this function.
     706             :  */
     707    37232697 : void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
     708             :                                  bool skip_pinned)
     709             : {
     710    37232697 :         struct extent_map *split;
     711    37232697 :         struct extent_map *split2;
     712    37232697 :         struct extent_map *em;
     713    37232697 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     714    37232697 :         u64 len = end - start + 1;
     715             : 
     716    37232697 :         WARN_ON(end < start);
     717    37232697 :         if (end == (u64)-1) {
     718     7967846 :                 if (start == 0 && !skip_pinned) {
     719     7780282 :                         drop_all_extent_maps_fast(em_tree);
     720     7780282 :                         return;
     721             :                 }
     722             :                 len = (u64)-1;
     723             :         } else {
     724             :                 /* Make end offset exclusive for use in the loop below. */
     725    29264851 :                 end++;
     726             :         }
     727             : 
     728             :         /*
     729             :          * It's ok if we fail to allocate the extent maps, see the comment near
     730             :          * the bottom of the loop below. We only need two spare extent maps in
     731             :          * the worst case, where the first extent map that intersects our range
     732             :          * starts before the range and the last extent map that intersects our
     733             :          * range ends after our range (and they might be the same extent map),
     734             :          * because we need to split those two extent maps at the boundaries.
     735             :          */
     736    29452415 :         split = alloc_extent_map();
     737    29452051 :         split2 = alloc_extent_map();
     738             : 
     739    29452560 :         write_lock(&em_tree->lock);
     740    29453018 :         em = lookup_extent_mapping(em_tree, start, len);
     741             : 
     742    44730230 :         while (em) {
     743             :                 /* extent_map_end() returns exclusive value (last byte + 1). */
     744    15278477 :                 const u64 em_end = extent_map_end(em);
     745    15278477 :                 struct extent_map *next_em = NULL;
     746    15278477 :                 u64 gen;
     747    15278477 :                 unsigned long flags;
     748    15278477 :                 bool modified;
     749    15278477 :                 bool compressed;
     750             : 
     751    15278477 :                 if (em_end < end) {
     752     6394230 :                         next_em = next_extent_map(em);
     753     6206328 :                         if (next_em) {
     754     6206328 :                                 if (next_em->start < end)
     755     6153785 :                                         refcount_inc(&next_em->refs);
     756             :                                 else
     757             :                                         next_em = NULL;
     758             :                         }
     759             :                 }
     760             : 
     761    16610957 :                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
     762          51 :                         start = em_end;
     763          51 :                         if (end != (u64)-1)
     764             :                                 len = start + len - em_end;
     765          51 :                         goto next;
     766             :                 }
     767             : 
     768    15278421 :                 flags = em->flags;
     769    15278421 :                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
     770             :                 /*
     771             :                  * In case we split the extent map, we want to preserve the
     772             :                  * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
     773             :                  * it on the new extent maps.
     774             :                  */
     775    15278550 :                 clear_bit(EXTENT_FLAG_LOGGING, &flags);
     776    15279352 :                 modified = !list_empty(&em->list);
     777             : 
     778             :                 /*
     779             :                  * The extent map does not cross our target range, so no need to
     780             :                  * split it, we can remove it directly.
     781             :                  */
     782    15279352 :                 if (em->start >= start && em_end <= end)
     783     8760114 :                         goto remove_em;
     784             : 
     785     6519238 :                 gen = em->generation;
     786     6519238 :                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
     787             : 
     788     6519238 :                 if (em->start < start) {
     789     4796197 :                         if (!split) {
     790           0 :                                 split = split2;
     791           0 :                                 split2 = NULL;
     792           0 :                                 if (!split)
     793           0 :                                         goto remove_em;
     794             :                         }
     795     4796197 :                         split->start = em->start;
     796     4796197 :                         split->len = start - em->start;
     797             : 
     798     4796197 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     799     3003727 :                                 split->orig_start = em->orig_start;
     800     3003727 :                                 split->block_start = em->block_start;
     801             : 
     802     3003727 :                                 if (compressed)
     803          78 :                                         split->block_len = em->block_len;
     804             :                                 else
     805     3003649 :                                         split->block_len = split->len;
     806     3003727 :                                 split->orig_block_len = max(split->block_len,
     807             :                                                 em->orig_block_len);
     808     3003727 :                                 split->ram_bytes = em->ram_bytes;
     809             :                         } else {
     810     1792470 :                                 split->orig_start = split->start;
     811     1792470 :                                 split->block_len = 0;
     812     1792470 :                                 split->block_start = em->block_start;
     813     1792470 :                                 split->orig_block_len = 0;
     814     1792470 :                                 split->ram_bytes = split->len;
     815             :                         }
     816             : 
     817     4796197 :                         split->generation = gen;
     818     4796197 :                         split->flags = flags;
     819     4796197 :                         split->compress_type = em->compress_type;
     820     4796197 :                         replace_extent_mapping(em_tree, em, split, modified);
     821     4796099 :                         free_extent_map(split);
     822     4796099 :                         split = split2;
     823     4796099 :                         split2 = NULL;
     824             :                 }
     825     6519172 :                 if (em_end > end) {
     826     4954117 :                         if (!split) {
     827           0 :                                 split = split2;
     828           0 :                                 split2 = NULL;
     829           0 :                                 if (!split)
     830           0 :                                         goto remove_em;
     831             :                         }
     832     4954117 :                         split->start = start + len;
     833     4954117 :                         split->len = em_end - (start + len);
     834     4954117 :                         split->block_start = em->block_start;
     835     4954117 :                         split->flags = flags;
     836     4954117 :                         split->compress_type = em->compress_type;
     837     4954117 :                         split->generation = gen;
     838             : 
     839     4954117 :                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
     840     3161855 :                                 split->orig_block_len = max(em->block_len,
     841             :                                                     em->orig_block_len);
     842             : 
     843     3161855 :                                 split->ram_bytes = em->ram_bytes;
     844     3161855 :                                 if (compressed) {
     845         174 :                                         split->block_len = em->block_len;
     846         174 :                                         split->orig_start = em->orig_start;
     847             :                                 } else {
     848     3161681 :                                         const u64 diff = start + len - em->start;
     849             : 
     850     3161681 :                                         split->block_len = split->len;
     851     3161681 :                                         split->block_start += diff;
     852     3161681 :                                         split->orig_start = em->orig_start;
     853             :                                 }
     854             :                         } else {
     855     1792262 :                                 split->ram_bytes = split->len;
     856     1792262 :                                 split->orig_start = split->start;
     857     1792262 :                                 split->block_len = 0;
     858     1792262 :                                 split->orig_block_len = 0;
     859             :                         }
     860             : 
     861     4954117 :                         if (extent_map_in_tree(em)) {
     862     1723036 :                                 replace_extent_mapping(em_tree, em, split,
     863             :                                                        modified);
     864             :                         } else {
     865     3231081 :                                 int ret;
     866             : 
     867     3231081 :                                 ret = add_extent_mapping(em_tree, split,
     868             :                                                          modified);
     869             :                                 /* Logic error, shouldn't happen. */
     870     3231085 :                                 ASSERT(ret == 0);
     871     3231085 :                                 if (WARN_ON(ret != 0) && modified)
     872           0 :                                         btrfs_set_inode_full_sync(inode);
     873             :                         }
     874     4954096 :                         free_extent_map(split);
     875     4954096 :                         split = NULL;
     876             :                 }
     877     1565055 : remove_em:
     878    15279236 :                 if (extent_map_in_tree(em)) {
     879             :                         /*
     880             :                          * If the extent map is still in the tree it means that
     881             :                          * either of the following is true:
     882             :                          *
     883             :                          * 1) It fits entirely in our range (doesn't end beyond
     884             :                          *    it or starts before it);
     885             :                          *
     886             :                          * 2) It starts before our range and/or ends after our
     887             :                          *    range, and we were not able to allocate the extent
     888             :                          *    maps for split operations, @split and @split2.
     889             :                          *
     890             :                          * If we are at case 2) then we just remove the entire
     891             :                          * extent map - this is fine since if anyone needs it to
     892             :                          * access the subranges outside our range, will just
     893             :                          * load it again from the subvolume tree's file extent
     894             :                          * item. However if the extent map was in the list of
     895             :                          * modified extents, then we must mark the inode for a
     896             :                          * full fsync, otherwise a fast fsync will miss this
     897             :                          * extent if it's new and needs to be logged.
     898             :                          */
     899     8760029 :                         if ((em->start < start || em_end > end) && modified) {
     900           0 :                                 ASSERT(!split);
     901           0 :                                 btrfs_set_inode_full_sync(inode);
     902             :                         }
     903     8760029 :                         remove_extent_mapping(em_tree, em);
     904             :                 }
     905             : 
     906             :                 /*
     907             :                  * Once for the tree reference (we replaced or removed the
     908             :                  * extent map from the tree).
     909             :                  */
     910    15277550 :                 free_extent_map(em);
     911    15277797 : next:
     912             :                 /* Once for us (for our lookup reference). */
     913    15277797 :                 free_extent_map(em);
     914             : 
     915    15277212 :                 em = next_em;
     916             :         }
     917             : 
     918    29452029 :         write_unlock(&em_tree->lock);
     919             : 
     920    29451807 :         free_extent_map(split);
     921    29452348 :         free_extent_map(split2);
     922             : }
     923             : 
     924             : /*
     925             :  * Replace a range in the inode's extent map tree with a new extent map.
     926             :  *
     927             :  * @inode:      The target inode.
     928             :  * @new_em:     The new extent map to add to the inode's extent map tree.
     929             :  * @modified:   Indicate if the new extent map should be added to the list of
     930             :  *              modified extents (for fast fsync tracking).
     931             :  *
     932             :  * Drops all the extent maps in the inode's extent map tree that intersect the
     933             :  * range of the new extent map and adds the new extent map to the tree.
     934             :  * The caller should have locked an appropriate file range in the inode's io
     935             :  * tree before calling this function.
     936             :  */
     937     8872588 : int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
     938             :                                    struct extent_map *new_em,
     939             :                                    bool modified)
     940             : {
     941     8872588 :         const u64 end = new_em->start + new_em->len - 1;
     942     8872588 :         struct extent_map_tree *tree = &inode->extent_tree;
     943     8872588 :         int ret;
     944             : 
     945     8872588 :         ASSERT(!extent_map_in_tree(new_em));
     946             : 
     947             :         /*
     948             :          * The caller has locked an appropriate file range in the inode's io
     949             :          * tree, but getting -EEXIST when adding the new extent map can still
     950             :          * happen in case there are extents that partially cover the range, and
     951             :          * this is due to two tasks operating on different parts of the extent.
     952             :          * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
     953             :          * btrfs_get_extent") for an example and details.
     954             :          */
     955     8872588 :         do {
     956     8872588 :                 btrfs_drop_extent_map_range(inode, new_em->start, end, false);
     957     8873758 :                 write_lock(&tree->lock);
     958     8873943 :                 ret = add_extent_mapping(tree, new_em, modified);
     959     8873001 :                 write_unlock(&tree->lock);
     960     8872905 :         } while (ret == -EEXIST);
     961             : 
     962     8872905 :         return ret;
     963             : }
     964             : 
     965             : /*
     966             :  * Split off the first pre bytes from the extent_map at [start, start + len],
     967             :  * and set the block_start for it to new_logical.
     968             :  *
     969             :  * This function is used when an ordered_extent needs to be split.
     970             :  */
     971         266 : int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
     972             :                      u64 new_logical)
     973             : {
     974         266 :         struct extent_map_tree *em_tree = &inode->extent_tree;
     975         266 :         struct extent_map *em;
     976         266 :         struct extent_map *split_pre = NULL;
     977         266 :         struct extent_map *split_mid = NULL;
     978         266 :         int ret = 0;
     979         266 :         unsigned long flags;
     980             : 
     981         266 :         ASSERT(pre != 0);
     982         266 :         ASSERT(pre < len);
     983             : 
     984         266 :         split_pre = alloc_extent_map();
     985         266 :         if (!split_pre)
     986             :                 return -ENOMEM;
     987         266 :         split_mid = alloc_extent_map();
     988         266 :         if (!split_mid) {
     989           0 :                 ret = -ENOMEM;
     990           0 :                 goto out_free_pre;
     991             :         }
     992             : 
     993         266 :         lock_extent(&inode->io_tree, start, start + len - 1, NULL);
     994         266 :         write_lock(&em_tree->lock);
     995         266 :         em = lookup_extent_mapping(em_tree, start, len);
     996         266 :         if (!em) {
     997           0 :                 ret = -EIO;
     998           0 :                 goto out_unlock;
     999             :         }
    1000             : 
    1001         266 :         ASSERT(em->len == len);
    1002         266 :         ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
    1003         266 :         ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
    1004         266 :         ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
    1005         266 :         ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
    1006         266 :         ASSERT(!list_empty(&em->list));
    1007             : 
    1008         266 :         flags = em->flags;
    1009         266 :         clear_bit(EXTENT_FLAG_PINNED, &em->flags);
    1010             : 
    1011             :         /* First, replace the em with a new extent_map starting from * em->start */
    1012         266 :         split_pre->start = em->start;
    1013         266 :         split_pre->len = pre;
    1014         266 :         split_pre->orig_start = split_pre->start;
    1015         266 :         split_pre->block_start = new_logical;
    1016         266 :         split_pre->block_len = split_pre->len;
    1017         266 :         split_pre->orig_block_len = split_pre->block_len;
    1018         266 :         split_pre->ram_bytes = split_pre->len;
    1019         266 :         split_pre->flags = flags;
    1020         266 :         split_pre->compress_type = em->compress_type;
    1021         266 :         split_pre->generation = em->generation;
    1022             : 
    1023         266 :         replace_extent_mapping(em_tree, em, split_pre, 1);
    1024             : 
    1025             :         /*
    1026             :          * Now we only have an extent_map at:
    1027             :          *     [em->start, em->start + pre]
    1028             :          */
    1029             : 
    1030             :         /* Insert the middle extent_map. */
    1031         266 :         split_mid->start = em->start + pre;
    1032         266 :         split_mid->len = em->len - pre;
    1033         266 :         split_mid->orig_start = split_mid->start;
    1034         266 :         split_mid->block_start = em->block_start + pre;
    1035         266 :         split_mid->block_len = split_mid->len;
    1036         266 :         split_mid->orig_block_len = split_mid->block_len;
    1037         266 :         split_mid->ram_bytes = split_mid->len;
    1038         266 :         split_mid->flags = flags;
    1039         266 :         split_mid->compress_type = em->compress_type;
    1040         266 :         split_mid->generation = em->generation;
    1041         266 :         add_extent_mapping(em_tree, split_mid, 1);
    1042             : 
    1043             :         /* Once for us */
    1044         266 :         free_extent_map(em);
    1045             :         /* Once for the tree */
    1046         266 :         free_extent_map(em);
    1047             : 
    1048         266 : out_unlock:
    1049         266 :         write_unlock(&em_tree->lock);
    1050         266 :         unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
    1051         266 :         free_extent_map(split_mid);
    1052         266 : out_free_pre:
    1053         266 :         free_extent_map(split_pre);
    1054         266 :         return ret;
    1055             : }

Generated by: LCOV version 1.14