LCOV - fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/btrfs - discard.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023	Lines:	289	296	97.6 %
Date:	2023-07-31 20:08:12	Functions:	23	23	100.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/jiffies.h>
       4             : #include <linux/kernel.h>
       5             : #include <linux/ktime.h>
       6             : #include <linux/list.h>
       7             : #include <linux/math64.h>
       8             : #include <linux/sizes.h>
       9             : #include <linux/workqueue.h>
      10             : #include "ctree.h"
      11             : #include "block-group.h"
      12             : #include "discard.h"
      13             : #include "free-space-cache.h"
      14             : #include "fs.h"
      15             : 
      16             : /*
      17             :  * This contains the logic to handle async discard.
      18             :  *
      19             :  * Async discard manages trimming of free space outside of transaction commit.
      20             :  * Discarding is done by managing the block_groups on a LRU list based on free
      21             :  * space recency.  Two passes are used to first prioritize discarding extents
      22             :  * and then allow for trimming in the bitmap the best opportunity to coalesce.
      23             :  * The block_groups are maintained on multiple lists to allow for multiple
      24             :  * passes with different discard filter requirements.  A delayed work item is
      25             :  * used to manage discarding with timeout determined by a max of the delay
      26             :  * incurred by the iops rate limit, the byte rate limit, and the max delay of
      27             :  * BTRFS_DISCARD_MAX_DELAY.
      28             :  *
      29             :  * Note, this only keeps track of block_groups that are explicitly for data.
      30             :  * Mixed block_groups are not supported.
      31             :  *
      32             :  * The first list is special to manage discarding of fully free block groups.
      33             :  * This is necessary because we issue a final trim for a full free block group
      34             :  * after forgetting it.  When a block group becomes unused, instead of directly
      35             :  * being added to the unused_bgs list, we add it to this first list.  Then
      36             :  * from there, if it becomes fully discarded, we place it onto the unused_bgs
      37             :  * list.
      38             :  *
      39             :  * The in-memory free space cache serves as the backing state for discard.
      40             :  * Consequently this means there is no persistence.  We opt to load all the
      41             :  * block groups in as not discarded, so the mount case degenerates to the
      42             :  * crashing case.
      43             :  *
      44             :  * As the free space cache uses bitmaps, there exists a tradeoff between
      45             :  * ease/efficiency for find_free_extent() and the accuracy of discard state.
      46             :  * Here we opt to let untrimmed regions merge with everything while only letting
      47             :  * trimmed regions merge with other trimmed regions.  This can cause
      48             :  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
      49             :  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
      50             :  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
      51             :  * this resets the state and we will retry trimming the whole bitmap.  This is a
      52             :  * tradeoff between discard state accuracy and the cost of accounting.
      53             :  */
      54             : 
      55             : /* This is an initial delay to give some chance for block reuse */
      56             : #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
      57             : #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
      58             : 
      59             : #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
      60             : #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
      61             : #define BTRFS_DISCARD_MAX_IOPS          (1000U)
      62             : 
      63             : /* Monotonically decreasing minimum length filters after index 0 */
      64             : static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
      65             :         0,
      66             :         BTRFS_ASYNC_DISCARD_MAX_FILTER,
      67             :         BTRFS_ASYNC_DISCARD_MIN_FILTER
      68             : };
      69             : 
      70             : static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
      71             :                                           struct btrfs_block_group *block_group)
      72             : {
      73      155313 :         return &discard_ctl->discard_list[block_group->discard_index];
      74             : }
      75             : 
      76             : /*
      77             :  * Determine if async discard should be running.
      78             :  *
      79             :  * @discard_ctl: discard control
      80             :  *
      81             :  * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
      82             :  */
      83     1761946 : static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
      84             : {
      85     1761946 :         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
      86             :                                                      struct btrfs_fs_info,
      87             :                                                      discard_ctl);
      88             : 
      89     3523880 :         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
      90     1761934 :                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
      91             : }
      92             : 
      93      157061 : static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
      94             :                                   struct btrfs_block_group *block_group)
      95             : {
      96      157061 :         lockdep_assert_held(&discard_ctl->lock);
      97      157061 :         if (!btrfs_run_discard_work(discard_ctl))
      98             :                 return;
      99             : 
     100      155313 :         if (list_empty(&block_group->discard_list) ||
     101      153945 :             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
     102        1517 :                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
     103        1333 :                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
     104        1517 :                 block_group->discard_eligible_time = (ktime_get_ns() +
     105             :                                                       BTRFS_DISCARD_DELAY);
     106        1517 :                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
     107             :         }
     108      155313 :         if (list_empty(&block_group->discard_list))
     109        1368 :                 btrfs_get_block_group(block_group);
     110             : 
     111      155313 :         list_move_tail(&block_group->discard_list,
     112             :                        get_discard_list(discard_ctl, block_group));
     113             : }
     114             : 
     115     3666883 : static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
     116             :                                 struct btrfs_block_group *block_group)
     117             : {
     118     3666883 :         if (!btrfs_is_block_group_data_only(block_group))
     119             :                 return;
     120             : 
     121      157060 :         spin_lock(&discard_ctl->lock);
     122      157060 :         __add_to_discard_list(discard_ctl, block_group);
     123      157060 :         spin_unlock(&discard_ctl->lock);
     124             : }
     125             : 
     126       44520 : static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
     127             :                                        struct btrfs_block_group *block_group)
     128             : {
     129       44520 :         bool queued;
     130             : 
     131       44520 :         spin_lock(&discard_ctl->lock);
     132             : 
     133       44520 :         queued = !list_empty(&block_group->discard_list);
     134             : 
     135       44520 :         if (!btrfs_run_discard_work(discard_ctl)) {
     136        9278 :                 spin_unlock(&discard_ctl->lock);
     137        9278 :                 return;
     138             :         }
     139             : 
     140       35242 :         list_del_init(&block_group->discard_list);
     141             : 
     142       35242 :         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
     143       35242 :         block_group->discard_eligible_time = (ktime_get_ns() +
     144             :                                               BTRFS_DISCARD_UNUSED_DELAY);
     145       35242 :         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
     146       35242 :         if (!queued)
     147         803 :                 btrfs_get_block_group(block_group);
     148       35242 :         list_add_tail(&block_group->discard_list,
     149             :                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
     150             : 
     151       35242 :         spin_unlock(&discard_ctl->lock);
     152             : }
     153             : 
     154         935 : static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
     155             :                                      struct btrfs_block_group *block_group)
     156             : {
     157         935 :         bool running = false;
     158         935 :         bool queued = false;
     159             : 
     160         935 :         spin_lock(&discard_ctl->lock);
     161             : 
     162         935 :         if (block_group == discard_ctl->block_group) {
     163         267 :                 running = true;
     164         267 :                 discard_ctl->block_group = NULL;
     165             :         }
     166             : 
     167         935 :         block_group->discard_eligible_time = 0;
     168         935 :         queued = !list_empty(&block_group->discard_list);
     169         935 :         list_del_init(&block_group->discard_list);
     170             :         /*
     171             :          * If the block group is currently running in the discard workfn, we
     172             :          * don't want to deref it, since it's still being used by the workfn.
     173             :          * The workfn will notice this case and deref the block group when it is
     174             :          * finished.
     175             :          */
     176         935 :         if (queued && !running)
     177         571 :                 btrfs_put_block_group(block_group);
     178             : 
     179         935 :         spin_unlock(&discard_ctl->lock);
     180             : 
     181         935 :         return running;
     182             : }
     183             : 
     184             : /*
     185             :  * Find block_group that's up next for discarding.
     186             :  *
     187             :  * @discard_ctl:  discard control
     188             :  * @now:          current time
     189             :  *
     190             :  * Iterate over the discard lists to find the next block_group up for
     191             :  * discarding checking the discard_eligible_time of block_group.
     192             :  */
     193     1536087 : static struct btrfs_block_group *find_next_block_group(
     194             :                                         struct btrfs_discard_ctl *discard_ctl,
     195             :                                         u64 now)
     196             : {
     197     1536087 :         struct btrfs_block_group *ret_block_group = NULL, *block_group;
     198     1536087 :         int i;
     199             : 
     200     6136561 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
     201     4603357 :                 struct list_head *discard_list = &discard_ctl->discard_list[i];
     202             : 
     203     4603357 :                 if (!list_empty(discard_list)) {
     204      235325 :                         block_group = list_first_entry(discard_list,
     205             :                                                        struct btrfs_block_group,
     206             :                                                        discard_list);
     207             : 
     208      235325 :                         if (!ret_block_group)
     209      158365 :                                 ret_block_group = block_group;
     210             : 
     211      235325 :                         if (ret_block_group->discard_eligible_time < now)
     212             :                                 break;
     213             : 
     214      232442 :                         if (ret_block_group->discard_eligible_time >
     215      232442 :                             block_group->discard_eligible_time)
     216       23648 :                                 ret_block_group = block_group;
     217             :                 }
     218             :         }
     219             : 
     220     1536087 :         return ret_block_group;
     221             : }
     222             : 
     223             : /*
     224             :  * Look up next block group and set it for use.
     225             :  *
     226             :  * @discard_ctl:   discard control
     227             :  * @discard_state: the discard_state of the block_group after state management
     228             :  * @discard_index: the discard_index of the block_group after state management
     229             :  * @now:           time when discard was invoked, in ns
     230             :  *
     231             :  * Wrap find_next_block_group() and set the block_group to be in use.
     232             :  * @discard_state's control flow is managed here.  Variables related to
     233             :  * @discard_state are reset here as needed (eg. @discard_cursor).  @discard_state
     234             :  * and @discard_index are remembered as it may change while we're discarding,
     235             :  * but we want the discard to execute in the context determined here.
     236             :  */
     237       46429 : static struct btrfs_block_group *peek_discard_list(
     238             :                                         struct btrfs_discard_ctl *discard_ctl,
     239             :                                         enum btrfs_discard_state *discard_state,
     240             :                                         int *discard_index, u64 now)
     241             : {
     242       46429 :         struct btrfs_block_group *block_group;
     243             : 
     244       46429 :         spin_lock(&discard_ctl->lock);
     245       46491 : again:
     246       46491 :         block_group = find_next_block_group(discard_ctl, now);
     247             : 
     248       46491 :         if (block_group && now >= block_group->discard_eligible_time) {
     249        4486 :                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
     250        1086 :                     block_group->used != 0) {
     251          62 :                         if (btrfs_is_block_group_data_only(block_group)) {
     252           1 :                                 __add_to_discard_list(discard_ctl, block_group);
     253             :                         } else {
     254          61 :                                 list_del_init(&block_group->discard_list);
     255          61 :                                 btrfs_put_block_group(block_group);
     256             :                         }
     257          62 :                         goto again;
     258             :                 }
     259        4424 :                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
     260         269 :                         block_group->discard_cursor = block_group->start;
     261         269 :                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
     262             :                 }
     263        4424 :                 discard_ctl->block_group = block_group;
     264             :         }
     265       46429 :         if (block_group) {
     266       46315 :                 *discard_state = block_group->discard_state;
     267       46315 :                 *discard_index = block_group->discard_index;
     268             :         }
     269       46429 :         spin_unlock(&discard_ctl->lock);
     270             : 
     271       46429 :         return block_group;
     272             : }
     273             : 
     274             : /*
     275             :  * Update a block group's filters.
     276             :  *
     277             :  * @block_group:  block group of interest
     278             :  * @bytes:        recently freed region size after coalescing
     279             :  *
     280             :  * Async discard maintains multiple lists with progressively smaller filters
     281             :  * to prioritize discarding based on size.  Should a free space that matches
     282             :  * a larger filter be returned to the free_space_cache, prioritize that discard
     283             :  * by moving @block_group to the proper filter.
     284             :  */
     285     3704935 : void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
     286             :                                 u64 bytes)
     287             : {
     288     3704935 :         struct btrfs_discard_ctl *discard_ctl;
     289             : 
     290     3704935 :         if (!block_group ||
     291     3704935 :             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
     292             :                 return;
     293             : 
     294     3695537 :         discard_ctl = &block_group->fs_info->discard_ctl;
     295             : 
     296     3695537 :         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
     297       24678 :             bytes >= discard_minlen[block_group->discard_index - 1]) {
     298          38 :                 int i;
     299             : 
     300          38 :                 remove_from_discard_list(discard_ctl, block_group);
     301             : 
     302          76 :                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
     303           0 :                      i++) {
     304          38 :                         if (bytes >= discard_minlen[i]) {
     305          38 :                                 block_group->discard_index = i;
     306          38 :                                 add_to_discard_list(discard_ctl, block_group);
     307          38 :                                 break;
     308             :                         }
     309             :                 }
     310             :         }
     311             : }
     312             : 
     313             : /*
     314             :  * Move a block group along the discard lists.
     315             :  *
     316             :  * @discard_ctl: discard control
     317             :  * @block_group: block_group of interest
     318             :  *
     319             :  * Increment @block_group's discard_index.  If it falls of the list, let it be.
     320             :  * Otherwise add it back to the appropriate list.
     321             :  */
     322         133 : static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
     323             :                                        struct btrfs_block_group *block_group)
     324             : {
     325         133 :         block_group->discard_index++;
     326         133 :         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
     327          33 :                 block_group->discard_index = 1;
     328          33 :                 return;
     329             :         }
     330             : 
     331         100 :         add_to_discard_list(discard_ctl, block_group);
     332             : }
     333             : 
     334             : /*
     335             :  * Remove a block_group from the discard lists.
     336             :  *
     337             :  * @discard_ctl: discard control
     338             :  * @block_group: block_group of interest
     339             :  *
     340             :  * Remove @block_group from the discard lists.  If necessary, wait on the
     341             :  * current work and then reschedule the delayed work.
     342             :  */
     343         630 : void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
     344             :                                struct btrfs_block_group *block_group)
     345             : {
     346         630 :         if (remove_from_discard_list(discard_ctl, block_group)) {
     347           0 :                 cancel_delayed_work_sync(&discard_ctl->work);
     348           0 :                 btrfs_discard_schedule_work(discard_ctl, true);
     349             :         }
     350         630 : }
     351             : 
     352             : /*
     353             :  * Handles queuing the block_groups.
     354             :  *
     355             :  * @discard_ctl: discard control
     356             :  * @block_group: block_group of interest
     357             :  *
     358             :  * Maintain the LRU order of the discard lists.
     359             :  */
     360     3720663 : void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
     361             :                               struct btrfs_block_group *block_group)
     362             : {
     363     3720663 :         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
     364             :                 return;
     365             : 
     366     3711265 :         if (block_group->used == 0)
     367       44520 :                 add_to_discard_unused_list(discard_ctl, block_group);
     368             :         else
     369     3666745 :                 add_to_discard_list(discard_ctl, block_group);
     370             : 
     371     3711265 :         if (!delayed_work_pending(&discard_ctl->work))
     372     1264509 :                 btrfs_discard_schedule_work(discard_ctl, false);
     373             : }
     374             : 
     375     1514050 : static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
     376             :                                           u64 now, bool override)
     377             : {
     378     1514050 :         struct btrfs_block_group *block_group;
     379             : 
     380     1514050 :         if (!btrfs_run_discard_work(discard_ctl))
     381             :                 return;
     382     2779527 :         if (!override && delayed_work_pending(&discard_ctl->work))
     383             :                 return;
     384             : 
     385     1489596 :         block_group = find_next_block_group(discard_ctl, now);
     386     1489596 :         if (block_group) {
     387      111988 :                 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
     388      111988 :                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
     389             : 
     390             :                 /*
     391             :                  * A single delayed workqueue item is responsible for
     392             :                  * discarding, so we can manage the bytes rate limit by keeping
     393             :                  * track of the previous discard.
     394             :                  */
     395      111988 :                 if (kbps_limit && discard_ctl->prev_discard) {
     396           0 :                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
     397           0 :                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
     398             :                                                   NSEC_PER_SEC, bps_limit);
     399             : 
     400           0 :                         delay = max(delay, bps_delay);
     401             :                 }
     402             : 
     403             :                 /*
     404             :                  * This timeout is to hopefully prevent immediate discarding
     405             :                  * in a recently allocated block group.
     406             :                  */
     407      111988 :                 if (now < block_group->discard_eligible_time) {
     408      107663 :                         u64 bg_timeout = block_group->discard_eligible_time - now;
     409             : 
     410      107663 :                         delay = max(delay, bg_timeout);
     411             :                 }
     412             : 
     413      111988 :                 if (override && discard_ctl->prev_discard) {
     414         288 :                         u64 elapsed = now - discard_ctl->prev_discard_time;
     415             : 
     416         288 :                         if (delay > elapsed)
     417         221 :                                 delay -= elapsed;
     418             :                         else
     419             :                                 delay = 0;
     420             :                 }
     421             : 
     422      111988 :                 mod_delayed_work(discard_ctl->discard_workers,
     423             :                                  &discard_ctl->work, nsecs_to_jiffies(delay));
     424             :         }
     425             : }
     426             : 
     427             : /*
     428             :  * Responsible for scheduling the discard work.
     429             :  *
     430             :  * @discard_ctl:  discard control
     431             :  * @override:     override the current timer
     432             :  *
     433             :  * Discards are issued by a delayed workqueue item.  @override is used to
     434             :  * update the current delay as the baseline delay interval is reevaluated on
     435             :  * transaction commit.  This is also maxed with any other rate limit.
     436             :  */
     437     1509627 : void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
     438             :                                  bool override)
     439             : {
     440     1509627 :         const u64 now = ktime_get_ns();
     441             : 
     442     1509627 :         spin_lock(&discard_ctl->lock);
     443     1509627 :         __btrfs_discard_schedule_work(discard_ctl, now, override);
     444     1509627 :         spin_unlock(&discard_ctl->lock);
     445     1509627 : }
     446             : 
     447             : /*
     448             :  * Determine next step of a block_group.
     449             :  *
     450             :  * @discard_ctl: discard control
     451             :  * @block_group: block_group of interest
     452             :  *
     453             :  * Determine the next step for a block group after it's finished going through
     454             :  * a pass on a discard list.  If it is unused and fully trimmed, we can mark it
     455             :  * unused and send it to the unused_bgs path.  Otherwise, pass it onto the
     456             :  * appropriate filter list or let it fall off.
     457             :  */
     458         267 : static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
     459             :                                       struct btrfs_block_group *block_group)
     460             : {
     461         267 :         remove_from_discard_list(discard_ctl, block_group);
     462             : 
     463         267 :         if (block_group->used == 0) {
     464         134 :                 if (btrfs_is_free_space_trimmed(block_group))
     465         134 :                         btrfs_mark_bg_unused(block_group);
     466             :                 else
     467           0 :                         add_to_discard_unused_list(discard_ctl, block_group);
     468             :         } else {
     469         133 :                 btrfs_update_discard_index(discard_ctl, block_group);
     470             :         }
     471         267 : }
     472             : 
     473             : /*
     474             :  * Discard work queue callback
     475             :  *
     476             :  * @work: work
     477             :  *
     478             :  * Find the next block_group to start discarding and then discard a single
     479             :  * region.  It does this in a two-pass fashion: first extents and second
     480             :  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
     481             :  */
     482       46429 : static void btrfs_discard_workfn(struct work_struct *work)
     483             : {
     484       46429 :         struct btrfs_discard_ctl *discard_ctl;
     485       46429 :         struct btrfs_block_group *block_group;
     486       46429 :         enum btrfs_discard_state discard_state;
     487       46429 :         int discard_index = 0;
     488       46429 :         u64 trimmed = 0;
     489       46429 :         u64 minlen = 0;
     490       46429 :         u64 now = ktime_get_ns();
     491             : 
     492       46429 :         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
     493             : 
     494       46429 :         block_group = peek_discard_list(discard_ctl, &discard_state,
     495             :                                         &discard_index, now);
     496       46429 :         if (!block_group || !btrfs_run_discard_work(discard_ctl))
     497       42006 :                 return;
     498       46314 :         if (now < block_group->discard_eligible_time) {
     499       41891 :                 btrfs_discard_schedule_work(discard_ctl, false);
     500       41891 :                 return;
     501             :         }
     502             : 
     503             :         /* Perform discarding */
     504        4423 :         minlen = discard_minlen[discard_index];
     505             : 
     506        4423 :         if (discard_state == BTRFS_DISCARD_BITMAPS) {
     507        1512 :                 u64 maxlen = 0;
     508             : 
     509             :                 /*
     510             :                  * Use the previous levels minimum discard length as the max
     511             :                  * length filter.  In the case something is added to make a
     512             :                  * region go beyond the max filter, the entire bitmap is set
     513             :                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
     514             :                  */
     515        1512 :                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
     516        1378 :                         maxlen = discard_minlen[discard_index - 1];
     517             : 
     518        1512 :                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
     519             :                                        block_group->discard_cursor,
     520             :                                        btrfs_block_group_end(block_group),
     521             :                                        minlen, maxlen, true);
     522        1512 :                 discard_ctl->discard_bitmap_bytes += trimmed;
     523             :         } else {
     524        2911 :                 btrfs_trim_block_group_extents(block_group, &trimmed,
     525             :                                        block_group->discard_cursor,
     526             :                                        btrfs_block_group_end(block_group),
     527             :                                        minlen, true);
     528        2911 :                 discard_ctl->discard_extent_bytes += trimmed;
     529             :         }
     530             : 
     531             :         /* Determine next steps for a block_group */
     532        4423 :         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
     533         535 :                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
     534         267 :                         btrfs_finish_discard_pass(discard_ctl, block_group);
     535             :                 } else {
     536         268 :                         block_group->discard_cursor = block_group->start;
     537         268 :                         spin_lock(&discard_ctl->lock);
     538         268 :                         if (block_group->discard_state !=
     539             :                             BTRFS_DISCARD_RESET_CURSOR)
     540         268 :                                 block_group->discard_state =
     541             :                                                         BTRFS_DISCARD_BITMAPS;
     542         268 :                         spin_unlock(&discard_ctl->lock);
     543             :                 }
     544             :         }
     545             : 
     546        4423 :         now = ktime_get_ns();
     547        4423 :         spin_lock(&discard_ctl->lock);
     548        4423 :         discard_ctl->prev_discard = trimmed;
     549        4423 :         discard_ctl->prev_discard_time = now;
     550             :         /*
     551             :          * If the block group was removed from the discard list while it was
     552             :          * running in this workfn, then we didn't deref it, since this function
     553             :          * still owned that reference. But we set the discard_ctl->block_group
     554             :          * back to NULL, so we can use that condition to know that now we need
     555             :          * to deref the block_group.
     556             :          */
     557        4423 :         if (discard_ctl->block_group == NULL)
     558         267 :                 btrfs_put_block_group(block_group);
     559        4423 :         discard_ctl->block_group = NULL;
     560        4423 :         __btrfs_discard_schedule_work(discard_ctl, now, false);
     561        4423 :         spin_unlock(&discard_ctl->lock);
     562             : }
     563             : 
     564             : /*
     565             :  * Recalculate the base delay.
     566             :  *
     567             :  * @discard_ctl: discard control
     568             :  *
     569             :  * Recalculate the base delay which is based off the total number of
     570             :  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
     571             :  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
     572             :  */
     573      203227 : void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
     574             : {
     575      203227 :         s32 discardable_extents;
     576      203227 :         s64 discardable_bytes;
     577      203227 :         u32 iops_limit;
     578      203227 :         unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
     579      203227 :         unsigned long delay;
     580             : 
     581      203227 :         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
     582      203227 :         if (!discardable_extents)
     583             :                 return;
     584             : 
     585       31151 :         spin_lock(&discard_ctl->lock);
     586             : 
     587             :         /*
     588             :          * The following is to fix a potential -1 discrepancy that we're not
     589             :          * sure how to reproduce. But given that this is the only place that
     590             :          * utilizes these numbers and this is only called by from
     591             :          * btrfs_finish_extent_commit() which is synchronized, we can correct
     592             :          * here.
     593             :          */
     594       31151 :         if (discardable_extents < 0)
     595           6 :                 atomic_add(-discardable_extents,
     596             :                            &discard_ctl->discardable_extents);
     597             : 
     598       31151 :         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
     599       31151 :         if (discardable_bytes < 0)
     600           1 :                 atomic64_add(-discardable_bytes,
     601             :                              &discard_ctl->discardable_bytes);
     602             : 
     603       31151 :         if (discardable_extents <= 0) {
     604           6 :                 spin_unlock(&discard_ctl->lock);
     605           6 :                 return;
     606             :         }
     607             : 
     608       31145 :         iops_limit = READ_ONCE(discard_ctl->iops_limit);
     609             : 
     610       31145 :         if (iops_limit) {
     611       31145 :                 delay = MSEC_PER_SEC / iops_limit;
     612             :         } else {
     613             :                 /*
     614             :                  * Unset iops_limit means go as fast as possible, so allow a
     615             :                  * delay of 0.
     616             :                  */
     617             :                 delay = 0;
     618             :                 min_delay = 0;
     619             :         }
     620             : 
     621       31145 :         delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
     622       31145 :         discard_ctl->delay_ms = delay;
     623             : 
     624       31145 :         spin_unlock(&discard_ctl->lock);
     625             : }
     626             : 
     627             : /*
     628             :  * Propagate discard counters.
     629             :  *
     630             :  * @block_group: block_group of interest
     631             :  *
     632             :  * Propagate deltas of counters up to the discard_ctl.  It maintains a current
     633             :  * counter and a previous counter passing the delta up to the global stat.
     634             :  * Then the current counter value becomes the previous counter value.
     635             :  */
     636     8065330 : void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
     637             : {
     638     8065330 :         struct btrfs_free_space_ctl *ctl;
     639     8065330 :         struct btrfs_discard_ctl *discard_ctl;
     640     8065330 :         s32 extents_delta;
     641     8065330 :         s64 bytes_delta;
     642             : 
     643     8065330 :         if (!block_group ||
     644     8065330 :             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
     645             :             !btrfs_is_block_group_data_only(block_group))
     646             :                 return;
     647             : 
     648     4041244 :         ctl = block_group->free_space_ctl;
     649     4041244 :         discard_ctl = &block_group->fs_info->discard_ctl;
     650             : 
     651     4041244 :         lockdep_assert_held(&ctl->tree_lock);
     652     4041244 :         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
     653     4041244 :                         ctl->discardable_extents[BTRFS_STAT_PREV];
     654     4041244 :         if (extents_delta) {
     655      226090 :                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
     656      226091 :                 ctl->discardable_extents[BTRFS_STAT_PREV] =
     657      226091 :                         ctl->discardable_extents[BTRFS_STAT_CURR];
     658             :         }
     659             : 
     660     4041245 :         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
     661     4041245 :                       ctl->discardable_bytes[BTRFS_STAT_PREV];
     662     4041245 :         if (bytes_delta) {
     663     1900423 :                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
     664     1900424 :                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
     665     1900424 :                         ctl->discardable_bytes[BTRFS_STAT_CURR];
     666             :         }
     667             : }
     668             : 
     669             : /*
     670             :  * Punt unused_bgs list to discard lists.
     671             :  *
     672             :  * @fs_info: fs_info of interest
     673             :  *
     674             :  * The unused_bgs list needs to be punted to the discard lists because the
     675             :  * order of operations is changed.  In the normal synchronous discard path, the
     676             :  * block groups are trimmed via a single large trim in transaction commit.  This
     677             :  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
     678             :  * it must be done before going down the unused_bgs path.
     679             :  */
     680        3161 : void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
     681             : {
     682        3161 :         struct btrfs_block_group *block_group, *next;
     683             : 
     684        3161 :         spin_lock(&fs_info->unused_bgs_lock);
     685             :         /* We enabled async discard, so punt all to the queue */
     686       12434 :         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
     687             :                                  bg_list) {
     688        9273 :                 list_del_init(&block_group->bg_list);
     689        9273 :                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
     690             :                 /*
     691             :                  * This put is for the get done by btrfs_mark_bg_unused.
     692             :                  * Queueing discard incremented it for discard's reference.
     693             :                  */
     694        9273 :                 btrfs_put_block_group(block_group);
     695             :         }
     696        3161 :         spin_unlock(&fs_info->unused_bgs_lock);
     697        3161 : }
     698             : 
     699             : /*
     700             :  * Purge discard lists.
     701             :  *
     702             :  * @discard_ctl: discard control
     703             :  *
     704             :  * If we are disabling async discard, we may have intercepted block groups that
     705             :  * are completely free and ready for the unused_bgs path.  As discarding will
     706             :  * now happen in transaction commit or not at all, we can safely mark the
     707             :  * corresponding block groups as unused and they will be sent on their merry
     708             :  * way to the unused_bgs list.
     709             :  */
     710        3234 : static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
     711             : {
     712        3234 :         struct btrfs_block_group *block_group, *next;
     713        3234 :         int i;
     714             : 
     715        3234 :         spin_lock(&discard_ctl->lock);
     716       16170 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
     717       10974 :                 list_for_each_entry_safe(block_group, next,
     718             :                                          &discard_ctl->discard_list[i],
     719             :                                          discard_list) {
     720        1272 :                         list_del_init(&block_group->discard_list);
     721        1272 :                         spin_unlock(&discard_ctl->lock);
     722        1272 :                         if (block_group->used == 0)
     723         249 :                                 btrfs_mark_bg_unused(block_group);
     724        1272 :                         spin_lock(&discard_ctl->lock);
     725        1272 :                         btrfs_put_block_group(block_group);
     726             :                 }
     727             :         }
     728        3234 :         spin_unlock(&discard_ctl->lock);
     729        3234 : }
     730             : 
     731        3177 : void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
     732             : {
     733        3177 :         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
     734          16 :                 btrfs_discard_cleanup(fs_info);
     735          16 :                 return;
     736             :         }
     737             : 
     738        3161 :         btrfs_discard_punt_unused_bgs_list(fs_info);
     739             : 
     740        3161 :         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
     741             : }
     742             : 
     743          17 : void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
     744             : {
     745          17 :         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
     746          17 : }
     747             : 
     748        3467 : void btrfs_discard_init(struct btrfs_fs_info *fs_info)
     749             : {
     750        3467 :         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
     751        3467 :         int i;
     752             : 
     753        3467 :         spin_lock_init(&discard_ctl->lock);
     754        3467 :         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
     755             : 
     756       17335 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
     757       10401 :                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
     758             : 
     759        3467 :         discard_ctl->prev_discard = 0;
     760        3467 :         discard_ctl->prev_discard_time = 0;
     761        3467 :         atomic_set(&discard_ctl->discardable_extents, 0);
     762        3467 :         atomic64_set(&discard_ctl->discardable_bytes, 0);
     763        3467 :         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
     764        3467 :         discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
     765        3467 :         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
     766        3467 :         discard_ctl->kbps_limit = 0;
     767        3467 :         discard_ctl->discard_extent_bytes = 0;
     768        3467 :         discard_ctl->discard_bitmap_bytes = 0;
     769        3467 :         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
     770        3467 : }
     771             : 
     772        3234 : void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
     773             : {
     774        3234 :         btrfs_discard_stop(fs_info);
     775        3234 :         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
     776        3234 :         btrfs_discard_purge_list(&fs_info->discard_ctl);
     777        3234 : }

Generated by: LCOV version 1.14