LCOV - code coverage report
Current view: top level - fs/btrfs - discard.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 289 296 97.6 %
Date: 2023-07-31 20:08:22 Functions: 23 23 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/jiffies.h>
       4             : #include <linux/kernel.h>
       5             : #include <linux/ktime.h>
       6             : #include <linux/list.h>
       7             : #include <linux/math64.h>
       8             : #include <linux/sizes.h>
       9             : #include <linux/workqueue.h>
      10             : #include "ctree.h"
      11             : #include "block-group.h"
      12             : #include "discard.h"
      13             : #include "free-space-cache.h"
      14             : #include "fs.h"
      15             : 
      16             : /*
      17             :  * This contains the logic to handle async discard.
      18             :  *
      19             :  * Async discard manages trimming of free space outside of transaction commit.
      20             :  * Discarding is done by managing the block_groups on a LRU list based on free
      21             :  * space recency.  Two passes are used to first prioritize discarding extents
      22             :  * and then allow for trimming in the bitmap the best opportunity to coalesce.
      23             :  * The block_groups are maintained on multiple lists to allow for multiple
      24             :  * passes with different discard filter requirements.  A delayed work item is
      25             :  * used to manage discarding with timeout determined by a max of the delay
      26             :  * incurred by the iops rate limit, the byte rate limit, and the max delay of
      27             :  * BTRFS_DISCARD_MAX_DELAY.
      28             :  *
      29             :  * Note, this only keeps track of block_groups that are explicitly for data.
      30             :  * Mixed block_groups are not supported.
      31             :  *
      32             :  * The first list is special to manage discarding of fully free block groups.
      33             :  * This is necessary because we issue a final trim for a full free block group
      34             :  * after forgetting it.  When a block group becomes unused, instead of directly
      35             :  * being added to the unused_bgs list, we add it to this first list.  Then
      36             :  * from there, if it becomes fully discarded, we place it onto the unused_bgs
      37             :  * list.
      38             :  *
      39             :  * The in-memory free space cache serves as the backing state for discard.
      40             :  * Consequently this means there is no persistence.  We opt to load all the
      41             :  * block groups in as not discarded, so the mount case degenerates to the
      42             :  * crashing case.
      43             :  *
      44             :  * As the free space cache uses bitmaps, there exists a tradeoff between
      45             :  * ease/efficiency for find_free_extent() and the accuracy of discard state.
      46             :  * Here we opt to let untrimmed regions merge with everything while only letting
      47             :  * trimmed regions merge with other trimmed regions.  This can cause
      48             :  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
      49             :  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
      50             :  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
      51             :  * this resets the state and we will retry trimming the whole bitmap.  This is a
      52             :  * tradeoff between discard state accuracy and the cost of accounting.
      53             :  */
      54             : 
      55             : /* This is an initial delay to give some chance for block reuse */
      56             : #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
      57             : #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
      58             : 
      59             : #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
      60             : #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
      61             : #define BTRFS_DISCARD_MAX_IOPS          (1000U)
      62             : 
      63             : /* Monotonically decreasing minimum length filters after index 0 */
      64             : static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
      65             :         0,
      66             :         BTRFS_ASYNC_DISCARD_MAX_FILTER,
      67             :         BTRFS_ASYNC_DISCARD_MIN_FILTER
      68             : };
      69             : 
      70             : static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
      71             :                                           struct btrfs_block_group *block_group)
      72             : {
      73      172443 :         return &discard_ctl->discard_list[block_group->discard_index];
      74             : }
      75             : 
      76             : /*
      77             :  * Determine if async discard should be running.
      78             :  *
      79             :  * @discard_ctl: discard control
      80             :  *
      81             :  * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
      82             :  */
      83     1853173 : static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
      84             : {
      85     1853173 :         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
      86             :                                                      struct btrfs_fs_info,
      87             :                                                      discard_ctl);
      88             : 
      89     3706334 :         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
      90     1853161 :                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
      91             : }
      92             : 
      93      174184 : static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
      94             :                                   struct btrfs_block_group *block_group)
      95             : {
      96      174184 :         lockdep_assert_held(&discard_ctl->lock);
      97      174184 :         if (!btrfs_run_discard_work(discard_ctl))
      98             :                 return;
      99             : 
     100      172443 :         if (list_empty(&block_group->discard_list) ||
     101      171095 :             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
     102        1535 :                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
     103        1366 :                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
     104        1535 :                 block_group->discard_eligible_time = (ktime_get_ns() +
     105             :                                                       BTRFS_DISCARD_DELAY);
     106        1535 :                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
     107             :         }
     108      172443 :         if (list_empty(&block_group->discard_list))
     109        1348 :                 btrfs_get_block_group(block_group);
     110             : 
     111      172443 :         list_move_tail(&block_group->discard_list,
     112             :                        get_discard_list(discard_ctl, block_group));
     113             : }
     114             : 
     115     3772803 : static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
     116             :                                 struct btrfs_block_group *block_group)
     117             : {
     118     3772803 :         if (!btrfs_is_block_group_data_only(block_group))
     119             :                 return;
     120             : 
     121      174181 :         spin_lock(&discard_ctl->lock);
     122      174181 :         __add_to_discard_list(discard_ctl, block_group);
     123      174181 :         spin_unlock(&discard_ctl->lock);
     124             : }
     125             : 
     126       51553 : static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
     127             :                                        struct btrfs_block_group *block_group)
     128             : {
     129       51553 :         bool queued;
     130             : 
     131       51553 :         spin_lock(&discard_ctl->lock);
     132             : 
     133       51553 :         queued = !list_empty(&block_group->discard_list);
     134             : 
     135       51553 :         if (!btrfs_run_discard_work(discard_ctl)) {
     136       12995 :                 spin_unlock(&discard_ctl->lock);
     137       12995 :                 return;
     138             :         }
     139             : 
     140       38558 :         list_del_init(&block_group->discard_list);
     141             : 
     142       38558 :         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
     143       38558 :         block_group->discard_eligible_time = (ktime_get_ns() +
     144             :                                               BTRFS_DISCARD_UNUSED_DELAY);
     145       38558 :         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
     146       38558 :         if (!queued)
     147         849 :                 btrfs_get_block_group(block_group);
     148       38558 :         list_add_tail(&block_group->discard_list,
     149             :                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
     150             : 
     151       38558 :         spin_unlock(&discard_ctl->lock);
     152             : }
     153             : 
     154         957 : static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
     155             :                                      struct btrfs_block_group *block_group)
     156             : {
     157         957 :         bool running = false;
     158         957 :         bool queued = false;
     159             : 
     160         957 :         spin_lock(&discard_ctl->lock);
     161             : 
     162         957 :         if (block_group == discard_ctl->block_group) {
     163         264 :                 running = true;
     164         264 :                 discard_ctl->block_group = NULL;
     165             :         }
     166             : 
     167         957 :         block_group->discard_eligible_time = 0;
     168         957 :         queued = !list_empty(&block_group->discard_list);
     169         957 :         list_del_init(&block_group->discard_list);
     170             :         /*
     171             :          * If the block group is currently running in the discard workfn, we
     172             :          * don't want to deref it, since it's still being used by the workfn.
     173             :          * The workfn will notice this case and deref the block group when it is
     174             :          * finished.
     175             :          */
     176         957 :         if (queued && !running)
     177         604 :                 btrfs_put_block_group(block_group);
     178             : 
     179         957 :         spin_unlock(&discard_ctl->lock);
     180             : 
     181         957 :         return running;
     182             : }
     183             : 
     184             : /*
     185             :  * Find block_group that's up next for discarding.
     186             :  *
     187             :  * @discard_ctl:  discard control
     188             :  * @now:          current time
     189             :  *
     190             :  * Iterate over the discard lists to find the next block_group up for
     191             :  * discarding checking the discard_eligible_time of block_group.
     192             :  */
     193     1598689 : static struct btrfs_block_group *find_next_block_group(
     194             :                                         struct btrfs_discard_ctl *discard_ctl,
     195             :                                         u64 now)
     196             : {
     197     1598689 :         struct btrfs_block_group *ret_block_group = NULL, *block_group;
     198     1598689 :         int i;
     199             : 
     200     6386318 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
     201     4790715 :                 struct list_head *discard_list = &discard_ctl->discard_list[i];
     202             : 
     203     4790715 :                 if (!list_empty(discard_list)) {
     204      166880 :                         block_group = list_first_entry(discard_list,
     205             :                                                        struct btrfs_block_group,
     206             :                                                        discard_list);
     207             : 
     208      166880 :                         if (!ret_block_group)
     209      132323 :                                 ret_block_group = block_group;
     210             : 
     211      166880 :                         if (ret_block_group->discard_eligible_time < now)
     212             :                                 break;
     213             : 
     214      163794 :                         if (ret_block_group->discard_eligible_time >
     215      163794 :                             block_group->discard_eligible_time)
     216       13249 :                                 ret_block_group = block_group;
     217             :                 }
     218             :         }
     219             : 
     220     1598689 :         return ret_block_group;
     221             : }
     222             : 
     223             : /*
     224             :  * Look up next block group and set it for use.
     225             :  *
     226             :  * @discard_ctl:   discard control
     227             :  * @discard_state: the discard_state of the block_group after state management
     228             :  * @discard_index: the discard_index of the block_group after state management
     229             :  * @now:           time when discard was invoked, in ns
     230             :  *
     231             :  * Wrap find_next_block_group() and set the block_group to be in use.
     232             :  * @discard_state's control flow is managed here.  Variables related to
     233             :  * @discard_state are reset here as needed (eg. @discard_cursor).  @discard_state
     234             :  * and @discard_index are remembered as it may change while we're discarding,
     235             :  * but we want the discard to execute in the context determined here.
     236             :  */
     237       32755 : static struct btrfs_block_group *peek_discard_list(
     238             :                                         struct btrfs_discard_ctl *discard_ctl,
     239             :                                         enum btrfs_discard_state *discard_state,
     240             :                                         int *discard_index, u64 now)
     241             : {
     242       32755 :         struct btrfs_block_group *block_group;
     243             : 
     244       32755 :         spin_lock(&discard_ctl->lock);
     245       32826 : again:
     246       32826 :         block_group = find_next_block_group(discard_ctl, now);
     247             : 
     248       32826 :         if (block_group && now >= block_group->discard_eligible_time) {
     249        4239 :                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
     250        1214 :                     block_group->used != 0) {
     251          71 :                         if (btrfs_is_block_group_data_only(block_group)) {
     252           3 :                                 __add_to_discard_list(discard_ctl, block_group);
     253             :                         } else {
     254          68 :                                 list_del_init(&block_group->discard_list);
     255          68 :                                 btrfs_put_block_group(block_group);
     256             :                         }
     257          71 :                         goto again;
     258             :                 }
     259        4168 :                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
     260         265 :                         block_group->discard_cursor = block_group->start;
     261         265 :                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
     262             :                 }
     263        4168 :                 discard_ctl->block_group = block_group;
     264             :         }
     265       32755 :         if (block_group) {
     266       32639 :                 *discard_state = block_group->discard_state;
     267       32639 :                 *discard_index = block_group->discard_index;
     268             :         }
     269       32755 :         spin_unlock(&discard_ctl->lock);
     270             : 
     271       32755 :         return block_group;
     272             : }
     273             : 
     274             : /*
     275             :  * Update a block group's filters.
     276             :  *
     277             :  * @block_group:  block group of interest
     278             :  * @bytes:        recently freed region size after coalescing
     279             :  *
     280             :  * Async discard maintains multiple lists with progressively smaller filters
     281             :  * to prioritize discarding based on size.  Should a free space that matches
     282             :  * a larger filter be returned to the free_space_cache, prioritize that discard
     283             :  * by moving @block_group to the proper filter.
     284             :  */
     285     3817814 : void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
     286             :                                 u64 bytes)
     287             : {
     288     3817814 :         struct btrfs_discard_ctl *discard_ctl;
     289             : 
     290     3817814 :         if (!block_group ||
     291     3817814 :             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
     292             :                 return;
     293             : 
     294     3804700 :         discard_ctl = &block_group->fs_info->discard_ctl;
     295             : 
     296     3804700 :         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
     297       20885 :             bytes >= discard_minlen[block_group->discard_index - 1]) {
     298          36 :                 int i;
     299             : 
     300          36 :                 remove_from_discard_list(discard_ctl, block_group);
     301             : 
     302          72 :                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
     303           0 :                      i++) {
     304          36 :                         if (bytes >= discard_minlen[i]) {
     305          36 :                                 block_group->discard_index = i;
     306          36 :                                 add_to_discard_list(discard_ctl, block_group);
     307          36 :                                 break;
     308             :                         }
     309             :                 }
     310             :         }
     311             : }
     312             : 
     313             : /*
     314             :  * Move a block group along the discard lists.
     315             :  *
     316             :  * @discard_ctl: discard control
     317             :  * @block_group: block_group of interest
     318             :  *
     319             :  * Increment @block_group's discard_index.  If it falls of the list, let it be.
     320             :  * Otherwise add it back to the appropriate list.
     321             :  */
     322         123 : static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
     323             :                                        struct btrfs_block_group *block_group)
     324             : {
     325         123 :         block_group->discard_index++;
     326         123 :         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
     327          30 :                 block_group->discard_index = 1;
     328          30 :                 return;
     329             :         }
     330             : 
     331          93 :         add_to_discard_list(discard_ctl, block_group);
     332             : }
     333             : 
     334             : /*
     335             :  * Remove a block_group from the discard lists.
     336             :  *
     337             :  * @discard_ctl: discard control
     338             :  * @block_group: block_group of interest
     339             :  *
     340             :  * Remove @block_group from the discard lists.  If necessary, wait on the
     341             :  * current work and then reschedule the delayed work.
     342             :  */
     343         657 : void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
     344             :                                struct btrfs_block_group *block_group)
     345             : {
     346         657 :         if (remove_from_discard_list(discard_ctl, block_group)) {
     347           0 :                 cancel_delayed_work_sync(&discard_ctl->work);
     348           0 :                 btrfs_discard_schedule_work(discard_ctl, true);
     349             :         }
     350         657 : }
     351             : 
     352             : /*
     353             :  * Handles queuing the block_groups.
     354             :  *
     355             :  * @discard_ctl: discard control
     356             :  * @block_group: block_group of interest
     357             :  *
     358             :  * Maintain the LRU order of the discard lists.
     359             :  */
     360     3837341 : void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
     361             :                               struct btrfs_block_group *block_group)
     362             : {
     363     3837341 :         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
     364             :                 return;
     365             : 
     366     3824227 :         if (block_group->used == 0)
     367       51553 :                 add_to_discard_unused_list(discard_ctl, block_group);
     368             :         else
     369     3772674 :                 add_to_discard_list(discard_ctl, block_group);
     370             : 
     371     3824228 :         if (!delayed_work_pending(&discard_ctl->work))
     372     1359178 :                 btrfs_discard_schedule_work(discard_ctl, false);
     373             : }
     374             : 
     375     1594797 : static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
     376             :                                           u64 now, bool override)
     377             : {
     378     1594797 :         struct btrfs_block_group *block_group;
     379             : 
     380     1594797 :         if (!btrfs_run_discard_work(discard_ctl))
     381             :                 return;
     382     2932289 :         if (!override && delayed_work_pending(&discard_ctl->work))
     383             :                 return;
     384             : 
     385     1565863 :         block_group = find_next_block_group(discard_ctl, now);
     386     1565863 :         if (block_group) {
     387       99613 :                 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
     388       99613 :                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
     389             : 
     390             :                 /*
     391             :                  * A single delayed workqueue item is responsible for
     392             :                  * discarding, so we can manage the bytes rate limit by keeping
     393             :                  * track of the previous discard.
     394             :                  */
     395       99613 :                 if (kbps_limit && discard_ctl->prev_discard) {
     396           0 :                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
     397           0 :                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
     398             :                                                   NSEC_PER_SEC, bps_limit);
     399             : 
     400           0 :                         delay = max(delay, bps_delay);
     401             :                 }
     402             : 
     403             :                 /*
     404             :                  * This timeout is to hopefully prevent immediate discarding
     405             :                  * in a recently allocated block group.
     406             :                  */
     407       99613 :                 if (now < block_group->discard_eligible_time) {
     408       95549 :                         u64 bg_timeout = block_group->discard_eligible_time - now;
     409             : 
     410       95549 :                         delay = max(delay, bg_timeout);
     411             :                 }
     412             : 
     413       99613 :                 if (override && discard_ctl->prev_discard) {
     414         664 :                         u64 elapsed = now - discard_ctl->prev_discard_time;
     415             : 
     416         664 :                         if (delay > elapsed)
     417         526 :                                 delay -= elapsed;
     418             :                         else
     419             :                                 delay = 0;
     420             :                 }
     421             : 
     422       99613 :                 mod_delayed_work(discard_ctl->discard_workers,
     423             :                                  &discard_ctl->work, nsecs_to_jiffies(delay));
     424             :         }
     425             : }
     426             : 
     427             : /*
     428             :  * Responsible for scheduling the discard work.
     429             :  *
     430             :  * @discard_ctl:  discard control
     431             :  * @override:     override the current timer
     432             :  *
     433             :  * Discards are issued by a delayed workqueue item.  @override is used to
     434             :  * update the current delay as the baseline delay interval is reevaluated on
     435             :  * transaction commit.  This is also maxed with any other rate limit.
     436             :  */
     437     1590630 : void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
     438             :                                  bool override)
     439             : {
     440     1590630 :         const u64 now = ktime_get_ns();
     441             : 
     442     1590630 :         spin_lock(&discard_ctl->lock);
     443     1590630 :         __btrfs_discard_schedule_work(discard_ctl, now, override);
     444     1590630 :         spin_unlock(&discard_ctl->lock);
     445     1590630 : }
     446             : 
     447             : /*
     448             :  * Determine next step of a block_group.
     449             :  *
     450             :  * @discard_ctl: discard control
     451             :  * @block_group: block_group of interest
     452             :  *
     453             :  * Determine the next step for a block group after it's finished going through
     454             :  * a pass on a discard list.  If it is unused and fully trimmed, we can mark it
     455             :  * unused and send it to the unused_bgs path.  Otherwise, pass it onto the
     456             :  * appropriate filter list or let it fall off.
     457             :  */
     458         264 : static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
     459             :                                       struct btrfs_block_group *block_group)
     460             : {
     461         264 :         remove_from_discard_list(discard_ctl, block_group);
     462             : 
     463         264 :         if (block_group->used == 0) {
     464         141 :                 if (btrfs_is_free_space_trimmed(block_group))
     465         141 :                         btrfs_mark_bg_unused(block_group);
     466             :                 else
     467           0 :                         add_to_discard_unused_list(discard_ctl, block_group);
     468             :         } else {
     469         123 :                 btrfs_update_discard_index(discard_ctl, block_group);
     470             :         }
     471         264 : }
     472             : 
     473             : /*
     474             :  * Discard work queue callback
     475             :  *
     476             :  * @work: work
     477             :  *
     478             :  * Find the next block_group to start discarding and then discard a single
     479             :  * region.  It does this in a two-pass fashion: first extents and second
     480             :  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
     481             :  */
     482       32755 : static void btrfs_discard_workfn(struct work_struct *work)
     483             : {
     484       32755 :         struct btrfs_discard_ctl *discard_ctl;
     485       32755 :         struct btrfs_block_group *block_group;
     486       32755 :         enum btrfs_discard_state discard_state;
     487       32755 :         int discard_index = 0;
     488       32755 :         u64 trimmed = 0;
     489       32755 :         u64 minlen = 0;
     490       32755 :         u64 now = ktime_get_ns();
     491             : 
     492       32755 :         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
     493             : 
     494       32755 :         block_group = peek_discard_list(discard_ctl, &discard_state,
     495             :                                         &discard_index, now);
     496       32755 :         if (!block_group || !btrfs_run_discard_work(discard_ctl))
     497       28588 :                 return;
     498       32638 :         if (now < block_group->discard_eligible_time) {
     499       28471 :                 btrfs_discard_schedule_work(discard_ctl, false);
     500       28471 :                 return;
     501             :         }
     502             : 
     503             :         /* Perform discarding */
     504        4167 :         minlen = discard_minlen[discard_index];
     505             : 
     506        4167 :         if (discard_state == BTRFS_DISCARD_BITMAPS) {
     507        1671 :                 u64 maxlen = 0;
     508             : 
     509             :                 /*
     510             :                  * Use the previous levels minimum discard length as the max
     511             :                  * length filter.  In the case something is added to make a
     512             :                  * region go beyond the max filter, the entire bitmap is set
     513             :                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
     514             :                  */
     515        1671 :                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
     516        1530 :                         maxlen = discard_minlen[discard_index - 1];
     517             : 
     518        1671 :                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
     519             :                                        block_group->discard_cursor,
     520             :                                        btrfs_block_group_end(block_group),
     521             :                                        minlen, maxlen, true);
     522        1671 :                 discard_ctl->discard_bitmap_bytes += trimmed;
     523             :         } else {
     524        2496 :                 btrfs_trim_block_group_extents(block_group, &trimmed,
     525             :                                        block_group->discard_cursor,
     526             :                                        btrfs_block_group_end(block_group),
     527             :                                        minlen, true);
     528        2496 :                 discard_ctl->discard_extent_bytes += trimmed;
     529             :         }
     530             : 
     531             :         /* Determine next steps for a block_group */
     532        4167 :         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
     533         528 :                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
     534         264 :                         btrfs_finish_discard_pass(discard_ctl, block_group);
     535             :                 } else {
     536         264 :                         block_group->discard_cursor = block_group->start;
     537         264 :                         spin_lock(&discard_ctl->lock);
     538         264 :                         if (block_group->discard_state !=
     539             :                             BTRFS_DISCARD_RESET_CURSOR)
     540         264 :                                 block_group->discard_state =
     541             :                                                         BTRFS_DISCARD_BITMAPS;
     542         264 :                         spin_unlock(&discard_ctl->lock);
     543             :                 }
     544             :         }
     545             : 
     546        4167 :         now = ktime_get_ns();
     547        4167 :         spin_lock(&discard_ctl->lock);
     548        4167 :         discard_ctl->prev_discard = trimmed;
     549        4167 :         discard_ctl->prev_discard_time = now;
     550             :         /*
     551             :          * If the block group was removed from the discard list while it was
     552             :          * running in this workfn, then we didn't deref it, since this function
     553             :          * still owned that reference. But we set the discard_ctl->block_group
     554             :          * back to NULL, so we can use that condition to know that now we need
     555             :          * to deref the block_group.
     556             :          */
     557        4167 :         if (discard_ctl->block_group == NULL)
     558         264 :                 btrfs_put_block_group(block_group);
     559        4167 :         discard_ctl->block_group = NULL;
     560        4167 :         __btrfs_discard_schedule_work(discard_ctl, now, false);
     561        4167 :         spin_unlock(&discard_ctl->lock);
     562             : }
     563             : 
     564             : /*
     565             :  * Recalculate the base delay.
     566             :  *
     567             :  * @discard_ctl: discard control
     568             :  *
     569             :  * Recalculate the base delay which is based off the total number of
     570             :  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
     571             :  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
     572             :  */
     573      202981 : void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
     574             : {
     575      202981 :         s32 discardable_extents;
     576      202981 :         s64 discardable_bytes;
     577      202981 :         u32 iops_limit;
     578      202981 :         unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
     579      202981 :         unsigned long delay;
     580             : 
     581      202981 :         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
     582      202981 :         if (!discardable_extents)
     583             :                 return;
     584             : 
     585       32618 :         spin_lock(&discard_ctl->lock);
     586             : 
     587             :         /*
     588             :          * The following is to fix a potential -1 discrepancy that we're not
     589             :          * sure how to reproduce. But given that this is the only place that
     590             :          * utilizes these numbers and this is only called by from
     591             :          * btrfs_finish_extent_commit() which is synchronized, we can correct
     592             :          * here.
     593             :          */
     594       32618 :         if (discardable_extents < 0)
     595           7 :                 atomic_add(-discardable_extents,
     596             :                            &discard_ctl->discardable_extents);
     597             : 
     598       32618 :         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
     599       32618 :         if (discardable_bytes < 0)
     600           3 :                 atomic64_add(-discardable_bytes,
     601             :                              &discard_ctl->discardable_bytes);
     602             : 
     603       32618 :         if (discardable_extents <= 0) {
     604           7 :                 spin_unlock(&discard_ctl->lock);
     605           7 :                 return;
     606             :         }
     607             : 
     608       32611 :         iops_limit = READ_ONCE(discard_ctl->iops_limit);
     609             : 
     610       32611 :         if (iops_limit) {
     611       32611 :                 delay = MSEC_PER_SEC / iops_limit;
     612             :         } else {
     613             :                 /*
     614             :                  * Unset iops_limit means go as fast as possible, so allow a
     615             :                  * delay of 0.
     616             :                  */
     617             :                 delay = 0;
     618             :                 min_delay = 0;
     619             :         }
     620             : 
     621       32611 :         delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
     622       32611 :         discard_ctl->delay_ms = delay;
     623             : 
     624       32611 :         spin_unlock(&discard_ctl->lock);
     625             : }
     626             : 
     627             : /*
     628             :  * Propagate discard counters.
     629             :  *
     630             :  * @block_group: block_group of interest
     631             :  *
     632             :  * Propagate deltas of counters up to the discard_ctl.  It maintains a current
     633             :  * counter and a previous counter passing the delta up to the global stat.
     634             :  * Then the current counter value becomes the previous counter value.
     635             :  */
     636     8379952 : void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
     637             : {
     638     8379952 :         struct btrfs_free_space_ctl *ctl;
     639     8379952 :         struct btrfs_discard_ctl *discard_ctl;
     640     8379952 :         s32 extents_delta;
     641     8379952 :         s64 bytes_delta;
     642             : 
     643     8379952 :         if (!block_group ||
     644     8379952 :             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
     645             :             !btrfs_is_block_group_data_only(block_group))
     646             :                 return;
     647             : 
     648     4183117 :         ctl = block_group->free_space_ctl;
     649     4183117 :         discard_ctl = &block_group->fs_info->discard_ctl;
     650             : 
     651     4183117 :         lockdep_assert_held(&ctl->tree_lock);
     652     4183117 :         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
     653     4183117 :                         ctl->discardable_extents[BTRFS_STAT_PREV];
     654     4183117 :         if (extents_delta) {
     655      248001 :                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
     656      248001 :                 ctl->discardable_extents[BTRFS_STAT_PREV] =
     657      248001 :                         ctl->discardable_extents[BTRFS_STAT_CURR];
     658             :         }
     659             : 
     660     4183117 :         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
     661     4183117 :                       ctl->discardable_bytes[BTRFS_STAT_PREV];
     662     4183117 :         if (bytes_delta) {
     663     1933233 :                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
     664     1933235 :                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
     665     1933235 :                         ctl->discardable_bytes[BTRFS_STAT_CURR];
     666             :         }
     667             : }
     668             : 
     669             : /*
     670             :  * Punt unused_bgs list to discard lists.
     671             :  *
     672             :  * @fs_info: fs_info of interest
     673             :  *
     674             :  * The unused_bgs list needs to be punted to the discard lists because the
     675             :  * order of operations is changed.  In the normal synchronous discard path, the
     676             :  * block groups are trimmed via a single large trim in transaction commit.  This
     677             :  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
     678             :  * it must be done before going down the unused_bgs path.
     679             :  */
     680        3165 : void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
     681             : {
     682        3165 :         struct btrfs_block_group *block_group, *next;
     683             : 
     684        3165 :         spin_lock(&fs_info->unused_bgs_lock);
     685             :         /* We enabled async discard, so punt all to the queue */
     686       16154 :         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
     687             :                                  bg_list) {
     688       12989 :                 list_del_init(&block_group->bg_list);
     689       12989 :                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
     690             :                 /*
     691             :                  * This put is for the get done by btrfs_mark_bg_unused.
     692             :                  * Queueing discard incremented it for discard's reference.
     693             :                  */
     694       12989 :                 btrfs_put_block_group(block_group);
     695             :         }
     696        3165 :         spin_unlock(&fs_info->unused_bgs_lock);
     697        3165 : }
     698             : 
     699             : /*
     700             :  * Purge discard lists.
     701             :  *
     702             :  * @discard_ctl: discard control
     703             :  *
     704             :  * If we are disabling async discard, we may have intercepted block groups that
     705             :  * are completely free and ready for the unused_bgs path.  As discarding will
     706             :  * now happen in transaction commit or not at all, we can safely mark the
     707             :  * corresponding block groups as unused and they will be sent on their merry
     708             :  * way to the unused_bgs list.
     709             :  */
     710        3237 : static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
     711             : {
     712        3237 :         struct btrfs_block_group *block_group, *next;
     713        3237 :         int i;
     714             : 
     715        3237 :         spin_lock(&discard_ctl->lock);
     716       16185 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
     717       10972 :                 list_for_each_entry_safe(block_group, next,
     718             :                                          &discard_ctl->discard_list[i],
     719             :                                          discard_list) {
     720        1261 :                         list_del_init(&block_group->discard_list);
     721        1261 :                         spin_unlock(&discard_ctl->lock);
     722        1261 :                         if (block_group->used == 0)
     723         253 :                                 btrfs_mark_bg_unused(block_group);
     724        1261 :                         spin_lock(&discard_ctl->lock);
     725        1261 :                         btrfs_put_block_group(block_group);
     726             :                 }
     727             :         }
     728        3237 :         spin_unlock(&discard_ctl->lock);
     729        3237 : }
     730             : 
     731        3181 : void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
     732             : {
     733        3181 :         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
     734          16 :                 btrfs_discard_cleanup(fs_info);
     735          16 :                 return;
     736             :         }
     737             : 
     738        3165 :         btrfs_discard_punt_unused_bgs_list(fs_info);
     739             : 
     740        3165 :         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
     741             : }
     742             : 
     743          17 : void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
     744             : {
     745          17 :         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
     746          17 : }
     747             : 
     748        3472 : void btrfs_discard_init(struct btrfs_fs_info *fs_info)
     749             : {
     750        3472 :         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
     751        3472 :         int i;
     752             : 
     753        3472 :         spin_lock_init(&discard_ctl->lock);
     754        3472 :         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
     755             : 
     756       17360 :         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
     757       10416 :                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
     758             : 
     759        3472 :         discard_ctl->prev_discard = 0;
     760        3472 :         discard_ctl->prev_discard_time = 0;
     761        3472 :         atomic_set(&discard_ctl->discardable_extents, 0);
     762        3472 :         atomic64_set(&discard_ctl->discardable_bytes, 0);
     763        3472 :         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
     764        3472 :         discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
     765        3472 :         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
     766        3472 :         discard_ctl->kbps_limit = 0;
     767        3472 :         discard_ctl->discard_extent_bytes = 0;
     768        3472 :         discard_ctl->discard_bitmap_bytes = 0;
     769        3472 :         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
     770        3472 : }
     771             : 
     772        3237 : void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
     773             : {
     774        3237 :         btrfs_discard_stop(fs_info);
     775        3237 :         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
     776        3237 :         btrfs_discard_purge_list(&fs_info->discard_ctl);
     777        3237 : }

Generated by: LCOV version 1.14