LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_defer.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 312 352 88.6 %
Date: 2023-07-31 20:08:12 Functions: 21 22 95.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * Copyright (C) 2016 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <darrick.wong@oracle.com>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_buf_item.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_inode_item.h"
      18             : #include "xfs_trace.h"
      19             : #include "xfs_icache.h"
      20             : #include "xfs_log.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_refcount.h"
      23             : #include "xfs_bmap.h"
      24             : #include "xfs_alloc.h"
      25             : #include "xfs_buf.h"
      26             : #include "xfs_da_format.h"
      27             : #include "xfs_da_btree.h"
      28             : #include "xfs_attr.h"
      29             : #include "xfs_swapext.h"
      30             : 
      31             : static struct kmem_cache        *xfs_defer_pending_cache;
      32             : 
      33             : /*
      34             :  * Deferred Operations in XFS
      35             :  *
      36             :  * Due to the way locking rules work in XFS, certain transactions (block
      37             :  * mapping and unmapping, typically) have permanent reservations so that
      38             :  * we can roll the transaction to adhere to AG locking order rules and
      39             :  * to unlock buffers between metadata updates.  Prior to rmap/reflink,
      40             :  * the mapping code had a mechanism to perform these deferrals for
      41             :  * extents that were going to be freed; this code makes that facility
      42             :  * more generic.
      43             :  *
      44             :  * When adding the reverse mapping and reflink features, it became
      45             :  * necessary to perform complex remapping multi-transactions to comply
      46             :  * with AG locking order rules, and to be able to spread a single
      47             :  * refcount update operation (an operation on an n-block extent can
      48             :  * update as many as n records!) among multiple transactions.  XFS can
      49             :  * roll a transaction to facilitate this, but using this facility
      50             :  * requires us to log "intent" items in case log recovery needs to
      51             :  * redo the operation, and to log "done" items to indicate that redo
      52             :  * is not necessary.
      53             :  *
      54             :  * Deferred work is tracked in xfs_defer_pending items.  Each pending
      55             :  * item tracks one type of deferred work.  Incoming work items (which
      56             :  * have not yet had an intent logged) are attached to a pending item
      57             :  * on the dop_intake list, where they wait for the caller to finish
      58             :  * the deferred operations.
      59             :  *
      60             :  * Finishing a set of deferred operations is an involved process.  To
      61             :  * start, we define "rolling a deferred-op transaction" as follows:
      62             :  *
      63             :  * > For each xfs_defer_pending item on the dop_intake list,
      64             :  *   - Sort the work items in AG order.  XFS locking
      65             :  *     order rules require us to lock buffers in AG order.
      66             :  *   - Create a log intent item for that type.
      67             :  *   - Attach it to the pending item.
      68             :  *   - Move the pending item from the dop_intake list to the
      69             :  *     dop_pending list.
      70             :  * > Roll the transaction.
      71             :  *
      72             :  * NOTE: To avoid exceeding the transaction reservation, we limit the
      73             :  * number of items that we attach to a given xfs_defer_pending.
      74             :  *
      75             :  * The actual finishing process looks like this:
      76             :  *
      77             :  * > For each xfs_defer_pending in the dop_pending list,
      78             :  *   - Roll the deferred-op transaction as above.
      79             :  *   - Create a log done item for that type, and attach it to the
      80             :  *     log intent item.
      81             :  *   - For each work item attached to the log intent item,
      82             :  *     * Perform the described action.
      83             :  *     * Attach the work item to the log done item.
      84             :  *     * If the result of doing the work was -EAGAIN, ->finish work
      85             :  *       wants a new transaction.  See the "Requesting a Fresh
      86             :  *       Transaction while Finishing Deferred Work" section below for
      87             :  *       details.
      88             :  *
      89             :  * The key here is that we must log an intent item for all pending
      90             :  * work items every time we roll the transaction, and that we must log
      91             :  * a done item as soon as the work is completed.  With this mechanism
      92             :  * we can perform complex remapping operations, chaining intent items
      93             :  * as needed.
      94             :  *
      95             :  * Requesting a Fresh Transaction while Finishing Deferred Work
      96             :  *
      97             :  * If ->finish_item decides that it needs a fresh transaction to
      98             :  * finish the work, it must ask its caller (xfs_defer_finish) for a
      99             :  * continuation.  The most likely cause of this circumstance are the
     100             :  * refcount adjust functions deciding that they've logged enough items
     101             :  * to be at risk of exceeding the transaction reservation.
     102             :  *
     103             :  * To get a fresh transaction, we want to log the existing log done
     104             :  * item to prevent the log intent item from replaying, immediately log
     105             :  * a new log intent item with the unfinished work items, roll the
     106             :  * transaction, and re-call ->finish_item wherever it left off.  The
     107             :  * log done item and the new log intent item must be in the same
     108             :  * transaction or atomicity cannot be guaranteed; defer_finish ensures
     109             :  * that this happens.
     110             :  *
     111             :  * This requires some coordination between ->finish_item and
     112             :  * defer_finish.  Upon deciding to request a new transaction,
     113             :  * ->finish_item should update the current work item to reflect the
     114             :  * unfinished work.  Next, it should reset the log done item's list
     115             :  * count to the number of items finished, and return -EAGAIN.
     116             :  * defer_finish sees the -EAGAIN, logs the new log intent item
     117             :  * with the remaining work items, and leaves the xfs_defer_pending
     118             :  * item at the head of the dop_work queue.  Then it rolls the
     119             :  * transaction and picks up processing where it left off.  It is
     120             :  * required that ->finish_item must be careful to leave enough
     121             :  * transaction reservation to fit the new log intent item.
     122             :  *
     123             :  * This is an example of remapping the extent (E, E+B) into file X at
     124             :  * offset A and dealing with the extent (C, C+B) already being mapped
     125             :  * there:
     126             :  * +-------------------------------------------------+
     127             :  * | Unmap file X startblock C offset A length B     | t0
     128             :  * | Intent to reduce refcount for extent (C, B)     |
     129             :  * | Intent to remove rmap (X, C, A, B)              |
     130             :  * | Intent to free extent (D, 1) (bmbt block)       |
     131             :  * | Intent to map (X, A, B) at startblock E         |
     132             :  * +-------------------------------------------------+
     133             :  * | Map file X startblock E offset A length B       | t1
     134             :  * | Done mapping (X, E, A, B)                       |
     135             :  * | Intent to increase refcount for extent (E, B)   |
     136             :  * | Intent to add rmap (X, E, A, B)                 |
     137             :  * +-------------------------------------------------+
     138             :  * | Reduce refcount for extent (C, B)               | t2
     139             :  * | Done reducing refcount for extent (C, 9)        |
     140             :  * | Intent to reduce refcount for extent (C+9, B-9) |
     141             :  * | (ran out of space after 9 refcount updates)     |
     142             :  * +-------------------------------------------------+
     143             :  * | Reduce refcount for extent (C+9, B+9)           | t3
     144             :  * | Done reducing refcount for extent (C+9, B-9)    |
     145             :  * | Increase refcount for extent (E, B)             |
     146             :  * | Done increasing refcount for extent (E, B)      |
     147             :  * | Intent to free extent (C, B)                    |
     148             :  * | Intent to free extent (F, 1) (refcountbt block) |
     149             :  * | Intent to remove rmap (F, 1, REFC)              |
     150             :  * +-------------------------------------------------+
     151             :  * | Remove rmap (X, C, A, B)                        | t4
     152             :  * | Done removing rmap (X, C, A, B)                 |
     153             :  * | Add rmap (X, E, A, B)                           |
     154             :  * | Done adding rmap (X, E, A, B)                   |
     155             :  * | Remove rmap (F, 1, REFC)                        |
     156             :  * | Done removing rmap (F, 1, REFC)                 |
     157             :  * +-------------------------------------------------+
     158             :  * | Free extent (C, B)                              | t5
     159             :  * | Done freeing extent (C, B)                      |
     160             :  * | Free extent (D, 1)                              |
     161             :  * | Done freeing extent (D, 1)                      |
     162             :  * | Free extent (F, 1)                              |
     163             :  * | Done freeing extent (F, 1)                      |
     164             :  * +-------------------------------------------------+
     165             :  *
     166             :  * If we should crash before t2 commits, log recovery replays
     167             :  * the following intent items:
     168             :  *
     169             :  * - Intent to reduce refcount for extent (C, B)
     170             :  * - Intent to remove rmap (X, C, A, B)
     171             :  * - Intent to free extent (D, 1) (bmbt block)
     172             :  * - Intent to increase refcount for extent (E, B)
     173             :  * - Intent to add rmap (X, E, A, B)
     174             :  *
     175             :  * In the process of recovering, it should also generate and take care
     176             :  * of these intent items:
     177             :  *
     178             :  * - Intent to free extent (C, B)
     179             :  * - Intent to free extent (F, 1) (refcountbt block)
     180             :  * - Intent to remove rmap (F, 1, REFC)
     181             :  *
     182             :  * Note that the continuation requested between t2 and t3 is likely to
     183             :  * reoccur.
     184             :  */
     185             : 
     186             : static const struct xfs_defer_op_type *defer_op_types[] = {
     187             :         [XFS_DEFER_OPS_TYPE_BMAP]       = &xfs_bmap_update_defer_type,
     188             :         [XFS_DEFER_OPS_TYPE_REFCOUNT]   = &xfs_refcount_update_defer_type,
     189             :         [XFS_DEFER_OPS_TYPE_RMAP]       = &xfs_rmap_update_defer_type,
     190             :         [XFS_DEFER_OPS_TYPE_FREE]       = &xfs_extent_free_defer_type,
     191             :         [XFS_DEFER_OPS_TYPE_AGFL_FREE]  = &xfs_agfl_free_defer_type,
     192             :         [XFS_DEFER_OPS_TYPE_ATTR]       = &xfs_attr_defer_type,
     193             :         [XFS_DEFER_OPS_TYPE_SWAPEXT]    = &xfs_swapext_defer_type,
     194             : };
     195             : 
     196             : /*
     197             :  * Ensure there's a log intent item associated with this deferred work item if
     198             :  * the operation must be restarted on crash.  Returns 1 if there's a log item;
     199             :  * 0 if there isn't; or a negative errno.
     200             :  */
     201             : static int
     202  1290335106 : xfs_defer_create_intent(
     203             :         struct xfs_trans                *tp,
     204             :         struct xfs_defer_pending        *dfp,
     205             :         bool                            sort)
     206             : {
     207  1290335106 :         const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
     208  1290313122 :         struct xfs_log_item             *lip;
     209             : 
     210  1290313122 :         if (dfp->dfp_intent)
     211             :                 return 1;
     212             : 
     213  1290412010 :         lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort);
     214  1290935974 :         if (!lip)
     215             :                 return 0;
     216  1105756380 :         if (IS_ERR(lip))
     217           0 :                 return PTR_ERR(lip);
     218             : 
     219  1105756380 :         dfp->dfp_intent = lip;
     220  1105756380 :         return 1;
     221             : }
     222             : 
     223             : /*
     224             :  * For each pending item in the intake list, log its intent item and the
     225             :  * associated extents, then add the entire intake list to the end of
     226             :  * the pending list.
     227             :  *
     228             :  * Returns 1 if at least one log item was associated with the deferred work;
     229             :  * 0 if there are no log items; or a negative errno.
     230             :  */
     231             : static int
     232  1290512248 : xfs_defer_create_intents(
     233             :         struct xfs_trans                *tp)
     234             : {
     235  1290512248 :         struct xfs_defer_pending        *dfp;
     236  1290512248 :         int                             ret = 0;
     237             : 
     238  2478653419 :         list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
     239  1187802631 :                 int                     ret2;
     240             : 
     241  1187802631 :                 trace_xfs_defer_create_intent(tp->t_mountp, dfp);
     242  1187485182 :                 ret2 = xfs_defer_create_intent(tp, dfp, true);
     243  1188141171 :                 if (ret2 < 0)
     244           0 :                         return ret2;
     245  1188141171 :                 ret |= ret2;
     246             :         }
     247             :         return ret;
     248             : }
     249             : 
     250             : /* Abort all the intents that were committed. */
     251             : STATIC void
     252        4777 : xfs_defer_trans_abort(
     253             :         struct xfs_trans                *tp,
     254             :         struct list_head                *dop_pending)
     255             : {
     256        4777 :         struct xfs_defer_pending        *dfp;
     257        4777 :         const struct xfs_defer_op_type  *ops;
     258             : 
     259        4777 :         trace_xfs_defer_trans_abort(tp, _RET_IP_);
     260             : 
     261             :         /* Abort intent items that don't have a done item. */
     262       12574 :         list_for_each_entry(dfp, dop_pending, dfp_list) {
     263        7797 :                 ops = defer_op_types[dfp->dfp_type];
     264        7799 :                 trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
     265        7799 :                 if (dfp->dfp_intent && !dfp->dfp_done) {
     266        3232 :                         ops->abort_intent(dfp->dfp_intent);
     267        3232 :                         dfp->dfp_intent = NULL;
     268             :                 }
     269             :         }
     270        4777 : }
     271             : 
     272             : /*
     273             :  * Capture resources that the caller said not to release ("held") when the
     274             :  * transaction commits.  Caller is responsible for zero-initializing @dres.
     275             :  */
     276             : static int
     277  1318429961 : xfs_defer_save_resources(
     278             :         struct xfs_defer_resources      *dres,
     279             :         struct xfs_trans                *tp)
     280             : {
     281  1318429961 :         struct xfs_buf_log_item         *bli;
     282  1318429961 :         struct xfs_inode_log_item       *ili;
     283  1318429961 :         struct xfs_log_item             *lip;
     284             : 
     285  1318429961 :         BUILD_BUG_ON(NBBY * sizeof(dres->dr_ordered) < XFS_DEFER_OPS_NR_BUFS);
     286             : 
     287  7415906468 :         list_for_each_entry(lip, &tp->t_items, li_trans) {
     288  6097073134 :                 switch (lip->li_type) {
     289  2512909548 :                 case XFS_LI_BUF:
     290  2512909548 :                         bli = container_of(lip, struct xfs_buf_log_item,
     291             :                                            bli_item);
     292  2512909548 :                         if (bli->bli_flags & XFS_BLI_HOLD) {
     293     4189240 :                                 if (dres->dr_bufs >= XFS_DEFER_OPS_NR_BUFS) {
     294           0 :                                         ASSERT(0);
     295           0 :                                         return -EFSCORRUPTED;
     296             :                                 }
     297     4189240 :                                 if (bli->bli_flags & XFS_BLI_ORDERED)
     298       73981 :                                         dres->dr_ordered |=
     299       73981 :                                                         (1U << dres->dr_bufs);
     300             :                                 else
     301     4115259 :                                         xfs_trans_dirty_buf(tp, bli->bli_buf);
     302     4190849 :                                 dres->dr_bp[dres->dr_bufs++] = bli->bli_buf;
     303             :                         }
     304             :                         break;
     305  1821962817 :                 case XFS_LI_INODE:
     306  1821962817 :                         ili = container_of(lip, struct xfs_inode_log_item,
     307             :                                            ili_item);
     308  1821962817 :                         if (ili->ili_lock_flags == 0) {
     309  1795426350 :                                 if (dres->dr_inos >= XFS_DEFER_OPS_NR_INODES) {
     310           0 :                                         ASSERT(0);
     311           0 :                                         return -EFSCORRUPTED;
     312             :                                 }
     313  1795426350 :                                 xfs_trans_log_inode(tp, ili->ili_inode,
     314             :                                                     XFS_ILOG_CORE);
     315  1795847707 :                                 dres->dr_ip[dres->dr_inos++] = ili->ili_inode;
     316             :                         }
     317             :                         break;
     318             :                 default:
     319             :                         break;
     320             :                 }
     321             :         }
     322             : 
     323             :         return 0;
     324             : }
     325             : 
     326             : /* Attach the held resources to the transaction. */
     327             : static void
     328  1318847768 : xfs_defer_restore_resources(
     329             :         struct xfs_trans                *tp,
     330             :         struct xfs_defer_resources      *dres)
     331             : {
     332  1318847768 :         unsigned short                  i;
     333             : 
     334             :         /* Rejoin the joined inodes. */
     335  3114663018 :         for (i = 0; i < dres->dr_inos; i++)
     336  1795662235 :                 xfs_trans_ijoin(tp, dres->dr_ip[i], 0);
     337             : 
     338             :         /* Rejoin the buffers and dirty them so the log moves forward. */
     339  1323179273 :         for (i = 0; i < dres->dr_bufs; i++) {
     340     4189739 :                 xfs_trans_bjoin(tp, dres->dr_bp[i]);
     341     4191349 :                 if (dres->dr_ordered & (1U << i))
     342       73980 :                         xfs_trans_ordered_buf(tp, dres->dr_bp[i]);
     343     4191351 :                 xfs_trans_bhold(tp, dres->dr_bp[i]);
     344             :         }
     345  1318989534 : }
     346             : 
     347             : /* Roll a transaction so we can do some deferred op processing. */
     348             : STATIC int
     349  1318541862 : xfs_defer_trans_roll(
     350             :         struct xfs_trans                **tpp)
     351             : {
     352  1318541862 :         struct xfs_defer_resources      dres = { };
     353  1318541862 :         int                             error;
     354             : 
     355  1318541862 :         error = xfs_defer_save_resources(&dres, *tpp);
     356  1318827114 :         if (error)
     357             :                 return error;
     358             : 
     359  1318822325 :         trace_xfs_defer_trans_roll(*tpp, _RET_IP_);
     360             : 
     361             :         /*
     362             :          * Roll the transaction.  Rolling always given a new transaction (even
     363             :          * if committing the old one fails!) to hand back to the caller, so we
     364             :          * join the held resources to the new transaction so that we always
     365             :          * return with the held resources joined to @tpp, no matter what
     366             :          * happened.
     367             :          */
     368  1318446544 :         error = xfs_trans_roll(tpp);
     369             : 
     370  1318910876 :         xfs_defer_restore_resources(*tpp, &dres);
     371             : 
     372  1318956444 :         if (error)
     373         234 :                 trace_xfs_defer_trans_roll_error(*tpp, error);
     374             :         return error;
     375             : }
     376             : 
     377             : /*
     378             :  * Free up any items left in the list.
     379             :  */
     380             : static void
     381        9898 : xfs_defer_cancel_list(
     382             :         struct xfs_mount                *mp,
     383             :         struct list_head                *dop_list)
     384             : {
     385        9898 :         struct xfs_defer_pending        *dfp;
     386        9898 :         struct xfs_defer_pending        *pli;
     387        9898 :         struct list_head                *pwi;
     388        9898 :         struct list_head                *n;
     389        9898 :         const struct xfs_defer_op_type  *ops;
     390             : 
     391             :         /*
     392             :          * Free the pending items.  Caller should already have arranged
     393             :          * for the intent items to be released.
     394             :          */
     395       18050 :         list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
     396        8153 :                 ops = defer_op_types[dfp->dfp_type];
     397        8152 :                 trace_xfs_defer_cancel_list(mp, dfp);
     398        8152 :                 list_del(&dfp->dfp_list);
     399       11780 :                 list_for_each_safe(pwi, n, &dfp->dfp_work) {
     400        3628 :                         list_del(pwi);
     401        3628 :                         dfp->dfp_count--;
     402        3628 :                         trace_xfs_defer_cancel_item(mp, dfp, pwi);
     403        3628 :                         ops->cancel_item(pwi);
     404             :                 }
     405        8152 :                 ASSERT(dfp->dfp_count == 0);
     406        8152 :                 kmem_cache_free(xfs_defer_pending_cache, dfp);
     407             :         }
     408        9897 : }
     409             : 
     410             : /*
     411             :  * Prevent a log intent item from pinning the tail of the log by logging a
     412             :  * done item to release the intent item; and then log a new intent item.
     413             :  * The caller should provide a fresh transaction and roll it after we're done.
     414             :  */
     415             : static int
     416  1147660031 : xfs_defer_relog(
     417             :         struct xfs_trans                **tpp,
     418             :         struct list_head                *dfops)
     419             : {
     420  1147660031 :         struct xlog                     *log = (*tpp)->t_mountp->m_log;
     421  1147660031 :         struct xfs_defer_pending        *dfp;
     422  1147660031 :         xfs_lsn_t                       threshold_lsn = NULLCOMMITLSN;
     423             : 
     424             : 
     425  1147660031 :         ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
     426             : 
     427  2968595544 :         list_for_each_entry(dfp, dfops, dfp_list) {
     428             :                 /*
     429             :                  * If the log intent item for this deferred op is not a part of
     430             :                  * the current log checkpoint, relog the intent item to keep
     431             :                  * the log tail moving forward.  We're ok with this being racy
     432             :                  * because an incorrect decision means we'll be a little slower
     433             :                  * at pushing the tail.
     434             :                  */
     435  3592398641 :                 if (dfp->dfp_intent == NULL ||
     436  1770945735 :                     xfs_log_item_in_current_chkpt(dfp->dfp_intent))
     437  1820857800 :                         continue;
     438             : 
     439             :                 /*
     440             :                  * Figure out where we need the tail to be in order to maintain
     441             :                  * the minimum required free space in the log.  Only sample
     442             :                  * the log threshold once per call.
     443             :                  */
     444      595106 :                 if (threshold_lsn == NULLCOMMITLSN) {
     445      564274 :                         threshold_lsn = xlog_grant_push_threshold(log, 0);
     446      564275 :                         if (threshold_lsn == NULLCOMMITLSN)
     447             :                                 break;
     448             :                 }
     449       77714 :                 if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
     450        4845 :                         continue;
     451             : 
     452       72869 :                 trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
     453       72869 :                 XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
     454       72869 :                 dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
     455             :         }
     456             : 
     457  1147646020 :         if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
     458       81301 :                 return xfs_defer_trans_roll(tpp);
     459             :         return 0;
     460             : }
     461             : 
     462             : /*
     463             :  * Log an intent-done item for the first pending intent, and finish the work
     464             :  * items.
     465             :  */
     466             : static int
     467  1290884912 : xfs_defer_finish_one(
     468             :         struct xfs_trans                *tp,
     469             :         struct xfs_defer_pending        *dfp)
     470             : {
     471  1290884912 :         const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
     472  1290898970 :         struct xfs_btree_cur            *state = NULL;
     473  1290898970 :         struct list_head                *li, *n;
     474  1290898970 :         int                             error;
     475             : 
     476  1290898970 :         trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
     477             : 
     478  1290826383 :         dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
     479  2551970761 :         list_for_each_safe(li, n, &dfp->dfp_work) {
     480  1363676120 :                 list_del(li);
     481  1363527701 :                 dfp->dfp_count--;
     482  1363527701 :                 trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
     483  1363437475 :                 error = ops->finish_item(tp, dfp->dfp_done, li, &state);
     484  1363709832 :                 if (error == -EAGAIN) {
     485   102731703 :                         int             ret;
     486             : 
     487             :                         /*
     488             :                          * Caller wants a fresh transaction; put the work item
     489             :                          * back on the list and log a new log intent item to
     490             :                          * replace the old one.  See "Requesting a Fresh
     491             :                          * Transaction while Finishing Deferred Work" above.
     492             :                          */
     493   102731703 :                         list_add(li, &dfp->dfp_work);
     494   102721688 :                         dfp->dfp_count++;
     495   102721688 :                         dfp->dfp_done = NULL;
     496   102721688 :                         dfp->dfp_intent = NULL;
     497   102721688 :                         ret = xfs_defer_create_intent(tp, dfp, false);
     498   102733533 :                         if (ret < 0)
     499           0 :                                 error = ret;
     500             :                 }
     501             : 
     502  1363711662 :                 if (error)
     503   102739272 :                         goto out;
     504             :         }
     505             : 
     506             :         /* Done with the dfp, free it. */
     507  1188294641 :         list_del(&dfp->dfp_list);
     508  1188169020 :         kmem_cache_free(xfs_defer_pending_cache, dfp);
     509  1188271609 :         tp->t_dfops_nr--;
     510  1188271609 :         tp->t_dfops_finished++;
     511  1291010881 : out:
     512  1291010881 :         if (ops->finish_cleanup)
     513   597712322 :                 ops->finish_cleanup(tp, state, error);
     514  1291063797 :         return error;
     515             : }
     516             : 
     517             : /*
     518             :  * Finish all the pending work.  This involves logging intent items for
     519             :  * any work items that wandered in since the last transaction roll (if
     520             :  * one has even happened), rolling the transaction, and finishing the
     521             :  * work items in the first item on the logged-and-pending list.
     522             :  *
     523             :  * If an inode is provided, relog it to the new transaction.
     524             :  */
     525             : int
     526  1192687630 : xfs_defer_finish_noroll(
     527             :         struct xfs_trans                **tp)
     528             : {
     529  1192687630 :         struct xfs_defer_pending        *dfp = NULL;
     530  1192687630 :         int                             error = 0;
     531  1192687630 :         LIST_HEAD(dop_pending);
     532             : 
     533  1192687630 :         ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
     534             : 
     535  1192687630 :         trace_xfs_defer_finish(*tp, _RET_IP_);
     536             : 
     537             :         /* Until we run out of pending work to finish... */
     538  2483008543 :         while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
     539             :                 /*
     540             :                  * Deferred items that are created in the process of finishing
     541             :                  * other deferred work items should be queued at the head of
     542             :                  * the pending list, which puts them ahead of the deferred work
     543             :                  * that was created by the caller.  This keeps the number of
     544             :                  * pending work items to a minimum, which decreases the amount
     545             :                  * of time that any one intent item can stick around in memory,
     546             :                  * pinning the log tail.
     547             :                  */
     548  1290210776 :                 int has_intents = xfs_defer_create_intents(*tp);
     549             : 
     550  1290705361 :                 list_splice_init(&(*tp)->t_dfops, &dop_pending);
     551             : 
     552  1290705361 :                 (*tp)->t_dfops_nr_max = max((*tp)->t_dfops_nr,
     553             :                                             (*tp)->t_dfops_nr_max);
     554             : 
     555  1290705361 :                 if (has_intents < 0) {
     556           0 :                         error = has_intents;
     557           0 :                         goto out_shutdown;
     558             :                 }
     559  1290705361 :                 if (has_intents || dfp) {
     560  1147250700 :                         error = xfs_defer_trans_roll(tp);
     561  1147745600 :                         if (error)
     562         227 :                                 goto out_shutdown;
     563             : 
     564             :                         /* Relog intent items to keep the log moving. */
     565  1147745373 :                         error = xfs_defer_relog(tp, &dop_pending);
     566  1147532465 :                         if (error)
     567           0 :                                 goto out_shutdown;
     568             :                 }
     569             : 
     570  1290987126 :                 dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
     571             :                                        dfp_list);
     572  1290987126 :                 error = xfs_defer_finish_one(*tp, dfp);
     573  1290917998 :                 if (error && error != -EAGAIN)
     574        4549 :                         goto out_shutdown;
     575             :         }
     576             : 
     577  1192242839 :         trace_xfs_defer_finish_done(*tp, _RET_IP_);
     578  1192276238 :         return 0;
     579             : 
     580        4776 : out_shutdown:
     581        4776 :         xfs_defer_trans_abort(*tp, &dop_pending);
     582        4777 :         xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
     583        4776 :         trace_xfs_defer_finish_error(*tp, error);
     584        4777 :         xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
     585        4777 :         (*tp)->t_dfops_nr = 0;
     586        4777 :         xfs_defer_cancel(*tp);
     587        4777 :         return error;
     588             : }
     589             : 
     590             : int
     591   185376121 : xfs_defer_finish(
     592             :         struct xfs_trans        **tp)
     593             : {
     594   185376121 :         int                     error;
     595             : 
     596             :         /*
     597             :          * Finish and roll the transaction once more to avoid returning to the
     598             :          * caller with a dirty transaction.
     599             :          */
     600   185376121 :         error = xfs_defer_finish_noroll(tp);
     601   185350053 :         if (error)
     602             :                 return error;
     603   185348469 :         if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
     604   171093343 :                 error = xfs_defer_trans_roll(tp);
     605   171115877 :                 if (error) {
     606           7 :                         xfs_force_shutdown((*tp)->t_mountp,
     607             :                                            SHUTDOWN_CORRUPT_INCORE);
     608           7 :                         return error;
     609             :                 }
     610             :         }
     611             : 
     612             :         /* Reset LOWMODE now that we've finished all the dfops. */
     613   185370996 :         ASSERT(list_empty(&(*tp)->t_dfops));
     614   185370996 :         (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
     615   185370996 :         return 0;
     616             : }
     617             : 
     618             : void
     619        5120 : xfs_defer_cancel(
     620             :         struct xfs_trans        *tp)
     621             : {
     622        5120 :         struct xfs_mount        *mp = tp->t_mountp;
     623             : 
     624        5120 :         trace_xfs_defer_cancel(tp, _RET_IP_);
     625        5120 :         xfs_defer_cancel_list(mp, &tp->t_dfops);
     626        5121 :         tp->t_dfops_nr = 0;
     627        5121 : }
     628             : 
     629             : /* Add an item for later deferred processing. */
     630             : void
     631  1260797991 : xfs_defer_add(
     632             :         struct xfs_trans                *tp,
     633             :         enum xfs_defer_ops_type         type,
     634             :         struct list_head                *li)
     635             : {
     636  1260797991 :         struct xfs_defer_pending        *dfp = NULL;
     637  1260797991 :         const struct xfs_defer_op_type  *ops = defer_op_types[type];
     638             : 
     639  1260692852 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
     640  1260692852 :         BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
     641             : 
     642             :         /*
     643             :          * Add the item to a pending item at the end of the intake list.
     644             :          * If the last pending item has the same type, reuse it.  Else,
     645             :          * create a new pending item at the end of the intake list.
     646             :          */
     647  1260692852 :         if (!list_empty(&tp->t_dfops)) {
     648   392465514 :                 dfp = list_last_entry(&tp->t_dfops,
     649             :                                 struct xfs_defer_pending, dfp_list);
     650   392465514 :                 if (dfp->dfp_type != type ||
     651    97555365 :                     (ops->max_items && dfp->dfp_count >= ops->max_items))
     652             :                         dfp = NULL;
     653             :         }
     654    72620307 :         if (!dfp) {
     655  1188072545 :                 dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
     656             :                                 GFP_NOFS | __GFP_NOFAIL);
     657  1188188993 :                 dfp->dfp_type = type;
     658  1188188993 :                 dfp->dfp_intent = NULL;
     659  1188188993 :                 dfp->dfp_done = NULL;
     660  1188188993 :                 dfp->dfp_count = 0;
     661  1188188993 :                 INIT_LIST_HEAD(&dfp->dfp_work);
     662  1188188993 :                 list_add_tail(&dfp->dfp_list, &tp->t_dfops);
     663  1187966653 :                 tp->t_dfops_nr++;
     664             :         }
     665             : 
     666  1260586960 :         list_add_tail(li, &dfp->dfp_work);
     667  1260676792 :         trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
     668  1260669897 :         dfp->dfp_count++;
     669  1260669897 : }
     670             : 
     671             : /*
     672             :  * Move deferred ops from one transaction to another and reset the source to
     673             :  * initial state. This is primarily used to carry state forward across
     674             :  * transaction rolls with pending dfops.
     675             :  */
     676             : void
     677  1330906080 : xfs_defer_move(
     678             :         struct xfs_trans        *dtp,
     679             :         struct xfs_trans        *stp)
     680             : {
     681  1330906080 :         list_splice_init(&stp->t_dfops, &dtp->t_dfops);
     682  1330906080 :         dtp->t_dfops_nr += stp->t_dfops_nr;
     683  1330906080 :         dtp->t_dfops_nr_max = stp->t_dfops_nr_max;
     684  1330906080 :         dtp->t_dfops_finished = stp->t_dfops_finished;
     685  1330906080 :         stp->t_dfops_nr = 0;
     686  1330906080 :         stp->t_dfops_nr_max = 0;
     687  1330906080 :         stp->t_dfops_finished = 0;
     688             : 
     689             :         /*
     690             :          * Low free space mode was historically controlled by a dfops field.
     691             :          * This meant that low mode state potentially carried across multiple
     692             :          * transaction rolls. Transfer low mode on a dfops move to preserve
     693             :          * that behavior.
     694             :          */
     695  1330906080 :         dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
     696  1330906080 :         stp->t_flags &= ~XFS_TRANS_LOWMODE;
     697  1330906080 : }
     698             : 
     699             : /*
     700             :  * Prepare a chain of fresh deferred ops work items to be completed later.  Log
     701             :  * recovery requires the ability to put off until later the actual finishing
     702             :  * work so that it can process unfinished items recovered from the log in
     703             :  * correct order.
     704             :  *
     705             :  * Create and log intent items for all the work that we're capturing so that we
     706             :  * can be assured that the items will get replayed if the system goes down
     707             :  * before log recovery gets a chance to finish the work it put off.  The entire
     708             :  * deferred ops state is transferred to the capture structure and the
     709             :  * transaction is then ready for the caller to commit it.  If there are no
     710             :  * intent items to capture, this function returns NULL.
     711             :  *
     712             :  * If capture_ip is not NULL, the capture structure will obtain an extra
     713             :  * reference to the inode.
     714             :  */
     715             : static struct xfs_defer_capture *
     716        5215 : xfs_defer_ops_capture(
     717             :         struct xfs_trans                *tp)
     718             : {
     719        5215 :         struct xfs_defer_capture        *dfc;
     720        5215 :         unsigned short                  i;
     721        5215 :         int                             error;
     722             : 
     723        5215 :         if (list_empty(&tp->t_dfops))
     724             :                 return NULL;
     725             : 
     726        1295 :         error = xfs_defer_create_intents(tp);
     727        1295 :         if (error < 0)
     728           0 :                 return ERR_PTR(error);
     729             : 
     730             :         /* Create an object to capture the defer ops. */
     731        1295 :         dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
     732        1295 :         INIT_LIST_HEAD(&dfc->dfc_list);
     733        1295 :         INIT_LIST_HEAD(&dfc->dfc_dfops);
     734             : 
     735             :         /* Move the dfops chain and transaction state to the capture struct. */
     736        1295 :         list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
     737        1295 :         dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
     738        1295 :         tp->t_flags &= ~XFS_TRANS_LOWMODE;
     739             : 
     740             :         /* Capture the remaining block reservations along with the dfops. */
     741        1295 :         dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
     742        1295 :         dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
     743             : 
     744             :         /* Preserve the log reservation size. */
     745        1295 :         dfc->dfc_logres = tp->t_log_res;
     746             : 
     747        1295 :         error = xfs_defer_save_resources(&dfc->dfc_held, tp);
     748        1295 :         if (error) {
     749             :                 /*
     750             :                  * Resource capture should never fail, but if it does, we
     751             :                  * still have to shut down the log and release things
     752             :                  * properly.
     753             :                  */
     754           0 :                 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
     755             :         }
     756             : 
     757             :         /*
     758             :          * Grab extra references to the inodes and buffers because callers are
     759             :          * expected to release their held references after we commit the
     760             :          * transaction.
     761             :          */
     762        2025 :         for (i = 0; i < dfc->dfc_held.dr_inos; i++) {
     763         730 :                 ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL));
     764         730 :                 ihold(VFS_I(dfc->dfc_held.dr_ip[i]));
     765             :         }
     766             : 
     767        1295 :         for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
     768           0 :                 xfs_buf_hold(dfc->dfc_held.dr_bp[i]);
     769             : 
     770             :         return dfc;
     771             : }
     772             : 
     773             : /* Release all resources that we used to capture deferred ops. */
     774             : void
     775           0 : xfs_defer_ops_capture_free(
     776             :         struct xfs_mount                *mp,
     777             :         struct xfs_defer_capture        *dfc)
     778             : {
     779           0 :         unsigned short                  i;
     780             : 
     781           0 :         xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
     782             : 
     783           0 :         for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
     784           0 :                 xfs_buf_relse(dfc->dfc_held.dr_bp[i]);
     785             : 
     786           0 :         for (i = 0; i < dfc->dfc_held.dr_inos; i++)
     787           0 :                 xfs_irele(dfc->dfc_held.dr_ip[i]);
     788             : 
     789           0 :         kmem_free(dfc);
     790           0 : }
     791             : 
     792             : /*
     793             :  * Capture any deferred ops and commit the transaction.  This is the last step
     794             :  * needed to finish a log intent item that we recovered from the log.  If any
     795             :  * of the deferred ops operate on an inode, the caller must pass in that inode
     796             :  * so that the reference can be transferred to the capture structure.  The
     797             :  * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
     798             :  * xfs_defer_ops_continue.
     799             :  */
     800             : int
     801        5215 : xfs_defer_ops_capture_and_commit(
     802             :         struct xfs_trans                *tp,
     803             :         struct list_head                *capture_list)
     804             : {
     805        5215 :         struct xfs_mount                *mp = tp->t_mountp;
     806        5215 :         struct xfs_defer_capture        *dfc;
     807        5215 :         int                             error;
     808             : 
     809             :         /* If we don't capture anything, commit transaction and exit. */
     810        5215 :         dfc = xfs_defer_ops_capture(tp);
     811        5215 :         if (IS_ERR(dfc)) {
     812           0 :                 xfs_trans_cancel(tp);
     813           0 :                 return PTR_ERR(dfc);
     814             :         }
     815        5215 :         if (!dfc)
     816        3920 :                 return xfs_trans_commit(tp);
     817             : 
     818             :         /* Commit the transaction and add the capture structure to the list. */
     819        1295 :         error = xfs_trans_commit(tp);
     820        1295 :         if (error) {
     821           0 :                 xfs_defer_ops_capture_free(mp, dfc);
     822           0 :                 return error;
     823             :         }
     824             : 
     825        1295 :         list_add_tail(&dfc->dfc_list, capture_list);
     826        1295 :         return 0;
     827             : }
     828             : 
     829             : /*
     830             :  * Attach a chain of captured deferred ops to a new transaction and free the
     831             :  * capture structure.  If an inode was captured, it will be passed back to the
     832             :  * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
     833             :  * The caller now owns the inode reference.
     834             :  */
     835             : void
     836        1295 : xfs_defer_ops_continue(
     837             :         struct xfs_defer_capture        *dfc,
     838             :         struct xfs_trans                *tp,
     839             :         struct xfs_defer_resources      *dres)
     840             : {
     841        1295 :         unsigned int                    i;
     842             : 
     843        1295 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
     844        1295 :         ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
     845             : 
     846             :         /* Lock the captured resources to the new transaction. */
     847        1295 :         if (dfc->dfc_held.dr_inos > 2) {
     848           0 :                 xfs_sort_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos);
     849           0 :                 xfs_lock_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos,
     850             :                                 XFS_ILOCK_EXCL);
     851        1295 :         } else if (dfc->dfc_held.dr_inos == 2)
     852          17 :                 xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
     853             :                                     dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
     854        1278 :         else if (dfc->dfc_held.dr_inos == 1)
     855         696 :                 xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
     856             : 
     857        1295 :         for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
     858           0 :                 xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
     859             : 
     860             :         /* Join the captured resources to the new transaction. */
     861        1295 :         xfs_defer_restore_resources(tp, &dfc->dfc_held);
     862        2590 :         memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
     863        1295 :         dres->dr_bufs = 0;
     864             : 
     865             :         /* Move captured dfops chain and state to the transaction. */
     866        1295 :         list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
     867        1295 :         tp->t_flags |= dfc->dfc_tpflags;
     868             : 
     869        1295 :         kmem_free(dfc);
     870        1295 : }
     871             : 
     872             : /* Release the resources captured and continued during recovery. */
     873             : void
     874        1295 : xfs_defer_resources_rele(
     875             :         struct xfs_defer_resources      *dres)
     876             : {
     877        1295 :         unsigned short                  i;
     878             : 
     879        2025 :         for (i = 0; i < dres->dr_inos; i++) {
     880         730 :                 xfs_iunlock(dres->dr_ip[i], XFS_ILOCK_EXCL);
     881         730 :                 xfs_irele(dres->dr_ip[i]);
     882         730 :                 dres->dr_ip[i] = NULL;
     883             :         }
     884             : 
     885        1295 :         for (i = 0; i < dres->dr_bufs; i++) {
     886           0 :                 xfs_buf_relse(dres->dr_bp[i]);
     887           0 :                 dres->dr_bp[i] = NULL;
     888             :         }
     889             : 
     890        1295 :         dres->dr_inos = 0;
     891        1295 :         dres->dr_bufs = 0;
     892        1295 :         dres->dr_ordered = 0;
     893        1295 : }
     894             : 
     895             : static inline int __init
     896          50 : xfs_defer_init_cache(void)
     897             : {
     898          50 :         xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending",
     899             :                         sizeof(struct xfs_defer_pending),
     900             :                         0, 0, NULL);
     901             : 
     902          50 :         return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM;
     903             : }
     904             : 
     905             : static inline void
     906             : xfs_defer_destroy_cache(void)
     907             : {
     908          49 :         kmem_cache_destroy(xfs_defer_pending_cache);
     909          49 :         xfs_defer_pending_cache = NULL;
     910             : }
     911             : 
     912             : /* Set up caches for deferred work items. */
     913             : int __init
     914          50 : xfs_defer_init_item_caches(void)
     915             : {
     916          50 :         int                             error;
     917             : 
     918          50 :         error = xfs_defer_init_cache();
     919          50 :         if (error)
     920             :                 return error;
     921          50 :         error = xfs_rmap_intent_init_cache();
     922          50 :         if (error)
     923           0 :                 goto err;
     924          50 :         error = xfs_refcount_intent_init_cache();
     925          50 :         if (error)
     926           0 :                 goto err;
     927          50 :         error = xfs_bmap_intent_init_cache();
     928          50 :         if (error)
     929           0 :                 goto err;
     930          50 :         error = xfs_extfree_intent_init_cache();
     931          50 :         if (error)
     932           0 :                 goto err;
     933          50 :         error = xfs_attr_intent_init_cache();
     934          50 :         if (error)
     935           0 :                 goto err;
     936          50 :         error = xfs_swapext_intent_init_cache();
     937          50 :         if (error)
     938           0 :                 goto err;
     939             : 
     940             :         return 0;
     941           0 : err:
     942           0 :         xfs_defer_destroy_item_caches();
     943           0 :         return error;
     944             : }
     945             : 
     946             : /* Destroy all the deferred work item caches, if they've been allocated. */
     947             : void
     948          49 : xfs_defer_destroy_item_caches(void)
     949             : {
     950          49 :         xfs_swapext_intent_destroy_cache();
     951          49 :         xfs_attr_intent_destroy_cache();
     952          49 :         xfs_extfree_intent_destroy_cache();
     953          49 :         xfs_bmap_intent_destroy_cache();
     954          49 :         xfs_refcount_intent_destroy_cache();
     955          49 :         xfs_rmap_intent_destroy_cache();
     956          49 :         xfs_defer_destroy_cache();
     957          49 : }

Generated by: LCOV version 1.14