LCOV - code coverage report
Current view: top level - fs/xfs - xfs_mount.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 498 556 89.6 %
Date: 2023-07-31 20:08:12 Functions: 26 26 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_sb.h"
      14             : #include "xfs_mount.h"
      15             : #include "xfs_inode.h"
      16             : #include "xfs_dir2.h"
      17             : #include "xfs_ialloc.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_rtalloc.h"
      20             : #include "xfs_bmap.h"
      21             : #include "xfs_trans.h"
      22             : #include "xfs_trans_priv.h"
      23             : #include "xfs_log.h"
      24             : #include "xfs_log_priv.h"
      25             : #include "xfs_error.h"
      26             : #include "xfs_quota.h"
      27             : #include "xfs_fsops.h"
      28             : #include "xfs_icache.h"
      29             : #include "xfs_sysfs.h"
      30             : #include "xfs_rmap_btree.h"
      31             : #include "xfs_refcount_btree.h"
      32             : #include "xfs_reflink.h"
      33             : #include "xfs_extent_busy.h"
      34             : #include "xfs_health.h"
      35             : #include "xfs_trace.h"
      36             : #include "xfs_ag.h"
      37             : #include "scrub/stats.h"
      38             : 
      39             : static DEFINE_MUTEX(xfs_uuid_table_mutex);
      40             : static int xfs_uuid_table_size;
      41             : static uuid_t *xfs_uuid_table;
      42             : 
      43             : void
      44          49 : xfs_uuid_table_free(void)
      45             : {
      46          49 :         if (xfs_uuid_table_size == 0)
      47             :                 return;
      48          49 :         kmem_free(xfs_uuid_table);
      49          49 :         xfs_uuid_table = NULL;
      50          49 :         xfs_uuid_table_size = 0;
      51             : }
      52             : 
      53             : /*
      54             :  * See if the UUID is unique among mounted XFS filesystems.
      55             :  * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
      56             :  */
      57             : STATIC int
      58       60880 : xfs_uuid_mount(
      59             :         struct xfs_mount        *mp)
      60             : {
      61       60880 :         uuid_t                  *uuid = &mp->m_sb.sb_uuid;
      62       60880 :         int                     hole, i;
      63             : 
      64             :         /* Publish UUID in struct super_block */
      65       60880 :         uuid_copy(&mp->m_super->s_uuid, uuid);
      66             : 
      67       60880 :         if (xfs_has_nouuid(mp))
      68             :                 return 0;
      69             : 
      70       60865 :         if (uuid_is_null(uuid)) {
      71           0 :                 xfs_warn(mp, "Filesystem has null UUID - can't mount");
      72           0 :                 return -EINVAL;
      73             :         }
      74             : 
      75       60865 :         mutex_lock(&xfs_uuid_table_mutex);
      76      275304 :         for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
      77      153590 :                 if (uuid_is_null(&xfs_uuid_table[i])) {
      78      107949 :                         hole = i;
      79      107949 :                         continue;
      80             :                 }
      81       45641 :                 if (uuid_equal(uuid, &xfs_uuid_table[i]))
      82          16 :                         goto out_duplicate;
      83             :         }
      84             : 
      85       60849 :         if (hole < 0) {
      86         284 :                 xfs_uuid_table = krealloc(xfs_uuid_table,
      87         142 :                         (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
      88             :                         GFP_KERNEL | __GFP_NOFAIL);
      89         142 :                 hole = xfs_uuid_table_size++;
      90             :         }
      91       60849 :         xfs_uuid_table[hole] = *uuid;
      92       60849 :         mutex_unlock(&xfs_uuid_table_mutex);
      93             : 
      94       60849 :         return 0;
      95             : 
      96             :  out_duplicate:
      97          16 :         mutex_unlock(&xfs_uuid_table_mutex);
      98          16 :         xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
      99          16 :         return -EINVAL;
     100             : }
     101             : 
     102             : STATIC void
     103       60874 : xfs_uuid_unmount(
     104             :         struct xfs_mount        *mp)
     105             : {
     106       60874 :         uuid_t                  *uuid = &mp->m_sb.sb_uuid;
     107       60874 :         int                     i;
     108             : 
     109       60874 :         if (xfs_has_nouuid(mp))
     110             :                 return;
     111             : 
     112       60859 :         mutex_lock(&xfs_uuid_table_mutex);
     113      170813 :         for (i = 0; i < xfs_uuid_table_size; i++) {
     114      109954 :                 if (uuid_is_null(&xfs_uuid_table[i]))
     115       42906 :                         continue;
     116       67048 :                 if (!uuid_equal(uuid, &xfs_uuid_table[i]))
     117        6189 :                         continue;
     118       60859 :                 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
     119       60859 :                 break;
     120             :         }
     121       60859 :         ASSERT(i < xfs_uuid_table_size);
     122       60859 :         mutex_unlock(&xfs_uuid_table_mutex);
     123             : }
     124             : 
     125             : /*
     126             :  * Check size of device based on the (data/realtime) block count.
     127             :  * Note: this check is used by the growfs code as well as mount.
     128             :  */
     129             : int
     130      123656 : xfs_sb_validate_fsb_count(
     131             :         xfs_sb_t        *sbp,
     132             :         uint64_t        nblocks)
     133             : {
     134      123656 :         ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
     135      123656 :         ASSERT(sbp->sb_blocklog >= BBSHIFT);
     136             : 
     137             :         /* Limited by ULONG_MAX of page cache index */
     138      123656 :         if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
     139             :                 return -EFBIG;
     140      123656 :         return 0;
     141             : }
     142             : 
     143             : /*
     144             :  * xfs_readsb
     145             :  *
     146             :  * Does the initial read of the superblock.
     147             :  */
     148             : int
     149       61590 : xfs_readsb(
     150             :         struct xfs_mount *mp,
     151             :         int             flags)
     152             : {
     153       61590 :         unsigned int    sector_size;
     154       61590 :         struct xfs_buf  *bp;
     155       61590 :         struct xfs_sb   *sbp = &mp->m_sb;
     156       61590 :         int             error;
     157       61590 :         int             loud = !(flags & XFS_MFSI_QUIET);
     158       61590 :         const struct xfs_buf_ops *buf_ops;
     159             : 
     160       61590 :         ASSERT(mp->m_sb_bp == NULL);
     161       61590 :         ASSERT(mp->m_ddev_targp != NULL);
     162             : 
     163             :         /*
     164             :          * For the initial read, we must guess at the sector
     165             :          * size based on the block device.  It's enough to
     166             :          * get the sb_sectsize out of the superblock and
     167             :          * then reread with the proper length.
     168             :          * We don't verify it yet, because it may not be complete.
     169             :          */
     170       61590 :         sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
     171       61590 :         buf_ops = NULL;
     172             : 
     173             :         /*
     174             :          * Allocate a (locked) buffer to hold the superblock. This will be kept
     175             :          * around at all times to optimize access to the superblock. Therefore,
     176             :          * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
     177             :          * elevated.
     178             :          */
     179      123168 : reread:
     180      123168 :         error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
     181      123168 :                                       BTOBB(sector_size), XBF_NO_IOACCT, &bp,
     182             :                                       buf_ops);
     183      123168 :         if (error) {
     184         585 :                 if (loud)
     185         585 :                         xfs_warn(mp, "SB validate failed with error %d.", error);
     186             :                 /* bad CRC means corrupted metadata */
     187         585 :                 if (error == -EFSBADCRC)
     188          21 :                         error = -EFSCORRUPTED;
     189         585 :                 return error;
     190             :         }
     191             : 
     192             :         /*
     193             :          * Initialize the mount structure from the superblock.
     194             :          */
     195      122583 :         xfs_sb_from_disk(sbp, bp->b_addr);
     196             : 
     197             :         /*
     198             :          * If we haven't validated the superblock, do so now before we try
     199             :          * to check the sector size and reread the superblock appropriately.
     200             :          */
     201      122583 :         if (sbp->sb_magicnum != XFS_SB_MAGIC) {
     202           1 :                 if (loud)
     203           1 :                         xfs_warn(mp, "Invalid superblock magic number");
     204           1 :                 error = -EINVAL;
     205           1 :                 goto release_buf;
     206             :         }
     207             : 
     208             :         /*
     209             :          * We must be able to do sector-sized and sector-aligned IO.
     210             :          */
     211      122582 :         if (sector_size > sbp->sb_sectsize) {
     212           0 :                 if (loud)
     213           0 :                         xfs_warn(mp, "device supports %u byte sectors (not %u)",
     214             :                                 sector_size, sbp->sb_sectsize);
     215           0 :                 error = -ENOSYS;
     216           0 :                 goto release_buf;
     217             :         }
     218             : 
     219      122582 :         if (buf_ops == NULL) {
     220             :                 /*
     221             :                  * Re-read the superblock so the buffer is correctly sized,
     222             :                  * and properly verified.
     223             :                  */
     224       61578 :                 xfs_buf_relse(bp);
     225       61578 :                 sector_size = sbp->sb_sectsize;
     226       61578 :                 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
     227       61578 :                 goto reread;
     228             :         }
     229             : 
     230       61004 :         mp->m_features |= xfs_sb_version_to_features(sbp);
     231       61004 :         xfs_reinit_percpu_counters(mp);
     232             : 
     233             :         /* no need to be quiet anymore, so reset the buf ops */
     234       61004 :         bp->b_ops = &xfs_sb_buf_ops;
     235             : 
     236       61004 :         mp->m_sb_bp = bp;
     237       61004 :         xfs_buf_unlock(bp);
     238       61004 :         return 0;
     239             : 
     240           1 : release_buf:
     241           1 :         xfs_buf_relse(bp);
     242           1 :         return error;
     243             : }
     244             : 
     245             : /*
     246             :  * If the sunit/swidth change would move the precomputed root inode value, we
     247             :  * must reject the ondisk change because repair will stumble over that.
     248             :  * However, we allow the mount to proceed because we never rejected this
     249             :  * combination before.  Returns true to update the sb, false otherwise.
     250             :  */
     251             : static inline int
     252          95 : xfs_check_new_dalign(
     253             :         struct xfs_mount        *mp,
     254             :         int                     new_dalign,
     255             :         bool                    *update_sb)
     256             : {
     257          95 :         struct xfs_sb           *sbp = &mp->m_sb;
     258          95 :         xfs_ino_t               calc_ino;
     259             : 
     260          95 :         calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
     261          95 :         trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
     262             : 
     263          95 :         if (sbp->sb_rootino == calc_ino) {
     264          52 :                 *update_sb = true;
     265          52 :                 return 0;
     266             :         }
     267             : 
     268          43 :         xfs_warn(mp,
     269             : "Cannot change stripe alignment; would require moving root inode.");
     270             : 
     271             :         /*
     272             :          * XXX: Next time we add a new incompat feature, this should start
     273             :          * returning -EINVAL to fail the mount.  Until then, spit out a warning
     274             :          * that we're ignoring the administrator's instructions.
     275             :          */
     276          43 :         xfs_warn(mp, "Skipping superblock stripe alignment update.");
     277          43 :         *update_sb = false;
     278          43 :         return 0;
     279             : }
     280             : 
     281             : /*
     282             :  * If we were provided with new sunit/swidth values as mount options, make sure
     283             :  * that they pass basic alignment and superblock feature checks, and convert
     284             :  * them into the same units (FSB) that everything else expects.  This step
     285             :  * /must/ be done before computing the inode geometry.
     286             :  */
     287             : STATIC int
     288       60890 : xfs_validate_new_dalign(
     289             :         struct xfs_mount        *mp)
     290             : {
     291       60890 :         if (mp->m_dalign == 0)
     292             :                 return 0;
     293             : 
     294             :         /*
     295             :          * If stripe unit and stripe width are not multiples
     296             :          * of the fs blocksize turn off alignment.
     297             :          */
     298         116 :         if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
     299         106 :             (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
     300          10 :                 xfs_warn(mp,
     301             :         "alignment check failed: sunit/swidth vs. blocksize(%d)",
     302             :                         mp->m_sb.sb_blocksize);
     303          10 :                 return -EINVAL;
     304             :         }
     305             : 
     306             :         /*
     307             :          * Convert the stripe unit and width to FSBs.
     308             :          */
     309         106 :         mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
     310         106 :         if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
     311           0 :                 xfs_warn(mp,
     312             :         "alignment check failed: sunit/swidth vs. agsize(%d)",
     313             :                         mp->m_sb.sb_agblocks);
     314           0 :                 return -EINVAL;
     315             :         }
     316             : 
     317         106 :         if (!mp->m_dalign) {
     318           0 :                 xfs_warn(mp,
     319             :         "alignment check failed: sunit(%d) less than bsize(%d)",
     320             :                         mp->m_dalign, mp->m_sb.sb_blocksize);
     321           0 :                 return -EINVAL;
     322             :         }
     323             : 
     324         106 :         mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
     325             : 
     326         106 :         if (!xfs_has_dalign(mp)) {
     327           0 :                 xfs_warn(mp,
     328             : "cannot change alignment: superblock does not support data alignment");
     329           0 :                 return -EINVAL;
     330             :         }
     331             : 
     332             :         return 0;
     333             : }
     334             : 
     335             : /* Update alignment values based on mount options and sb values. */
     336             : STATIC int
     337       60880 : xfs_update_alignment(
     338             :         struct xfs_mount        *mp)
     339             : {
     340       60880 :         struct xfs_sb           *sbp = &mp->m_sb;
     341             : 
     342       60880 :         if (mp->m_dalign) {
     343         106 :                 bool            update_sb;
     344         106 :                 int             error;
     345             : 
     346         106 :                 if (sbp->sb_unit == mp->m_dalign &&
     347             :                     sbp->sb_width == mp->m_swidth)
     348          54 :                         return 0;
     349             : 
     350          95 :                 error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
     351          95 :                 if (error || !update_sb)
     352          43 :                         return error;
     353             : 
     354          52 :                 sbp->sb_unit = mp->m_dalign;
     355          52 :                 sbp->sb_width = mp->m_swidth;
     356          52 :                 mp->m_update_sb = true;
     357       60774 :         } else if (!xfs_has_noalign(mp) && xfs_has_dalign(mp)) {
     358        5416 :                 mp->m_dalign = sbp->sb_unit;
     359        5416 :                 mp->m_swidth = sbp->sb_width;
     360             :         }
     361             : 
     362             :         return 0;
     363             : }
     364             : 
     365             : /*
     366             :  * precalculate the low space thresholds for dynamic speculative preallocation.
     367             :  */
     368             : void
     369       61617 : xfs_set_low_space_thresholds(
     370             :         struct xfs_mount        *mp)
     371             : {
     372       61617 :         uint64_t                dblocks = mp->m_sb.sb_dblocks;
     373       61617 :         uint64_t                rtexts = mp->m_sb.sb_rextents;
     374       61617 :         int                     i;
     375             : 
     376       61617 :         do_div(dblocks, 100);
     377       61617 :         do_div(rtexts, 100);
     378             : 
     379      369702 :         for (i = 0; i < XFS_LOWSP_MAX; i++) {
     380      308085 :                 mp->m_low_space[i] = dblocks * (i + 1);
     381      308085 :                 mp->m_low_rtexts[i] = rtexts * (i + 1);
     382             :         }
     383       61617 : }
     384             : 
     385             : /*
     386             :  * Check that the data (and log if separate) is an ok size.
     387             :  */
     388             : STATIC int
     389       60864 : xfs_check_sizes(
     390             :         struct xfs_mount *mp)
     391             : {
     392       60864 :         struct xfs_buf  *bp;
     393       60864 :         xfs_daddr_t     d;
     394       60864 :         int             error;
     395             : 
     396       60864 :         d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
     397       60864 :         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
     398           0 :                 xfs_warn(mp, "filesystem size mismatch detected");
     399           0 :                 return -EFBIG;
     400             :         }
     401      182592 :         error = xfs_buf_read_uncached(mp->m_ddev_targp,
     402       60864 :                                         d - XFS_FSS_TO_BB(mp, 1),
     403       60864 :                                         XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
     404       60864 :         if (error) {
     405           0 :                 xfs_warn(mp, "last sector read failed");
     406           0 :                 return error;
     407             :         }
     408       60864 :         xfs_buf_relse(bp);
     409             : 
     410       60864 :         if (mp->m_logdev_targp == mp->m_ddev_targp)
     411             :                 return 0;
     412             : 
     413       18189 :         d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
     414       18189 :         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
     415           0 :                 xfs_warn(mp, "log size mismatch detected");
     416           0 :                 return -EFBIG;
     417             :         }
     418       54567 :         error = xfs_buf_read_uncached(mp->m_logdev_targp,
     419       18189 :                                         d - XFS_FSB_TO_BB(mp, 1),
     420       18189 :                                         XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
     421       18189 :         if (error) {
     422           0 :                 xfs_warn(mp, "log device read failed");
     423           0 :                 return error;
     424             :         }
     425       18189 :         xfs_buf_relse(bp);
     426       18189 :         return 0;
     427             : }
     428             : 
     429             : /*
     430             :  * Clear the quotaflags in memory and in the superblock.
     431             :  */
     432             : int
     433         147 : xfs_mount_reset_sbqflags(
     434             :         struct xfs_mount        *mp)
     435             : {
     436         147 :         mp->m_qflags = 0;
     437             : 
     438             :         /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
     439         147 :         if (mp->m_sb.sb_qflags == 0)
     440             :                 return 0;
     441         147 :         spin_lock(&mp->m_sb_lock);
     442         147 :         mp->m_sb.sb_qflags = 0;
     443         147 :         spin_unlock(&mp->m_sb_lock);
     444             : 
     445         147 :         if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
     446             :                 return 0;
     447             : 
     448         147 :         return xfs_sync_sb(mp, false);
     449             : }
     450             : 
     451             : uint64_t
     452          22 : xfs_default_resblks(xfs_mount_t *mp)
     453             : {
     454       58439 :         uint64_t resblks;
     455             : 
     456             :         /*
     457             :          * We default to 5% or 8192 fsbs of space reserved, whichever is
     458             :          * smaller.  This is intended to cover concurrent allocation
     459             :          * transactions when we initially hit enospc. These each require a 4
     460             :          * block reservation. Hence by default we cover roughly 2000 concurrent
     461             :          * allocation reservations.
     462             :          */
     463       58439 :         resblks = mp->m_sb.sb_dblocks;
     464       58439 :         do_div(resblks, 20);
     465       58439 :         resblks = min_t(uint64_t, resblks, 8192);
     466       58439 :         return resblks;
     467             : }
     468             : 
     469             : /* Ensure the summary counts are correct. */
     470             : STATIC int
     471       60702 : xfs_check_summary_counts(
     472             :         struct xfs_mount        *mp)
     473             : {
     474       60702 :         int                     error = 0;
     475             : 
     476             :         /*
     477             :          * The AG0 superblock verifier rejects in-progress filesystems,
     478             :          * so we should never see the flag set this far into mounting.
     479             :          */
     480       60702 :         if (mp->m_sb.sb_inprogress) {
     481           0 :                 xfs_err(mp, "sb_inprogress set after log recovery??");
     482           0 :                 WARN_ON(1);
     483           0 :                 return -EFSCORRUPTED;
     484             :         }
     485             : 
     486             :         /*
     487             :          * Now the log is mounted, we know if it was an unclean shutdown or
     488             :          * not. If it was, with the first phase of recovery has completed, we
     489             :          * have consistent AG blocks on disk. We have not recovered EFIs yet,
     490             :          * but they are recovered transactionally in the second recovery phase
     491             :          * later.
     492             :          *
     493             :          * If the log was clean when we mounted, we can check the summary
     494             :          * counters.  If any of them are obviously incorrect, we can recompute
     495             :          * them from the AGF headers in the next step.
     496             :          */
     497      121404 :         if (xfs_is_clean(mp) &&
     498       93648 :             (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
     499       46813 :              !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
     500       46813 :              mp->m_sb.sb_ifree > mp->m_sb.sb_icount)) {
     501          33 :                 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
     502          33 :                 xfs_fs_mark_checked(mp, XFS_SICK_FS_COUNTERS);
     503             :         }
     504             : 
     505             :         /*
     506             :          * We can safely re-initialise incore superblock counters from the
     507             :          * per-ag data. These may not be correct if the filesystem was not
     508             :          * cleanly unmounted, so we waited for recovery to finish before doing
     509             :          * this.
     510             :          *
     511             :          * If the filesystem was cleanly unmounted or the previous check did
     512             :          * not flag anything weird, then we can trust the values in the
     513             :          * superblock to be correct and we don't need to do anything here.
     514             :          * Otherwise, recalculate the summary counters.
     515             :          */
     516      168206 :         if ((xfs_has_lazysbcount(mp) && !xfs_is_clean(mp)) ||
     517             :             xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) {
     518       13900 :                 error = xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
     519       13900 :                 if (error)
     520             :                         return error;
     521             :         }
     522             : 
     523             :         /*
     524             :          * Older kernels misused sb_frextents to reflect both incore
     525             :          * reservations made by running transactions and the actual count of
     526             :          * free rt extents in the ondisk metadata.  Transactions committed
     527             :          * during runtime can therefore contain a superblock update that
     528             :          * undercounts the number of free rt extents tracked in the rt bitmap.
     529             :          * A clean unmount record will have the correct frextents value since
     530             :          * there can be no other transactions running at that point.
     531             :          *
     532             :          * If we're mounting the rt volume after recovering the log, recompute
     533             :          * frextents from the rtbitmap file to fix the inconsistency.
     534             :          */
     535       76906 :         if (xfs_has_realtime(mp) && !xfs_is_clean(mp)) {
     536        1621 :                 error = xfs_rtalloc_reinit_frextents(mp);
     537        1621 :                 if (error)
     538           0 :                         return error;
     539             :         }
     540             : 
     541             :         return 0;
     542             : }
     543             : 
     544             : static void
     545       60583 : xfs_unmount_check(
     546             :         struct xfs_mount        *mp)
     547             : {
     548      121166 :         if (xfs_is_shutdown(mp))
     549             :                 return;
     550             : 
     551       95024 :         if (percpu_counter_sum(&mp->m_ifree) >
     552       47512 :                         percpu_counter_sum(&mp->m_icount)) {
     553           0 :                 xfs_alert(mp, "ifree/icount mismatch at unmount");
     554           0 :                 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
     555             :         }
     556             : }
     557             : 
     558             : /*
     559             :  * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
     560             :  * internal inode structures can be sitting in the CIL and AIL at this point,
     561             :  * so we need to unpin them, write them back and/or reclaim them before unmount
     562             :  * can proceed.  In other words, callers are required to have inactivated all
     563             :  * inodes.
     564             :  *
     565             :  * An inode cluster that has been freed can have its buffer still pinned in
     566             :  * memory because the transaction is still sitting in a iclog. The stale inodes
     567             :  * on that buffer will be pinned to the buffer until the transaction hits the
     568             :  * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
     569             :  * may never see the pinned buffer, so nothing will push out the iclog and
     570             :  * unpin the buffer.
     571             :  *
     572             :  * Hence we need to force the log to unpin everything first. However, log
     573             :  * forces don't wait for the discards they issue to complete, so we have to
     574             :  * explicitly wait for them to complete here as well.
     575             :  *
     576             :  * Then we can tell the world we are unmounting so that error handling knows
     577             :  * that the filesystem is going away and we should error out anything that we
     578             :  * have been retrying in the background.  This will prevent never-ending
     579             :  * retries in AIL pushing from hanging the unmount.
     580             :  *
     581             :  * Finally, we can push the AIL to clean all the remaining dirty objects, then
     582             :  * reclaim the remaining inodes that are still in memory at this point in time.
     583             :  */
     584             : static void
     585       60712 : xfs_unmount_flush_inodes(
     586             :         struct xfs_mount        *mp)
     587             : {
     588       60712 :         xfs_log_force(mp, XFS_LOG_SYNC);
     589       60712 :         xfs_extent_busy_wait_all(mp);
     590       60712 :         flush_workqueue(xfs_discard_wq);
     591             : 
     592       60712 :         set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate);
     593             : 
     594       60712 :         xfs_ail_push_all_sync(mp->m_ail);
     595       60712 :         xfs_inodegc_stop(mp);
     596       60712 :         cancel_delayed_work_sync(&mp->m_reclaim_work);
     597       60712 :         xfs_reclaim_inodes(mp);
     598       60712 :         xfs_health_unmount(mp);
     599       60712 : }
     600             : 
     601             : static void
     602       60880 : xfs_mount_setup_inode_geom(
     603             :         struct xfs_mount        *mp)
     604             : {
     605       60880 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
     606             : 
     607       60880 :         igeo->attr_fork_offset = xfs_bmap_compute_attr_offset(mp);
     608       61086 :         ASSERT(igeo->attr_fork_offset < XFS_LITINO(mp));
     609             : 
     610       60880 :         xfs_ialloc_setup_geometry(mp);
     611       60880 : }
     612             : 
     613             : /* Compute maximum possible height for per-AG btree types for this fs. */
     614             : static inline void
     615             : xfs_agbtree_compute_maxlevels(
     616             :         struct xfs_mount        *mp)
     617             : {
     618       60880 :         unsigned int            levels;
     619             : 
     620       60880 :         levels = max(mp->m_alloc_maxlevels, M_IGEO(mp)->inobt_maxlevels);
     621       60880 :         levels = max(levels, mp->m_rmap_maxlevels);
     622       60880 :         mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
     623             : }
     624             : 
     625             : /*
     626             :  * This function does the following on an initial mount of a file system:
     627             :  *      - reads the superblock from disk and init the mount struct
     628             :  *      - if we're a 32-bit kernel, do a size check on the superblock
     629             :  *              so we don't mount terabyte filesystems
     630             :  *      - init mount struct realtime fields
     631             :  *      - allocate inode hash table for fs
     632             :  *      - init directory manager
     633             :  *      - perform recovery and init the log manager
     634             :  */
     635             : int
     636       60890 : xfs_mountfs(
     637             :         struct xfs_mount        *mp)
     638             : {
     639       60890 :         struct xfs_sb           *sbp = &(mp->m_sb);
     640       60890 :         struct xfs_inode        *rip;
     641       60890 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
     642       60890 :         uint64_t                resblks;
     643       60890 :         uint                    quotamount = 0;
     644       60890 :         uint                    quotaflags = 0;
     645       60890 :         int                     error = 0;
     646             : 
     647       60890 :         xfs_sb_mount_common(mp, sbp);
     648             : 
     649             :         /*
     650             :          * Check for a mismatched features2 values.  Older kernels read & wrote
     651             :          * into the wrong sb offset for sb_features2 on some platforms due to
     652             :          * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
     653             :          * which made older superblock reading/writing routines swap it as a
     654             :          * 64-bit value.
     655             :          *
     656             :          * For backwards compatibility, we make both slots equal.
     657             :          *
     658             :          * If we detect a mismatched field, we OR the set bits into the existing
     659             :          * features2 field in case it has already been modified; we don't want
     660             :          * to lose any features.  We then update the bad location with the ORed
     661             :          * value so that older kernels will see any features2 flags. The
     662             :          * superblock writeback code ensures the new sb_features2 is copied to
     663             :          * sb_bad_features2 before it is logged or written to disk.
     664             :          */
     665       60890 :         if (xfs_sb_has_mismatched_features2(sbp)) {
     666          22 :                 xfs_warn(mp, "correcting sb_features alignment problem");
     667          22 :                 sbp->sb_features2 |= sbp->sb_bad_features2;
     668          22 :                 mp->m_update_sb = true;
     669             :         }
     670             : 
     671             : 
     672             :         /* always use v2 inodes by default now */
     673       60890 :         if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
     674           0 :                 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
     675           0 :                 mp->m_features |= XFS_FEAT_NLINK;
     676           0 :                 mp->m_update_sb = true;
     677             :         }
     678             : 
     679             :         /*
     680             :          * If we were given new sunit/swidth options, do some basic validation
     681             :          * checks and convert the incore dalign and swidth values to the
     682             :          * same units (FSB) that everything else uses.  This /must/ happen
     683             :          * before computing the inode geometry.
     684             :          */
     685       60890 :         error = xfs_validate_new_dalign(mp);
     686       60890 :         if (error)
     687          10 :                 goto out;
     688             : 
     689       60880 :         xfs_alloc_compute_maxlevels(mp);
     690       60880 :         xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
     691       60880 :         xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
     692       60880 :         xfs_mount_setup_inode_geom(mp);
     693       60880 :         xfs_rmapbt_compute_maxlevels(mp);
     694       60880 :         xfs_refcountbt_compute_maxlevels(mp);
     695             : 
     696       60880 :         xfs_agbtree_compute_maxlevels(mp);
     697             : 
     698             :         /*
     699             :          * Check if sb_agblocks is aligned at stripe boundary.  If sb_agblocks
     700             :          * is NOT aligned turn off m_dalign since allocator alignment is within
     701             :          * an ag, therefore ag has to be aligned at stripe boundary.  Note that
     702             :          * we must compute the free space and rmap btree geometry before doing
     703             :          * this.
     704             :          */
     705       60880 :         error = xfs_update_alignment(mp);
     706       60880 :         if (error)
     707           0 :                 goto out;
     708             : 
     709             :         /* enable fail_at_unmount as default */
     710       60880 :         mp->m_fail_unmount = true;
     711             : 
     712       60880 :         error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
     713       60880 :                                NULL, mp->m_super->s_id);
     714       60880 :         if (error)
     715           0 :                 goto out;
     716             : 
     717       60880 :         error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
     718             :                                &mp->m_kobj, "stats");
     719       60880 :         if (error)
     720           0 :                 goto out_remove_sysfs;
     721             : 
     722       60880 :         xchk_stats_register(mp->m_scrub_stats, mp->m_debugfs);
     723             : 
     724       60880 :         error = xfs_error_sysfs_init(mp);
     725       60880 :         if (error)
     726           0 :                 goto out_remove_scrub_stats;
     727             : 
     728       60880 :         error = xfs_errortag_init(mp);
     729       60880 :         if (error)
     730           0 :                 goto out_remove_error_sysfs;
     731             : 
     732       60880 :         error = xfs_uuid_mount(mp);
     733       60880 :         if (error)
     734          16 :                 goto out_remove_errortag;
     735             : 
     736             :         /*
     737             :          * Update the preferred write size based on the information from the
     738             :          * on-disk superblock.
     739             :          */
     740       60864 :         mp->m_allocsize_log =
     741       60864 :                 max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
     742       60864 :         mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
     743             : 
     744             :         /* set the low space thresholds for dynamic preallocation */
     745       60864 :         xfs_set_low_space_thresholds(mp);
     746             : 
     747             :         /*
     748             :          * If enabled, sparse inode chunk alignment is expected to match the
     749             :          * cluster size. Full inode chunk alignment must match the chunk size,
     750             :          * but that is checked on sb read verification...
     751             :          */
     752       60864 :         if (xfs_has_sparseinodes(mp) &&
     753       60669 :             mp->m_sb.sb_spino_align !=
     754       60669 :                         XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
     755           0 :                 xfs_warn(mp,
     756             :         "Sparse inode block alignment (%u) must match cluster size (%llu).",
     757             :                          mp->m_sb.sb_spino_align,
     758             :                          XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
     759           0 :                 error = -EINVAL;
     760           0 :                 goto out_remove_uuid;
     761             :         }
     762             : 
     763             :         /*
     764             :          * Check that the data (and log if separate) is an ok size.
     765             :          */
     766       60864 :         error = xfs_check_sizes(mp);
     767       60864 :         if (error)
     768           0 :                 goto out_remove_uuid;
     769             : 
     770             :         /*
     771             :          * Initialize realtime fields in the mount structure
     772             :          */
     773       60864 :         error = xfs_rtmount_init(mp);
     774       60864 :         if (error) {
     775         100 :                 xfs_warn(mp, "RT mount failed");
     776         100 :                 goto out_remove_uuid;
     777             :         }
     778             : 
     779             :         /*
     780             :          *  Copies the low order bits of the timestamp and the randomly
     781             :          *  set "sequence" number out of a UUID.
     782             :          */
     783       60764 :         mp->m_fixedfsid[0] =
     784       60764 :                 (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
     785       60764 :                  get_unaligned_be16(&sbp->sb_uuid.b[4]);
     786       60764 :         mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
     787             : 
     788       60764 :         error = xfs_da_mount(mp);
     789       60764 :         if (error) {
     790           0 :                 xfs_warn(mp, "Failed dir/attr init: %d", error);
     791           0 :                 goto out_remove_uuid;
     792             :         }
     793             : 
     794             :         /*
     795             :          * Initialize the precomputed transaction reservations values.
     796             :          */
     797       60764 :         xfs_trans_init(mp);
     798             : 
     799             :         /*
     800             :          * Allocate and initialize the per-ag data.
     801             :          */
     802       60764 :         error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks,
     803             :                         &mp->m_maxagi);
     804       60764 :         if (error) {
     805           0 :                 xfs_warn(mp, "Failed per-ag init: %d", error);
     806           0 :                 goto out_free_dir;
     807             :         }
     808             : 
     809       60764 :         if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
     810           0 :                 xfs_warn(mp, "no log defined");
     811           0 :                 error = -EFSCORRUPTED;
     812           0 :                 goto out_free_perag;
     813             :         }
     814             : 
     815       60764 :         error = xfs_inodegc_register_shrinker(mp);
     816       60764 :         if (error)
     817           0 :                 goto out_fail_wait;
     818             : 
     819             :         /*
     820             :          * Log's mount-time initialization. The first part of recovery can place
     821             :          * some items on the AIL, to be handled when recovery is finished or
     822             :          * cancelled.
     823             :          */
     824      182292 :         error = xfs_log_mount(mp, mp->m_logdev_targp,
     825       60764 :                               XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
     826       60764 :                               XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
     827       60764 :         if (error) {
     828          42 :                 xfs_warn(mp, "log mount failed");
     829          42 :                 goto out_inodegc_shrinker;
     830             :         }
     831             : 
     832             :         /* Enable background inode inactivation workers. */
     833       60722 :         xfs_inodegc_start(mp);
     834       60722 :         xfs_blockgc_start(mp);
     835             : 
     836             :         /*
     837             :          * Now that we've recovered any pending superblock feature bit
     838             :          * additions, we can finish setting up the attr2 behaviour for the
     839             :          * mount. The noattr2 option overrides the superblock flag, so only
     840             :          * check the superblock feature flag if the mount option is not set.
     841             :          */
     842       60722 :         if (xfs_has_noattr2(mp)) {
     843          10 :                 mp->m_features &= ~XFS_FEAT_ATTR2;
     844       60712 :         } else if (!xfs_has_attr2(mp) &&
     845          33 :                    (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT)) {
     846          22 :                 mp->m_features |= XFS_FEAT_ATTR2;
     847             :         }
     848             : 
     849             :         /*
     850             :          * Get and sanity-check the root inode.
     851             :          * Save the pointer to it in the mount structure.
     852             :          */
     853       60722 :         error = xfs_iget(mp, NULL, sbp->sb_rootino, XFS_IGET_UNTRUSTED,
     854             :                          XFS_ILOCK_EXCL, &rip);
     855       60722 :         if (error) {
     856          20 :                 xfs_warn(mp,
     857             :                         "Failed to read root inode 0x%llx, error %d",
     858             :                         sbp->sb_rootino, -error);
     859          20 :                 goto out_log_dealloc;
     860             :         }
     861             : 
     862       60702 :         ASSERT(rip != NULL);
     863             : 
     864       60702 :         if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
     865           0 :                 xfs_warn(mp, "corrupted root inode %llu: not a directory",
     866             :                         (unsigned long long)rip->i_ino);
     867           0 :                 xfs_iunlock(rip, XFS_ILOCK_EXCL);
     868           0 :                 error = -EFSCORRUPTED;
     869           0 :                 goto out_rele_rip;
     870             :         }
     871       60702 :         mp->m_rootip = rip;  /* save it */
     872             : 
     873       60702 :         xfs_iunlock(rip, XFS_ILOCK_EXCL);
     874             : 
     875             :         /*
     876             :          * Initialize realtime inode pointers in the mount structure
     877             :          */
     878       60702 :         error = xfs_rtmount_inodes(mp);
     879       60702 :         if (error) {
     880             :                 /*
     881             :                  * Free up the root inode.
     882             :                  */
     883           0 :                 xfs_warn(mp, "failed to read RT inodes");
     884           0 :                 goto out_rele_rip;
     885             :         }
     886             : 
     887             :         /* Make sure the summary counts are ok. */
     888       60702 :         error = xfs_check_summary_counts(mp);
     889       60702 :         if (error)
     890          11 :                 goto out_rtunmount;
     891             : 
     892             :         /*
     893             :          * If this is a read-only mount defer the superblock updates until
     894             :          * the next remount into writeable mode.  Otherwise we would never
     895             :          * perform the update e.g. for the root filesystem.
     896             :          */
     897       60765 :         if (mp->m_update_sb && !xfs_is_readonly(mp)) {
     898          63 :                 error = xfs_sync_sb(mp, false);
     899          63 :                 if (error) {
     900           0 :                         xfs_warn(mp, "failed to write sb changes");
     901           0 :                         goto out_rtunmount;
     902             :                 }
     903             :         }
     904             : 
     905             :         /*
     906             :          * Initialise the XFS quota management subsystem for this mount
     907             :          */
     908       60691 :         if (XFS_IS_QUOTA_ON(mp)) {
     909       51432 :                 error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
     910       51432 :                 if (error)
     911          15 :                         goto out_rtunmount;
     912             :         } else {
     913             :                 /*
     914             :                  * If a file system had quotas running earlier, but decided to
     915             :                  * mount without -o uquota/pquota/gquota options, revoke the
     916             :                  * quotachecked license.
     917             :                  */
     918        9259 :                 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
     919         147 :                         xfs_notice(mp, "resetting quota flags");
     920         147 :                         error = xfs_mount_reset_sbqflags(mp);
     921         147 :                         if (error)
     922           0 :                                 goto out_rtunmount;
     923             :                 }
     924             :         }
     925             : 
     926             :         /*
     927             :          * Finish recovering the file system.  This part needed to be delayed
     928             :          * until after the root and real-time bitmap inodes were consistently
     929             :          * read in.  Temporarily create per-AG space reservations for metadata
     930             :          * btree shape changes because space freeing transactions (for inode
     931             :          * inactivation) require the per-AG reservation in lieu of reserving
     932             :          * blocks.
     933             :          */
     934       60676 :         error = xfs_fs_reserve_ag_blocks(mp);
     935       60676 :         if (error && error == -ENOSPC)
     936           0 :                 xfs_warn(mp,
     937             :         "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
     938       60676 :         error = xfs_log_mount_finish(mp);
     939       60676 :         xfs_fs_unreserve_ag_blocks(mp);
     940       60676 :         if (error) {
     941           6 :                 xfs_warn(mp, "log mount finish failed");
     942           6 :                 goto out_rtunmount;
     943             :         }
     944             : 
     945             :         /*
     946             :          * Now the log is fully replayed, we can transition to full read-only
     947             :          * mode for read-only mounts. This will sync all the metadata and clean
     948             :          * the log so that the recovery we just performed does not have to be
     949             :          * replayed again on the next mount.
     950             :          *
     951             :          * We use the same quiesce mechanism as the rw->ro remount, as they are
     952             :          * semantically identical operations.
     953             :          */
     954      121340 :         if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp))
     955        2204 :                 xfs_log_clean(mp);
     956             : 
     957             :         /*
     958             :          * Complete the quota initialisation, post-log-replay component.
     959             :          */
     960       60670 :         if (quotamount) {
     961       22593 :                 ASSERT(mp->m_qflags == 0);
     962       22593 :                 mp->m_qflags = quotaflags;
     963             : 
     964       22593 :                 xfs_qm_mount_quotas(mp);
     965             :         }
     966             : 
     967             :         /*
     968             :          * Now we are mounted, reserve a small amount of unused space for
     969             :          * privileged transactions. This is needed so that transaction
     970             :          * space required for critical operations can dip into this pool
     971             :          * when at ENOSPC. This is needed for operations like create with
     972             :          * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
     973             :          * are not allowed to use this reserved space.
     974             :          *
     975             :          * This may drive us straight to ENOSPC on mount, but that implies
     976             :          * we were already there on the last unmount. Warn if this occurs.
     977             :          */
     978      121340 :         if (!xfs_is_readonly(mp)) {
     979       58417 :                 resblks = xfs_default_resblks(mp);
     980       58417 :                 error = xfs_reserve_blocks(mp, &resblks, NULL);
     981       58417 :                 if (error)
     982           0 :                         xfs_warn(mp,
     983             :         "Unable to allocate reserve blocks. Continuing without reserve pool.");
     984             : 
     985             :                 /* Reserve AG blocks for future btree expansion. */
     986       58417 :                 error = xfs_fs_reserve_ag_blocks(mp);
     987       58417 :                 if (error && error != -ENOSPC)
     988          97 :                         goto out_agresv;
     989             :         }
     990             : 
     991             :         return 0;
     992             : 
     993             :  out_agresv:
     994          97 :         xfs_fs_unreserve_ag_blocks(mp);
     995          97 :         xfs_qm_unmount_quotas(mp);
     996         129 :  out_rtunmount:
     997         129 :         xfs_rtunmount_inodes(mp);
     998         129 :  out_rele_rip:
     999         129 :         xfs_irele(rip);
    1000             :         /* Clean out dquots that might be in memory after quotacheck. */
    1001         129 :         xfs_qm_unmount(mp);
    1002             : 
    1003             :         /*
    1004             :          * Inactivate all inodes that might still be in memory after a log
    1005             :          * intent recovery failure so that reclaim can free them.  Metadata
    1006             :          * inodes and the root directory shouldn't need inactivation, but the
    1007             :          * mount failed for some reason, so pull down all the state and flee.
    1008             :          */
    1009         129 :         xfs_inodegc_flush(mp);
    1010             : 
    1011             :         /*
    1012             :          * Flush all inode reclamation work and flush the log.
    1013             :          * We have to do this /after/ rtunmount and qm_unmount because those
    1014             :          * two will have scheduled delayed reclaim for the rt/quota inodes.
    1015             :          *
    1016             :          * This is slightly different from the unmountfs call sequence
    1017             :          * because we could be tearing down a partially set up mount.  In
    1018             :          * particular, if log_mount_finish fails we bail out without calling
    1019             :          * qm_unmount_quotas and therefore rely on qm_unmount to release the
    1020             :          * quota inodes.
    1021             :          */
    1022         129 :         xfs_unmount_flush_inodes(mp);
    1023         149 :  out_log_dealloc:
    1024         149 :         xfs_log_mount_cancel(mp);
    1025         191 :  out_inodegc_shrinker:
    1026         191 :         unregister_shrinker(&mp->m_inodegc_shrinker);
    1027         191 :  out_fail_wait:
    1028         191 :         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
    1029          59 :                 xfs_buftarg_drain(mp->m_logdev_targp);
    1030         191 :         xfs_buftarg_drain(mp->m_ddev_targp);
    1031         191 :  out_free_perag:
    1032         191 :         xfs_free_perag(mp);
    1033         191 :  out_free_dir:
    1034         191 :         xfs_da_unmount(mp);
    1035         291 :  out_remove_uuid:
    1036         291 :         xfs_uuid_unmount(mp);
    1037         307 :  out_remove_errortag:
    1038         307 :         xfs_errortag_del(mp);
    1039         307 :  out_remove_error_sysfs:
    1040         307 :         xfs_error_sysfs_del(mp);
    1041         307 :  out_remove_scrub_stats:
    1042         307 :         xchk_stats_unregister(mp->m_scrub_stats);
    1043         307 :         xfs_sysfs_del(&mp->m_stats.xs_kobj);
    1044         307 :  out_remove_sysfs:
    1045         307 :         xfs_sysfs_del(&mp->m_kobj);
    1046             :  out:
    1047             :         return error;
    1048             : }
    1049             : 
    1050             : /*
    1051             :  * This flushes out the inodes,dquots and the superblock, unmounts the
    1052             :  * log and makes sure that incore structures are freed.
    1053             :  */
    1054             : void
    1055       60583 : xfs_unmountfs(
    1056             :         struct xfs_mount        *mp)
    1057             : {
    1058       60583 :         uint64_t                resblks;
    1059       60583 :         int                     error;
    1060             : 
    1061             :         /*
    1062             :          * Perform all on-disk metadata updates required to inactivate inodes
    1063             :          * that the VFS evicted earlier in the unmount process.  Freeing inodes
    1064             :          * and discarding CoW fork preallocations can cause shape changes to
    1065             :          * the free inode and refcount btrees, respectively, so we must finish
    1066             :          * this before we discard the metadata space reservations.  Metadata
    1067             :          * inodes and the root directory do not require inactivation.
    1068             :          */
    1069       60583 :         xfs_inodegc_flush(mp);
    1070             : 
    1071       60583 :         xfs_blockgc_stop(mp);
    1072       60583 :         xfs_fs_unreserve_ag_blocks(mp);
    1073       60583 :         xfs_qm_unmount_quotas(mp);
    1074       60583 :         xfs_rtunmount_inodes(mp);
    1075       60583 :         xfs_irele(mp->m_rootip);
    1076             : 
    1077       60583 :         xfs_unmount_flush_inodes(mp);
    1078             : 
    1079       60583 :         xfs_qm_unmount(mp);
    1080             : 
    1081             :         /*
    1082             :          * Unreserve any blocks we have so that when we unmount we don't account
    1083             :          * the reserved free space as used. This is really only necessary for
    1084             :          * lazy superblock counting because it trusts the incore superblock
    1085             :          * counters to be absolutely correct on clean unmount.
    1086             :          *
    1087             :          * We don't bother correcting this elsewhere for lazy superblock
    1088             :          * counting because on mount of an unclean filesystem we reconstruct the
    1089             :          * correct counter value and this is irrelevant.
    1090             :          *
    1091             :          * For non-lazy counter filesystems, this doesn't matter at all because
    1092             :          * we only every apply deltas to the superblock and hence the incore
    1093             :          * value does not matter....
    1094             :          */
    1095       60583 :         resblks = 0;
    1096       60583 :         error = xfs_reserve_blocks(mp, &resblks, NULL);
    1097       60583 :         if (error)
    1098           0 :                 xfs_warn(mp, "Unable to free reserved block pool. "
    1099             :                                 "Freespace may not be correct on next mount.");
    1100       60583 :         xfs_unmount_check(mp);
    1101             : 
    1102       60583 :         xfs_log_unmount(mp);
    1103       60583 :         xfs_da_unmount(mp);
    1104       60583 :         xfs_uuid_unmount(mp);
    1105             : 
    1106             : #if defined(DEBUG)
    1107       60583 :         xfs_errortag_clearall(mp);
    1108             : #endif
    1109       60583 :         unregister_shrinker(&mp->m_inodegc_shrinker);
    1110       60583 :         xfs_free_perag(mp);
    1111             : 
    1112       60583 :         xfs_errortag_del(mp);
    1113       60583 :         xfs_error_sysfs_del(mp);
    1114       60583 :         xchk_stats_unregister(mp->m_scrub_stats);
    1115       60583 :         xfs_sysfs_del(&mp->m_stats.xs_kobj);
    1116       60583 :         xfs_sysfs_del(&mp->m_kobj);
    1117       60583 : }
    1118             : 
    1119             : /*
    1120             :  * Determine whether modifications can proceed. The caller specifies the minimum
    1121             :  * freeze level for which modifications should not be allowed. This allows
    1122             :  * certain operations to proceed while the freeze sequence is in progress, if
    1123             :  * necessary.
    1124             :  */
    1125             : bool
    1126       17566 : xfs_fs_writable(
    1127             :         struct xfs_mount        *mp,
    1128             :         int                     level)
    1129             : {
    1130       17566 :         ASSERT(level > SB_UNFROZEN);
    1131       35055 :         if ((mp->m_super->s_writers.frozen >= level) ||
    1132       17487 :             xfs_is_shutdown(mp) || xfs_is_readonly(mp))
    1133          79 :                 return false;
    1134             : 
    1135             :         return true;
    1136             : }
    1137             : 
    1138             : /* Adjust m_fdblocks or m_frextents. */
    1139             : int
    1140  2363772676 : xfs_mod_freecounter(
    1141             :         struct xfs_mount        *mp,
    1142             :         struct percpu_counter   *counter,
    1143             :         int64_t                 delta,
    1144             :         bool                    rsvd)
    1145             : {
    1146  2363772676 :         int64_t                 lcounter;
    1147  2363772676 :         long long               res_used;
    1148  2363772676 :         uint64_t                set_aside = 0;
    1149  2363772676 :         s32                     batch;
    1150  2363772676 :         bool                    has_resv_pool;
    1151             : 
    1152  2363772676 :         ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
    1153  2363772676 :         has_resv_pool = (counter == &mp->m_fdblocks);
    1154  2363772676 :         if (rsvd)
    1155   190063778 :                 ASSERT(has_resv_pool);
    1156             : 
    1157  2363772676 :         if (delta > 0) {
    1158             :                 /*
    1159             :                  * If the reserve pool is depleted, put blocks back into it
    1160             :                  * first. Most of the time the pool is full.
    1161             :                  */
    1162  1166352318 :                 if (likely(!has_resv_pool ||
    1163             :                            mp->m_resblks == mp->m_resblks_avail)) {
    1164  1165817809 :                         percpu_counter_add(counter, delta);
    1165  1165817809 :                         return 0;
    1166             :                 }
    1167             : 
    1168      534509 :                 spin_lock(&mp->m_sb_lock);
    1169      534551 :                 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
    1170             : 
    1171      534551 :                 if (res_used > delta) {
    1172      166375 :                         mp->m_resblks_avail += delta;
    1173             :                 } else {
    1174      368176 :                         delta -= res_used;
    1175      368176 :                         mp->m_resblks_avail = mp->m_resblks;
    1176      368176 :                         percpu_counter_add(counter, delta);
    1177             :                 }
    1178      534551 :                 spin_unlock(&mp->m_sb_lock);
    1179      534551 :                 return 0;
    1180             :         }
    1181             : 
    1182             :         /*
    1183             :          * Taking blocks away, need to be more accurate the closer we
    1184             :          * are to zero.
    1185             :          *
    1186             :          * If the counter has a value of less than 2 * max batch size,
    1187             :          * then make everything serialise as we are real close to
    1188             :          * ENOSPC.
    1189             :          */
    1190  1197420358 :         if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
    1191             :                                      XFS_FDBLOCKS_BATCH) < 0)
    1192             :                 batch = 1;
    1193             :         else
    1194  1165676942 :                 batch = XFS_FDBLOCKS_BATCH;
    1195             : 
    1196             :         /*
    1197             :          * Set aside allocbt blocks because these blocks are tracked as free
    1198             :          * space but not available for allocation. Technically this means that a
    1199             :          * single reservation cannot consume all remaining free space, but the
    1200             :          * ratio of allocbt blocks to usable free blocks should be rather small.
    1201             :          * The tradeoff without this is that filesystems that maintain high
    1202             :          * perag block reservations can over reserve physical block availability
    1203             :          * and fail physical allocation, which leads to much more serious
    1204             :          * problems (i.e. transaction abort, pagecache discards, etc.) than
    1205             :          * slightly premature -ENOSPC.
    1206             :          */
    1207  1196355865 :         if (has_resv_pool)
    1208  1131387289 :                 set_aside = xfs_fdblocks_unavailable(mp);
    1209  1196355865 :         percpu_counter_add_batch(counter, delta, batch);
    1210  1196639074 :         if (__percpu_counter_compare(counter, set_aside,
    1211             :                                      XFS_FDBLOCKS_BATCH) >= 0) {
    1212             :                 /* we had space! */
    1213             :                 return 0;
    1214             :         }
    1215             : 
    1216             :         /*
    1217             :          * lock up the sb for dipping into reserves before releasing the space
    1218             :          * that took us to ENOSPC.
    1219             :          */
    1220    17091951 :         spin_lock(&mp->m_sb_lock);
    1221    17094005 :         percpu_counter_add(counter, -delta);
    1222    17094005 :         if (!has_resv_pool || !rsvd)
    1223    16676572 :                 goto fdblocks_enospc;
    1224             : 
    1225      417433 :         lcounter = (long long)mp->m_resblks_avail + delta;
    1226      417433 :         if (lcounter >= 0) {
    1227      395155 :                 mp->m_resblks_avail = lcounter;
    1228      395155 :                 spin_unlock(&mp->m_sb_lock);
    1229      395155 :                 return 0;
    1230             :         }
    1231       22278 :         xfs_warn_once(mp,
    1232             : "Reserve blocks depleted! Consider increasing reserve pool size.");
    1233             : 
    1234    16698850 : fdblocks_enospc:
    1235    16698850 :         spin_unlock(&mp->m_sb_lock);
    1236    16698850 :         return -ENOSPC;
    1237             : }
    1238             : 
    1239             : /*
    1240             :  * Used to free the superblock along various error paths.
    1241             :  */
    1242             : void
    1243       61014 : xfs_freesb(
    1244             :         struct xfs_mount        *mp)
    1245             : {
    1246       61014 :         struct xfs_buf          *bp = mp->m_sb_bp;
    1247             : 
    1248       61014 :         xfs_buf_lock(bp);
    1249       61014 :         mp->m_sb_bp = NULL;
    1250       61014 :         xfs_buf_relse(bp);
    1251       61014 : }
    1252             : 
    1253             : /*
    1254             :  * If the underlying (data/log/rt) device is readonly, there are some
    1255             :  * operations that cannot proceed.
    1256             :  */
    1257             : int
    1258       36393 : xfs_dev_is_read_only(
    1259             :         struct xfs_mount        *mp,
    1260             :         char                    *message)
    1261             : {
    1262       72760 :         if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
    1263       36367 :             xfs_readonly_buftarg(mp->m_logdev_targp) ||
    1264       36367 :             (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
    1265          26 :                 xfs_notice(mp, "%s required on read-only device.", message);
    1266          26 :                 xfs_notice(mp, "write access unavailable, cannot proceed.");
    1267          26 :                 return -EROFS;
    1268             :         }
    1269             :         return 0;
    1270             : }
    1271             : 
    1272             : /* Force the summary counters to be recalculated at next mount. */
    1273             : void
    1274      353685 : xfs_force_summary_recalc(
    1275             :         struct xfs_mount        *mp)
    1276             : {
    1277      353685 :         if (!xfs_has_lazysbcount(mp))
    1278             :                 return;
    1279             : 
    1280      353678 :         xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
    1281      353722 :         xfs_fs_mark_checked(mp, XFS_SICK_FS_COUNTERS);
    1282             : }
    1283             : 
    1284             : /*
    1285             :  * Enable a log incompat feature flag in the primary superblock.  The caller
    1286             :  * cannot have any other transactions in progress.
    1287             :  */
    1288             : int
    1289      177596 : xfs_add_incompat_log_feature(
    1290             :         struct xfs_mount        *mp,
    1291             :         uint32_t                feature)
    1292             : {
    1293      177596 :         struct xfs_dsb          *dsb;
    1294      177596 :         int                     error;
    1295             : 
    1296      177596 :         ASSERT(hweight32(feature) == 1);
    1297      177596 :         ASSERT(!(feature & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
    1298             : 
    1299             :         /*
    1300             :          * Force the log to disk and kick the background AIL thread to reduce
    1301             :          * the chances that the bwrite will stall waiting for the AIL to unpin
    1302             :          * the primary superblock buffer.  This isn't a data integrity
    1303             :          * operation, so we don't need a synchronous push.
    1304             :          */
    1305      177596 :         error = xfs_log_force(mp, XFS_LOG_SYNC);
    1306      177694 :         if (error)
    1307             :                 return error;
    1308      177693 :         xfs_ail_push_all(mp->m_ail);
    1309             : 
    1310             :         /*
    1311             :          * Lock the primary superblock buffer to serialize all callers that
    1312             :          * are trying to set feature bits.
    1313             :          */
    1314      177692 :         xfs_buf_lock(mp->m_sb_bp);
    1315      177734 :         xfs_buf_hold(mp->m_sb_bp);
    1316             : 
    1317      355468 :         if (xfs_is_shutdown(mp)) {
    1318           2 :                 error = -EIO;
    1319           2 :                 goto rele;
    1320             :         }
    1321             : 
    1322      177732 :         if (xfs_sb_has_incompat_log_feature(&mp->m_sb, feature))
    1323       91758 :                 goto rele;
    1324             : 
    1325             :         /*
    1326             :          * Write the primary superblock to disk immediately, because we need
    1327             :          * the log_incompat bit to be set in the primary super now to protect
    1328             :          * the log items that we're going to commit later.
    1329             :          */
    1330       85974 :         dsb = mp->m_sb_bp->b_addr;
    1331       85974 :         xfs_sb_to_disk(dsb, &mp->m_sb);
    1332       85974 :         dsb->sb_features_log_incompat |= cpu_to_be32(feature);
    1333       85974 :         error = xfs_bwrite(mp->m_sb_bp);
    1334       85974 :         if (error)
    1335         131 :                 goto shutdown;
    1336             : 
    1337             :         /*
    1338             :          * Add the feature bits to the incore superblock before we unlock the
    1339             :          * buffer.
    1340             :          */
    1341       85843 :         xfs_sb_add_incompat_log_features(&mp->m_sb, feature);
    1342       85843 :         xfs_buf_relse(mp->m_sb_bp);
    1343             : 
    1344             :         /* Log the superblock to disk. */
    1345       85843 :         return xfs_sync_sb(mp, false);
    1346             : shutdown:
    1347         131 :         xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
    1348       91891 : rele:
    1349       91891 :         xfs_buf_relse(mp->m_sb_bp);
    1350       91890 :         return error;
    1351             : }
    1352             : 
    1353             : /*
    1354             :  * Clear all the log incompat flags from the superblock.
    1355             :  *
    1356             :  * The caller cannot be in a transaction, must ensure that the log does not
    1357             :  * contain any log items protected by any log incompat bit, and must ensure
    1358             :  * that there are no other threads that depend on the state of the log incompat
    1359             :  * feature flags in the primary super.
    1360             :  *
    1361             :  * Returns true if the superblock is dirty.
    1362             :  */
    1363             : bool
    1364      133934 : xfs_clear_incompat_log_features(
    1365             :         struct xfs_mount        *mp,
    1366             :         uint32_t                features)
    1367             : {
    1368      133934 :         bool                    ret = false;
    1369             : 
    1370      133934 :         if (!xfs_has_crc(mp) ||
    1371       88611 :             !xfs_sb_has_incompat_log_feature(&mp->m_sb, features) ||
    1372             :             xfs_is_shutdown(mp))
    1373             :                 return false;
    1374             : 
    1375             :         /*
    1376             :          * Update the incore superblock.  We synchronize on the primary super
    1377             :          * buffer lock to be consistent with the add function, though at least
    1378             :          * in theory this shouldn't be necessary.
    1379             :          */
    1380       78158 :         xfs_buf_lock(mp->m_sb_bp);
    1381       78158 :         xfs_buf_hold(mp->m_sb_bp);
    1382             : 
    1383       78158 :         if (xfs_sb_has_incompat_log_feature(&mp->m_sb, features)) {
    1384       78158 :                 xfs_sb_remove_incompat_log_features(&mp->m_sb, features);
    1385       78158 :                 ret = true;
    1386             :         }
    1387             : 
    1388       78158 :         xfs_buf_relse(mp->m_sb_bp);
    1389       78158 :         return ret;
    1390             : }
    1391             : 
    1392             : /*
    1393             :  * Update the in-core delayed block counter.
    1394             :  *
    1395             :  * We prefer to update the counter without having to take a spinlock for every
    1396             :  * counter update (i.e. batching).  Each change to delayed allocation
    1397             :  * reservations can change can easily exceed the default percpu counter
    1398             :  * batching, so we use a larger batch factor here.
    1399             :  *
    1400             :  * Note that we don't currently have any callers requiring fast summation
    1401             :  * (e.g. percpu_counter_read) so we can use a big batch value here.
    1402             :  */
    1403             : #define XFS_DELALLOC_BATCH      (4096)
    1404             : void
    1405   135602994 : xfs_mod_delalloc(
    1406             :         struct xfs_mount        *mp,
    1407             :         int64_t                 delta)
    1408             : {
    1409   135602994 :         percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
    1410             :                         XFS_DELALLOC_BATCH);
    1411   135524817 : }

Generated by: LCOV version 1.14