LCOV - code coverage report
Current view: top level - include/linux - iversion.h (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 10 11 90.9 %
Date: 2023-07-31 20:08:12 Functions: 0 0 -

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : #ifndef _LINUX_IVERSION_H
       3             : #define _LINUX_IVERSION_H
       4             : 
       5             : #include <linux/fs.h>
       6             : 
       7             : /*
       8             :  * The inode->i_version field:
       9             :  * ---------------------------
      10             :  * The change attribute (i_version) is mandated by NFSv4 and is mostly for
      11             :  * knfsd, but is also used for other purposes (e.g. IMA). The i_version must
      12             :  * appear larger to observers if there was an explicit change to the inode's
      13             :  * data or metadata since it was last queried.
      14             :  *
      15             :  * An explicit change is one that would ordinarily result in a change to the
      16             :  * inode status change time (aka ctime). i_version must appear to change, even
      17             :  * if the ctime does not (since the whole point is to avoid missing updates due
      18             :  * to timestamp granularity). If POSIX or other relevant spec mandates that the
      19             :  * ctime must change due to an operation, then the i_version counter must be
      20             :  * incremented as well.
      21             :  *
      22             :  * Making the i_version update completely atomic with the operation itself would
      23             :  * be prohibitively expensive. Traditionally the kernel has updated the times on
      24             :  * directories after an operation that changes its contents. For regular files,
      25             :  * the ctime is usually updated before the data is copied into the cache for a
      26             :  * write. This means that there is a window of time when an observer can
      27             :  * associate a new timestamp with old file contents. Since the purpose of the
      28             :  * i_version is to allow for better cache coherency, the i_version must always
      29             :  * be updated after the results of the operation are visible. Updating it before
      30             :  * and after a change is also permitted. (Note that no filesystems currently do
      31             :  * this. Fixing that is a work-in-progress).
      32             :  *
      33             :  * Observers see the i_version as a 64-bit number that never decreases. If it
      34             :  * remains the same since it was last checked, then nothing has changed in the
      35             :  * inode. If it's different then something has changed. Observers cannot infer
      36             :  * anything about the nature or magnitude of the changes from the value, only
      37             :  * that the inode has changed in some fashion.
      38             :  *
      39             :  * Not all filesystems properly implement the i_version counter. Subsystems that
      40             :  * want to use i_version field on an inode should first check whether the
      41             :  * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro).
      42             :  *
      43             :  * Those that set SB_I_VERSION will automatically have their i_version counter
      44             :  * incremented on writes to normal files. If the SB_I_VERSION is not set, then
      45             :  * the VFS will not touch it on writes, and the filesystem can use it how it
      46             :  * wishes. Note that the filesystem is always responsible for updating the
      47             :  * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.).
      48             :  * We consider these sorts of filesystems to have a kernel-managed i_version.
      49             :  *
      50             :  * It may be impractical for filesystems to keep i_version updates atomic with
      51             :  * respect to the changes that cause them.  They should, however, guarantee
      52             :  * that i_version updates are never visible before the changes that caused
      53             :  * them.  Also, i_version updates should never be delayed longer than it takes
      54             :  * the original change to reach disk.
      55             :  *
      56             :  * This implementation uses the low bit in the i_version field as a flag to
      57             :  * track when the value has been queried. If it has not been queried since it
      58             :  * was last incremented, we can skip the increment in most cases.
      59             :  *
      60             :  * In the event that we're updating the ctime, we will usually go ahead and
      61             :  * bump the i_version anyway. Since that has to go to stable storage in some
      62             :  * fashion, we might as well increment it as well.
      63             :  *
      64             :  * With this implementation, the value should always appear to observers to
      65             :  * increase over time if the file has changed. It's recommended to use
      66             :  * inode_eq_iversion() helper to compare values.
      67             :  *
      68             :  * Note that some filesystems (e.g. NFS and AFS) just use the field to store
      69             :  * a server-provided value (for the most part). For that reason, those
      70             :  * filesystems do not set SB_I_VERSION. These filesystems are considered to
      71             :  * have a self-managed i_version.
      72             :  *
      73             :  * Persistently storing the i_version
      74             :  * ----------------------------------
      75             :  * Queries of the i_version field are not gated on them hitting the backing
      76             :  * store. It's always possible that the host could crash after allowing
      77             :  * a query of the value but before it has made it to disk.
      78             :  *
      79             :  * To mitigate this problem, filesystems should always use
      80             :  * inode_set_iversion_queried when loading an existing inode from disk. This
      81             :  * ensures that the next attempted inode increment will result in the value
      82             :  * changing.
      83             :  *
      84             :  * Storing the value to disk therefore does not count as a query, so those
      85             :  * filesystems should use inode_peek_iversion to grab the value to be stored.
      86             :  * There is no need to flag the value as having been queried in that case.
      87             :  */
      88             : 
      89             : /*
      90             :  * We borrow the lowest bit in the i_version to use as a flag to tell whether
      91             :  * it has been queried since we last incremented it. If it has, then we must
      92             :  * increment it on the next change. After that, we can clear the flag and
      93             :  * avoid incrementing it again until it has again been queried.
      94             :  */
      95             : #define I_VERSION_QUERIED_SHIFT (1)
      96             : #define I_VERSION_QUERIED       (1ULL << (I_VERSION_QUERIED_SHIFT - 1))
      97             : #define I_VERSION_INCREMENT     (1ULL << I_VERSION_QUERIED_SHIFT)
      98             : 
      99             : /**
     100             :  * inode_set_iversion_raw - set i_version to the specified raw value
     101             :  * @inode: inode to set
     102             :  * @val: new i_version value to set
     103             :  *
     104             :  * Set @inode's i_version field to @val. This function is for use by
     105             :  * filesystems that self-manage the i_version.
     106             :  *
     107             :  * For example, the NFS client stores its NFSv4 change attribute in this way,
     108             :  * and the AFS client stores the data_version from the server here.
     109             :  */
     110             : static inline void
     111             : inode_set_iversion_raw(struct inode *inode, u64 val)
     112             : {
     113  1184061545 :         atomic64_set(&inode->i_version, val);
     114           0 : }
     115             : 
     116             : /**
     117             :  * inode_peek_iversion_raw - grab a "raw" iversion value
     118             :  * @inode: inode from which i_version should be read
     119             :  *
     120             :  * Grab a "raw" inode->i_version value and return it. The i_version is not
     121             :  * flagged or converted in any way. This is mostly used to access a self-managed
     122             :  * i_version.
     123             :  *
     124             :  * With those filesystems, we want to treat the i_version as an entirely
     125             :  * opaque value.
     126             :  */
     127             : static inline u64
     128             : inode_peek_iversion_raw(const struct inode *inode)
     129             : {
     130  8260954480 :         return atomic64_read(&inode->i_version);
     131             : }
     132             : 
     133             : /**
     134             :  * inode_set_max_iversion_raw - update i_version new value is larger
     135             :  * @inode: inode to set
     136             :  * @val: new i_version to set
     137             :  *
     138             :  * Some self-managed filesystems (e.g Ceph) will only update the i_version
     139             :  * value if the new value is larger than the one we already have.
     140             :  */
     141             : static inline void
     142             : inode_set_max_iversion_raw(struct inode *inode, u64 val)
     143             : {
     144             :         u64 cur = inode_peek_iversion_raw(inode);
     145             : 
     146             :         do {
     147             :                 if (cur > val)
     148             :                         break;
     149             :         } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val));
     150             : }
     151             : 
     152             : /**
     153             :  * inode_set_iversion - set i_version to a particular value
     154             :  * @inode: inode to set
     155             :  * @val: new i_version value to set
     156             :  *
     157             :  * Set @inode's i_version field to @val. This function is for filesystems with
     158             :  * a kernel-managed i_version, for initializing a newly-created inode from
     159             :  * scratch.
     160             :  *
     161             :  * In this case, we do not set the QUERIED flag since we know that this value
     162             :  * has never been queried.
     163             :  */
     164             : static inline void
     165             : inode_set_iversion(struct inode *inode, u64 val)
     166             : {
     167   130173357 :         inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT);
     168             : }
     169             : 
     170             : /**
     171             :  * inode_set_iversion_queried - set i_version to a particular value as quereied
     172             :  * @inode: inode to set
     173             :  * @val: new i_version value to set
     174             :  *
     175             :  * Set @inode's i_version field to @val, and flag it for increment on the next
     176             :  * change.
     177             :  *
     178             :  * Filesystems that persistently store the i_version on disk should use this
     179             :  * when loading an existing inode from disk.
     180             :  *
     181             :  * When loading in an i_version value from a backing store, we can't be certain
     182             :  * that it wasn't previously viewed before being stored. Thus, we must assume
     183             :  * that it was, to ensure that we don't end up handing out the same value for
     184             :  * different versions of the same inode.
     185             :  */
     186             : static inline void
     187             : inode_set_iversion_queried(struct inode *inode, u64 val)
     188             : {
     189  1054102869 :         inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) |
     190             :                                 I_VERSION_QUERIED);
     191      214741 : }
     192             : 
     193             : bool inode_maybe_inc_iversion(struct inode *inode, bool force);
     194             : 
     195             : /**
     196             :  * inode_inc_iversion - forcibly increment i_version
     197             :  * @inode: inode that needs to be updated
     198             :  *
     199             :  * Forcbily increment the i_version field. This always results in a change to
     200             :  * the observable value.
     201             :  */
     202             : static inline void
     203             : inode_inc_iversion(struct inode *inode)
     204             : {
     205    64726080 :         inode_maybe_inc_iversion(inode, true);
     206    29933401 : }
     207             : 
     208             : /**
     209             :  * inode_iversion_need_inc - is the i_version in need of being incremented?
     210             :  * @inode: inode to check
     211             :  *
     212             :  * Returns whether the inode->i_version counter needs incrementing on the next
     213             :  * change. Just fetch the value and check the QUERIED flag.
     214             :  */
     215             : static inline bool
     216             : inode_iversion_need_inc(struct inode *inode)
     217             : {
     218   705363295 :         return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED;
     219             : }
     220             : 
     221             : /**
     222             :  * inode_inc_iversion_raw - forcibly increment raw i_version
     223             :  * @inode: inode that needs to be updated
     224             :  *
     225             :  * Forcbily increment the raw i_version field. This always results in a change
     226             :  * to the raw value.
     227             :  *
     228             :  * NFS will use the i_version field to store the value from the server. It
     229             :  * mostly treats it as opaque, but in the case where it holds a write
     230             :  * delegation, it must increment the value itself. This function does that.
     231             :  */
     232             : static inline void
     233             : inode_inc_iversion_raw(struct inode *inode)
     234             : {
     235             :         atomic64_inc(&inode->i_version);
     236             : }
     237             : 
     238             : /**
     239             :  * inode_peek_iversion - read i_version without flagging it to be incremented
     240             :  * @inode: inode from which i_version should be read
     241             :  *
     242             :  * Read the inode i_version counter for an inode without registering it as a
     243             :  * query.
     244             :  *
     245             :  * This is typically used by local filesystems that need to store an i_version
     246             :  * on disk. In that situation, it's not necessary to flag it as having been
     247             :  * viewed, as the result won't be used to gauge changes from that point.
     248             :  */
     249             : static inline u64
     250             : inode_peek_iversion(const struct inode *inode)
     251             : {
     252  4021828384 :         return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT;
     253             : }
     254             : 
     255             : /*
     256             :  * For filesystems without any sort of change attribute, the best we can
     257             :  * do is fake one up from the ctime:
     258             :  */
     259             : static inline u64 time_to_chattr(struct timespec64 *t)
     260             : {
     261             :         u64 chattr = t->tv_sec;
     262             : 
     263             :         chattr <<= 32;
     264             :         chattr += t->tv_nsec;
     265             :         return chattr;
     266             : }
     267             : 
     268             : u64 inode_query_iversion(struct inode *inode);
     269             : 
     270             : /**
     271             :  * inode_eq_iversion_raw - check whether the raw i_version counter has changed
     272             :  * @inode: inode to check
     273             :  * @old: old value to check against its i_version
     274             :  *
     275             :  * Compare the current raw i_version counter with a previous one. Returns true
     276             :  * if they are the same or false if they are different.
     277             :  */
     278             : static inline bool
     279             : inode_eq_iversion_raw(const struct inode *inode, u64 old)
     280             : {
     281             :         return inode_peek_iversion_raw(inode) == old;
     282             : }
     283             : 
     284             : /**
     285             :  * inode_eq_iversion - check whether the i_version counter has changed
     286             :  * @inode: inode to check
     287             :  * @old: old value to check against its i_version
     288             :  *
     289             :  * Compare an i_version counter with a previous one. Returns true if they are
     290             :  * the same, and false if they are different.
     291             :  *
     292             :  * Note that we don't need to set the QUERIED flag in this case, as the value
     293             :  * in the inode is not being recorded for later use.
     294             :  */
     295             : static inline bool
     296             : inode_eq_iversion(const struct inode *inode, u64 old)
     297             : {
     298   141961610 :         return inode_peek_iversion(inode) == old;
     299             : }
     300             : #endif

Generated by: LCOV version 1.14