Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include <linux/iversion.h>
7 : #include "xfs.h"
8 : #include "xfs_fs.h"
9 : #include "xfs_shared.h"
10 : #include "xfs_format.h"
11 : #include "xfs_log_format.h"
12 : #include "xfs_trans_resv.h"
13 : #include "xfs_sb.h"
14 : #include "xfs_mount.h"
15 : #include "xfs_inode.h"
16 : #include "xfs_inode_util.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_ialloc.h"
19 : #include "xfs_health.h"
20 : #include "xfs_bmap.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_ag.h"
24 : #include "xfs_iunlink_item.h"
25 : #include "xfs_inode_item.h"
26 : #include "xfs_imeta.h"
27 :
28 : uint16_t
29 1874905 : xfs_flags2diflags(
30 : struct xfs_inode *ip,
31 : unsigned int xflags)
32 : {
33 : /* can't set PREALLOC this way, just preserve it */
34 1874905 : uint16_t di_flags =
35 1874905 : (ip->i_diflags & XFS_DIFLAG_PREALLOC);
36 :
37 1874905 : if (xflags & FS_XFLAG_IMMUTABLE)
38 74 : di_flags |= XFS_DIFLAG_IMMUTABLE;
39 1874905 : if (xflags & FS_XFLAG_APPEND)
40 51 : di_flags |= XFS_DIFLAG_APPEND;
41 1874905 : if (xflags & FS_XFLAG_SYNC)
42 24 : di_flags |= XFS_DIFLAG_SYNC;
43 1874905 : if (xflags & FS_XFLAG_NOATIME)
44 11 : di_flags |= XFS_DIFLAG_NOATIME;
45 1874905 : if (xflags & FS_XFLAG_NODUMP)
46 14 : di_flags |= XFS_DIFLAG_NODUMP;
47 1874905 : if (xflags & FS_XFLAG_NODEFRAG)
48 0 : di_flags |= XFS_DIFLAG_NODEFRAG;
49 1874905 : if (xflags & FS_XFLAG_FILESTREAM)
50 928 : di_flags |= XFS_DIFLAG_FILESTREAM;
51 1874905 : if (S_ISDIR(VFS_I(ip)->i_mode)) {
52 12435 : if (xflags & FS_XFLAG_RTINHERIT)
53 51 : di_flags |= XFS_DIFLAG_RTINHERIT;
54 12435 : if (xflags & FS_XFLAG_NOSYMLINKS)
55 6 : di_flags |= XFS_DIFLAG_NOSYMLINKS;
56 12435 : if (xflags & FS_XFLAG_EXTSZINHERIT)
57 42 : di_flags |= XFS_DIFLAG_EXTSZINHERIT;
58 12435 : if (xflags & FS_XFLAG_PROJINHERIT)
59 1158 : di_flags |= XFS_DIFLAG_PROJINHERIT;
60 1862470 : } else if (S_ISREG(VFS_I(ip)->i_mode)) {
61 1862471 : if (xflags & FS_XFLAG_REALTIME)
62 564806 : di_flags |= XFS_DIFLAG_REALTIME;
63 1862471 : if (xflags & FS_XFLAG_EXTSIZE)
64 57 : di_flags |= XFS_DIFLAG_EXTSIZE;
65 : }
66 :
67 1874905 : return di_flags;
68 : }
69 :
70 : uint64_t
71 1273182 : xfs_flags2diflags2(
72 : struct xfs_inode *ip,
73 : unsigned int xflags)
74 : {
75 1273182 : uint64_t di_flags2 =
76 1273182 : (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
77 : XFS_DIFLAG2_BIGTIME |
78 : XFS_DIFLAG2_NREXT64));
79 :
80 1273182 : if (xflags & FS_XFLAG_DAX)
81 24 : di_flags2 |= XFS_DIFLAG2_DAX;
82 1273182 : if (xflags & FS_XFLAG_COWEXTSIZE)
83 274 : di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
84 :
85 1273182 : return di_flags2;
86 : }
87 :
88 : uint32_t
89 69830025541 : xfs_ip2xflags(
90 : struct xfs_inode *ip)
91 : {
92 69830025541 : uint32_t flags = 0;
93 :
94 69830025541 : if (ip->i_diflags & XFS_DIFLAG_ANY) {
95 21614369192 : if (ip->i_diflags & XFS_DIFLAG_REALTIME)
96 9673044820 : flags |= FS_XFLAG_REALTIME;
97 21614369192 : if (ip->i_diflags & XFS_DIFLAG_PREALLOC)
98 10917455328 : flags |= FS_XFLAG_PREALLOC;
99 21614369192 : if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
100 364542 : flags |= FS_XFLAG_IMMUTABLE;
101 21614369192 : if (ip->i_diflags & XFS_DIFLAG_APPEND)
102 169 : flags |= FS_XFLAG_APPEND;
103 21614369192 : if (ip->i_diflags & XFS_DIFLAG_SYNC)
104 375474 : flags |= FS_XFLAG_SYNC;
105 21614369192 : if (ip->i_diflags & XFS_DIFLAG_NOATIME)
106 375461 : flags |= FS_XFLAG_NOATIME;
107 21614369192 : if (ip->i_diflags & XFS_DIFLAG_NODUMP)
108 375460 : flags |= FS_XFLAG_NODUMP;
109 21614369192 : if (ip->i_diflags & XFS_DIFLAG_RTINHERIT)
110 5048146781 : flags |= FS_XFLAG_RTINHERIT;
111 21614369192 : if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT)
112 1244 : flags |= FS_XFLAG_PROJINHERIT;
113 21614369192 : if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS)
114 257126 : flags |= FS_XFLAG_NOSYMLINKS;
115 21614369192 : if (ip->i_diflags & XFS_DIFLAG_EXTSIZE)
116 4110 : flags |= FS_XFLAG_EXTSIZE;
117 21614369192 : if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)
118 2182 : flags |= FS_XFLAG_EXTSZINHERIT;
119 21614369192 : if (ip->i_diflags & XFS_DIFLAG_NODEFRAG)
120 375414 : flags |= FS_XFLAG_NODEFRAG;
121 21614369192 : if (ip->i_diflags & XFS_DIFLAG_FILESTREAM)
122 14611 : flags |= FS_XFLAG_FILESTREAM;
123 : }
124 :
125 69830025541 : if (ip->i_diflags2 & XFS_DIFLAG2_ANY) {
126 70050316935 : if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
127 86 : flags |= FS_XFLAG_DAX;
128 70050316935 : if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
129 9946 : flags |= FS_XFLAG_COWEXTSIZE;
130 : }
131 :
132 69830025541 : if (xfs_inode_has_attr_fork(ip))
133 70083742502 : flags |= FS_XFLAG_HASATTR;
134 69830025541 : return flags;
135 : }
136 :
137 : #define XFS_PROJID_DEFAULT 0
138 :
139 : prid_t
140 64317817 : xfs_get_initial_prid(struct xfs_inode *dp)
141 : {
142 64317817 : if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT)
143 426 : return dp->i_projid;
144 :
145 : return XFS_PROJID_DEFAULT;
146 : }
147 :
148 : /* Propagate di_flags from a parent inode to a child inode. */
149 : static inline void
150 8973195 : xfs_inode_inherit_flags(
151 : struct xfs_inode *ip,
152 : const struct xfs_inode *pip)
153 : {
154 8973195 : unsigned int di_flags = 0;
155 8973195 : xfs_failaddr_t failaddr;
156 8973195 : umode_t mode = VFS_I(ip)->i_mode;
157 :
158 8973195 : if (S_ISDIR(mode)) {
159 2690862 : if (pip->i_diflags & XFS_DIFLAG_RTINHERIT)
160 2687976 : di_flags |= XFS_DIFLAG_RTINHERIT;
161 2690862 : if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
162 0 : di_flags |= XFS_DIFLAG_EXTSZINHERIT;
163 0 : ip->i_extsize = pip->i_extsize;
164 : }
165 2690862 : if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT)
166 2 : di_flags |= XFS_DIFLAG_PROJINHERIT;
167 6282333 : } else if (S_ISREG(mode)) {
168 6282167 : if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
169 6268522 : xfs_has_realtime(ip->i_mount))
170 6268531 : di_flags |= XFS_DIFLAG_REALTIME;
171 6282167 : if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
172 430 : di_flags |= XFS_DIFLAG_EXTSIZE;
173 430 : ip->i_extsize = pip->i_extsize;
174 : }
175 : }
176 8973195 : if ((pip->i_diflags & XFS_DIFLAG_NOATIME) &&
177 11070 : xfs_inherit_noatime)
178 11070 : di_flags |= XFS_DIFLAG_NOATIME;
179 8973195 : if ((pip->i_diflags & XFS_DIFLAG_NODUMP) &&
180 11070 : xfs_inherit_nodump)
181 11070 : di_flags |= XFS_DIFLAG_NODUMP;
182 8973195 : if ((pip->i_diflags & XFS_DIFLAG_SYNC) &&
183 11070 : xfs_inherit_sync)
184 11070 : di_flags |= XFS_DIFLAG_SYNC;
185 8973195 : if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) &&
186 11072 : xfs_inherit_nosymlinks)
187 0 : di_flags |= XFS_DIFLAG_NOSYMLINKS;
188 8973195 : if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) &&
189 11070 : xfs_inherit_nodefrag)
190 11070 : di_flags |= XFS_DIFLAG_NODEFRAG;
191 8973195 : if (pip->i_diflags & XFS_DIFLAG_FILESTREAM)
192 4772 : di_flags |= XFS_DIFLAG_FILESTREAM;
193 :
194 8973195 : ip->i_diflags |= di_flags;
195 :
196 : /*
197 : * Inode verifiers on older kernels only check that the extent size
198 : * hint is an integer multiple of the rt extent size on realtime files.
199 : * They did not check the hint alignment on a directory with both
200 : * rtinherit and extszinherit flags set. If the misaligned hint is
201 : * propagated from a directory into a new realtime file, new file
202 : * allocations will fail due to math errors in the rt allocator and/or
203 : * trip the verifiers. Validate the hint settings in the new file so
204 : * that we don't let broken hints propagate.
205 : */
206 8973195 : failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
207 : VFS_I(ip)->i_mode, ip->i_diflags);
208 8972803 : if (failaddr) {
209 0 : ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
210 : XFS_DIFLAG_EXTSZINHERIT);
211 0 : ip->i_extsize = 0;
212 : }
213 8972803 : }
214 :
215 : /* Propagate di_flags2 from a parent inode to a child inode. */
216 : static inline void
217 28202289 : xfs_inode_inherit_flags2(
218 : struct xfs_inode *ip,
219 : const struct xfs_inode *pip)
220 : {
221 28202289 : xfs_failaddr_t failaddr;
222 :
223 28202289 : if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
224 360 : ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
225 360 : ip->i_cowextsize = pip->i_cowextsize;
226 : }
227 28202289 : if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
228 30 : ip->i_diflags2 |= XFS_DIFLAG2_DAX;
229 28202289 : if (pip->i_diflags2 & XFS_DIFLAG2_METADIR)
230 11070 : ip->i_diflags2 |= XFS_DIFLAG2_METADIR;
231 :
232 : /* Don't let invalid cowextsize hints propagate. */
233 28202289 : failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
234 : VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
235 28202897 : if (failaddr) {
236 0 : ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
237 0 : ip->i_cowextsize = 0;
238 : }
239 28202897 : }
240 :
241 : /* Initialise an inode's attributes. */
242 : void
243 64361261 : xfs_inode_init(
244 : struct xfs_trans *tp,
245 : const struct xfs_icreate_args *args,
246 : struct xfs_inode *ip)
247 : {
248 64361261 : struct xfs_inode *pip = args->pip;
249 64361261 : struct inode *dir = pip ? VFS_I(pip) : NULL;
250 64361261 : struct xfs_mount *mp = tp->t_mountp;
251 64361261 : struct inode *inode = VFS_I(ip);
252 64361261 : unsigned int flags;
253 64361261 : int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG |
254 : XFS_ICHGTIME_ACCESS;
255 :
256 64361261 : set_nlink(inode, args->nlink);
257 64360460 : inode->i_rdev = args->rdev;
258 64360460 : ip->i_projid = args->prid;
259 :
260 64360460 : if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
261 0 : inode_fsuid_set(inode, args->idmap);
262 0 : inode->i_gid = dir->i_gid;
263 0 : inode->i_mode = args->mode;
264 : } else {
265 64360460 : inode_init_owner(args->idmap, inode, dir, args->mode);
266 : }
267 :
268 : /*
269 : * If the group ID of the new file does not match the effective group
270 : * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
271 : * (and only if the irix_sgid_inherit compatibility variable is set).
272 : */
273 64360717 : if (irix_sgid_inherit && (inode->i_mode & S_ISGID) &&
274 0 : !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode)))
275 0 : inode->i_mode &= ~S_ISGID;
276 :
277 : /* struct copies */
278 64360717 : if (args->flags & XFS_ICREATE_ARGS_FORCE_UID)
279 566301 : inode->i_uid = args->uid;
280 : else
281 63794416 : ASSERT(uid_eq(inode->i_uid, args->uid));
282 64360717 : if (args->flags & XFS_ICREATE_ARGS_FORCE_GID)
283 565537 : inode->i_gid = args->gid;
284 63795180 : else if (!pip || !XFS_INHERIT_GID(pip))
285 63796249 : ASSERT(gid_eq(inode->i_gid, args->gid));
286 64360717 : if (args->flags & XFS_ICREATE_ARGS_FORCE_MODE)
287 566299 : inode->i_mode = args->mode;
288 :
289 64360717 : ip->i_disk_size = 0;
290 64360717 : ip->i_df.if_nextents = 0;
291 64360717 : ASSERT(ip->i_nblocks == 0);
292 :
293 64360717 : ip->i_extsize = 0;
294 64360717 : ip->i_diflags = 0;
295 :
296 64360717 : if (xfs_has_v3inodes(mp)) {
297 64360204 : inode_set_iversion(inode, 1);
298 64360204 : ip->i_cowextsize = 0;
299 64360204 : times |= XFS_ICHGTIME_CREATE;
300 : }
301 :
302 64360717 : xfs_trans_ichgtime(tp, ip, times);
303 :
304 64360693 : flags = XFS_ILOG_CORE;
305 64360693 : switch (args->mode & S_IFMT) {
306 9016208 : case S_IFIFO:
307 : case S_IFCHR:
308 : case S_IFBLK:
309 : case S_IFSOCK:
310 9016208 : ip->i_df.if_format = XFS_DINODE_FMT_DEV;
311 9016208 : flags |= XFS_ILOG_DEV;
312 9016208 : break;
313 28202471 : case S_IFREG:
314 : case S_IFDIR:
315 28202471 : if (pip && (pip->i_diflags & XFS_DIFLAG_ANY))
316 8972869 : xfs_inode_inherit_flags(ip, pip);
317 28202185 : if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
318 28202218 : xfs_inode_inherit_flags2(ip, pip);
319 55343801 : fallthrough;
320 : case S_IFLNK:
321 55343801 : ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
322 55343801 : ip->i_df.if_bytes = 0;
323 55343801 : ip->i_df.if_u1.if_root = NULL;
324 55343801 : break;
325 0 : default:
326 0 : ASSERT(0);
327 : }
328 :
329 : /*
330 : * If we need to create attributes immediately after allocating the
331 : * inode, initialise an empty attribute fork right now. We use the
332 : * default fork offset for attributes here as we don't know exactly what
333 : * size or how many attributes we might be adding. We can do this
334 : * safely here because we know the data fork is completely empty and
335 : * this saves us from needing to run a separate transaction to set the
336 : * fork offset in the immediate future.
337 : */
338 64360009 : if ((args->flags & XFS_ICREATE_ARGS_INIT_XATTRS) &&
339 : xfs_has_attr(mp)) {
340 63647272 : ip->i_forkoff = xfs_default_attroffset(ip) >> 3;
341 63646377 : xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
342 : }
343 :
344 64358685 : xfs_trans_log_inode(tp, ip, flags);
345 64363124 : }
346 :
347 : /*
348 : * In-Core Unlinked List Lookups
349 : * =============================
350 : *
351 : * Every inode is supposed to be reachable from some other piece of metadata
352 : * with the exception of the root directory. Inodes with a connection to a
353 : * file descriptor but not linked from anywhere in the on-disk directory tree
354 : * are collectively known as unlinked inodes, though the filesystem itself
355 : * maintains links to these inodes so that on-disk metadata are consistent.
356 : *
357 : * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI
358 : * header contains a number of buckets that point to an inode, and each inode
359 : * record has a pointer to the next inode in the hash chain. This
360 : * singly-linked list causes scaling problems in the iunlink remove function
361 : * because we must walk that list to find the inode that points to the inode
362 : * being removed from the unlinked hash bucket list.
363 : *
364 : * Hence we keep an in-memory double linked list to link each inode on an
365 : * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer
366 : * based lists would require having 64 list heads in the perag, one for each
367 : * list. This is expensive in terms of memory (think millions of AGs) and cache
368 : * misses on lookups. Instead, use the fact that inodes on the unlinked list
369 : * must be referenced at the VFS level to keep them on the list and hence we
370 : * have an existence guarantee for inodes on the unlinked list.
371 : *
372 : * Given we have an existence guarantee, we can use lockless inode cache lookups
373 : * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode
374 : * for the double linked unlinked list, and we don't need any extra locking to
375 : * keep the list safe as all manipulations are done under the AGI buffer lock.
376 : * Keeping the list up to date does not require memory allocation, just finding
377 : * the XFS inode and updating the next/prev unlinked list aginos.
378 : */
379 :
380 : /* Update the prev pointer of the next agino. */
381 : static int
382 80827993 : xfs_iunlink_update_backref(
383 : struct xfs_perag *pag,
384 : xfs_agino_t prev_agino,
385 : xfs_agino_t next_agino)
386 : {
387 80827993 : struct xfs_inode *ip;
388 :
389 : /* No update necessary if we are at the end of the list. */
390 80827993 : if (next_agino == NULLAGINO)
391 : return 0;
392 :
393 15389045 : ip = xfs_iunlink_lookup(pag, next_agino);
394 15388921 : if (!ip) {
395 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
396 0 : return -EFSCORRUPTED;
397 : }
398 :
399 15388921 : ip->i_prev_unlinked = prev_agino;
400 15388921 : return 0;
401 : }
402 :
403 : /*
404 : * Point the AGI unlinked bucket at an inode and log the results. The caller
405 : * is responsible for validating the old value.
406 : */
407 : STATIC int
408 78059550 : xfs_iunlink_update_bucket(
409 : struct xfs_trans *tp,
410 : struct xfs_perag *pag,
411 : struct xfs_buf *agibp,
412 : unsigned int bucket_index,
413 : xfs_agino_t new_agino)
414 : {
415 78059550 : struct xfs_agi *agi = agibp->b_addr;
416 78059550 : xfs_agino_t old_value;
417 78059550 : int offset;
418 :
419 123969466 : ASSERT(xfs_verify_agino_or_null(pag, new_agino));
420 :
421 78059550 : old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
422 78059550 : trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
423 : old_value, new_agino);
424 :
425 : /*
426 : * We should never find the head of the list already set to the value
427 : * passed in because either we're adding or removing ourselves from the
428 : * head of the list.
429 : */
430 78060185 : if (old_value == new_agino) {
431 0 : xfs_buf_mark_corrupt(agibp);
432 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
433 0 : return -EFSCORRUPTED;
434 : }
435 :
436 78060185 : agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
437 78060185 : offset = offsetof(struct xfs_agi, agi_unlinked) +
438 : (sizeof(xfs_agino_t) * bucket_index);
439 78060185 : xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1);
440 78060185 : return 0;
441 : }
442 :
443 : static int
444 40416019 : xfs_iunlink_insert_inode(
445 : struct xfs_trans *tp,
446 : struct xfs_perag *pag,
447 : struct xfs_buf *agibp,
448 : struct xfs_inode *ip)
449 : {
450 40416019 : struct xfs_mount *mp = tp->t_mountp;
451 40416019 : struct xfs_agi *agi = agibp->b_addr;
452 40416019 : xfs_agino_t next_agino;
453 40416019 : xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
454 40416019 : short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
455 40416019 : int error;
456 :
457 : /*
458 : * Get the index into the agi hash table for the list this inode will
459 : * go on. Make sure the pointer isn't garbage and that this inode
460 : * isn't already on the list.
461 : */
462 40416019 : next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
463 40416019 : if (next_agino == agino ||
464 : !xfs_verify_agino_or_null(pag, next_agino)) {
465 1 : xfs_buf_mark_corrupt(agibp);
466 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
467 0 : return -EFSCORRUPTED;
468 : }
469 :
470 : /*
471 : * Update the prev pointer in the next inode to point back to this
472 : * inode.
473 : */
474 40417982 : error = xfs_iunlink_update_backref(pag, agino, next_agino);
475 40417788 : if (error)
476 : return error;
477 :
478 40417788 : if (next_agino != NULLAGINO) {
479 : /*
480 : * There is already another inode in the bucket, so point this
481 : * inode to the current head of the list.
482 : */
483 8268783 : error = xfs_iunlink_log_inode(tp, ip, pag, next_agino);
484 8268728 : if (error)
485 : return error;
486 8268728 : ip->i_next_unlinked = next_agino;
487 : }
488 :
489 : /* Point the head of the list to point to this inode. */
490 40417733 : ip->i_prev_unlinked = NULLAGINO;
491 40417733 : return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
492 : }
493 :
494 : /*
495 : * This is called when the inode's link count has gone to 0 or we are creating
496 : * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0.
497 : *
498 : * We place the on-disk inode on a list in the AGI. It will be pulled from this
499 : * list when the inode is freed.
500 : */
501 : int
502 40418065 : xfs_iunlink(
503 : struct xfs_trans *tp,
504 : struct xfs_inode *ip)
505 : {
506 40418065 : struct xfs_mount *mp = tp->t_mountp;
507 40418065 : struct xfs_perag *pag;
508 40418065 : struct xfs_buf *agibp;
509 40418065 : int error;
510 :
511 40418065 : ASSERT(VFS_I(ip)->i_nlink == 0);
512 40418065 : ASSERT(VFS_I(ip)->i_mode != 0);
513 40418065 : trace_xfs_iunlink(ip);
514 :
515 40418247 : pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
516 :
517 : /* Get the agi buffer first. It ensures lock ordering on the list. */
518 40418068 : error = xfs_read_agi(pag, tp, &agibp);
519 40417430 : if (error)
520 13 : goto out;
521 :
522 40417417 : error = xfs_iunlink_insert_inode(tp, pag, agibp, ip);
523 40417083 : out:
524 40417083 : xfs_perag_put(pag);
525 40417409 : return error;
526 : }
527 :
528 : static int
529 40419548 : xfs_iunlink_remove_inode(
530 : struct xfs_trans *tp,
531 : struct xfs_perag *pag,
532 : struct xfs_buf *agibp,
533 : struct xfs_inode *ip)
534 : {
535 40419548 : struct xfs_mount *mp = tp->t_mountp;
536 40419548 : struct xfs_agi *agi = agibp->b_addr;
537 40419548 : xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
538 40419548 : xfs_agino_t head_agino;
539 40419548 : short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
540 40419548 : int error;
541 :
542 40419548 : trace_xfs_iunlink_remove(ip);
543 :
544 : /*
545 : * Get the index into the agi hash table for the list this inode will
546 : * go on. Make sure the head pointer isn't garbage.
547 : */
548 40418598 : head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
549 40418598 : if (!xfs_verify_agino(pag, head_agino)) {
550 4683 : XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
551 : agi, sizeof(*agi));
552 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
553 0 : return -EFSCORRUPTED;
554 : }
555 :
556 : /*
557 : * Set our inode's next_unlinked pointer to NULL and then return
558 : * the old pointer value so that we can update whatever was previous
559 : * to us in the list to point to whatever was next in the list.
560 : */
561 40413915 : error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO);
562 40416410 : if (error)
563 : return error;
564 :
565 : /*
566 : * Update the prev pointer in the next inode to point back to previous
567 : * inode in the chain.
568 : */
569 40416196 : error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
570 : ip->i_next_unlinked);
571 40411063 : if (error)
572 : return error;
573 :
574 40411063 : if (head_agino != agino) {
575 2769182 : struct xfs_inode *prev_ip;
576 :
577 2769182 : prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked);
578 2769246 : if (!prev_ip) {
579 0 : xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
580 0 : return -EFSCORRUPTED;
581 : }
582 :
583 2769246 : error = xfs_iunlink_log_inode(tp, prev_ip, pag,
584 : ip->i_next_unlinked);
585 2769082 : prev_ip->i_next_unlinked = ip->i_next_unlinked;
586 : } else {
587 : /* Point the head of the list to the next unlinked inode. */
588 37641881 : error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index,
589 : ip->i_next_unlinked);
590 : }
591 :
592 40417571 : ip->i_next_unlinked = NULLAGINO;
593 40417571 : ip->i_prev_unlinked = 0;
594 40417571 : return error;
595 : }
596 :
597 : /*
598 : * Pull the on-disk inode from the AGI unlinked list.
599 : */
600 : int
601 40416926 : xfs_iunlink_remove(
602 : struct xfs_trans *tp,
603 : struct xfs_perag *pag,
604 : struct xfs_inode *ip)
605 : {
606 40416926 : struct xfs_buf *agibp;
607 40416926 : int error;
608 :
609 40416926 : trace_xfs_iunlink_remove(ip);
610 :
611 : /* Get the agi buffer first. It ensures lock ordering on the list. */
612 40416466 : error = xfs_read_agi(pag, tp, &agibp);
613 40419500 : if (error)
614 : return error;
615 :
616 40419687 : return xfs_iunlink_remove_inode(tp, pag, agibp, ip);
617 : }
618 :
619 : /*
620 : * Decrement the link count on an inode & log the change. If this causes the
621 : * link count to go to zero, move the inode to AGI unlinked list so that it can
622 : * be freed when the last active reference goes away via xfs_inactive().
623 : */
624 : int
625 47781259 : xfs_droplink(
626 : struct xfs_trans *tp,
627 : struct xfs_inode *ip)
628 : {
629 47781259 : struct inode *inode = VFS_I(ip);
630 :
631 47781259 : xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
632 :
633 47780439 : if (inode->i_nlink != XFS_NLINK_PINNED)
634 47781976 : drop_nlink(VFS_I(ip));
635 :
636 47780035 : xfs_imeta_droplink(ip);
637 47781699 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
638 :
639 47782119 : if (VFS_I(ip)->i_nlink)
640 : return 0;
641 :
642 36465993 : return xfs_iunlink(tp, ip);
643 : }
644 :
645 : /*
646 : * Increment the link count on an inode & log the change.
647 : */
648 : void
649 23048300 : xfs_bumplink(
650 : struct xfs_trans *tp,
651 : struct xfs_inode *ip)
652 : {
653 23048300 : struct inode *inode = VFS_I(ip);
654 :
655 23048300 : xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
656 :
657 23048372 : if (inode->i_nlink != XFS_NLINK_PINNED)
658 23048453 : inc_nlink(VFS_I(ip));
659 :
660 23048314 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
661 23048467 : }
662 :
663 : /* Mark an inode free on disk. */
664 : int
665 38021558 : xfs_dir_ifree(
666 : struct xfs_trans *tp,
667 : struct xfs_perag *pag,
668 : struct xfs_inode *ip,
669 : struct xfs_icluster *xic)
670 : {
671 38021558 : int error;
672 :
673 : /*
674 : * Free the inode first so that we guarantee that the AGI lock is going
675 : * to be taken before we remove the inode from the unlinked list. This
676 : * makes the AGI lock -> unlinked list modification order the same as
677 : * used in O_TMPFILE creation.
678 : */
679 38021558 : error = xfs_difree(tp, pag, ip->i_ino, xic);
680 38025262 : if (error)
681 : return error;
682 :
683 38023522 : error = xfs_iunlink_remove(tp, pag, ip);
684 38019391 : if (error)
685 : return error;
686 :
687 : /*
688 : * Free any local-format data sitting around before we reset the
689 : * data fork to extents format. Note that the attr fork data has
690 : * already been freed by xfs_attr_inactive.
691 : */
692 38019391 : if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
693 5345947 : kmem_free(ip->i_df.if_u1.if_data);
694 5346097 : ip->i_df.if_u1.if_data = NULL;
695 5346097 : ip->i_df.if_bytes = 0;
696 : }
697 :
698 38019541 : VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
699 38019541 : ip->i_diflags = 0;
700 38019541 : ip->i_diflags2 = ip->i_mount->m_ino_geo.new_diflags2;
701 38019541 : ip->i_forkoff = 0; /* mark the attr fork not in use */
702 38019541 : ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
703 :
704 : /*
705 : * Bump the generation count so no one will be confused
706 : * by reincarnations of this inode.
707 : */
708 38019541 : VFS_I(ip)->i_generation++;
709 38019541 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
710 38019541 : return 0;
711 : }
|