Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_inode.h"
14 : #include "xfs_acl.h"
15 : #include "xfs_quota.h"
16 : #include "xfs_da_format.h"
17 : #include "xfs_da_btree.h"
18 : #include "xfs_attr.h"
19 : #include "xfs_trans.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_icache.h"
22 : #include "xfs_symlink.h"
23 : #include "xfs_dir2.h"
24 : #include "xfs_iomap.h"
25 : #include "xfs_error.h"
26 : #include "xfs_ioctl.h"
27 : #include "xfs_xattr.h"
28 : #include "xfs_file.h"
29 :
30 : #include <linux/posix_acl.h>
31 : #include <linux/security.h>
32 : #include <linux/iversion.h>
33 : #include <linux/fiemap.h>
34 :
35 : /*
36 : * Directories have different lock order w.r.t. mmap_lock compared to regular
37 : * files. This is due to readdir potentially triggering page faults on a user
38 : * buffer inside filldir(), and this happens with the ilock on the directory
39 : * held. For regular files, the lock order is the other way around - the
40 : * mmap_lock is taken during the page fault, and then we lock the ilock to do
41 : * block mapping. Hence we need a different class for the directory ilock so
42 : * that lockdep can tell them apart.
43 : */
44 : static struct lock_class_key xfs_nondir_ilock_class;
45 : static struct lock_class_key xfs_dir_ilock_class;
46 :
47 : static int
48 : xfs_initxattrs(
49 : struct inode *inode,
50 : const struct xattr *xattr_array,
51 : void *fs_info)
52 : {
53 : const struct xattr *xattr;
54 : struct xfs_inode *ip = XFS_I(inode);
55 : int error = 0;
56 :
57 : for (xattr = xattr_array; xattr->name != NULL; xattr++) {
58 : struct xfs_da_args args = {
59 : .dp = ip,
60 : .attr_filter = XFS_ATTR_SECURE,
61 : .name = xattr->name,
62 : .namelen = strlen(xattr->name),
63 : .value = xattr->value,
64 : .valuelen = xattr->value_len,
65 : .owner = ip->i_ino,
66 : };
67 : error = xfs_attr_change(&args);
68 : if (error < 0)
69 : break;
70 : }
71 : return error;
72 : }
73 :
74 : /*
75 : * Hook in SELinux. This is not quite correct yet, what we really need
76 : * here (as we do for default ACLs) is a mechanism by which creation of
77 : * these attrs can be journalled at inode creation time (along with the
78 : * inode, of course, such that log replay can't cause these to be lost).
79 : */
80 : int
81 1814559 : xfs_inode_init_security(
82 : struct inode *inode,
83 : struct inode *dir,
84 : const struct qstr *qstr)
85 : {
86 1814559 : return security_inode_init_security(inode, dir, qstr,
87 : &xfs_initxattrs, NULL);
88 : }
89 :
90 : static void
91 : xfs_dentry_to_name(
92 : struct xfs_name *namep,
93 : struct dentry *dentry)
94 : {
95 232818204 : namep->name = dentry->d_name.name;
96 232818204 : namep->len = dentry->d_name.len;
97 232818204 : namep->type = XFS_DIR3_FT_UNKNOWN;
98 : }
99 :
100 : static int
101 : xfs_dentry_mode_to_name(
102 : struct xfs_name *namep,
103 : struct dentry *dentry,
104 : int mode)
105 : {
106 531953205 : namep->name = dentry->d_name.name;
107 531953205 : namep->len = dentry->d_name.len;
108 1063950385 : namep->type = xfs_mode_to_ftype(mode);
109 :
110 531997180 : if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
111 21548327 : return -EFSCORRUPTED;
112 :
113 : return 0;
114 : }
115 :
116 : STATIC void
117 0 : xfs_cleanup_inode(
118 : struct inode *dir,
119 : struct inode *inode,
120 : struct dentry *dentry)
121 : {
122 0 : struct xfs_name teardown;
123 :
124 : /* Oh, the horror.
125 : * If we can't add the ACL or we fail in
126 : * xfs_inode_init_security we must back out.
127 : * ENOSPC can hit here, among other things.
128 : */
129 0 : xfs_dentry_to_name(&teardown, dentry);
130 :
131 0 : xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
132 0 : }
133 :
134 : /*
135 : * Check to see if we are likely to need an extended attribute to be added to
136 : * the inode we are about to allocate. This allows the attribute fork to be
137 : * created during the inode allocation, reducing the number of transactions we
138 : * need to do in this fast path.
139 : *
140 : * The security checks are optimistic, but not guaranteed. The two LSMs that
141 : * require xattrs to be added here (selinux and smack) are also the only two
142 : * LSMs that add a sb->s_security structure to the superblock. Hence if security
143 : * is enabled and sb->s_security is set, we have a pretty good idea that we are
144 : * going to be asked to add a security xattr immediately after allocating the
145 : * xfs inode and instantiating the VFS inode.
146 : */
147 : static inline bool
148 : xfs_create_need_xattr(
149 : struct inode *dir,
150 : struct posix_acl *default_acl,
151 : struct posix_acl *acl)
152 : {
153 28097000 : if (acl)
154 : return true;
155 28096274 : if (default_acl)
156 : return true;
157 : #if IS_ENABLED(CONFIG_SECURITY)
158 : if (dir->i_sb->s_security)
159 : return true;
160 : #endif
161 28097377 : if (xfs_has_parent(XFS_I(dir)->i_mount))
162 27946370 : return true;
163 : return false;
164 : }
165 :
166 :
167 : STATIC int
168 29076597 : xfs_generic_create(
169 : struct mnt_idmap *idmap,
170 : struct inode *dir,
171 : struct dentry *dentry,
172 : umode_t mode,
173 : dev_t rdev,
174 : struct file *tmpfile) /* unnamed file */
175 : {
176 29076597 : struct inode *inode;
177 29076597 : struct xfs_inode *ip = NULL;
178 29076597 : struct posix_acl *default_acl, *acl;
179 29076597 : struct xfs_name name;
180 29076597 : int error;
181 :
182 : /*
183 : * Irix uses Missed'em'V split, but doesn't want to see
184 : * the upper 5 bits of (14bit) major.
185 : */
186 29076597 : if (S_ISCHR(mode) || S_ISBLK(mode)) {
187 5959599 : if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
188 : return -EINVAL;
189 : } else {
190 : rdev = 0;
191 : }
192 :
193 29076597 : error = posix_acl_create(dir, &mode, &default_acl, &acl);
194 29076171 : if (error)
195 : return error;
196 :
197 : /* Verify mode is valid also for tmpfile case */
198 29076715 : error = xfs_dentry_mode_to_name(&name, dentry, mode);
199 29073966 : if (unlikely(error))
200 0 : goto out_free_acl;
201 :
202 29073966 : if (!tmpfile) {
203 56193274 : error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev,
204 : xfs_create_need_xattr(dir, default_acl, acl),
205 : &ip);
206 : } else {
207 976966 : bool init_xattrs = false;
208 :
209 : /*
210 : * If this temporary file will be linkable, set up the file
211 : * with an attr fork to receive a parent pointer.
212 : */
213 976966 : if (!(tmpfile->f_flags & O_EXCL) &&
214 976967 : xfs_has_parent(XFS_I(dir)->i_mount))
215 976962 : init_xattrs = true;
216 :
217 976966 : error = xfs_create_tmpfile(idmap, XFS_I(dir), mode,
218 : init_xattrs, &ip);
219 : }
220 29081332 : if (unlikely(error))
221 379517 : goto out_free_acl;
222 :
223 28701815 : inode = VFS_I(ip);
224 :
225 28701815 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
226 28701815 : if (unlikely(error))
227 : goto out_cleanup_inode;
228 :
229 28701815 : if (default_acl) {
230 26 : error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
231 26 : if (error)
232 0 : goto out_cleanup_inode;
233 : }
234 28701815 : if (acl) {
235 3375 : error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
236 3375 : if (error)
237 0 : goto out_cleanup_inode;
238 : }
239 :
240 28701815 : xfs_setup_iops(ip);
241 :
242 28700697 : if (tmpfile) {
243 : /*
244 : * The VFS requires that any inode fed to d_tmpfile must have
245 : * nlink == 1 so that it can decrement the nlink in d_tmpfile.
246 : * However, we created the temp file with nlink == 0 because
247 : * we're not allowed to put an inode with nlink > 0 on the
248 : * unlinked list. Therefore we have to set nlink to 1 so that
249 : * d_tmpfile can immediately set it back to zero.
250 : */
251 977057 : set_nlink(inode, 1);
252 977064 : d_tmpfile(tmpfile, inode);
253 : } else
254 27723640 : d_instantiate(dentry, inode);
255 :
256 28698706 : xfs_finish_inode_setup(ip);
257 :
258 29075687 : out_free_acl:
259 29075687 : posix_acl_release(default_acl);
260 29075654 : posix_acl_release(acl);
261 29075654 : return error;
262 :
263 0 : out_cleanup_inode:
264 0 : xfs_finish_inode_setup(ip);
265 0 : if (!tmpfile)
266 0 : xfs_cleanup_inode(dir, inode, dentry);
267 0 : xfs_irele(ip);
268 0 : goto out_free_acl;
269 : }
270 :
271 : STATIC int
272 5959665 : xfs_vn_mknod(
273 : struct mnt_idmap *idmap,
274 : struct inode *dir,
275 : struct dentry *dentry,
276 : umode_t mode,
277 : dev_t rdev)
278 : {
279 5959665 : return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL);
280 : }
281 :
282 : STATIC int
283 15961336 : xfs_vn_create(
284 : struct mnt_idmap *idmap,
285 : struct inode *dir,
286 : struct dentry *dentry,
287 : umode_t mode,
288 : bool flags)
289 : {
290 15961336 : return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL);
291 : }
292 :
293 : STATIC int
294 6180796 : xfs_vn_mkdir(
295 : struct mnt_idmap *idmap,
296 : struct inode *dir,
297 : struct dentry *dentry,
298 : umode_t mode)
299 : {
300 6180796 : return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL);
301 : }
302 :
303 : STATIC struct dentry *
304 189907980 : xfs_vn_lookup(
305 : struct inode *dir,
306 : struct dentry *dentry,
307 : unsigned int flags)
308 : {
309 189907980 : struct inode *inode;
310 189907980 : struct xfs_inode *cip;
311 189907980 : struct xfs_name name;
312 189907980 : int error;
313 :
314 189907980 : if (dentry->d_name.len >= MAXNAMELEN)
315 : return ERR_PTR(-ENAMETOOLONG);
316 :
317 189910404 : xfs_dentry_to_name(&name, dentry);
318 189910404 : error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
319 189907438 : if (likely(!error))
320 116777374 : inode = VFS_I(cip);
321 73130064 : else if (likely(error == -ENOENT))
322 : inode = NULL;
323 : else
324 79184 : inode = ERR_PTR(error);
325 189907438 : return d_splice_alias(inode, dentry);
326 : }
327 :
328 : STATIC struct dentry *
329 195894 : xfs_vn_ci_lookup(
330 : struct inode *dir,
331 : struct dentry *dentry,
332 : unsigned int flags)
333 : {
334 195894 : struct xfs_inode *ip;
335 195894 : struct xfs_name xname;
336 195894 : struct xfs_name ci_name;
337 195894 : struct qstr dname;
338 195894 : int error;
339 :
340 195894 : if (dentry->d_name.len >= MAXNAMELEN)
341 : return ERR_PTR(-ENAMETOOLONG);
342 :
343 195894 : xfs_dentry_to_name(&xname, dentry);
344 195894 : error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
345 195894 : if (unlikely(error)) {
346 93224 : if (unlikely(error != -ENOENT))
347 0 : return ERR_PTR(error);
348 : /*
349 : * call d_add(dentry, NULL) here when d_drop_negative_children
350 : * is called in xfs_vn_mknod (ie. allow negative dentries
351 : * with CI filesystems).
352 : */
353 : return NULL;
354 : }
355 :
356 : /* if exact match, just splice and exit */
357 102670 : if (!ci_name.name)
358 14574 : return d_splice_alias(VFS_I(ip), dentry);
359 :
360 : /* else case-insensitive match... */
361 88096 : dname.name = ci_name.name;
362 88096 : dname.len = ci_name.len;
363 88096 : dentry = d_add_ci(dentry, VFS_I(ip), &dname);
364 88096 : kmem_free(ci_name.name);
365 88096 : return dentry;
366 : }
367 :
368 : STATIC int
369 5799359 : xfs_vn_link(
370 : struct dentry *old_dentry,
371 : struct inode *dir,
372 : struct dentry *dentry)
373 : {
374 5799359 : struct inode *inode = d_inode(old_dentry);
375 5799359 : struct xfs_name name;
376 5799359 : int error;
377 :
378 5799359 : error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
379 5799249 : if (unlikely(error))
380 : return error;
381 :
382 5799249 : error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
383 5799395 : if (unlikely(error))
384 : return error;
385 :
386 5775316 : ihold(inode);
387 5775329 : d_instantiate(dentry, inode);
388 5775329 : return 0;
389 : }
390 :
391 : STATIC int
392 42711906 : xfs_vn_unlink(
393 : struct inode *dir,
394 : struct dentry *dentry)
395 : {
396 42711906 : struct xfs_name name;
397 42711906 : int error;
398 :
399 42711906 : xfs_dentry_to_name(&name, dentry);
400 :
401 42711906 : error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
402 42712532 : if (error)
403 : return error;
404 :
405 : /*
406 : * With unlink, the VFS makes the dentry "negative": no inode,
407 : * but still hashed. This is incompatible with case-insensitive
408 : * mode, so invalidate (unhash) the dentry in CI-mode.
409 : */
410 40950063 : if (xfs_has_asciici(XFS_M(dir->i_sb)))
411 37640 : d_invalidate(dentry);
412 : return 0;
413 : }
414 :
415 : STATIC int
416 437707876 : xfs_vn_symlink(
417 : struct mnt_idmap *idmap,
418 : struct inode *dir,
419 : struct dentry *dentry,
420 : const char *symname)
421 : {
422 437707876 : struct inode *inode;
423 437707876 : struct xfs_inode *cip = NULL;
424 437707876 : struct xfs_name name;
425 437707876 : int error;
426 437707876 : umode_t mode;
427 :
428 437707876 : mode = S_IFLNK |
429 437707876 : (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
430 437707876 : error = xfs_dentry_mode_to_name(&name, dentry, mode);
431 437754711 : if (unlikely(error))
432 0 : goto out;
433 :
434 437754711 : error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip);
435 437742408 : if (unlikely(error))
436 406807741 : goto out;
437 :
438 30934667 : inode = VFS_I(cip);
439 :
440 30934667 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
441 30934667 : if (unlikely(error))
442 : goto out_cleanup_inode;
443 :
444 30934667 : xfs_setup_iops(cip);
445 :
446 30932087 : d_instantiate(dentry, inode);
447 30931329 : xfs_finish_inode_setup(cip);
448 30931329 : return 0;
449 :
450 : out_cleanup_inode:
451 : xfs_finish_inode_setup(cip);
452 : xfs_cleanup_inode(dir, inode, dentry);
453 : xfs_irele(cip);
454 : out:
455 : return error;
456 : }
457 :
458 : STATIC int
459 29684627 : xfs_vn_rename(
460 : struct mnt_idmap *idmap,
461 : struct inode *odir,
462 : struct dentry *odentry,
463 : struct inode *ndir,
464 : struct dentry *ndentry,
465 : unsigned int flags)
466 : {
467 29684627 : struct inode *new_inode = d_inode(ndentry);
468 29684627 : int omode = 0;
469 29684627 : int error;
470 29684627 : struct xfs_name oname;
471 29684627 : struct xfs_name nname;
472 :
473 29684627 : if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
474 : return -EINVAL;
475 :
476 : /* if we are exchanging files, we need to set i_mode of both files */
477 29684627 : if (flags & RENAME_EXCHANGE)
478 8136301 : omode = d_inode(ndentry)->i_mode;
479 :
480 29684627 : error = xfs_dentry_mode_to_name(&oname, odentry, omode);
481 29684628 : if (omode && unlikely(error))
482 : return error;
483 :
484 29684628 : error = xfs_dentry_mode_to_name(&nname, ndentry,
485 29684628 : d_inode(odentry)->i_mode);
486 29684626 : if (unlikely(error))
487 : return error;
488 :
489 38077739 : return xfs_rename(idmap, XFS_I(odir), &oname,
490 : XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
491 : new_inode ? XFS_I(new_inode) : NULL, flags);
492 : }
493 :
494 : /*
495 : * careful here - this function can get called recursively, so
496 : * we need to be very careful about how much stack we use.
497 : * uio is kmalloced for this reason...
498 : */
499 : STATIC const char *
500 295571977 : xfs_vn_get_link(
501 : struct dentry *dentry,
502 : struct inode *inode,
503 : struct delayed_call *done)
504 : {
505 295571977 : char *link;
506 295571977 : int error = -ENOMEM;
507 :
508 295571977 : if (!dentry)
509 : return ERR_PTR(-ECHILD);
510 :
511 295551825 : link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
512 295531513 : if (!link)
513 0 : goto out_err;
514 :
515 295531513 : error = xfs_readlink(XFS_I(d_inode(dentry)), link);
516 295639873 : if (unlikely(error))
517 31 : goto out_kfree;
518 :
519 295639842 : set_delayed_call(done, kfree_link, link);
520 295639842 : return link;
521 :
522 : out_kfree:
523 31 : kfree(link);
524 31 : out_err:
525 31 : return ERR_PTR(error);
526 : }
527 :
528 : static uint32_t
529 1507842457 : xfs_stat_blksize(
530 : struct xfs_inode *ip)
531 : {
532 1507842457 : struct xfs_mount *mp = ip->i_mount;
533 :
534 : /*
535 : * If the file blocks are being allocated from a realtime volume, then
536 : * always return the realtime extent size.
537 : */
538 1507842457 : if (XFS_IS_REALTIME_INODE(ip))
539 85998335 : return XFS_FSB_TO_B(mp, xfs_get_extsz_hint(ip));
540 :
541 : /*
542 : * Allow large block sizes to be reported to userspace programs if the
543 : * "largeio" mount option is used.
544 : *
545 : * If compatibility mode is specified, simply return the basic unit of
546 : * caching so that we don't get inefficient read/modify/write I/O from
547 : * user apps. Otherwise....
548 : *
549 : * If the underlying volume is a stripe, then return the stripe width in
550 : * bytes as the recommended I/O size. It is not a stripe and we've set a
551 : * default buffered I/O size, return that, otherwise return the compat
552 : * default.
553 : */
554 1421844122 : if (xfs_has_large_iosize(mp)) {
555 2 : if (mp->m_swidth)
556 0 : return XFS_FSB_TO_B(mp, mp->m_swidth);
557 2 : if (xfs_has_allocsize(mp))
558 0 : return 1U << mp->m_allocsize_log;
559 : }
560 :
561 : return PAGE_SIZE;
562 : }
563 :
564 : STATIC int
565 1510544405 : xfs_vn_getattr(
566 : struct mnt_idmap *idmap,
567 : const struct path *path,
568 : struct kstat *stat,
569 : u32 request_mask,
570 : unsigned int query_flags)
571 : {
572 1510544405 : struct inode *inode = d_inode(path->dentry);
573 1510544405 : struct xfs_inode *ip = XFS_I(inode);
574 1510544405 : struct xfs_mount *mp = ip->i_mount;
575 1510544405 : vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
576 1510773142 : vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
577 :
578 1510661444 : trace_xfs_getattr(ip);
579 :
580 3021300090 : if (xfs_is_shutdown(mp))
581 : return -EIO;
582 :
583 1510615603 : stat->size = XFS_ISIZE(ip);
584 1510615603 : stat->dev = inode->i_sb->s_dev;
585 1510615603 : stat->mode = inode->i_mode;
586 1510615603 : stat->nlink = inode->i_nlink;
587 1510615603 : stat->uid = vfsuid_into_kuid(vfsuid);
588 1510615603 : stat->gid = vfsgid_into_kgid(vfsgid);
589 1510615603 : stat->ino = ip->i_ino;
590 1510615603 : stat->atime = inode->i_atime;
591 1510615603 : stat->mtime = inode->i_mtime;
592 1510615603 : stat->ctime = inode->i_ctime;
593 1510615603 : stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
594 :
595 1510615603 : if (xfs_has_v3inodes(mp)) {
596 1510627748 : if (request_mask & STATX_BTIME) {
597 2189913 : stat->result_mask |= STATX_BTIME;
598 2189913 : stat->btime = ip->i_crtime;
599 : }
600 : }
601 :
602 : /*
603 : * Note: If you add another clause to set an attribute flag, please
604 : * update attributes_mask below.
605 : */
606 1510615603 : if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
607 378 : stat->attributes |= STATX_ATTR_IMMUTABLE;
608 1510615603 : if (ip->i_diflags & XFS_DIFLAG_APPEND)
609 320 : stat->attributes |= STATX_ATTR_APPEND;
610 1510615603 : if (ip->i_diflags & XFS_DIFLAG_NODUMP)
611 76 : stat->attributes |= STATX_ATTR_NODUMP;
612 :
613 1510615603 : stat->attributes_mask |= (STATX_ATTR_IMMUTABLE |
614 : STATX_ATTR_APPEND |
615 : STATX_ATTR_NODUMP);
616 :
617 1510615603 : switch (inode->i_mode & S_IFMT) {
618 2795284 : case S_IFBLK:
619 : case S_IFCHR:
620 2795284 : stat->blksize = BLKDEV_IOSIZE;
621 2795284 : stat->rdev = inode->i_rdev;
622 2795284 : break;
623 305714186 : case S_IFREG:
624 305714186 : if (request_mask & STATX_DIOALIGN) {
625 0 : struct xfs_buftarg *target = xfs_inode_buftarg(ip);
626 0 : struct block_device *bdev = target->bt_bdev;
627 :
628 0 : stat->result_mask |= STATX_DIOALIGN;
629 0 : stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
630 0 : stat->dio_offset_align = bdev_logical_block_size(bdev);
631 : }
632 1507820319 : fallthrough;
633 : default:
634 1507820319 : stat->blksize = xfs_stat_blksize(ip);
635 1507863615 : stat->rdev = 0;
636 1507863615 : break;
637 : }
638 :
639 : return 0;
640 : }
641 :
642 : static int
643 13150479 : xfs_vn_change_ok(
644 : struct mnt_idmap *idmap,
645 : struct dentry *dentry,
646 : struct iattr *iattr)
647 : {
648 13150479 : struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount;
649 :
650 26300958 : if (xfs_is_readonly(mp))
651 : return -EROFS;
652 :
653 26300958 : if (xfs_is_shutdown(mp))
654 : return -EIO;
655 :
656 13148551 : return setattr_prepare(idmap, dentry, iattr);
657 : }
658 :
659 : /*
660 : * Set non-size attributes of an inode.
661 : *
662 : * Caution: The caller of this function is responsible for calling
663 : * setattr_prepare() or otherwise verifying the change is fine.
664 : */
665 : static int
666 7768001 : xfs_setattr_nonsize(
667 : struct mnt_idmap *idmap,
668 : struct dentry *dentry,
669 : struct xfs_inode *ip,
670 : struct iattr *iattr)
671 : {
672 7768001 : xfs_mount_t *mp = ip->i_mount;
673 7768001 : struct inode *inode = VFS_I(ip);
674 7768001 : int mask = iattr->ia_valid;
675 7768001 : xfs_trans_t *tp;
676 7768001 : int error;
677 7768001 : kuid_t uid = GLOBAL_ROOT_UID;
678 7768001 : kgid_t gid = GLOBAL_ROOT_GID;
679 7768001 : struct xfs_dquot *udqp = NULL, *gdqp = NULL;
680 7768001 : struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL;
681 :
682 7768001 : ASSERT((mask & ATTR_SIZE) == 0);
683 :
684 : /*
685 : * If disk quotas is on, we make sure that the dquots do exist on disk,
686 : * before we start any other transactions. Trying to do this later
687 : * is messy. We don't care to take a readlock to look at the ids
688 : * in inode here, because we can't hold it across the trans_reserve.
689 : * If the IDs do change before we take the ilock, we're covered
690 : * because the i_*dquot fields will get updated anyway.
691 : */
692 7768001 : if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
693 4346778 : uint qflags = 0;
694 :
695 4346778 : if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
696 4346634 : uid = from_vfsuid(idmap, i_user_ns(inode),
697 : iattr->ia_vfsuid);
698 4346634 : qflags |= XFS_QMOPT_UQUOTA;
699 : } else {
700 144 : uid = inode->i_uid;
701 : }
702 4346616 : if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
703 4344165 : gid = from_vfsgid(idmap, i_user_ns(inode),
704 : iattr->ia_vfsgid);
705 4343958 : qflags |= XFS_QMOPT_GQUOTA;
706 : } else {
707 2464 : gid = inode->i_gid;
708 : }
709 :
710 : /*
711 : * We take a reference when we initialize udqp and gdqp,
712 : * so it is important that we never blindly double trip on
713 : * the same variable. See xfs_create() for an example.
714 : */
715 4346409 : ASSERT(udqp == NULL);
716 4346409 : ASSERT(gdqp == NULL);
717 4346409 : error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_projid,
718 : qflags, &udqp, &gdqp, NULL);
719 4347278 : if (error)
720 : return error;
721 : }
722 :
723 7745830 : error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL,
724 : has_capability_noaudit(current, CAP_FOWNER), &tp);
725 7746647 : if (error)
726 294 : goto out_dqrele;
727 :
728 : /*
729 : * Register quota modifications in the transaction. Must be the owner
730 : * or privileged. These IDs could have changed since we last looked at
731 : * them. But, we're assured that if the ownership did change while we
732 : * didn't have the inode locked, inode's dquot(s) would have changed
733 : * also.
734 : */
735 14586312 : if (XFS_IS_UQUOTA_ON(mp) &&
736 6839884 : i_uid_needs_update(idmap, iattr, inode)) {
737 4125614 : ASSERT(udqp);
738 4125614 : old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
739 : }
740 14582500 : if (XFS_IS_GQUOTA_ON(mp) &&
741 6835990 : i_gid_needs_update(idmap, iattr, inode)) {
742 4123388 : ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
743 4123388 : ASSERT(gdqp);
744 4123388 : old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
745 : }
746 :
747 7746521 : setattr_copy(idmap, inode, iattr);
748 7746338 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
749 :
750 7746629 : XFS_STATS_INC(mp, xs_ig_attrchg);
751 :
752 7746629 : if (xfs_has_wsync(mp))
753 314 : xfs_trans_set_sync(tp);
754 7746629 : error = xfs_trans_commit(tp);
755 :
756 : /*
757 : * Release any dquot(s) the inode had kept before chown.
758 : */
759 7746916 : xfs_qm_dqrele(old_udqp);
760 7746925 : xfs_qm_dqrele(old_gdqp);
761 7746941 : xfs_qm_dqrele(udqp);
762 7746945 : xfs_qm_dqrele(gdqp);
763 :
764 7746786 : if (error)
765 : return error;
766 :
767 : /*
768 : * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
769 : * update. We could avoid this with linked transactions
770 : * and passing down the transaction pointer all the way
771 : * to attr_set. No previous user of the generic
772 : * Posix ACL code seems to care about this issue either.
773 : */
774 7746783 : if (mask & ATTR_MODE) {
775 96096 : error = posix_acl_chmod(idmap, dentry, inode->i_mode);
776 96097 : if (error)
777 0 : return error;
778 : }
779 :
780 : return 0;
781 :
782 : out_dqrele:
783 294 : xfs_qm_dqrele(udqp);
784 294 : xfs_qm_dqrele(gdqp);
785 294 : return error;
786 : }
787 :
788 : /*
789 : * Truncate file. Must have write permission and not be a directory.
790 : *
791 : * Caution: The caller of this function is responsible for calling
792 : * setattr_prepare() or otherwise verifying the change is fine.
793 : */
794 : STATIC int
795 5539002 : xfs_setattr_size(
796 : struct mnt_idmap *idmap,
797 : struct dentry *dentry,
798 : struct xfs_inode *ip,
799 : struct iattr *iattr)
800 : {
801 5539002 : struct xfs_mount *mp = ip->i_mount;
802 5539002 : struct inode *inode = VFS_I(ip);
803 5539002 : xfs_off_t oldsize, newsize;
804 5539002 : struct xfs_trans *tp;
805 5539002 : int error;
806 5539002 : uint lock_flags = 0;
807 5539002 : bool did_zeroing = false;
808 :
809 5539002 : ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
810 5539003 : ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
811 5538997 : ASSERT(S_ISREG(inode->i_mode));
812 5538997 : ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
813 : ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
814 :
815 5538997 : oldsize = inode->i_size;
816 5538997 : newsize = iattr->ia_size;
817 :
818 : /*
819 : * Short circuit the truncate case for zero length files.
820 : */
821 5538997 : if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) {
822 158487 : if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
823 : return 0;
824 :
825 : /*
826 : * Use the regular setattr path to update the timestamps.
827 : */
828 158486 : iattr->ia_valid &= ~ATTR_SIZE;
829 158486 : return xfs_setattr_nonsize(idmap, dentry, ip, iattr);
830 : }
831 :
832 : /*
833 : * Make sure that the dquots are attached to the inode.
834 : */
835 5380510 : error = xfs_qm_dqattach(ip);
836 5380520 : if (error)
837 : return error;
838 :
839 : /*
840 : * Wait for all direct I/O to complete.
841 : */
842 5379136 : inode_dio_wait(inode);
843 :
844 : /*
845 : * File data changes must be complete before we start the transaction to
846 : * modify the inode. This needs to be done before joining the inode to
847 : * the transaction because the inode cannot be unlocked once it is a
848 : * part of the transaction.
849 : *
850 : * Start with zeroing any data beyond EOF that we may expose on file
851 : * extension, or zeroing out the rest of the block on a downward
852 : * truncate.
853 : */
854 5379117 : if (newsize > oldsize) {
855 2713813 : trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
856 2713821 : error = xfs_zero_range(ip, oldsize, newsize - oldsize,
857 : &did_zeroing);
858 : } else {
859 : /*
860 : * iomap won't detect a dirty page over an unwritten block (or a
861 : * cow block over a hole) and subsequently skips zeroing the
862 : * newly post-EOF portion of the page. Flush the new EOF to
863 : * convert the block before the pagecache truncate.
864 : */
865 2665304 : error = filemap_write_and_wait_range(inode->i_mapping, newsize,
866 : newsize);
867 2665326 : if (error)
868 : return error;
869 2665282 : error = xfs_truncate_page(ip, newsize, &did_zeroing);
870 : }
871 :
872 5379093 : if (error)
873 : return error;
874 :
875 : /*
876 : * We've already locked out new page faults, so now we can safely remove
877 : * pages from the page cache knowing they won't get refaulted until we
878 : * drop the XFS_MMAP_EXCL lock after the extent manipulations are
879 : * complete. The truncate_setsize() call also cleans partial EOF page
880 : * PTEs on extending truncates and hence ensures sub-page block size
881 : * filesystems are correctly handled, too.
882 : *
883 : * We have to do all the page cache truncate work outside the
884 : * transaction context as the "lock" order is page lock->log space
885 : * reservation as defined by extent allocation in the writeback path.
886 : * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
887 : * having already truncated the in-memory version of the file (i.e. made
888 : * user visible changes). There's not much we can do about this, except
889 : * to hope that the caller sees ENOMEM and retries the truncate
890 : * operation.
891 : *
892 : * And we update in-core i_size and truncate page cache beyond newsize
893 : * before writeback the [i_disk_size, newsize] range, so we're
894 : * guaranteed not to write stale data past the new EOF on truncate down.
895 : */
896 5378893 : truncate_setsize(inode, newsize);
897 :
898 : /*
899 : * We are going to log the inode size change in this transaction so
900 : * any previous writes that are beyond the on disk EOF and the new
901 : * EOF that have not been written out need to be written here. If we
902 : * do not write the data out, we expose ourselves to the null files
903 : * problem. Note that this includes any block zeroing we did above;
904 : * otherwise those blocks may not be zeroed after a crash.
905 : */
906 5378893 : if (did_zeroing ||
907 3623213 : (newsize > ip->i_disk_size && oldsize != ip->i_disk_size)) {
908 1841427 : error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
909 : ip->i_disk_size, newsize - 1);
910 1841450 : if (error)
911 : return error;
912 : }
913 :
914 5378387 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
915 5378404 : if (error)
916 : return error;
917 :
918 5378371 : lock_flags |= XFS_ILOCK_EXCL;
919 5378371 : xfs_ilock(ip, XFS_ILOCK_EXCL);
920 5378372 : xfs_trans_ijoin(tp, ip, 0);
921 :
922 : /*
923 : * Only change the c/mtime if we are changing the size or we are
924 : * explicitly asked to change it. This handles the semantic difference
925 : * between truncate() and ftruncate() as implemented in the VFS.
926 : *
927 : * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
928 : * special case where we need to update the times despite not having
929 : * these flags set. For all other operations the VFS set these flags
930 : * explicitly if it wants a timestamp update.
931 : */
932 5378354 : if (newsize != oldsize &&
933 5357721 : !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
934 8326690 : iattr->ia_ctime = iattr->ia_mtime =
935 4163343 : current_time(inode);
936 4163347 : iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
937 : }
938 :
939 : /*
940 : * The first thing we do is set the size to new_size permanently on
941 : * disk. This way we don't have to worry about anyone ever being able
942 : * to look at the data being freed even in the face of a crash.
943 : * What we're getting around here is the case where we free a block, it
944 : * is allocated to another file, it is written to, and then we crash.
945 : * If the new data gets written to the file but the log buffers
946 : * containing the free and reallocation don't, then we'd end up with
947 : * garbage in the blocks being freed. As long as we make the new size
948 : * permanent before actually freeing any blocks it doesn't matter if
949 : * they get written to.
950 : */
951 5378358 : ip->i_disk_size = newsize;
952 5378358 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
953 :
954 5378366 : if (newsize <= oldsize) {
955 2665056 : error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
956 2665059 : if (error)
957 478 : goto out_trans_cancel;
958 :
959 : /*
960 : * Truncated "down", so we're removing references to old data
961 : * here - if we delay flushing for a long time, we expose
962 : * ourselves unduly to the notorious NULL files problem. So,
963 : * we mark this inode and flush it when the file is closed,
964 : * and do not wait the usual (long) time for writeout.
965 : */
966 2664581 : xfs_iflags_set(ip, XFS_ITRUNCATED);
967 :
968 : /* A truncate down always removes post-EOF blocks. */
969 2664578 : xfs_inode_clear_eofblocks_tag(ip);
970 : }
971 :
972 5377891 : ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
973 5377891 : setattr_copy(idmap, inode, iattr);
974 5377887 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
975 :
976 5377892 : XFS_STATS_INC(mp, xs_ig_attrchg);
977 :
978 5377892 : if (xfs_has_wsync(mp))
979 0 : xfs_trans_set_sync(tp);
980 :
981 5377892 : error = xfs_trans_commit(tp);
982 5378371 : out_unlock:
983 5378371 : if (lock_flags)
984 5378371 : xfs_iunlock(ip, lock_flags);
985 5378371 : return error;
986 :
987 : out_trans_cancel:
988 478 : xfs_trans_cancel(tp);
989 478 : goto out_unlock;
990 : }
991 :
992 : int
993 5539009 : xfs_vn_setattr_size(
994 : struct mnt_idmap *idmap,
995 : struct dentry *dentry,
996 : struct iattr *iattr)
997 : {
998 5539009 : struct xfs_inode *ip = XFS_I(d_inode(dentry));
999 5539009 : int error;
1000 :
1001 5539009 : trace_xfs_setattr(ip);
1002 :
1003 5538994 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1004 5538993 : if (error)
1005 : return error;
1006 5538980 : return xfs_setattr_size(idmap, dentry, ip, iattr);
1007 : }
1008 :
1009 : STATIC int
1010 11103587 : xfs_vn_setattr(
1011 : struct mnt_idmap *idmap,
1012 : struct dentry *dentry,
1013 : struct iattr *iattr)
1014 : {
1015 11103587 : struct inode *inode = d_inode(dentry);
1016 11103587 : struct xfs_inode *ip = XFS_I(inode);
1017 11103587 : int error;
1018 :
1019 11103587 : if (iattr->ia_valid & ATTR_SIZE) {
1020 3491637 : uint iolock;
1021 :
1022 3491637 : xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
1023 3491642 : iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1024 :
1025 3491642 : error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
1026 3491629 : if (error) {
1027 0 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1028 0 : return error;
1029 : }
1030 :
1031 3491629 : error = xfs_vn_setattr_size(idmap, dentry, iattr);
1032 3491661 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1033 : } else {
1034 7611950 : trace_xfs_setattr(ip);
1035 :
1036 7611781 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1037 7611122 : if (!error)
1038 7609024 : error = xfs_setattr_nonsize(idmap, dentry, ip, iattr);
1039 : }
1040 :
1041 : return error;
1042 : }
1043 :
1044 : STATIC int
1045 69946083 : xfs_vn_update_time(
1046 : struct inode *inode,
1047 : struct timespec64 *now,
1048 : int flags)
1049 : {
1050 69946083 : struct xfs_inode *ip = XFS_I(inode);
1051 69946083 : struct xfs_mount *mp = ip->i_mount;
1052 69946083 : int log_flags = XFS_ILOG_TIMESTAMP;
1053 69946083 : struct xfs_trans *tp;
1054 69946083 : int error;
1055 :
1056 69946083 : trace_xfs_update_time(ip);
1057 :
1058 69945031 : if (inode->i_sb->s_flags & SB_LAZYTIME) {
1059 48 : if (!((flags & S_VERSION) &&
1060 12 : inode_maybe_inc_iversion(inode, false)))
1061 24 : return generic_update_time(inode, now, flags);
1062 :
1063 : /* Capture the iversion update that just occurred */
1064 : log_flags |= XFS_ILOG_CORE;
1065 : }
1066 :
1067 69945007 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
1068 69949803 : if (error)
1069 : return error;
1070 :
1071 69949640 : xfs_ilock(ip, XFS_ILOCK_EXCL);
1072 69949038 : if (flags & S_CTIME)
1073 27631119 : inode->i_ctime = *now;
1074 69949038 : if (flags & S_MTIME)
1075 27947705 : inode->i_mtime = *now;
1076 69949038 : if (flags & S_ATIME)
1077 42001610 : inode->i_atime = *now;
1078 :
1079 69949038 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1080 69943680 : xfs_trans_log_inode(tp, ip, log_flags);
1081 69948860 : return xfs_trans_commit(tp);
1082 : }
1083 :
1084 : STATIC int
1085 1072729 : xfs_vn_fiemap(
1086 : struct inode *inode,
1087 : struct fiemap_extent_info *fieinfo,
1088 : u64 start,
1089 : u64 length)
1090 : {
1091 1072729 : int error;
1092 :
1093 1072729 : xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1094 1072728 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1095 510326 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
1096 510326 : error = iomap_fiemap(inode, fieinfo, start, length,
1097 : &xfs_xattr_iomap_ops);
1098 : } else {
1099 562402 : error = iomap_fiemap(inode, fieinfo, start, length,
1100 : &xfs_read_iomap_ops);
1101 : }
1102 1072727 : xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
1103 :
1104 1072726 : return error;
1105 : }
1106 :
1107 : STATIC int
1108 976938 : xfs_vn_tmpfile(
1109 : struct mnt_idmap *idmap,
1110 : struct inode *dir,
1111 : struct file *file,
1112 : umode_t mode)
1113 : {
1114 976938 : int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file);
1115 :
1116 977035 : return finish_open_simple(file, err);
1117 : }
1118 :
1119 : static const struct inode_operations xfs_inode_operations = {
1120 : .get_inode_acl = xfs_get_acl,
1121 : .set_acl = xfs_set_acl,
1122 : .getattr = xfs_vn_getattr,
1123 : .setattr = xfs_vn_setattr,
1124 : .listxattr = xfs_vn_listxattr,
1125 : .fiemap = xfs_vn_fiemap,
1126 : .update_time = xfs_vn_update_time,
1127 : .fileattr_get = xfs_fileattr_get,
1128 : .fileattr_set = xfs_fileattr_set,
1129 : };
1130 :
1131 : static const struct inode_operations xfs_dir_inode_operations = {
1132 : .create = xfs_vn_create,
1133 : .lookup = xfs_vn_lookup,
1134 : .link = xfs_vn_link,
1135 : .unlink = xfs_vn_unlink,
1136 : .symlink = xfs_vn_symlink,
1137 : .mkdir = xfs_vn_mkdir,
1138 : /*
1139 : * Yes, XFS uses the same method for rmdir and unlink.
1140 : *
1141 : * There are some subtile differences deeper in the code,
1142 : * but we use S_ISDIR to check for those.
1143 : */
1144 : .rmdir = xfs_vn_unlink,
1145 : .mknod = xfs_vn_mknod,
1146 : .rename = xfs_vn_rename,
1147 : .get_inode_acl = xfs_get_acl,
1148 : .set_acl = xfs_set_acl,
1149 : .getattr = xfs_vn_getattr,
1150 : .setattr = xfs_vn_setattr,
1151 : .listxattr = xfs_vn_listxattr,
1152 : .update_time = xfs_vn_update_time,
1153 : .tmpfile = xfs_vn_tmpfile,
1154 : .fileattr_get = xfs_fileattr_get,
1155 : .fileattr_set = xfs_fileattr_set,
1156 : };
1157 :
1158 : static const struct inode_operations xfs_dir_ci_inode_operations = {
1159 : .create = xfs_vn_create,
1160 : .lookup = xfs_vn_ci_lookup,
1161 : .link = xfs_vn_link,
1162 : .unlink = xfs_vn_unlink,
1163 : .symlink = xfs_vn_symlink,
1164 : .mkdir = xfs_vn_mkdir,
1165 : /*
1166 : * Yes, XFS uses the same method for rmdir and unlink.
1167 : *
1168 : * There are some subtile differences deeper in the code,
1169 : * but we use S_ISDIR to check for those.
1170 : */
1171 : .rmdir = xfs_vn_unlink,
1172 : .mknod = xfs_vn_mknod,
1173 : .rename = xfs_vn_rename,
1174 : .get_inode_acl = xfs_get_acl,
1175 : .set_acl = xfs_set_acl,
1176 : .getattr = xfs_vn_getattr,
1177 : .setattr = xfs_vn_setattr,
1178 : .listxattr = xfs_vn_listxattr,
1179 : .update_time = xfs_vn_update_time,
1180 : .tmpfile = xfs_vn_tmpfile,
1181 : .fileattr_get = xfs_fileattr_get,
1182 : .fileattr_set = xfs_fileattr_set,
1183 : };
1184 :
1185 : static const struct inode_operations xfs_symlink_inode_operations = {
1186 : .get_link = xfs_vn_get_link,
1187 : .getattr = xfs_vn_getattr,
1188 : .setattr = xfs_vn_setattr,
1189 : .listxattr = xfs_vn_listxattr,
1190 : .update_time = xfs_vn_update_time,
1191 : };
1192 :
1193 : /* Figure out if this file actually supports DAX. */
1194 : static bool
1195 : xfs_inode_supports_dax(
1196 : struct xfs_inode *ip)
1197 : {
1198 : struct xfs_mount *mp = ip->i_mount;
1199 :
1200 : /* Only supported on regular files. */
1201 : if (!S_ISREG(VFS_I(ip)->i_mode))
1202 : return false;
1203 :
1204 : /* Block size must match page size */
1205 : if (mp->m_sb.sb_blocksize != PAGE_SIZE)
1206 : return false;
1207 :
1208 : /* Device has to support DAX too. */
1209 : return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
1210 : }
1211 :
1212 : static bool
1213 : xfs_inode_should_enable_dax(
1214 : struct xfs_inode *ip)
1215 : {
1216 : if (!IS_ENABLED(CONFIG_FS_DAX))
1217 : return false;
1218 : if (xfs_has_dax_never(ip->i_mount))
1219 : return false;
1220 : if (!xfs_inode_supports_dax(ip))
1221 : return false;
1222 : if (xfs_has_dax_always(ip->i_mount))
1223 : return true;
1224 : if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
1225 : return true;
1226 : return false;
1227 : }
1228 :
1229 : void
1230 1056475602 : xfs_diflags_to_iflags(
1231 : struct xfs_inode *ip,
1232 : bool init)
1233 : {
1234 1056475602 : struct inode *inode = VFS_I(ip);
1235 1056475602 : unsigned int xflags = xfs_ip2xflags(ip);
1236 1056463888 : unsigned int flags = 0;
1237 :
1238 1056463888 : ASSERT(!(IS_DAX(inode) && init));
1239 :
1240 1056463888 : if (xflags & FS_XFLAG_IMMUTABLE)
1241 58 : flags |= S_IMMUTABLE;
1242 1056463888 : if (xflags & FS_XFLAG_APPEND)
1243 46 : flags |= S_APPEND;
1244 1056463888 : if (xflags & FS_XFLAG_SYNC)
1245 28 : flags |= S_SYNC;
1246 1056463888 : if (xflags & FS_XFLAG_NOATIME)
1247 14 : flags |= S_NOATIME;
1248 1056463888 : if (init && xfs_inode_should_enable_dax(ip))
1249 : flags |= S_DAX;
1250 :
1251 : /*
1252 : * S_DAX can only be set during inode initialization and is never set by
1253 : * the VFS, so we cannot mask off S_DAX in i_flags.
1254 : */
1255 1056463888 : inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME);
1256 1056463888 : inode->i_flags |= flags;
1257 1056463888 : }
1258 :
1259 : /*
1260 : * Initialize the Linux inode.
1261 : *
1262 : * When reading existing inodes from disk this is called directly from xfs_iget,
1263 : * when creating a new inode it is called from xfs_init_new_inode after setting
1264 : * up the inode. These callers have different criteria for clearing XFS_INEW, so
1265 : * leave it up to the caller to deal with unlocking the inode appropriately.
1266 : */
1267 : void
1268 1054862081 : xfs_setup_inode(
1269 : struct xfs_inode *ip)
1270 : {
1271 1054862081 : struct inode *inode = &ip->i_vnode;
1272 1054862081 : gfp_t gfp_mask;
1273 :
1274 1054862081 : inode->i_ino = ip->i_ino;
1275 1054862081 : inode->i_state |= I_NEW;
1276 :
1277 1054862081 : inode_sb_list_add(inode);
1278 : /* make the inode look hashed for the writeback code */
1279 1055895666 : inode_fake_hash(inode);
1280 :
1281 1055895666 : i_size_write(inode, ip->i_disk_size);
1282 1055895666 : xfs_diflags_to_iflags(ip, true);
1283 :
1284 1055799755 : if (S_ISDIR(inode->i_mode)) {
1285 : /*
1286 : * We set the i_rwsem class here to avoid potential races with
1287 : * lockdep_annotate_inode_mutex_key() reinitialising the lock
1288 : * after a filehandle lookup has already found the inode in
1289 : * cache before it has been unlocked via unlock_new_inode().
1290 : */
1291 : lockdep_set_class(&inode->i_rwsem,
1292 : &inode->i_sb->s_type->i_mutex_dir_key);
1293 : lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1294 : } else {
1295 1055799755 : lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1296 : }
1297 :
1298 : /*
1299 : * Ensure all page cache allocations are done from GFP_NOFS context to
1300 : * prevent direct reclaim recursion back into the filesystem and blowing
1301 : * stacks or deadlocking.
1302 : */
1303 1055799755 : gfp_mask = mapping_gfp_mask(inode->i_mapping);
1304 1055799755 : mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
1305 :
1306 : /*
1307 : * If there is no attribute fork no ACL can exist on this inode,
1308 : * and it can't have any file capabilities attached to it either.
1309 : */
1310 1055799755 : if (!xfs_inode_has_attr_fork(ip)) {
1311 8274482 : inode_has_no_xattr(inode);
1312 8274470 : cache_no_acl(inode);
1313 : }
1314 1055799743 : }
1315 :
1316 : void
1317 1055902163 : xfs_setup_iops(
1318 : struct xfs_inode *ip)
1319 : {
1320 1055902163 : struct inode *inode = &ip->i_vnode;
1321 :
1322 1055902163 : switch (inode->i_mode & S_IFMT) {
1323 339029317 : case S_IFREG:
1324 339029317 : inode->i_op = &xfs_inode_operations;
1325 339029317 : inode->i_fop = &xfs_file_operations;
1326 339029317 : if (IS_DAX(inode))
1327 : inode->i_mapping->a_ops = &xfs_dax_aops;
1328 : else
1329 339029317 : inode->i_mapping->a_ops = &xfs_address_space_operations;
1330 : break;
1331 168635187 : case S_IFDIR:
1332 168635187 : if (xfs_has_asciici(XFS_M(inode->i_sb)))
1333 116 : inode->i_op = &xfs_dir_ci_inode_operations;
1334 : else
1335 168635071 : inode->i_op = &xfs_dir_inode_operations;
1336 168635187 : inode->i_fop = &xfs_dir_file_operations;
1337 168635187 : break;
1338 96116838 : case S_IFLNK:
1339 96116838 : inode->i_op = &xfs_symlink_inode_operations;
1340 96116838 : break;
1341 452120821 : default:
1342 452120821 : inode->i_op = &xfs_inode_operations;
1343 452120821 : init_special_inode(inode, inode->i_mode, inode->i_rdev);
1344 452120821 : break;
1345 : }
1346 1055890355 : }
|