Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_inode.h"
14 : #include "xfs_acl.h"
15 : #include "xfs_quota.h"
16 : #include "xfs_da_format.h"
17 : #include "xfs_da_btree.h"
18 : #include "xfs_attr.h"
19 : #include "xfs_trans.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_icache.h"
22 : #include "xfs_symlink.h"
23 : #include "xfs_dir2.h"
24 : #include "xfs_iomap.h"
25 : #include "xfs_error.h"
26 : #include "xfs_ioctl.h"
27 : #include "xfs_xattr.h"
28 : #include "xfs_file.h"
29 :
30 : #include <linux/posix_acl.h>
31 : #include <linux/security.h>
32 : #include <linux/iversion.h>
33 : #include <linux/fiemap.h>
34 :
35 : /*
36 : * Directories have different lock order w.r.t. mmap_lock compared to regular
37 : * files. This is due to readdir potentially triggering page faults on a user
38 : * buffer inside filldir(), and this happens with the ilock on the directory
39 : * held. For regular files, the lock order is the other way around - the
40 : * mmap_lock is taken during the page fault, and then we lock the ilock to do
41 : * block mapping. Hence we need a different class for the directory ilock so
42 : * that lockdep can tell them apart.
43 : */
44 : static struct lock_class_key xfs_nondir_ilock_class;
45 : static struct lock_class_key xfs_dir_ilock_class;
46 :
47 : static int
48 : xfs_initxattrs(
49 : struct inode *inode,
50 : const struct xattr *xattr_array,
51 : void *fs_info)
52 : {
53 : const struct xattr *xattr;
54 : struct xfs_inode *ip = XFS_I(inode);
55 : int error = 0;
56 :
57 : for (xattr = xattr_array; xattr->name != NULL; xattr++) {
58 : struct xfs_da_args args = {
59 : .dp = ip,
60 : .attr_filter = XFS_ATTR_SECURE,
61 : .name = xattr->name,
62 : .namelen = strlen(xattr->name),
63 : .value = xattr->value,
64 : .valuelen = xattr->value_len,
65 : .owner = ip->i_ino,
66 : };
67 : error = xfs_attr_change(&args);
68 : if (error < 0)
69 : break;
70 : }
71 : return error;
72 : }
73 :
74 : /*
75 : * Hook in SELinux. This is not quite correct yet, what we really need
76 : * here (as we do for default ACLs) is a mechanism by which creation of
77 : * these attrs can be journalled at inode creation time (along with the
78 : * inode, of course, such that log replay can't cause these to be lost).
79 : */
80 : int
81 2936533 : xfs_inode_init_security(
82 : struct inode *inode,
83 : struct inode *dir,
84 : const struct qstr *qstr)
85 : {
86 2936533 : return security_inode_init_security(inode, dir, qstr,
87 : &xfs_initxattrs, NULL);
88 : }
89 :
90 : static void
91 : xfs_dentry_to_name(
92 : struct xfs_name *namep,
93 : struct dentry *dentry)
94 : {
95 269614346 : namep->name = dentry->d_name.name;
96 269614346 : namep->len = dentry->d_name.len;
97 269614346 : namep->type = XFS_DIR3_FT_UNKNOWN;
98 : }
99 :
100 : static int
101 : xfs_dentry_mode_to_name(
102 : struct xfs_name *namep,
103 : struct dentry *dentry,
104 : int mode)
105 : {
106 778647593 : namep->name = dentry->d_name.name;
107 778647593 : namep->len = dentry->d_name.len;
108 1557493418 : namep->type = xfs_mode_to_ftype(mode);
109 :
110 778845825 : if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
111 34562638 : return -EFSCORRUPTED;
112 :
113 : return 0;
114 : }
115 :
116 : STATIC void
117 0 : xfs_cleanup_inode(
118 : struct inode *dir,
119 : struct inode *inode,
120 : struct dentry *dentry)
121 : {
122 0 : struct xfs_name teardown;
123 :
124 : /* Oh, the horror.
125 : * If we can't add the ACL or we fail in
126 : * xfs_inode_init_security we must back out.
127 : * ENOSPC can hit here, among other things.
128 : */
129 0 : xfs_dentry_to_name(&teardown, dentry);
130 :
131 0 : xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
132 0 : }
133 :
134 : /*
135 : * Check to see if we are likely to need an extended attribute to be added to
136 : * the inode we are about to allocate. This allows the attribute fork to be
137 : * created during the inode allocation, reducing the number of transactions we
138 : * need to do in this fast path.
139 : *
140 : * The security checks are optimistic, but not guaranteed. The two LSMs that
141 : * require xattrs to be added here (selinux and smack) are also the only two
142 : * LSMs that add a sb->s_security structure to the superblock. Hence if security
143 : * is enabled and sb->s_security is set, we have a pretty good idea that we are
144 : * going to be asked to add a security xattr immediately after allocating the
145 : * xfs inode and instantiating the VFS inode.
146 : */
147 : static inline bool
148 : xfs_create_need_xattr(
149 : struct inode *dir,
150 : struct posix_acl *default_acl,
151 : struct posix_acl *acl)
152 : {
153 65638904 : if (acl)
154 : return true;
155 65493903 : if (default_acl)
156 : return true;
157 : #if IS_ENABLED(CONFIG_SECURITY)
158 : if (dir->i_sb->s_security)
159 : return true;
160 : #endif
161 65543235 : if (xfs_has_parent(XFS_I(dir)->i_mount))
162 61481456 : return true;
163 : return false;
164 : }
165 :
166 :
167 : STATIC int
168 73154438 : xfs_generic_create(
169 : struct mnt_idmap *idmap,
170 : struct inode *dir,
171 : struct dentry *dentry,
172 : umode_t mode,
173 : dev_t rdev,
174 : struct file *tmpfile) /* unnamed file */
175 : {
176 73154438 : struct inode *inode;
177 73154438 : struct xfs_inode *ip = NULL;
178 73154438 : struct posix_acl *default_acl, *acl;
179 73154438 : struct xfs_name name;
180 73154438 : int error;
181 :
182 : /*
183 : * Irix uses Missed'em'V split, but doesn't want to see
184 : * the upper 5 bits of (14bit) major.
185 : */
186 73154438 : if (S_ISCHR(mode) || S_ISBLK(mode)) {
187 10519917 : if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
188 : return -EINVAL;
189 : } else {
190 : rdev = 0;
191 : }
192 :
193 73154438 : error = posix_acl_create(dir, &mode, &default_acl, &acl);
194 72684622 : if (error)
195 : return error;
196 :
197 : /* Verify mode is valid also for tmpfile case */
198 72710305 : error = xfs_dentry_mode_to_name(&name, dentry, mode);
199 73090998 : if (unlikely(error))
200 0 : goto out_free_acl;
201 :
202 73090998 : if (!tmpfile) {
203 65638904 : error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev,
204 65638904 : xfs_create_need_xattr(dir, default_acl, acl),
205 : &ip);
206 : } else {
207 7452094 : bool init_xattrs = false;
208 :
209 : /*
210 : * If this temporary file will be linkable, set up the file
211 : * with an attr fork to receive a parent pointer.
212 : */
213 7452094 : if (!(tmpfile->f_flags & O_EXCL) &&
214 7453752 : xfs_has_parent(XFS_I(dir)->i_mount))
215 6931059 : init_xattrs = true;
216 :
217 7452094 : error = xfs_create_tmpfile(idmap, XFS_I(dir), mode,
218 : init_xattrs, &ip);
219 : }
220 73881477 : if (unlikely(error))
221 591401 : goto out_free_acl;
222 :
223 73290076 : inode = VFS_I(ip);
224 :
225 73290076 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
226 73290076 : if (unlikely(error))
227 : goto out_cleanup_inode;
228 :
229 73290076 : if (default_acl) {
230 15610 : error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
231 15610 : if (error)
232 0 : goto out_cleanup_inode;
233 : }
234 73290076 : if (acl) {
235 171115 : error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
236 171115 : if (error)
237 0 : goto out_cleanup_inode;
238 : }
239 :
240 73290076 : xfs_setup_iops(ip);
241 :
242 72778886 : if (tmpfile) {
243 : /*
244 : * The VFS requires that any inode fed to d_tmpfile must have
245 : * nlink == 1 so that it can decrement the nlink in d_tmpfile.
246 : * However, we created the temp file with nlink == 0 because
247 : * we're not allowed to put an inode with nlink > 0 on the
248 : * unlinked list. Therefore we have to set nlink to 1 so that
249 : * d_tmpfile can immediately set it back to zero.
250 : */
251 7556956 : set_nlink(inode, 1);
252 7803266 : d_tmpfile(tmpfile, inode);
253 : } else
254 65221930 : d_instantiate(dentry, inode);
255 :
256 73375370 : xfs_finish_inode_setup(ip);
257 :
258 74099665 : out_free_acl:
259 74099665 : posix_acl_release(default_acl);
260 73544451 : posix_acl_release(acl);
261 73544451 : return error;
262 :
263 0 : out_cleanup_inode:
264 0 : xfs_finish_inode_setup(ip);
265 0 : if (!tmpfile)
266 0 : xfs_cleanup_inode(dir, inode, dentry);
267 0 : xfs_irele(ip);
268 0 : goto out_free_acl;
269 : }
270 :
271 : STATIC int
272 10520138 : xfs_vn_mknod(
273 : struct mnt_idmap *idmap,
274 : struct inode *dir,
275 : struct dentry *dentry,
276 : umode_t mode,
277 : dev_t rdev)
278 : {
279 10520138 : return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL);
280 : }
281 :
282 : STATIC int
283 43655135 : xfs_vn_create(
284 : struct mnt_idmap *idmap,
285 : struct inode *dir,
286 : struct dentry *dentry,
287 : umode_t mode,
288 : bool flags)
289 : {
290 43655135 : return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL);
291 : }
292 :
293 : STATIC int
294 11724728 : xfs_vn_mkdir(
295 : struct mnt_idmap *idmap,
296 : struct inode *dir,
297 : struct dentry *dentry,
298 : umode_t mode)
299 : {
300 11724728 : return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL);
301 : }
302 :
303 : STATIC struct dentry *
304 201674044 : xfs_vn_lookup(
305 : struct inode *dir,
306 : struct dentry *dentry,
307 : unsigned int flags)
308 : {
309 201674044 : struct inode *inode;
310 201674044 : struct xfs_inode *cip;
311 201674044 : struct xfs_name name;
312 201674044 : int error;
313 :
314 201674044 : if (dentry->d_name.len >= MAXNAMELEN)
315 : return ERR_PTR(-ENAMETOOLONG);
316 :
317 201626870 : xfs_dentry_to_name(&name, dentry);
318 201626870 : error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
319 201612744 : if (likely(!error))
320 52430673 : inode = VFS_I(cip);
321 149182071 : else if (likely(error == -ENOENT))
322 : inode = NULL;
323 : else
324 64587 : inode = ERR_PTR(error);
325 201612744 : return d_splice_alias(inode, dentry);
326 : }
327 :
328 : STATIC struct dentry *
329 1077163 : xfs_vn_ci_lookup(
330 : struct inode *dir,
331 : struct dentry *dentry,
332 : unsigned int flags)
333 : {
334 1077163 : struct xfs_inode *ip;
335 1077163 : struct xfs_name xname;
336 1077163 : struct xfs_name ci_name;
337 1077163 : struct qstr dname;
338 1077163 : int error;
339 :
340 1077163 : if (dentry->d_name.len >= MAXNAMELEN)
341 : return ERR_PTR(-ENAMETOOLONG);
342 :
343 1077163 : xfs_dentry_to_name(&xname, dentry);
344 1077163 : error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
345 1077163 : if (unlikely(error)) {
346 512456 : if (unlikely(error != -ENOENT))
347 0 : return ERR_PTR(error);
348 : /*
349 : * call d_add(dentry, NULL) here when d_drop_negative_children
350 : * is called in xfs_vn_mknod (ie. allow negative dentries
351 : * with CI filesystems).
352 : */
353 : return NULL;
354 : }
355 :
356 : /* if exact match, just splice and exit */
357 564707 : if (!ci_name.name)
358 80179 : return d_splice_alias(VFS_I(ip), dentry);
359 :
360 : /* else case-insensitive match... */
361 484528 : dname.name = ci_name.name;
362 484528 : dname.len = ci_name.len;
363 484528 : dentry = d_add_ci(dentry, VFS_I(ip), &dname);
364 484528 : kmem_free(ci_name.name);
365 484528 : return dentry;
366 : }
367 :
368 : STATIC int
369 13027577 : xfs_vn_link(
370 : struct dentry *old_dentry,
371 : struct inode *dir,
372 : struct dentry *dentry)
373 : {
374 13027577 : struct inode *inode = d_inode(old_dentry);
375 13027577 : struct xfs_name name;
376 13027577 : int error;
377 :
378 13027577 : error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
379 13027400 : if (unlikely(error))
380 : return error;
381 :
382 13027400 : error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
383 13028387 : if (unlikely(error))
384 : return error;
385 :
386 12979069 : ihold(inode);
387 12979377 : d_instantiate(dentry, inode);
388 12979377 : return 0;
389 : }
390 :
391 : STATIC int
392 66910313 : xfs_vn_unlink(
393 : struct inode *dir,
394 : struct dentry *dentry)
395 : {
396 66910313 : struct xfs_name name;
397 66910313 : int error;
398 :
399 66910313 : xfs_dentry_to_name(&name, dentry);
400 :
401 66910313 : error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
402 66901842 : if (error)
403 : return error;
404 :
405 : /*
406 : * With unlink, the VFS makes the dentry "negative": no inode,
407 : * but still hashed. This is incompatible with case-insensitive
408 : * mode, so invalidate (unhash) the dentry in CI-mode.
409 : */
410 63774380 : if (xfs_has_asciici(XFS_M(dir->i_sb)))
411 207020 : d_invalidate(dentry);
412 : return 0;
413 : }
414 :
415 : STATIC int
416 597361444 : xfs_vn_symlink(
417 : struct mnt_idmap *idmap,
418 : struct inode *dir,
419 : struct dentry *dentry,
420 : const char *symname)
421 : {
422 597361444 : struct inode *inode;
423 597361444 : struct xfs_inode *cip = NULL;
424 597361444 : struct xfs_name name;
425 597361444 : int error;
426 597361444 : umode_t mode;
427 :
428 597361444 : mode = S_IFLNK |
429 597361444 : (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
430 597361444 : error = xfs_dentry_mode_to_name(&name, dentry, mode);
431 597179444 : if (unlikely(error))
432 0 : goto out;
433 :
434 597179444 : error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip);
435 597682746 : if (unlikely(error))
436 567268340 : goto out;
437 :
438 30414406 : inode = VFS_I(cip);
439 :
440 30414406 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
441 30414406 : if (unlikely(error))
442 : goto out_cleanup_inode;
443 :
444 30414406 : xfs_setup_iops(cip);
445 :
446 30304163 : d_instantiate(dentry, inode);
447 30448459 : xfs_finish_inode_setup(cip);
448 30448459 : return 0;
449 :
450 : out_cleanup_inode:
451 : xfs_finish_inode_setup(cip);
452 : xfs_cleanup_inode(dir, inode, dentry);
453 : xfs_irele(cip);
454 : out:
455 : return error;
456 : }
457 :
458 : STATIC int
459 47774317 : xfs_vn_rename(
460 : struct mnt_idmap *idmap,
461 : struct inode *odir,
462 : struct dentry *odentry,
463 : struct inode *ndir,
464 : struct dentry *ndentry,
465 : unsigned int flags)
466 : {
467 47774317 : struct inode *new_inode = d_inode(ndentry);
468 47774317 : int omode = 0;
469 47774317 : int error;
470 47774317 : struct xfs_name oname;
471 47774317 : struct xfs_name nname;
472 :
473 47774317 : if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
474 : return -EINVAL;
475 :
476 : /* if we are exchanging files, we need to set i_mode of both files */
477 47774317 : if (flags & RENAME_EXCHANGE)
478 13211239 : omode = d_inode(ndentry)->i_mode;
479 :
480 47774317 : error = xfs_dentry_mode_to_name(&oname, odentry, omode);
481 47773950 : if (omode && unlikely(error))
482 : return error;
483 :
484 47773950 : error = xfs_dentry_mode_to_name(&nname, ndentry,
485 47773950 : d_inode(odentry)->i_mode);
486 47774033 : if (unlikely(error))
487 : return error;
488 :
489 61674800 : return xfs_rename(idmap, XFS_I(odir), &oname,
490 : XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
491 : new_inode ? XFS_I(new_inode) : NULL, flags);
492 : }
493 :
494 : /*
495 : * careful here - this function can get called recursively, so
496 : * we need to be very careful about how much stack we use.
497 : * uio is kmalloced for this reason...
498 : */
499 : STATIC const char *
500 280103754 : xfs_vn_get_link(
501 : struct dentry *dentry,
502 : struct inode *inode,
503 : struct delayed_call *done)
504 : {
505 280103754 : char *link;
506 280103754 : int error = -ENOMEM;
507 :
508 280103754 : if (!dentry)
509 : return ERR_PTR(-ECHILD);
510 :
511 280042212 : link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
512 280245906 : if (!link)
513 0 : goto out_err;
514 :
515 280245906 : error = xfs_readlink(XFS_I(d_inode(dentry)), link);
516 279950646 : if (unlikely(error))
517 41 : goto out_kfree;
518 :
519 279950605 : set_delayed_call(done, kfree_link, link);
520 279950605 : return link;
521 :
522 : out_kfree:
523 41 : kfree(link);
524 41 : out_err:
525 41 : return ERR_PTR(error);
526 : }
527 :
528 : static uint32_t
529 1922994537 : xfs_stat_blksize(
530 : struct xfs_inode *ip)
531 : {
532 1922994537 : struct xfs_mount *mp = ip->i_mount;
533 :
534 : /*
535 : * If the file blocks are being allocated from a realtime volume, then
536 : * always return the realtime extent size.
537 : */
538 1922994537 : if (XFS_IS_REALTIME_INODE(ip))
539 174920144 : return XFS_FSB_TO_B(mp, xfs_get_extsz_hint(ip));
540 :
541 : /*
542 : * Allow large block sizes to be reported to userspace programs if the
543 : * "largeio" mount option is used.
544 : *
545 : * If compatibility mode is specified, simply return the basic unit of
546 : * caching so that we don't get inefficient read/modify/write I/O from
547 : * user apps. Otherwise....
548 : *
549 : * If the underlying volume is a stripe, then return the stripe width in
550 : * bytes as the recommended I/O size. It is not a stripe and we've set a
551 : * default buffered I/O size, return that, otherwise return the compat
552 : * default.
553 : */
554 1748074393 : if (xfs_has_large_iosize(mp)) {
555 10 : if (mp->m_swidth)
556 0 : return XFS_FSB_TO_B(mp, mp->m_swidth);
557 10 : if (xfs_has_allocsize(mp))
558 0 : return 1U << mp->m_allocsize_log;
559 : }
560 :
561 : return PAGE_SIZE;
562 : }
563 :
564 : STATIC int
565 1927698767 : xfs_vn_getattr(
566 : struct mnt_idmap *idmap,
567 : const struct path *path,
568 : struct kstat *stat,
569 : u32 request_mask,
570 : unsigned int query_flags)
571 : {
572 1927698767 : struct inode *inode = d_inode(path->dentry);
573 1927698767 : struct xfs_inode *ip = XFS_I(inode);
574 1927698767 : struct xfs_mount *mp = ip->i_mount;
575 1927698767 : vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
576 1926536166 : vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
577 :
578 1926761596 : trace_xfs_getattr(ip);
579 :
580 3854394448 : if (xfs_is_shutdown(mp))
581 : return -EIO;
582 :
583 1927162859 : stat->size = XFS_ISIZE(ip);
584 1927162859 : stat->dev = inode->i_sb->s_dev;
585 1927162859 : stat->mode = inode->i_mode;
586 1927162859 : stat->nlink = inode->i_nlink;
587 1927162859 : stat->uid = vfsuid_into_kuid(vfsuid);
588 1927162859 : stat->gid = vfsgid_into_kgid(vfsgid);
589 1927162859 : stat->ino = ip->i_ino;
590 1927162859 : stat->atime = inode->i_atime;
591 1927162859 : stat->mtime = inode->i_mtime;
592 1927162859 : stat->ctime = inode->i_ctime;
593 1927162859 : stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
594 :
595 1927162859 : if (xfs_has_v3inodes(mp)) {
596 1926998163 : if (request_mask & STATX_BTIME) {
597 2474733 : stat->result_mask |= STATX_BTIME;
598 2474733 : stat->btime = ip->i_crtime;
599 : }
600 : }
601 :
602 : /*
603 : * Note: If you add another clause to set an attribute flag, please
604 : * update attributes_mask below.
605 : */
606 1927162859 : if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
607 1914 : stat->attributes |= STATX_ATTR_IMMUTABLE;
608 1927162859 : if (ip->i_diflags & XFS_DIFLAG_APPEND)
609 1765 : stat->attributes |= STATX_ATTR_APPEND;
610 1927162859 : if (ip->i_diflags & XFS_DIFLAG_NODUMP)
611 418 : stat->attributes |= STATX_ATTR_NODUMP;
612 :
613 1927162859 : stat->attributes_mask |= (STATX_ATTR_IMMUTABLE |
614 : STATX_ATTR_APPEND |
615 : STATX_ATTR_NODUMP);
616 :
617 1927162859 : switch (inode->i_mode & S_IFMT) {
618 4242132 : case S_IFBLK:
619 : case S_IFCHR:
620 4242132 : stat->blksize = BLKDEV_IOSIZE;
621 4242132 : stat->rdev = inode->i_rdev;
622 4242132 : break;
623 503235394 : case S_IFREG:
624 503235394 : if (request_mask & STATX_DIOALIGN) {
625 0 : struct xfs_buftarg *target = xfs_inode_buftarg(ip);
626 0 : struct block_device *bdev = target->bt_bdev;
627 :
628 0 : stat->result_mask |= STATX_DIOALIGN;
629 0 : stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
630 0 : stat->dio_offset_align = bdev_logical_block_size(bdev);
631 : }
632 1922920727 : fallthrough;
633 : default:
634 1922920727 : stat->blksize = xfs_stat_blksize(ip);
635 1922287856 : stat->rdev = 0;
636 1922287856 : break;
637 : }
638 :
639 : return 0;
640 : }
641 :
642 : static int
643 45814469 : xfs_vn_change_ok(
644 : struct mnt_idmap *idmap,
645 : struct dentry *dentry,
646 : struct iattr *iattr)
647 : {
648 45814469 : struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount;
649 :
650 91628938 : if (xfs_is_readonly(mp))
651 : return -EROFS;
652 :
653 91628938 : if (xfs_is_shutdown(mp))
654 : return -EIO;
655 :
656 45812417 : return setattr_prepare(idmap, dentry, iattr);
657 : }
658 :
659 : /*
660 : * Set non-size attributes of an inode.
661 : *
662 : * Caution: The caller of this function is responsible for calling
663 : * setattr_prepare() or otherwise verifying the change is fine.
664 : */
665 : static int
666 29582299 : xfs_setattr_nonsize(
667 : struct mnt_idmap *idmap,
668 : struct dentry *dentry,
669 : struct xfs_inode *ip,
670 : struct iattr *iattr)
671 : {
672 29582299 : xfs_mount_t *mp = ip->i_mount;
673 29582299 : struct inode *inode = VFS_I(ip);
674 29582299 : int mask = iattr->ia_valid;
675 29582299 : xfs_trans_t *tp;
676 29582299 : int error;
677 29582299 : kuid_t uid = GLOBAL_ROOT_UID;
678 29582299 : kgid_t gid = GLOBAL_ROOT_GID;
679 29582299 : struct xfs_dquot *udqp = NULL, *gdqp = NULL;
680 29582299 : struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL;
681 :
682 29582299 : ASSERT((mask & ATTR_SIZE) == 0);
683 :
684 : /*
685 : * If disk quotas is on, we make sure that the dquots do exist on disk,
686 : * before we start any other transactions. Trying to do this later
687 : * is messy. We don't care to take a readlock to look at the ids
688 : * in inode here, because we can't hold it across the trans_reserve.
689 : * If the IDs do change before we take the ilock, we're covered
690 : * because the i_*dquot fields will get updated anyway.
691 : */
692 29582299 : if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
693 8076041 : uint qflags = 0;
694 :
695 8076041 : if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
696 8075606 : uid = from_vfsuid(idmap, i_user_ns(inode),
697 : iattr->ia_vfsuid);
698 8075606 : qflags |= XFS_QMOPT_UQUOTA;
699 : } else {
700 435 : uid = inode->i_uid;
701 : }
702 8083914 : if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
703 8064939 : gid = from_vfsgid(idmap, i_user_ns(inode),
704 : iattr->ia_vfsgid);
705 8070858 : qflags |= XFS_QMOPT_GQUOTA;
706 : } else {
707 18975 : gid = inode->i_gid;
708 : }
709 :
710 : /*
711 : * We take a reference when we initialize udqp and gdqp,
712 : * so it is important that we never blindly double trip on
713 : * the same variable. See xfs_create() for an example.
714 : */
715 8089833 : ASSERT(udqp == NULL);
716 8089833 : ASSERT(gdqp == NULL);
717 8089833 : error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_projid,
718 : qflags, &udqp, &gdqp, NULL);
719 8187526 : if (error)
720 : return error;
721 : }
722 :
723 29664323 : error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL,
724 29664323 : has_capability_noaudit(current, CAP_FOWNER), &tp);
725 29730774 : if (error)
726 320 : goto out_dqrele;
727 :
728 : /*
729 : * Register quota modifications in the transaction. Must be the owner
730 : * or privileged. These IDs could have changed since we last looked at
731 : * them. But, we're assured that if the ownership did change while we
732 : * didn't have the inode locked, inode's dquot(s) would have changed
733 : * also.
734 : */
735 44145346 : if (XFS_IS_UQUOTA_ON(mp) &&
736 14449932 : i_uid_needs_update(idmap, iattr, inode)) {
737 7804133 : ASSERT(udqp);
738 7804133 : old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
739 : }
740 44094952 : if (XFS_IS_GQUOTA_ON(mp) &&
741 14405839 : i_gid_needs_update(idmap, iattr, inode)) {
742 7788999 : ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
743 7788999 : ASSERT(gdqp);
744 7788999 : old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
745 : }
746 :
747 29696973 : setattr_copy(idmap, inode, iattr);
748 29680840 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
749 :
750 29799866 : XFS_STATS_INC(mp, xs_ig_attrchg);
751 :
752 29806745 : if (xfs_has_wsync(mp))
753 942 : xfs_trans_set_sync(tp);
754 29806745 : error = xfs_trans_commit(tp);
755 :
756 : /*
757 : * Release any dquot(s) the inode had kept before chown.
758 : */
759 29785555 : xfs_qm_dqrele(old_udqp);
760 29733525 : xfs_qm_dqrele(old_gdqp);
761 29748943 : xfs_qm_dqrele(udqp);
762 29759310 : xfs_qm_dqrele(gdqp);
763 :
764 29764793 : if (error)
765 : return error;
766 :
767 : /*
768 : * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
769 : * update. We could avoid this with linked transactions
770 : * and passing down the transaction pointer all the way
771 : * to attr_set. No previous user of the generic
772 : * Posix ACL code seems to care about this issue either.
773 : */
774 29764792 : if (mask & ATTR_MODE) {
775 524916 : error = posix_acl_chmod(idmap, dentry, inode->i_mode);
776 524929 : if (error)
777 0 : return error;
778 : }
779 :
780 : return 0;
781 :
782 : out_dqrele:
783 320 : xfs_qm_dqrele(udqp);
784 320 : xfs_qm_dqrele(gdqp);
785 320 : return error;
786 : }
787 :
788 : /*
789 : * Truncate file. Must have write permission and not be a directory.
790 : *
791 : * Caution: The caller of this function is responsible for calling
792 : * setattr_prepare() or otherwise verifying the change is fine.
793 : */
794 : STATIC int
795 16710265 : xfs_setattr_size(
796 : struct mnt_idmap *idmap,
797 : struct dentry *dentry,
798 : struct xfs_inode *ip,
799 : struct iattr *iattr)
800 : {
801 16710265 : struct xfs_mount *mp = ip->i_mount;
802 16710265 : struct inode *inode = VFS_I(ip);
803 16710265 : xfs_off_t oldsize, newsize;
804 16710265 : struct xfs_trans *tp;
805 16710265 : int error;
806 16710265 : uint lock_flags = 0;
807 16710265 : bool did_zeroing = false;
808 :
809 16710265 : ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
810 16709998 : ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
811 16710123 : ASSERT(S_ISREG(inode->i_mode));
812 16710123 : ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
813 : ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
814 :
815 16710123 : oldsize = inode->i_size;
816 16710123 : newsize = iattr->ia_size;
817 :
818 : /*
819 : * Short circuit the truncate case for zero length files.
820 : */
821 16710123 : if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) {
822 584994 : if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
823 : return 0;
824 :
825 : /*
826 : * Use the regular setattr path to update the timestamps.
827 : */
828 584989 : iattr->ia_valid &= ~ATTR_SIZE;
829 584989 : return xfs_setattr_nonsize(idmap, dentry, ip, iattr);
830 : }
831 :
832 : /*
833 : * Make sure that the dquots are attached to the inode.
834 : */
835 16125129 : error = xfs_qm_dqattach(ip);
836 16125110 : if (error)
837 : return error;
838 :
839 : /*
840 : * Wait for all direct I/O to complete.
841 : */
842 16123706 : inode_dio_wait(inode);
843 :
844 : /*
845 : * File data changes must be complete before we start the transaction to
846 : * modify the inode. This needs to be done before joining the inode to
847 : * the transaction because the inode cannot be unlocked once it is a
848 : * part of the transaction.
849 : *
850 : * Start with zeroing any data beyond EOF that we may expose on file
851 : * extension, or zeroing out the rest of the block on a downward
852 : * truncate.
853 : */
854 16123775 : if (newsize > oldsize) {
855 9352735 : trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
856 9352725 : error = xfs_zero_range(ip, oldsize, newsize - oldsize,
857 : &did_zeroing);
858 : } else {
859 : /*
860 : * iomap won't detect a dirty page over an unwritten block (or a
861 : * cow block over a hole) and subsequently skips zeroing the
862 : * newly post-EOF portion of the page. Flush the new EOF to
863 : * convert the block before the pagecache truncate.
864 : */
865 6771040 : error = filemap_write_and_wait_range(inode->i_mapping, newsize,
866 : newsize);
867 6771124 : if (error)
868 : return error;
869 6771070 : error = xfs_truncate_page(ip, newsize, &did_zeroing);
870 : }
871 :
872 16123661 : if (error)
873 : return error;
874 :
875 : /*
876 : * We've already locked out new page faults, so now we can safely remove
877 : * pages from the page cache knowing they won't get refaulted until we
878 : * drop the XFS_MMAP_EXCL lock after the extent manipulations are
879 : * complete. The truncate_setsize() call also cleans partial EOF page
880 : * PTEs on extending truncates and hence ensures sub-page block size
881 : * filesystems are correctly handled, too.
882 : *
883 : * We have to do all the page cache truncate work outside the
884 : * transaction context as the "lock" order is page lock->log space
885 : * reservation as defined by extent allocation in the writeback path.
886 : * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
887 : * having already truncated the in-memory version of the file (i.e. made
888 : * user visible changes). There's not much we can do about this, except
889 : * to hope that the caller sees ENOMEM and retries the truncate
890 : * operation.
891 : *
892 : * And we update in-core i_size and truncate page cache beyond newsize
893 : * before writeback the [i_disk_size, newsize] range, so we're
894 : * guaranteed not to write stale data past the new EOF on truncate down.
895 : */
896 16120766 : truncate_setsize(inode, newsize);
897 :
898 : /*
899 : * We are going to log the inode size change in this transaction so
900 : * any previous writes that are beyond the on disk EOF and the new
901 : * EOF that have not been written out need to be written here. If we
902 : * do not write the data out, we expose ourselves to the null files
903 : * problem. Note that this includes any block zeroing we did above;
904 : * otherwise those blocks may not be zeroed after a crash.
905 : */
906 16120826 : if (did_zeroing ||
907 10278274 : (newsize > ip->i_disk_size && oldsize != ip->i_disk_size)) {
908 6154767 : error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
909 : ip->i_disk_size, newsize - 1);
910 6154898 : if (error)
911 : return error;
912 : }
913 :
914 16120058 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
915 16119950 : if (error)
916 : return error;
917 :
918 16119908 : lock_flags |= XFS_ILOCK_EXCL;
919 16119908 : xfs_ilock(ip, XFS_ILOCK_EXCL);
920 16119937 : xfs_trans_ijoin(tp, ip, 0);
921 :
922 : /*
923 : * Only change the c/mtime if we are changing the size or we are
924 : * explicitly asked to change it. This handles the semantic difference
925 : * between truncate() and ftruncate() as implemented in the VFS.
926 : *
927 : * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
928 : * special case where we need to update the times despite not having
929 : * these flags set. For all other operations the VFS set these flags
930 : * explicitly if it wants a timestamp update.
931 : */
932 16120020 : if (newsize != oldsize &&
933 16056653 : !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
934 24148792 : iattr->ia_ctime = iattr->ia_mtime =
935 12074373 : current_time(inode);
936 12074419 : iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
937 : }
938 :
939 : /*
940 : * The first thing we do is set the size to new_size permanently on
941 : * disk. This way we don't have to worry about anyone ever being able
942 : * to look at the data being freed even in the face of a crash.
943 : * What we're getting around here is the case where we free a block, it
944 : * is allocated to another file, it is written to, and then we crash.
945 : * If the new data gets written to the file but the log buffers
946 : * containing the free and reallocation don't, then we'd end up with
947 : * garbage in the blocks being freed. As long as we make the new size
948 : * permanent before actually freeing any blocks it doesn't matter if
949 : * they get written to.
950 : */
951 16120066 : ip->i_disk_size = newsize;
952 16120066 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
953 :
954 16120225 : if (newsize <= oldsize) {
955 6769986 : error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
956 6770029 : if (error)
957 584 : goto out_trans_cancel;
958 :
959 : /*
960 : * Truncated "down", so we're removing references to old data
961 : * here - if we delay flushing for a long time, we expose
962 : * ourselves unduly to the notorious NULL files problem. So,
963 : * we mark this inode and flush it when the file is closed,
964 : * and do not wait the usual (long) time for writeout.
965 : */
966 6769445 : xfs_iflags_set(ip, XFS_ITRUNCATED);
967 :
968 : /* A truncate down always removes post-EOF blocks. */
969 6769436 : xfs_inode_clear_eofblocks_tag(ip);
970 : }
971 :
972 16119689 : ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
973 16119689 : setattr_copy(idmap, inode, iattr);
974 16119574 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
975 :
976 16119687 : XFS_STATS_INC(mp, xs_ig_attrchg);
977 :
978 16119650 : if (xfs_has_wsync(mp))
979 0 : xfs_trans_set_sync(tp);
980 :
981 16119650 : error = xfs_trans_commit(tp);
982 16120084 : out_unlock:
983 16120084 : if (lock_flags)
984 16120084 : xfs_iunlock(ip, lock_flags);
985 16120084 : return error;
986 :
987 : out_trans_cancel:
988 584 : xfs_trans_cancel(tp);
989 584 : goto out_unlock;
990 : }
991 :
992 : int
993 16710598 : xfs_vn_setattr_size(
994 : struct mnt_idmap *idmap,
995 : struct dentry *dentry,
996 : struct iattr *iattr)
997 : {
998 16710598 : struct xfs_inode *ip = XFS_I(d_inode(dentry));
999 16710598 : int error;
1000 :
1001 16710598 : trace_xfs_setattr(ip);
1002 :
1003 16710275 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1004 16710307 : if (error)
1005 : return error;
1006 16710283 : return xfs_setattr_size(idmap, dentry, ip, iattr);
1007 : }
1008 :
1009 : STATIC int
1010 37093081 : xfs_vn_setattr(
1011 : struct mnt_idmap *idmap,
1012 : struct dentry *dentry,
1013 : struct iattr *iattr)
1014 : {
1015 37093081 : struct inode *inode = d_inode(dentry);
1016 37093081 : struct xfs_inode *ip = XFS_I(inode);
1017 37093081 : int error;
1018 :
1019 37093081 : if (iattr->ia_valid & ATTR_SIZE) {
1020 7946237 : uint iolock;
1021 :
1022 7946237 : xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
1023 7946211 : iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1024 :
1025 7946211 : error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
1026 7946613 : if (error) {
1027 0 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1028 0 : return error;
1029 : }
1030 :
1031 7946613 : error = xfs_vn_setattr_size(idmap, dentry, iattr);
1032 7946504 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1033 : } else {
1034 29146844 : trace_xfs_setattr(ip);
1035 :
1036 29093436 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1037 29033661 : if (!error)
1038 29027837 : error = xfs_setattr_nonsize(idmap, dentry, ip, iattr);
1039 : }
1040 :
1041 : return error;
1042 : }
1043 :
1044 : STATIC int
1045 111612774 : xfs_vn_update_time(
1046 : struct inode *inode,
1047 : struct timespec64 *now,
1048 : int flags)
1049 : {
1050 111612774 : struct xfs_inode *ip = XFS_I(inode);
1051 111612774 : struct xfs_mount *mp = ip->i_mount;
1052 111612774 : int log_flags = XFS_ILOG_TIMESTAMP;
1053 111612774 : struct xfs_trans *tp;
1054 111612774 : int error;
1055 :
1056 111612774 : trace_xfs_update_time(ip);
1057 :
1058 111576528 : if (inode->i_sb->s_flags & SB_LAZYTIME) {
1059 264 : if (!((flags & S_VERSION) &&
1060 66 : inode_maybe_inc_iversion(inode, false)))
1061 132 : return generic_update_time(inode, now, flags);
1062 :
1063 : /* Capture the iversion update that just occurred */
1064 : log_flags |= XFS_ILOG_CORE;
1065 : }
1066 :
1067 111576396 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
1068 111563207 : if (error)
1069 : return error;
1070 :
1071 111562616 : xfs_ilock(ip, XFS_ILOCK_EXCL);
1072 111594215 : if (flags & S_CTIME)
1073 50715251 : inode->i_ctime = *now;
1074 111594215 : if (flags & S_MTIME)
1075 51293025 : inode->i_mtime = *now;
1076 111594215 : if (flags & S_ATIME)
1077 60298056 : inode->i_atime = *now;
1078 :
1079 111594215 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1080 111610786 : xfs_trans_log_inode(tp, ip, log_flags);
1081 111647773 : return xfs_trans_commit(tp);
1082 : }
1083 :
1084 : STATIC int
1085 1838627 : xfs_vn_fiemap(
1086 : struct inode *inode,
1087 : struct fiemap_extent_info *fieinfo,
1088 : u64 start,
1089 : u64 length)
1090 : {
1091 1838627 : int error;
1092 :
1093 1838627 : xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1094 1838626 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1095 762264 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
1096 762264 : error = iomap_fiemap(inode, fieinfo, start, length,
1097 : &xfs_xattr_iomap_ops);
1098 : } else {
1099 1076362 : error = iomap_fiemap(inode, fieinfo, start, length,
1100 : &xfs_read_iomap_ops);
1101 : }
1102 1838618 : xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
1103 :
1104 1838617 : return error;
1105 : }
1106 :
1107 : STATIC int
1108 7442555 : xfs_vn_tmpfile(
1109 : struct mnt_idmap *idmap,
1110 : struct inode *dir,
1111 : struct file *file,
1112 : umode_t mode)
1113 : {
1114 7442555 : int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file);
1115 :
1116 7405425 : return finish_open_simple(file, err);
1117 : }
1118 :
1119 : static const struct inode_operations xfs_inode_operations = {
1120 : .get_inode_acl = xfs_get_acl,
1121 : .set_acl = xfs_set_acl,
1122 : .getattr = xfs_vn_getattr,
1123 : .setattr = xfs_vn_setattr,
1124 : .listxattr = xfs_vn_listxattr,
1125 : .fiemap = xfs_vn_fiemap,
1126 : .update_time = xfs_vn_update_time,
1127 : .fileattr_get = xfs_fileattr_get,
1128 : .fileattr_set = xfs_fileattr_set,
1129 : };
1130 :
1131 : static const struct inode_operations xfs_dir_inode_operations = {
1132 : .create = xfs_vn_create,
1133 : .lookup = xfs_vn_lookup,
1134 : .link = xfs_vn_link,
1135 : .unlink = xfs_vn_unlink,
1136 : .symlink = xfs_vn_symlink,
1137 : .mkdir = xfs_vn_mkdir,
1138 : /*
1139 : * Yes, XFS uses the same method for rmdir and unlink.
1140 : *
1141 : * There are some subtile differences deeper in the code,
1142 : * but we use S_ISDIR to check for those.
1143 : */
1144 : .rmdir = xfs_vn_unlink,
1145 : .mknod = xfs_vn_mknod,
1146 : .rename = xfs_vn_rename,
1147 : .get_inode_acl = xfs_get_acl,
1148 : .set_acl = xfs_set_acl,
1149 : .getattr = xfs_vn_getattr,
1150 : .setattr = xfs_vn_setattr,
1151 : .listxattr = xfs_vn_listxattr,
1152 : .update_time = xfs_vn_update_time,
1153 : .tmpfile = xfs_vn_tmpfile,
1154 : .fileattr_get = xfs_fileattr_get,
1155 : .fileattr_set = xfs_fileattr_set,
1156 : };
1157 :
1158 : static const struct inode_operations xfs_dir_ci_inode_operations = {
1159 : .create = xfs_vn_create,
1160 : .lookup = xfs_vn_ci_lookup,
1161 : .link = xfs_vn_link,
1162 : .unlink = xfs_vn_unlink,
1163 : .symlink = xfs_vn_symlink,
1164 : .mkdir = xfs_vn_mkdir,
1165 : /*
1166 : * Yes, XFS uses the same method for rmdir and unlink.
1167 : *
1168 : * There are some subtile differences deeper in the code,
1169 : * but we use S_ISDIR to check for those.
1170 : */
1171 : .rmdir = xfs_vn_unlink,
1172 : .mknod = xfs_vn_mknod,
1173 : .rename = xfs_vn_rename,
1174 : .get_inode_acl = xfs_get_acl,
1175 : .set_acl = xfs_set_acl,
1176 : .getattr = xfs_vn_getattr,
1177 : .setattr = xfs_vn_setattr,
1178 : .listxattr = xfs_vn_listxattr,
1179 : .update_time = xfs_vn_update_time,
1180 : .tmpfile = xfs_vn_tmpfile,
1181 : .fileattr_get = xfs_fileattr_get,
1182 : .fileattr_set = xfs_fileattr_set,
1183 : };
1184 :
1185 : static const struct inode_operations xfs_symlink_inode_operations = {
1186 : .get_link = xfs_vn_get_link,
1187 : .getattr = xfs_vn_getattr,
1188 : .setattr = xfs_vn_setattr,
1189 : .listxattr = xfs_vn_listxattr,
1190 : .update_time = xfs_vn_update_time,
1191 : };
1192 :
1193 : /* Figure out if this file actually supports DAX. */
1194 : static bool
1195 1114830824 : xfs_inode_supports_dax(
1196 : struct xfs_inode *ip)
1197 : {
1198 1114830824 : struct xfs_mount *mp = ip->i_mount;
1199 :
1200 : /* Only supported on regular files. */
1201 1114830824 : if (!S_ISREG(VFS_I(ip)->i_mode))
1202 : return false;
1203 :
1204 : /* Block size must match page size */
1205 491688608 : if (mp->m_sb.sb_blocksize != PAGE_SIZE)
1206 : return false;
1207 :
1208 : /* Device has to support DAX too. */
1209 491680971 : return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
1210 : }
1211 :
1212 : static bool
1213 1114770168 : xfs_inode_should_enable_dax(
1214 : struct xfs_inode *ip)
1215 : {
1216 1114770168 : if (!IS_ENABLED(CONFIG_FS_DAX))
1217 : return false;
1218 1114770168 : if (xfs_has_dax_never(ip->i_mount))
1219 : return false;
1220 1114769733 : if (!xfs_inode_supports_dax(ip))
1221 : return false;
1222 0 : if (xfs_has_dax_always(ip->i_mount))
1223 : return true;
1224 0 : if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
1225 0 : return true;
1226 : return false;
1227 : }
1228 :
1229 : void
1230 1116772274 : xfs_diflags_to_iflags(
1231 : struct xfs_inode *ip,
1232 : bool init)
1233 : {
1234 1116772274 : struct inode *inode = VFS_I(ip);
1235 1116772274 : unsigned int xflags = xfs_ip2xflags(ip);
1236 1115908685 : unsigned int flags = 0;
1237 :
1238 1115908685 : ASSERT(!(IS_DAX(inode) && init));
1239 :
1240 1115908685 : if (xflags & FS_XFLAG_IMMUTABLE)
1241 291 : flags |= S_IMMUTABLE;
1242 1115908685 : if (xflags & FS_XFLAG_APPEND)
1243 256 : flags |= S_APPEND;
1244 1115908685 : if (xflags & FS_XFLAG_SYNC)
1245 10875 : flags |= S_SYNC;
1246 1115908685 : if (xflags & FS_XFLAG_NOATIME)
1247 80 : flags |= S_NOATIME;
1248 1115908685 : if (init && xfs_inode_should_enable_dax(ip))
1249 0 : flags |= S_DAX;
1250 :
1251 : /*
1252 : * S_DAX can only be set during inode initialization and is never set by
1253 : * the VFS, so we cannot mask off S_DAX in i_flags.
1254 : */
1255 1115908685 : inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME);
1256 1115908685 : inode->i_flags |= flags;
1257 1115908685 : }
1258 :
1259 : /*
1260 : * Initialize the Linux inode.
1261 : *
1262 : * When reading existing inodes from disk this is called directly from xfs_iget,
1263 : * when creating a new inode it is called from xfs_init_new_inode after setting
1264 : * up the inode. These callers have different criteria for clearing XFS_INEW, so
1265 : * leave it up to the caller to deal with unlocking the inode appropriately.
1266 : */
1267 : void
1268 1115033859 : xfs_setup_inode(
1269 : struct xfs_inode *ip)
1270 : {
1271 1115033859 : struct inode *inode = &ip->i_vnode;
1272 1115033859 : gfp_t gfp_mask;
1273 :
1274 1115033859 : inode->i_ino = ip->i_ino;
1275 1115033859 : inode->i_state |= I_NEW;
1276 :
1277 1115033859 : inode_sb_list_add(inode);
1278 : /* make the inode look hashed for the writeback code */
1279 1115877454 : inode_fake_hash(inode);
1280 :
1281 1115877454 : i_size_write(inode, ip->i_disk_size);
1282 1115877454 : xfs_diflags_to_iflags(ip, true);
1283 :
1284 1114698454 : if (S_ISDIR(inode->i_mode)) {
1285 : /*
1286 : * We set the i_rwsem class here to avoid potential races with
1287 : * lockdep_annotate_inode_mutex_key() reinitialising the lock
1288 : * after a filehandle lookup has already found the inode in
1289 : * cache before it has been unlocked via unlock_new_inode().
1290 : */
1291 : lockdep_set_class(&inode->i_rwsem,
1292 : &inode->i_sb->s_type->i_mutex_dir_key);
1293 : lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1294 : } else {
1295 1114698454 : lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1296 : }
1297 :
1298 : /*
1299 : * Ensure all page cache allocations are done from GFP_NOFS context to
1300 : * prevent direct reclaim recursion back into the filesystem and blowing
1301 : * stacks or deadlocking.
1302 : */
1303 1114698454 : gfp_mask = mapping_gfp_mask(inode->i_mapping);
1304 1114698454 : mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
1305 :
1306 : /*
1307 : * If there is no attribute fork no ACL can exist on this inode,
1308 : * and it can't have any file capabilities attached to it either.
1309 : */
1310 1114698454 : if (!xfs_inode_has_attr_fork(ip)) {
1311 33059463 : inode_has_no_xattr(inode);
1312 33013111 : cache_no_acl(inode);
1313 : }
1314 1114652102 : }
1315 :
1316 : void
1317 1114317530 : xfs_setup_iops(
1318 : struct xfs_inode *ip)
1319 : {
1320 1114317530 : struct inode *inode = &ip->i_vnode;
1321 :
1322 1114317530 : switch (inode->i_mode & S_IFMT) {
1323 491262302 : case S_IFREG:
1324 491262302 : inode->i_op = &xfs_inode_operations;
1325 491262302 : inode->i_fop = &xfs_file_operations;
1326 491262302 : if (IS_DAX(inode))
1327 0 : inode->i_mapping->a_ops = &xfs_dax_aops;
1328 : else
1329 491262302 : inode->i_mapping->a_ops = &xfs_address_space_operations;
1330 : break;
1331 179182343 : case S_IFDIR:
1332 179182343 : if (xfs_has_asciici(XFS_M(inode->i_sb)))
1333 694 : inode->i_op = &xfs_dir_ci_inode_operations;
1334 : else
1335 179181649 : inode->i_op = &xfs_dir_inode_operations;
1336 179182343 : inode->i_fop = &xfs_dir_file_operations;
1337 179182343 : break;
1338 88226043 : case S_IFLNK:
1339 88226043 : inode->i_op = &xfs_symlink_inode_operations;
1340 88226043 : break;
1341 355646842 : default:
1342 355646842 : inode->i_op = &xfs_inode_operations;
1343 355646842 : init_special_inode(inode, inode->i_mode, inode->i_rdev);
1344 355646842 : break;
1345 : }
1346 1114310486 : }
|