Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_inode.h"
14 : #include "xfs_acl.h"
15 : #include "xfs_quota.h"
16 : #include "xfs_da_format.h"
17 : #include "xfs_da_btree.h"
18 : #include "xfs_attr.h"
19 : #include "xfs_trans.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_icache.h"
22 : #include "xfs_symlink.h"
23 : #include "xfs_dir2.h"
24 : #include "xfs_iomap.h"
25 : #include "xfs_error.h"
26 : #include "xfs_ioctl.h"
27 : #include "xfs_xattr.h"
28 : #include "xfs_file.h"
29 : #include "xfs_bmap.h"
30 : #include "xfs_reflink.h"
31 :
32 : #include <linux/posix_acl.h>
33 : #include <linux/security.h>
34 : #include <linux/iversion.h>
35 : #include <linux/fiemap.h>
36 :
37 : /*
38 : * Directories have different lock order w.r.t. mmap_lock compared to regular
39 : * files. This is due to readdir potentially triggering page faults on a user
40 : * buffer inside filldir(), and this happens with the ilock on the directory
41 : * held. For regular files, the lock order is the other way around - the
42 : * mmap_lock is taken during the page fault, and then we lock the ilock to do
43 : * block mapping. Hence we need a different class for the directory ilock so
44 : * that lockdep can tell them apart.
45 : */
46 : static struct lock_class_key xfs_nondir_ilock_class;
47 : static struct lock_class_key xfs_dir_ilock_class;
48 :
49 : /*
50 : * Metadata directories and files are not exposed to userspace, which means
51 : * that they never access any of the VFS IO locks and never experience page
52 : * faults. Give them separate locking classes so that lockdep will not
53 : * complain about conflicts that cannot happen.
54 : */
55 : static struct lock_class_key xfs_metadata_file_ilock_class;
56 : static struct lock_class_key xfs_metadata_dir_ilock_class;
57 :
58 : static int
59 : xfs_initxattrs(
60 : struct inode *inode,
61 : const struct xattr *xattr_array,
62 : void *fs_info)
63 : {
64 : const struct xattr *xattr;
65 : struct xfs_inode *ip = XFS_I(inode);
66 : int error = 0;
67 :
68 : for (xattr = xattr_array; xattr->name != NULL; xattr++) {
69 : struct xfs_da_args args = {
70 : .dp = ip,
71 : .attr_filter = XFS_ATTR_SECURE,
72 : .name = xattr->name,
73 : .namelen = strlen(xattr->name),
74 : .value = xattr->value,
75 : .valuelen = xattr->value_len,
76 : .owner = ip->i_ino,
77 : };
78 : error = xfs_attr_change(&args);
79 : if (error < 0)
80 : break;
81 : }
82 : return error;
83 : }
84 :
85 : /*
86 : * Hook in SELinux. This is not quite correct yet, what we really need
87 : * here (as we do for default ACLs) is a mechanism by which creation of
88 : * these attrs can be journalled at inode creation time (along with the
89 : * inode, of course, such that log replay can't cause these to be lost).
90 : */
91 : int
92 2420822 : xfs_inode_init_security(
93 : struct inode *inode,
94 : struct inode *dir,
95 : const struct qstr *qstr)
96 : {
97 2420822 : return security_inode_init_security(inode, dir, qstr,
98 : &xfs_initxattrs, NULL);
99 : }
100 :
101 : static void
102 : xfs_dentry_to_name(
103 : struct xfs_name *namep,
104 : struct dentry *dentry)
105 : {
106 269344977 : namep->name = dentry->d_name.name;
107 269344977 : namep->len = dentry->d_name.len;
108 269344977 : namep->type = XFS_DIR3_FT_UNKNOWN;
109 : }
110 :
111 : static int
112 : xfs_dentry_mode_to_name(
113 : struct xfs_name *namep,
114 : struct dentry *dentry,
115 : int mode)
116 : {
117 536463118 : namep->name = dentry->d_name.name;
118 536463118 : namep->len = dentry->d_name.len;
119 1072950479 : namep->type = xfs_mode_to_ftype(mode);
120 :
121 536487361 : if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
122 22934552 : return -EFSCORRUPTED;
123 :
124 : return 0;
125 : }
126 :
127 : STATIC void
128 0 : xfs_cleanup_inode(
129 : struct inode *dir,
130 : struct inode *inode,
131 : struct dentry *dentry)
132 : {
133 0 : struct xfs_name teardown;
134 :
135 : /* Oh, the horror.
136 : * If we can't add the ACL or we fail in
137 : * xfs_inode_init_security we must back out.
138 : * ENOSPC can hit here, among other things.
139 : */
140 0 : xfs_dentry_to_name(&teardown, dentry);
141 :
142 0 : xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
143 0 : }
144 :
145 : /*
146 : * Check to see if we are likely to need an extended attribute to be added to
147 : * the inode we are about to allocate. This allows the attribute fork to be
148 : * created during the inode allocation, reducing the number of transactions we
149 : * need to do in this fast path.
150 : *
151 : * The security checks are optimistic, but not guaranteed. The two LSMs that
152 : * require xattrs to be added here (selinux and smack) are also the only two
153 : * LSMs that add a sb->s_security structure to the superblock. Hence if security
154 : * is enabled and sb->s_security is set, we have a pretty good idea that we are
155 : * going to be asked to add a security xattr immediately after allocating the
156 : * xfs inode and instantiating the VFS inode.
157 : */
158 : static inline bool
159 : xfs_create_need_xattr(
160 : struct inode *dir,
161 : struct posix_acl *default_acl,
162 : struct posix_acl *acl)
163 : {
164 33726351 : if (acl)
165 : return true;
166 33721814 : if (default_acl)
167 : return true;
168 : #if IS_ENABLED(CONFIG_SECURITY)
169 : if (dir->i_sb->s_security)
170 : return true;
171 : #endif
172 33722318 : if (xfs_has_parent(XFS_I(dir)->i_mount))
173 : return true;
174 : return false;
175 : }
176 :
177 :
178 : STATIC int
179 34704003 : xfs_generic_create(
180 : struct mnt_idmap *idmap,
181 : struct inode *dir,
182 : struct dentry *dentry,
183 : umode_t mode,
184 : dev_t rdev,
185 : struct file *tmpfile) /* unnamed file */
186 : {
187 34704003 : struct xfs_icreate_args args = {
188 : .rdev = rdev,
189 : };
190 34704003 : struct inode *inode;
191 34704003 : struct xfs_inode *ip = NULL;
192 34704003 : struct posix_acl *default_acl, *acl;
193 34704003 : struct xfs_name name;
194 34704003 : int error;
195 :
196 34704003 : xfs_icreate_args_inherit(&args, XFS_I(dir), idmap, mode, false);
197 34704937 : if (tmpfile)
198 976902 : args.nlink = 0;
199 33728035 : else if (S_ISDIR(mode))
200 6903724 : args.nlink = 2;
201 : else
202 26824311 : args.nlink = 1;
203 :
204 : /*
205 : * Irix uses Missed'em'V split, but doesn't want to see
206 : * the upper 5 bits of (14bit) major.
207 : */
208 34704937 : if (S_ISCHR(args.mode) || S_ISBLK(args.mode)) {
209 6669432 : if (unlikely(!sysv_valid_dev(args.rdev) ||
210 : MAJOR(args.rdev) & ~0x1ff))
211 : return -EINVAL;
212 : } else {
213 28035505 : args.rdev = 0;
214 : }
215 :
216 34704937 : error = posix_acl_create(dir, &args.mode, &default_acl, &acl);
217 34704705 : if (error)
218 : return error;
219 :
220 : /* Verify mode is valid also for tmpfile case */
221 34704851 : error = xfs_dentry_mode_to_name(&name, dentry, args.mode);
222 34702839 : if (unlikely(error))
223 0 : goto out_free_acl;
224 :
225 34702839 : if (!tmpfile) {
226 33726351 : if (xfs_create_need_xattr(dir, default_acl, acl))
227 33571611 : args.flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
228 :
229 33726351 : error = xfs_create(XFS_I(dir), &name, &args, &ip);
230 : } else {
231 : /*
232 : * If this temporary file will be linkable, set up the file
233 : * with an attr fork to receive a parent pointer.
234 : */
235 976488 : if (!(tmpfile->f_flags & O_EXCL) &&
236 976345 : xfs_has_parent(XFS_I(dir)->i_mount))
237 976410 : args.flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
238 :
239 976488 : error = xfs_create_tmpfile(XFS_I(dir), &args, &ip);
240 : }
241 34707364 : if (unlikely(error))
242 467472 : goto out_free_acl;
243 :
244 34239892 : inode = VFS_I(ip);
245 :
246 34239892 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
247 34239892 : if (unlikely(error))
248 : goto out_cleanup_inode;
249 :
250 34239892 : if (default_acl) {
251 27 : error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
252 27 : if (error)
253 0 : goto out_cleanup_inode;
254 : }
255 34239892 : if (acl) {
256 3415 : error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
257 3415 : if (error)
258 0 : goto out_cleanup_inode;
259 : }
260 :
261 34239892 : xfs_setup_iops(ip);
262 :
263 34238746 : if (tmpfile) {
264 : /*
265 : * The VFS requires that any inode fed to d_tmpfile must have
266 : * nlink == 1 so that it can decrement the nlink in d_tmpfile.
267 : * However, we created the temp file with nlink == 0 because
268 : * we're not allowed to put an inode with nlink > 0 on the
269 : * unlinked list. Therefore we have to set nlink to 1 so that
270 : * d_tmpfile can immediately set it back to zero.
271 : */
272 976561 : set_nlink(inode, 1);
273 976555 : d_tmpfile(tmpfile, inode);
274 : } else
275 33262185 : d_instantiate(dentry, inode);
276 :
277 34238173 : xfs_finish_inode_setup(ip);
278 :
279 34702638 : out_free_acl:
280 34702638 : posix_acl_release(default_acl);
281 34703396 : posix_acl_release(acl);
282 34703396 : return error;
283 :
284 0 : out_cleanup_inode:
285 0 : xfs_finish_inode_setup(ip);
286 0 : if (!tmpfile)
287 0 : xfs_cleanup_inode(dir, inode, dentry);
288 0 : xfs_irele(ip);
289 0 : goto out_free_acl;
290 : }
291 :
292 : STATIC int
293 6669447 : xfs_vn_mknod(
294 : struct mnt_idmap *idmap,
295 : struct inode *dir,
296 : struct dentry *dentry,
297 : umode_t mode,
298 : dev_t rdev)
299 : {
300 6669447 : return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL);
301 : }
302 :
303 : STATIC int
304 20156395 : xfs_vn_create(
305 : struct mnt_idmap *idmap,
306 : struct inode *dir,
307 : struct dentry *dentry,
308 : umode_t mode,
309 : bool flags)
310 : {
311 20156395 : return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL);
312 : }
313 :
314 : STATIC int
315 6903688 : xfs_vn_mkdir(
316 : struct mnt_idmap *idmap,
317 : struct inode *dir,
318 : struct dentry *dentry,
319 : umode_t mode)
320 : {
321 6903688 : return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL);
322 : }
323 :
324 : STATIC struct dentry *
325 228146016 : xfs_vn_lookup(
326 : struct inode *dir,
327 : struct dentry *dentry,
328 : unsigned int flags)
329 : {
330 228146016 : struct inode *inode;
331 228146016 : struct xfs_inode *cip;
332 228146016 : struct xfs_name name;
333 228146016 : int error;
334 :
335 228146016 : if (dentry->d_name.len >= MAXNAMELEN)
336 : return ERR_PTR(-ENAMETOOLONG);
337 :
338 228152263 : xfs_dentry_to_name(&name, dentry);
339 228152263 : error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
340 228146392 : if (likely(!error))
341 147426940 : inode = VFS_I(cip);
342 80719452 : else if (likely(error == -ENOENT))
343 : inode = NULL;
344 : else
345 73255 : inode = ERR_PTR(error);
346 228146392 : return d_splice_alias(inode, dentry);
347 : }
348 :
349 : STATIC struct dentry *
350 195894 : xfs_vn_ci_lookup(
351 : struct inode *dir,
352 : struct dentry *dentry,
353 : unsigned int flags)
354 : {
355 195894 : struct xfs_inode *ip;
356 195894 : struct xfs_name xname;
357 195894 : struct xfs_name ci_name;
358 195894 : struct qstr dname;
359 195894 : int error;
360 :
361 195894 : if (dentry->d_name.len >= MAXNAMELEN)
362 : return ERR_PTR(-ENAMETOOLONG);
363 :
364 195894 : xfs_dentry_to_name(&xname, dentry);
365 195894 : error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
366 195894 : if (unlikely(error)) {
367 93224 : if (unlikely(error != -ENOENT))
368 0 : return ERR_PTR(error);
369 : /*
370 : * call d_add(dentry, NULL) here when d_drop_negative_children
371 : * is called in xfs_vn_mknod (ie. allow negative dentries
372 : * with CI filesystems).
373 : */
374 : return NULL;
375 : }
376 :
377 : /* if exact match, just splice and exit */
378 102670 : if (!ci_name.name)
379 14574 : return d_splice_alias(VFS_I(ip), dentry);
380 :
381 : /* else case-insensitive match... */
382 88096 : dname.name = ci_name.name;
383 88096 : dname.len = ci_name.len;
384 88096 : dentry = d_add_ci(dentry, VFS_I(ip), &dname);
385 88096 : kmem_free(ci_name.name);
386 88096 : return dentry;
387 : }
388 :
389 : STATIC int
390 6208190 : xfs_vn_link(
391 : struct dentry *old_dentry,
392 : struct inode *dir,
393 : struct dentry *dentry)
394 : {
395 6208190 : struct inode *inode = d_inode(old_dentry);
396 6208190 : struct xfs_name name;
397 6208190 : int error;
398 :
399 6208190 : error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
400 6208184 : if (unlikely(error))
401 : return error;
402 :
403 6208184 : error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
404 6208302 : if (unlikely(error))
405 : return error;
406 :
407 6175096 : ihold(inode);
408 6175096 : d_instantiate(dentry, inode);
409 6175096 : return 0;
410 : }
411 :
412 : STATIC int
413 40996820 : xfs_vn_unlink(
414 : struct inode *dir,
415 : struct dentry *dentry)
416 : {
417 40996820 : struct xfs_name name;
418 40996820 : int error;
419 :
420 40996820 : xfs_dentry_to_name(&name, dentry);
421 :
422 40996820 : error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
423 40997125 : if (error)
424 : return error;
425 :
426 : /*
427 : * With unlink, the VFS makes the dentry "negative": no inode,
428 : * but still hashed. This is incompatible with case-insensitive
429 : * mode, so invalidate (unhash) the dentry in CI-mode.
430 : */
431 38941302 : if (xfs_has_asciici(XFS_M(dir->i_sb)))
432 37640 : d_invalidate(dentry);
433 : return 0;
434 : }
435 :
436 : STATIC int
437 432335517 : xfs_vn_symlink(
438 : struct mnt_idmap *idmap,
439 : struct inode *dir,
440 : struct dentry *dentry,
441 : const char *symname)
442 : {
443 432335517 : struct inode *inode;
444 432335517 : struct xfs_inode *cip = NULL;
445 432335517 : struct xfs_name name;
446 432335517 : int error;
447 432335517 : umode_t mode;
448 :
449 432335517 : mode = S_IFLNK |
450 432335517 : (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
451 432335517 : error = xfs_dentry_mode_to_name(&name, dentry, mode);
452 432361781 : if (unlikely(error))
453 0 : goto out;
454 :
455 432361781 : error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip);
456 432357392 : if (unlikely(error))
457 405222976 : goto out;
458 :
459 27134416 : inode = VFS_I(cip);
460 :
461 27134416 : error = xfs_inode_init_security(inode, dir, &dentry->d_name);
462 27134416 : if (unlikely(error))
463 : goto out_cleanup_inode;
464 :
465 27134416 : xfs_setup_iops(cip);
466 :
467 27134104 : d_instantiate(dentry, inode);
468 27131880 : xfs_finish_inode_setup(cip);
469 27131880 : return 0;
470 :
471 : out_cleanup_inode:
472 : xfs_finish_inode_setup(cip);
473 : xfs_cleanup_inode(dir, inode, dentry);
474 : xfs_irele(cip);
475 : out:
476 : return error;
477 : }
478 :
479 : STATIC int
480 31607280 : xfs_vn_rename(
481 : struct mnt_idmap *idmap,
482 : struct inode *odir,
483 : struct dentry *odentry,
484 : struct inode *ndir,
485 : struct dentry *ndentry,
486 : unsigned int flags)
487 : {
488 31607280 : struct inode *new_inode = d_inode(ndentry);
489 31607280 : int omode = 0;
490 31607280 : int error;
491 31607280 : struct xfs_name oname;
492 31607280 : struct xfs_name nname;
493 :
494 31607280 : if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
495 : return -EINVAL;
496 :
497 : /* if we are exchanging files, we need to set i_mode of both files */
498 31607280 : if (flags & RENAME_EXCHANGE)
499 8672729 : omode = d_inode(ndentry)->i_mode;
500 :
501 31607280 : error = xfs_dentry_mode_to_name(&oname, odentry, omode);
502 31607280 : if (omode && unlikely(error))
503 : return error;
504 :
505 31607280 : error = xfs_dentry_mode_to_name(&nname, ndentry,
506 31607280 : d_inode(odentry)->i_mode);
507 31607277 : if (unlikely(error))
508 : return error;
509 :
510 40533817 : return xfs_rename(idmap, XFS_I(odir), &oname,
511 : XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
512 : new_inode ? XFS_I(new_inode) : NULL, flags);
513 : }
514 :
515 : /*
516 : * careful here - this function can get called recursively, so
517 : * we need to be very careful about how much stack we use.
518 : * uio is kmalloced for this reason...
519 : */
520 : STATIC const char *
521 256211769 : xfs_vn_get_link(
522 : struct dentry *dentry,
523 : struct inode *inode,
524 : struct delayed_call *done)
525 : {
526 256211769 : char *link;
527 256211769 : int error = -ENOMEM;
528 :
529 256211769 : if (!dentry)
530 : return ERR_PTR(-ECHILD);
531 :
532 256184254 : link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
533 256251036 : if (!link)
534 0 : goto out_err;
535 :
536 256251036 : error = xfs_readlink(XFS_I(d_inode(dentry)), link);
537 256285634 : if (unlikely(error))
538 37 : goto out_kfree;
539 :
540 256285597 : set_delayed_call(done, kfree_link, link);
541 256285597 : return link;
542 :
543 : out_kfree:
544 37 : kfree(link);
545 37 : out_err:
546 37 : return ERR_PTR(error);
547 : }
548 :
549 : static uint32_t
550 2027750944 : xfs_stat_blksize(
551 : struct xfs_inode *ip)
552 : {
553 2027750944 : struct xfs_mount *mp = ip->i_mount;
554 :
555 : /*
556 : * If the file blocks are being allocated from a realtime volume, then
557 : * always return the realtime extent size.
558 : */
559 2027750944 : if (XFS_IS_REALTIME_INODE(ip))
560 154942897 : return XFS_FSB_TO_B(mp, xfs_get_extsz_hint(ip));
561 :
562 : /*
563 : * Allow large block sizes to be reported to userspace programs if the
564 : * "largeio" mount option is used.
565 : *
566 : * If compatibility mode is specified, simply return the basic unit of
567 : * caching so that we don't get inefficient read/modify/write I/O from
568 : * user apps. Otherwise....
569 : *
570 : * If the underlying volume is a stripe, then return the stripe width in
571 : * bytes as the recommended I/O size. It is not a stripe and we've set a
572 : * default buffered I/O size, return that, otherwise return the compat
573 : * default.
574 : */
575 1872808047 : if (xfs_has_large_iosize(mp)) {
576 2 : if (mp->m_swidth)
577 0 : return XFS_FSB_TO_B(mp, mp->m_swidth);
578 2 : if (xfs_has_allocsize(mp))
579 0 : return 1U << mp->m_allocsize_log;
580 : }
581 :
582 : return PAGE_SIZE;
583 : }
584 :
585 : STATIC int
586 2031004537 : xfs_vn_getattr(
587 : struct mnt_idmap *idmap,
588 : const struct path *path,
589 : struct kstat *stat,
590 : u32 request_mask,
591 : unsigned int query_flags)
592 : {
593 2031004537 : struct inode *inode = d_inode(path->dentry);
594 2031004537 : struct xfs_inode *ip = XFS_I(inode);
595 2031004537 : struct xfs_mount *mp = ip->i_mount;
596 2031004537 : vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
597 2031290311 : vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
598 :
599 2031018395 : trace_xfs_getattr(ip);
600 :
601 4062299164 : if (xfs_is_shutdown(mp))
602 : return -EIO;
603 :
604 2031113356 : stat->size = XFS_ISIZE(ip);
605 2031113356 : stat->dev = inode->i_sb->s_dev;
606 2031113356 : stat->mode = inode->i_mode;
607 2031113356 : stat->nlink = inode->i_nlink;
608 2031113356 : stat->uid = vfsuid_into_kuid(vfsuid);
609 2031113356 : stat->gid = vfsgid_into_kgid(vfsgid);
610 2031113356 : stat->ino = ip->i_ino;
611 2031113356 : stat->atime = inode->i_atime;
612 2031113356 : stat->mtime = inode->i_mtime;
613 2031113356 : stat->ctime = inode->i_ctime;
614 2031113356 : stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
615 :
616 2031113356 : if (xfs_has_v3inodes(mp)) {
617 2031218748 : if (request_mask & STATX_BTIME) {
618 2327844 : stat->result_mask |= STATX_BTIME;
619 2327844 : stat->btime = ip->i_crtime;
620 : }
621 : }
622 :
623 : /*
624 : * Note: If you add another clause to set an attribute flag, please
625 : * update attributes_mask below.
626 : */
627 2031113356 : if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE)
628 378 : stat->attributes |= STATX_ATTR_IMMUTABLE;
629 2031113356 : if (ip->i_diflags & XFS_DIFLAG_APPEND)
630 325 : stat->attributes |= STATX_ATTR_APPEND;
631 2031113356 : if (ip->i_diflags & XFS_DIFLAG_NODUMP)
632 76 : stat->attributes |= STATX_ATTR_NODUMP;
633 :
634 2031113356 : stat->attributes_mask |= (STATX_ATTR_IMMUTABLE |
635 : STATX_ATTR_APPEND |
636 : STATX_ATTR_NODUMP);
637 :
638 2031113356 : switch (inode->i_mode & S_IFMT) {
639 3379938 : case S_IFBLK:
640 : case S_IFCHR:
641 3379938 : stat->blksize = BLKDEV_IOSIZE;
642 3379938 : stat->rdev = inode->i_rdev;
643 3379938 : break;
644 403919193 : case S_IFREG:
645 403919193 : if (request_mask & STATX_DIOALIGN) {
646 0 : struct xfs_buftarg *target = xfs_inode_buftarg(ip);
647 0 : struct block_device *bdev = target->bt_bdev;
648 :
649 0 : stat->result_mask |= STATX_DIOALIGN;
650 0 : stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
651 0 : stat->dio_offset_align = bdev_logical_block_size(bdev);
652 : }
653 2027733418 : fallthrough;
654 : default:
655 2027733418 : stat->blksize = xfs_stat_blksize(ip);
656 2027816412 : stat->rdev = 0;
657 2027816412 : break;
658 : }
659 :
660 : return 0;
661 : }
662 :
663 : static int
664 15103187 : xfs_vn_change_ok(
665 : struct mnt_idmap *idmap,
666 : struct dentry *dentry,
667 : struct iattr *iattr)
668 : {
669 15103187 : struct xfs_mount *mp = XFS_I(d_inode(dentry))->i_mount;
670 :
671 30206374 : if (xfs_is_readonly(mp))
672 : return -EROFS;
673 :
674 30206374 : if (xfs_is_shutdown(mp))
675 : return -EIO;
676 :
677 15101183 : return setattr_prepare(idmap, dentry, iattr);
678 : }
679 :
680 : /*
681 : * Set non-size attributes of an inode.
682 : *
683 : * Caution: The caller of this function is responsible for calling
684 : * setattr_prepare() or otherwise verifying the change is fine.
685 : */
686 : static int
687 8889448 : xfs_setattr_nonsize(
688 : struct mnt_idmap *idmap,
689 : struct dentry *dentry,
690 : struct xfs_inode *ip,
691 : struct iattr *iattr)
692 : {
693 8889448 : xfs_mount_t *mp = ip->i_mount;
694 8889448 : struct inode *inode = VFS_I(ip);
695 8889448 : int mask = iattr->ia_valid;
696 8889448 : xfs_trans_t *tp;
697 8889448 : int error;
698 8889448 : kuid_t uid = GLOBAL_ROOT_UID;
699 8889448 : kgid_t gid = GLOBAL_ROOT_GID;
700 8889448 : struct xfs_dquot *udqp = NULL, *gdqp = NULL;
701 8889448 : struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL;
702 :
703 8889448 : ASSERT((mask & ATTR_SIZE) == 0);
704 :
705 : /*
706 : * If disk quotas is on, we make sure that the dquots do exist on disk,
707 : * before we start any other transactions. Trying to do this later
708 : * is messy. We don't care to take a readlock to look at the ids
709 : * in inode here, because we can't hold it across the trans_reserve.
710 : * If the IDs do change before we take the ilock, we're covered
711 : * because the i_*dquot fields will get updated anyway.
712 : */
713 8889448 : if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
714 6117718 : uint qflags = 0;
715 :
716 6117718 : if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
717 6117584 : uid = from_vfsuid(idmap, i_user_ns(inode),
718 : iattr->ia_vfsuid);
719 6117584 : qflags |= XFS_QMOPT_UQUOTA;
720 : } else {
721 134 : uid = inode->i_uid;
722 : }
723 6117774 : if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
724 6115056 : gid = from_vfsgid(idmap, i_user_ns(inode),
725 : iattr->ia_vfsgid);
726 6115000 : qflags |= XFS_QMOPT_GQUOTA;
727 : } else {
728 2729 : gid = inode->i_gid;
729 : }
730 :
731 : /*
732 : * We take a reference when we initialize udqp and gdqp,
733 : * so it is important that we never blindly double trip on
734 : * the same variable. See xfs_create() for an example.
735 : */
736 6117718 : ASSERT(udqp == NULL);
737 6117718 : ASSERT(gdqp == NULL);
738 6117718 : error = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_projid,
739 : qflags, &udqp, &gdqp, NULL);
740 6118183 : if (error)
741 : return error;
742 : }
743 :
744 8858940 : error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL,
745 : has_capability_noaudit(current, CAP_FOWNER), &tp);
746 8859936 : if (error)
747 268 : goto out_dqrele;
748 :
749 : /*
750 : * Register quota modifications in the transaction. Must be the owner
751 : * or privileged. These IDs could have changed since we last looked at
752 : * them. But, we're assured that if the ownership did change while we
753 : * didn't have the inode locked, inode's dquot(s) would have changed
754 : * also.
755 : */
756 17647615 : if (XFS_IS_UQUOTA_ON(mp) &&
757 8787479 : i_uid_needs_update(idmap, iattr, inode)) {
758 5800873 : ASSERT(udqp);
759 5800873 : old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
760 : }
761 17643395 : if (XFS_IS_GQUOTA_ON(mp) &&
762 8783449 : i_gid_needs_update(idmap, iattr, inode)) {
763 5798035 : ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
764 5798035 : ASSERT(gdqp);
765 5798035 : old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
766 : }
767 :
768 8859941 : setattr_copy(idmap, inode, iattr);
769 8859471 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
770 :
771 8860093 : XFS_STATS_INC(mp, xs_ig_attrchg);
772 :
773 8860093 : if (xfs_has_wsync(mp))
774 314 : xfs_trans_set_sync(tp);
775 8860093 : error = xfs_trans_commit(tp);
776 :
777 : /*
778 : * Release any dquot(s) the inode had kept before chown.
779 : */
780 8860329 : xfs_qm_dqrele(old_udqp);
781 8860215 : xfs_qm_dqrele(old_gdqp);
782 8860346 : xfs_qm_dqrele(udqp);
783 8860380 : xfs_qm_dqrele(gdqp);
784 :
785 8860338 : if (error)
786 : return error;
787 :
788 : /*
789 : * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
790 : * update. We could avoid this with linked transactions
791 : * and passing down the transaction pointer all the way
792 : * to attr_set. No previous user of the generic
793 : * Posix ACL code seems to care about this issue either.
794 : */
795 8860338 : if (mask & ATTR_MODE) {
796 95344 : error = posix_acl_chmod(idmap, dentry, inode->i_mode);
797 95342 : if (error)
798 0 : return error;
799 : }
800 :
801 : return 0;
802 :
803 : out_dqrele:
804 268 : xfs_qm_dqrele(udqp);
805 268 : xfs_qm_dqrele(gdqp);
806 268 : return error;
807 : }
808 :
809 : /*
810 : * Truncate file. Must have write permission and not be a directory.
811 : *
812 : * Caution: The caller of this function is responsible for calling
813 : * setattr_prepare() or otherwise verifying the change is fine.
814 : */
815 : STATIC int
816 6544656 : xfs_setattr_size(
817 : struct mnt_idmap *idmap,
818 : struct dentry *dentry,
819 : struct xfs_inode *ip,
820 : struct iattr *iattr)
821 : {
822 6544656 : struct xfs_mount *mp = ip->i_mount;
823 6544656 : struct inode *inode = VFS_I(ip);
824 6544656 : xfs_off_t oldsize, newsize;
825 6544656 : struct xfs_trans *tp;
826 6544656 : int error;
827 6544656 : uint lock_flags = 0;
828 6544656 : bool did_zeroing = false;
829 :
830 6544656 : ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
831 6544659 : ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
832 6544642 : ASSERT(S_ISREG(inode->i_mode));
833 6544642 : ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
834 : ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
835 :
836 6544642 : oldsize = inode->i_size;
837 6544642 : newsize = iattr->ia_size;
838 :
839 : /*
840 : * Short circuit the truncate case for zero length files.
841 : */
842 6544642 : if (newsize == 0 && oldsize == 0 && ip->i_df.if_nextents == 0) {
843 333567 : if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
844 : return 0;
845 :
846 : /*
847 : * Use the regular setattr path to update the timestamps.
848 : */
849 333568 : iattr->ia_valid &= ~ATTR_SIZE;
850 333568 : return xfs_setattr_nonsize(idmap, dentry, ip, iattr);
851 : }
852 :
853 : /*
854 : * Make sure that the dquots are attached to the inode.
855 : */
856 6211075 : error = xfs_qm_dqattach(ip);
857 6211090 : if (error)
858 : return error;
859 :
860 : /*
861 : * Wait for all direct I/O to complete.
862 : */
863 6209810 : inode_dio_wait(inode);
864 :
865 : /*
866 : * File data changes must be complete before we start the transaction to
867 : * modify the inode. This needs to be done before joining the inode to
868 : * the transaction because the inode cannot be unlocked once it is a
869 : * part of the transaction.
870 : *
871 : * Start with zeroing any data beyond EOF that we may expose on file
872 : * extension, or zeroing out the rest of the block on a downward
873 : * truncate.
874 : */
875 6209814 : if (newsize > oldsize) {
876 : /*
877 : * Extending the file size, so COW around the allocation unit
878 : * containing EOF before we zero the new range of the file.
879 : */
880 3137347 : if (xfs_truncate_needs_cow_around(ip, oldsize)) {
881 0 : error = xfs_file_unshare_at(ip, oldsize);
882 0 : if (error)
883 : return error;
884 : }
885 :
886 3137355 : trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
887 3137345 : error = xfs_zero_range(ip, oldsize, newsize - oldsize,
888 : &did_zeroing);
889 : } else {
890 : /*
891 : * Truncating the file, so COW around the new EOF allocation
892 : * unit before truncation zeroes the part of the EOF block
893 : * after the new EOF.
894 : */
895 3072467 : if (xfs_truncate_needs_cow_around(ip, newsize)) {
896 0 : error = xfs_file_unshare_at(ip, newsize);
897 0 : if (error)
898 : return error;
899 : }
900 :
901 : /*
902 : * iomap won't detect a dirty page over an unwritten block (or a
903 : * cow block over a hole) and subsequently skips zeroing the
904 : * newly post-EOF portion of the page. Flush the new EOF to
905 : * convert the block before the pagecache truncate.
906 : */
907 3072464 : error = filemap_write_and_wait_range(inode->i_mapping, newsize,
908 : newsize);
909 3072480 : if (error)
910 : return error;
911 3072448 : error = xfs_truncate_page(ip, newsize, &did_zeroing);
912 : }
913 :
914 6209776 : if (error)
915 : return error;
916 :
917 : /*
918 : * We've already locked out new page faults, so now we can safely remove
919 : * pages from the page cache knowing they won't get refaulted until we
920 : * drop the XFS_MMAP_EXCL lock after the extent manipulations are
921 : * complete. The truncate_setsize() call also cleans partial EOF page
922 : * PTEs on extending truncates and hence ensures sub-page block size
923 : * filesystems are correctly handled, too.
924 : *
925 : * We have to do all the page cache truncate work outside the
926 : * transaction context as the "lock" order is page lock->log space
927 : * reservation as defined by extent allocation in the writeback path.
928 : * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
929 : * having already truncated the in-memory version of the file (i.e. made
930 : * user visible changes). There's not much we can do about this, except
931 : * to hope that the caller sees ENOMEM and retries the truncate
932 : * operation.
933 : *
934 : * And we update in-core i_size and truncate page cache beyond newsize
935 : * before writeback the [i_disk_size, newsize] range, so we're
936 : * guaranteed not to write stale data past the new EOF on truncate down.
937 : */
938 6209596 : truncate_setsize(inode, newsize);
939 :
940 : /*
941 : * We are going to log the inode size change in this transaction so
942 : * any previous writes that are beyond the on disk EOF and the new
943 : * EOF that have not been written out need to be written here. If we
944 : * do not write the data out, we expose ourselves to the null files
945 : * problem. Note that this includes any block zeroing we did above;
946 : * otherwise those blocks may not be zeroed after a crash.
947 : */
948 6209619 : if (did_zeroing ||
949 4327131 : (newsize > ip->i_disk_size && oldsize != ip->i_disk_size)) {
950 1957661 : error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
951 : ip->i_disk_size, newsize - 1);
952 1957650 : if (error)
953 : return error;
954 : }
955 :
956 6209080 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
957 6209101 : if (error)
958 : return error;
959 :
960 6209066 : lock_flags |= XFS_ILOCK_EXCL;
961 6209066 : xfs_ilock(ip, XFS_ILOCK_EXCL);
962 6209063 : xfs_trans_ijoin(tp, ip, 0);
963 :
964 : /*
965 : * Only change the c/mtime if we are changing the size or we are
966 : * explicitly asked to change it. This handles the semantic difference
967 : * between truncate() and ftruncate() as implemented in the VFS.
968 : *
969 : * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
970 : * special case where we need to update the times despite not having
971 : * these flags set. For all other operations the VFS set these flags
972 : * explicitly if it wants a timestamp update.
973 : */
974 6209064 : if (newsize != oldsize &&
975 6188552 : !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
976 9966333 : iattr->ia_ctime = iattr->ia_mtime =
977 4983167 : current_time(inode);
978 4983166 : iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
979 : }
980 :
981 : /*
982 : * The first thing we do is set the size to new_size permanently on
983 : * disk. This way we don't have to worry about anyone ever being able
984 : * to look at the data being freed even in the face of a crash.
985 : * What we're getting around here is the case where we free a block, it
986 : * is allocated to another file, it is written to, and then we crash.
987 : * If the new data gets written to the file but the log buffers
988 : * containing the free and reallocation don't, then we'd end up with
989 : * garbage in the blocks being freed. As long as we make the new size
990 : * permanent before actually freeing any blocks it doesn't matter if
991 : * they get written to.
992 : */
993 6209063 : ip->i_disk_size = newsize;
994 6209063 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
995 :
996 6209066 : if (newsize <= oldsize) {
997 3072228 : error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
998 3072228 : if (error)
999 561 : goto out_trans_cancel;
1000 :
1001 : /*
1002 : * Truncated "down", so we're removing references to old data
1003 : * here - if we delay flushing for a long time, we expose
1004 : * ourselves unduly to the notorious NULL files problem. So,
1005 : * we mark this inode and flush it when the file is closed,
1006 : * and do not wait the usual (long) time for writeout.
1007 : */
1008 3071667 : xfs_iflags_set(ip, XFS_ITRUNCATED);
1009 :
1010 : /* A truncate down always removes post-EOF blocks. */
1011 3071667 : xfs_inode_clear_eofblocks_tag(ip);
1012 : }
1013 :
1014 6208506 : ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
1015 6208506 : setattr_copy(idmap, inode, iattr);
1016 6208495 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1017 :
1018 6208507 : XFS_STATS_INC(mp, xs_ig_attrchg);
1019 :
1020 6208507 : if (xfs_has_wsync(mp))
1021 0 : xfs_trans_set_sync(tp);
1022 :
1023 6208507 : error = xfs_trans_commit(tp);
1024 6209058 : out_unlock:
1025 6209058 : if (lock_flags)
1026 6209058 : xfs_iunlock(ip, lock_flags);
1027 6209058 : return error;
1028 :
1029 : out_trans_cancel:
1030 561 : xfs_trans_cancel(tp);
1031 561 : goto out_unlock;
1032 : }
1033 :
1034 : int
1035 6544670 : xfs_vn_setattr_size(
1036 : struct mnt_idmap *idmap,
1037 : struct dentry *dentry,
1038 : struct iattr *iattr)
1039 : {
1040 6544670 : struct xfs_inode *ip = XFS_I(d_inode(dentry));
1041 6544670 : int error;
1042 :
1043 6544670 : trace_xfs_setattr(ip);
1044 :
1045 6544670 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1046 6544655 : if (error)
1047 : return error;
1048 6544641 : return xfs_setattr_size(idmap, dentry, ip, iattr);
1049 : }
1050 :
1051 : STATIC int
1052 12863742 : xfs_vn_setattr(
1053 : struct mnt_idmap *idmap,
1054 : struct dentry *dentry,
1055 : struct iattr *iattr)
1056 : {
1057 12863742 : struct inode *inode = d_inode(dentry);
1058 12863742 : struct xfs_inode *ip = XFS_I(inode);
1059 12863742 : int error;
1060 :
1061 12863742 : if (iattr->ia_valid & ATTR_SIZE) {
1062 4304807 : uint iolock;
1063 :
1064 4304807 : xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
1065 4304813 : iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1066 :
1067 4304813 : error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
1068 4304810 : if (error) {
1069 0 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1070 0 : return error;
1071 : }
1072 :
1073 4304810 : error = xfs_vn_setattr_size(idmap, dentry, iattr);
1074 4304824 : xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1075 : } else {
1076 8558935 : trace_xfs_setattr(ip);
1077 :
1078 8558861 : error = xfs_vn_change_ok(idmap, dentry, iattr);
1079 8557954 : if (!error)
1080 8555914 : error = xfs_setattr_nonsize(idmap, dentry, ip, iattr);
1081 : }
1082 :
1083 : return error;
1084 : }
1085 :
1086 : STATIC int
1087 80806440 : xfs_vn_update_time(
1088 : struct inode *inode,
1089 : struct timespec64 *now,
1090 : int flags)
1091 : {
1092 80806440 : struct xfs_inode *ip = XFS_I(inode);
1093 80806440 : struct xfs_mount *mp = ip->i_mount;
1094 80806440 : int log_flags = XFS_ILOG_TIMESTAMP;
1095 80806440 : struct xfs_trans *tp;
1096 80806440 : int error;
1097 :
1098 80806440 : trace_xfs_update_time(ip);
1099 :
1100 80805573 : if (inode->i_sb->s_flags & SB_LAZYTIME) {
1101 48 : if (!((flags & S_VERSION) &&
1102 12 : inode_maybe_inc_iversion(inode, false)))
1103 24 : return generic_update_time(inode, now, flags);
1104 :
1105 : /* Capture the iversion update that just occurred */
1106 : log_flags |= XFS_ILOG_CORE;
1107 : }
1108 :
1109 80805549 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
1110 80811258 : if (error)
1111 : return error;
1112 :
1113 80810632 : xfs_ilock(ip, XFS_ILOCK_EXCL);
1114 80810894 : if (flags & S_CTIME)
1115 37444224 : inode->i_ctime = *now;
1116 80810894 : if (flags & S_MTIME)
1117 37773913 : inode->i_mtime = *now;
1118 80810894 : if (flags & S_ATIME)
1119 43036933 : inode->i_atime = *now;
1120 :
1121 80810894 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1122 80807031 : xfs_trans_log_inode(tp, ip, log_flags);
1123 80810569 : return xfs_trans_commit(tp);
1124 : }
1125 :
1126 : STATIC int
1127 1388167 : xfs_vn_fiemap(
1128 : struct inode *inode,
1129 : struct fiemap_extent_info *fieinfo,
1130 : u64 start,
1131 : u64 length)
1132 : {
1133 1388167 : int error;
1134 :
1135 1388167 : xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1136 1388167 : if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1137 666320 : fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
1138 666320 : error = iomap_fiemap(inode, fieinfo, start, length,
1139 : &xfs_xattr_iomap_ops);
1140 : } else {
1141 721847 : error = iomap_fiemap(inode, fieinfo, start, length,
1142 : &xfs_read_iomap_ops);
1143 : }
1144 1388169 : xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
1145 :
1146 1388166 : return error;
1147 : }
1148 :
1149 : STATIC int
1150 976471 : xfs_vn_tmpfile(
1151 : struct mnt_idmap *idmap,
1152 : struct inode *dir,
1153 : struct file *file,
1154 : umode_t mode)
1155 : {
1156 976471 : int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file);
1157 :
1158 973417 : return finish_open_simple(file, err);
1159 : }
1160 :
1161 : static const struct inode_operations xfs_inode_operations = {
1162 : .get_inode_acl = xfs_get_acl,
1163 : .set_acl = xfs_set_acl,
1164 : .getattr = xfs_vn_getattr,
1165 : .setattr = xfs_vn_setattr,
1166 : .listxattr = xfs_vn_listxattr,
1167 : .fiemap = xfs_vn_fiemap,
1168 : .update_time = xfs_vn_update_time,
1169 : .fileattr_get = xfs_fileattr_get,
1170 : .fileattr_set = xfs_fileattr_set,
1171 : };
1172 :
1173 : static const struct inode_operations xfs_dir_inode_operations = {
1174 : .create = xfs_vn_create,
1175 : .lookup = xfs_vn_lookup,
1176 : .link = xfs_vn_link,
1177 : .unlink = xfs_vn_unlink,
1178 : .symlink = xfs_vn_symlink,
1179 : .mkdir = xfs_vn_mkdir,
1180 : /*
1181 : * Yes, XFS uses the same method for rmdir and unlink.
1182 : *
1183 : * There are some subtile differences deeper in the code,
1184 : * but we use S_ISDIR to check for those.
1185 : */
1186 : .rmdir = xfs_vn_unlink,
1187 : .mknod = xfs_vn_mknod,
1188 : .rename = xfs_vn_rename,
1189 : .get_inode_acl = xfs_get_acl,
1190 : .set_acl = xfs_set_acl,
1191 : .getattr = xfs_vn_getattr,
1192 : .setattr = xfs_vn_setattr,
1193 : .listxattr = xfs_vn_listxattr,
1194 : .update_time = xfs_vn_update_time,
1195 : .tmpfile = xfs_vn_tmpfile,
1196 : .fileattr_get = xfs_fileattr_get,
1197 : .fileattr_set = xfs_fileattr_set,
1198 : };
1199 :
1200 : static const struct inode_operations xfs_dir_ci_inode_operations = {
1201 : .create = xfs_vn_create,
1202 : .lookup = xfs_vn_ci_lookup,
1203 : .link = xfs_vn_link,
1204 : .unlink = xfs_vn_unlink,
1205 : .symlink = xfs_vn_symlink,
1206 : .mkdir = xfs_vn_mkdir,
1207 : /*
1208 : * Yes, XFS uses the same method for rmdir and unlink.
1209 : *
1210 : * There are some subtile differences deeper in the code,
1211 : * but we use S_ISDIR to check for those.
1212 : */
1213 : .rmdir = xfs_vn_unlink,
1214 : .mknod = xfs_vn_mknod,
1215 : .rename = xfs_vn_rename,
1216 : .get_inode_acl = xfs_get_acl,
1217 : .set_acl = xfs_set_acl,
1218 : .getattr = xfs_vn_getattr,
1219 : .setattr = xfs_vn_setattr,
1220 : .listxattr = xfs_vn_listxattr,
1221 : .update_time = xfs_vn_update_time,
1222 : .tmpfile = xfs_vn_tmpfile,
1223 : .fileattr_get = xfs_fileattr_get,
1224 : .fileattr_set = xfs_fileattr_set,
1225 : };
1226 :
1227 : static const struct inode_operations xfs_symlink_inode_operations = {
1228 : .get_link = xfs_vn_get_link,
1229 : .getattr = xfs_vn_getattr,
1230 : .setattr = xfs_vn_setattr,
1231 : .listxattr = xfs_vn_listxattr,
1232 : .update_time = xfs_vn_update_time,
1233 : };
1234 :
1235 : /* Figure out if this file actually supports DAX. */
1236 : static bool
1237 : xfs_inode_supports_dax(
1238 : struct xfs_inode *ip)
1239 : {
1240 : struct xfs_mount *mp = ip->i_mount;
1241 :
1242 : /* Only supported on regular files. */
1243 : if (!S_ISREG(VFS_I(ip)->i_mode))
1244 : return false;
1245 :
1246 : /* Block size must match page size */
1247 : if (mp->m_sb.sb_blocksize != PAGE_SIZE)
1248 : return false;
1249 :
1250 : /* Device has to support DAX too. */
1251 : return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
1252 : }
1253 :
1254 : static bool
1255 : xfs_inode_should_enable_dax(
1256 : struct xfs_inode *ip)
1257 : {
1258 : if (!IS_ENABLED(CONFIG_FS_DAX))
1259 : return false;
1260 : if (xfs_has_dax_never(ip->i_mount))
1261 : return false;
1262 : if (!xfs_inode_supports_dax(ip))
1263 : return false;
1264 : if (xfs_has_dax_always(ip->i_mount))
1265 : return true;
1266 : if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
1267 : return true;
1268 : return false;
1269 : }
1270 :
1271 : void
1272 1076363184 : xfs_diflags_to_iflags(
1273 : struct xfs_inode *ip,
1274 : bool init)
1275 : {
1276 1076363184 : struct inode *inode = VFS_I(ip);
1277 1076363184 : unsigned int xflags = xfs_ip2xflags(ip);
1278 1076375378 : unsigned int flags = 0;
1279 :
1280 1076375378 : ASSERT(!(IS_DAX(inode) && init));
1281 :
1282 1076375378 : if (xflags & FS_XFLAG_IMMUTABLE)
1283 364402 : flags |= S_IMMUTABLE;
1284 1076375378 : if (xflags & FS_XFLAG_APPEND)
1285 49 : flags |= S_APPEND;
1286 1076375378 : if (xflags & FS_XFLAG_SYNC)
1287 375442 : flags |= S_SYNC;
1288 1076375378 : if (xflags & FS_XFLAG_NOATIME)
1289 375431 : flags |= S_NOATIME;
1290 1076375378 : if (init && xfs_inode_should_enable_dax(ip))
1291 : flags |= S_DAX;
1292 :
1293 : /*
1294 : * S_DAX can only be set during inode initialization and is never set by
1295 : * the VFS, so we cannot mask off S_DAX in i_flags.
1296 : */
1297 1076375378 : inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | S_NOATIME);
1298 1076375378 : inode->i_flags |= flags;
1299 1076375378 : }
1300 :
1301 : /*
1302 : * Initialize the Linux inode.
1303 : *
1304 : * When reading existing inodes from disk this is called directly from xfs_iget,
1305 : * when creating a new inode it is called from xfs_init_new_inode after setting
1306 : * up the inode. These callers have different criteria for clearing XFS_INEW, so
1307 : * leave it up to the caller to deal with unlocking the inode appropriately.
1308 : */
1309 : void
1310 1074296323 : xfs_setup_inode(
1311 : struct xfs_inode *ip)
1312 : {
1313 1074296323 : struct inode *inode = &ip->i_vnode;
1314 1074296323 : gfp_t gfp_mask;
1315 1074296323 : bool is_meta = xfs_is_metadata_inode(ip);
1316 :
1317 1074296323 : inode->i_ino = ip->i_ino;
1318 1074296323 : inode->i_state |= I_NEW;
1319 :
1320 1074296323 : inode_sb_list_add(inode);
1321 : /* make the inode look hashed for the writeback code */
1322 1075693607 : inode_fake_hash(inode);
1323 :
1324 1075693607 : i_size_write(inode, ip->i_disk_size);
1325 1075693607 : xfs_diflags_to_iflags(ip, true);
1326 :
1327 : /*
1328 : * Mark our metadata files as private so that LSMs and the ACL code
1329 : * don't try to add their own metadata or reason about these files,
1330 : * and users cannot ever obtain file handles to them.
1331 : */
1332 1075688011 : if (is_meta) {
1333 375687 : inode->i_flags |= S_PRIVATE;
1334 375687 : inode->i_opflags &= ~IOP_XATTR;
1335 : }
1336 :
1337 1075688011 : if (S_ISDIR(inode->i_mode)) {
1338 : /*
1339 : * We set the i_rwsem class here to avoid potential races with
1340 : * lockdep_annotate_inode_mutex_key() reinitialising the lock
1341 : * after a filehandle lookup has already found the inode in
1342 : * cache before it has been unlocked via unlock_new_inode().
1343 : */
1344 : lockdep_set_class(&inode->i_rwsem,
1345 : &inode->i_sb->s_type->i_mutex_dir_key);
1346 : if (is_meta)
1347 : lockdep_set_class(&ip->i_lock.mr_lock,
1348 : &xfs_metadata_dir_ilock_class);
1349 : else
1350 : lockdep_set_class(&ip->i_lock.mr_lock,
1351 : &xfs_dir_ilock_class);
1352 : } else {
1353 : if (is_meta)
1354 : lockdep_set_class(&ip->i_lock.mr_lock,
1355 : &xfs_metadata_file_ilock_class);
1356 : else
1357 1075688011 : lockdep_set_class(&ip->i_lock.mr_lock,
1358 : &xfs_nondir_ilock_class);
1359 : }
1360 :
1361 : /*
1362 : * Ensure all page cache allocations are done from GFP_NOFS context to
1363 : * prevent direct reclaim recursion back into the filesystem and blowing
1364 : * stacks or deadlocking.
1365 : */
1366 1075688011 : gfp_mask = mapping_gfp_mask(inode->i_mapping);
1367 1075688011 : mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
1368 :
1369 : /*
1370 : * If there is no attribute fork no ACL can exist on this inode,
1371 : * and it can't have any file capabilities attached to it either.
1372 : */
1373 1075688011 : if (!xfs_inode_has_attr_fork(ip)) {
1374 838245 : inode_has_no_xattr(inode);
1375 838246 : cache_no_acl(inode);
1376 : }
1377 1075688012 : }
1378 :
1379 : void
1380 1075659190 : xfs_setup_iops(
1381 : struct xfs_inode *ip)
1382 : {
1383 1075659190 : struct inode *inode = &ip->i_vnode;
1384 :
1385 1075659190 : switch (inode->i_mode & S_IFMT) {
1386 366096930 : case S_IFREG:
1387 366096930 : inode->i_op = &xfs_inode_operations;
1388 366096930 : inode->i_fop = &xfs_file_operations;
1389 366096930 : if (IS_DAX(inode))
1390 : inode->i_mapping->a_ops = &xfs_dax_aops;
1391 : else
1392 366096930 : inode->i_mapping->a_ops = &xfs_address_space_operations;
1393 : break;
1394 161099164 : case S_IFDIR:
1395 161099164 : if (xfs_has_asciici(XFS_M(inode->i_sb)))
1396 346 : inode->i_op = &xfs_dir_ci_inode_operations;
1397 : else
1398 161098818 : inode->i_op = &xfs_dir_inode_operations;
1399 161099164 : inode->i_fop = &xfs_dir_file_operations;
1400 161099164 : break;
1401 89848197 : case S_IFLNK:
1402 89848197 : inode->i_op = &xfs_symlink_inode_operations;
1403 89848197 : break;
1404 458614899 : default:
1405 458614899 : inode->i_op = &xfs_inode_operations;
1406 458614899 : init_special_inode(inode, inode->i_mode, inode->i_rdev);
1407 458614899 : break;
1408 : }
1409 1075678815 : }
|