Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_ag.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_errortag.h"
16 : #include "xfs_error.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_trans.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_dir2.h"
21 : #include "xfs_health.h"
22 :
23 : #include <linux/iversion.h>
24 :
25 : /*
26 : * If we are doing readahead on an inode buffer, we might be in log recovery
27 : * reading an inode allocation buffer that hasn't yet been replayed, and hence
28 : * has not had the inode cores stamped into it. Hence for readahead, the buffer
29 : * may be potentially invalid.
30 : *
31 : * If the readahead buffer is invalid, we need to mark it with an error and
32 : * clear the DONE status of the buffer so that a followup read will re-read it
33 : * from disk. We don't report the error otherwise to avoid warnings during log
34 : * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
35 : * because all we want to do is say readahead failed; there is no-one to report
36 : * the error to, so this will distinguish it from a non-ra verifier failure.
37 : * Changes to this readahead error behaviour also need to be reflected in
38 : * xfs_dquot_buf_readahead_verify().
39 : */
40 : static void
41 62450020 : xfs_inode_buf_verify(
42 : struct xfs_buf *bp,
43 : bool readahead)
44 : {
45 62450020 : struct xfs_mount *mp = bp->b_mount;
46 62450020 : int i;
47 62450020 : int ni;
48 :
49 : /*
50 : * Validate the magic number and version of every inode in the buffer
51 : */
52 62450020 : ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
53 2060676849 : for (i = 0; i < ni; i++) {
54 1998229473 : struct xfs_dinode *dip;
55 1998229473 : xfs_agino_t unlinked_ino;
56 1998229473 : int di_ok;
57 :
58 1998229473 : dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
59 1998118948 : unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
60 3996346796 : di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
61 1998232168 : xfs_dinode_good_version(mp, dip->di_version) &&
62 1998228898 : xfs_verify_agino_or_null(bp->b_pag, unlinked_ino);
63 1998232168 : if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
64 : XFS_ERRTAG_ITOBP_INOTOBP))) {
65 2723 : if (readahead) {
66 2331 : bp->b_flags &= ~XBF_DONE;
67 2331 : xfs_buf_ioerror(bp, -EIO);
68 2331 : return;
69 : }
70 :
71 : #ifdef DEBUG
72 392 : xfs_alert(mp,
73 : "bad inode magic/vsn daddr %lld #%d (magic=%x)",
74 : (unsigned long long)xfs_buf_daddr(bp), i,
75 : be16_to_cpu(dip->di_magic));
76 : #endif
77 392 : xfs_buf_verifier_error(bp, -EFSCORRUPTED,
78 : __func__, dip, sizeof(*dip),
79 : NULL);
80 392 : return;
81 : }
82 : }
83 : }
84 :
85 :
86 : static void
87 11851891 : xfs_inode_buf_read_verify(
88 : struct xfs_buf *bp)
89 : {
90 11851891 : xfs_inode_buf_verify(bp, false);
91 11851891 : }
92 :
93 : static void
94 264918 : xfs_inode_buf_readahead_verify(
95 : struct xfs_buf *bp)
96 : {
97 264918 : xfs_inode_buf_verify(bp, true);
98 264918 : }
99 :
100 : static void
101 50333503 : xfs_inode_buf_write_verify(
102 : struct xfs_buf *bp)
103 : {
104 50333503 : xfs_inode_buf_verify(bp, false);
105 50333503 : }
106 :
107 : const struct xfs_buf_ops xfs_inode_buf_ops = {
108 : .name = "xfs_inode",
109 : .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
110 : cpu_to_be16(XFS_DINODE_MAGIC) },
111 : .verify_read = xfs_inode_buf_read_verify,
112 : .verify_write = xfs_inode_buf_write_verify,
113 : };
114 :
115 : const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
116 : .name = "xfs_inode_ra",
117 : .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
118 : cpu_to_be16(XFS_DINODE_MAGIC) },
119 : .verify_read = xfs_inode_buf_readahead_verify,
120 : .verify_write = xfs_inode_buf_write_verify,
121 : };
122 :
123 :
124 : /*
125 : * This routine is called to map an inode to the buffer containing the on-disk
126 : * version of the inode. It returns a pointer to the buffer containing the
127 : * on-disk inode in the bpp parameter.
128 : */
129 : int
130 883849328 : xfs_imap_to_bp(
131 : struct xfs_mount *mp,
132 : struct xfs_trans *tp,
133 : struct xfs_imap *imap,
134 : struct xfs_buf **bpp)
135 : {
136 883849328 : int error;
137 :
138 883849328 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
139 883849328 : imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops);
140 883956051 : if (xfs_metadata_is_sick(error))
141 384 : xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno),
142 : XFS_SICK_AG_INODES);
143 883956051 : return error;
144 : }
145 :
146 : static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
147 : {
148 2229791473 : struct timespec64 tv;
149 2229791473 : uint32_t n;
150 :
151 2229791473 : tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
152 2229791473 : tv.tv_nsec = n;
153 :
154 2229791473 : return tv;
155 : }
156 :
157 : /* Convert an ondisk timestamp to an incore timestamp. */
158 : struct timespec64
159 2229867079 : xfs_inode_from_disk_ts(
160 : struct xfs_dinode *dip,
161 : const xfs_timestamp_t ts)
162 : {
163 2229867079 : struct timespec64 tv;
164 2229867079 : struct xfs_legacy_timestamp *lts;
165 :
166 4459734158 : if (xfs_dinode_has_bigtime(dip))
167 2229791473 : return xfs_inode_decode_bigtime(be64_to_cpu(ts));
168 :
169 75606 : lts = (struct xfs_legacy_timestamp *)&ts;
170 75606 : tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
171 75606 : tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
172 :
173 75606 : return tv;
174 : }
175 :
176 : int
177 479354132 : xfs_inode_from_disk(
178 : struct xfs_inode *ip,
179 : struct xfs_dinode *from)
180 : {
181 479354132 : struct inode *inode = VFS_I(ip);
182 479354132 : int error;
183 479354132 : xfs_failaddr_t fa;
184 :
185 479354132 : ASSERT(ip->i_cowfp == NULL);
186 :
187 479354132 : fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
188 479336612 : if (fa) {
189 198 : xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
190 : sizeof(*from), fa);
191 198 : return -EFSCORRUPTED;
192 : }
193 :
194 : /*
195 : * First get the permanent information that is needed to allocate an
196 : * inode. If the inode is unused, mode is zero and we shouldn't mess
197 : * with the uninitialized part of it.
198 : */
199 479336414 : if (!xfs_has_v3inodes(ip->i_mount))
200 832 : ip->i_flushiter = be16_to_cpu(from->di_flushiter);
201 479336414 : inode->i_generation = be32_to_cpu(from->di_gen);
202 479336414 : inode->i_mode = be16_to_cpu(from->di_mode);
203 479336414 : if (!inode->i_mode)
204 : return 0;
205 :
206 : /*
207 : * Convert v1 inodes immediately to v2 inode format as this is the
208 : * minimum inode version format we support in the rest of the code.
209 : * They will also be unconditionally written back to disk as v2 inodes.
210 : */
211 479339547 : if (unlikely(from->di_version == 1)) {
212 0 : set_nlink(inode, be16_to_cpu(from->di_onlink));
213 0 : ip->i_projid = 0;
214 : } else {
215 958679094 : set_nlink(inode, be32_to_cpu(from->di_nlink));
216 958685376 : ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
217 479342688 : be16_to_cpu(from->di_projid_lo);
218 : }
219 :
220 479342688 : i_uid_write(inode, be32_to_cpu(from->di_uid));
221 479348825 : i_gid_write(inode, be32_to_cpu(from->di_gid));
222 :
223 : /*
224 : * Time is signed, so need to convert to signed 32 bit before
225 : * storing in inode timestamp which may be 64 bit. Otherwise
226 : * a time before epoch is converted to a time long after epoch
227 : * on 64 bit systems.
228 : */
229 479342481 : inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
230 479342481 : inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
231 479342481 : inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
232 :
233 479342481 : ip->i_disk_size = be64_to_cpu(from->di_size);
234 479342481 : ip->i_nblocks = be64_to_cpu(from->di_nblocks);
235 479342481 : ip->i_extsize = be32_to_cpu(from->di_extsize);
236 479342481 : ip->i_forkoff = from->di_forkoff;
237 479342481 : ip->i_diflags = be16_to_cpu(from->di_flags);
238 479342481 : ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked);
239 :
240 479342481 : if (from->di_dmevmask || from->di_dmstate)
241 1 : xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
242 :
243 479327179 : if (xfs_has_v3inodes(ip->i_mount)) {
244 479327023 : inode_set_iversion_queried(inode,
245 479327023 : be64_to_cpu(from->di_changecount));
246 479327023 : ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
247 479327023 : ip->i_diflags2 = be64_to_cpu(from->di_flags2);
248 958654046 : ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
249 : }
250 :
251 479327179 : error = xfs_iformat_data_fork(ip, from);
252 479342297 : if (error)
253 : return error;
254 479342293 : if (from->di_forkoff) {
255 479226198 : error = xfs_iformat_attr_fork(ip, from);
256 479227979 : if (error)
257 0 : goto out_destroy_data_fork;
258 : }
259 479344074 : if (xfs_is_reflink_inode(ip))
260 99701422 : xfs_ifork_init_cow(ip);
261 : return 0;
262 :
263 : out_destroy_data_fork:
264 0 : xfs_idestroy_fork(&ip->i_df);
265 0 : return error;
266 : }
267 :
268 : /* Convert an incore timestamp to an ondisk timestamp. */
269 : static inline xfs_timestamp_t
270 1454675821 : xfs_inode_to_disk_ts(
271 : struct xfs_inode *ip,
272 : const struct timespec64 tv)
273 : {
274 1454675821 : struct xfs_legacy_timestamp *lts;
275 1454675821 : xfs_timestamp_t ts;
276 :
277 1454675821 : if (xfs_inode_has_bigtime(ip))
278 1454602367 : return cpu_to_be64(xfs_inode_encode_bigtime(tv));
279 :
280 73454 : lts = (struct xfs_legacy_timestamp *)&ts;
281 73454 : lts->t_sec = cpu_to_be32(tv.tv_sec);
282 73454 : lts->t_nsec = cpu_to_be32(tv.tv_nsec);
283 :
284 73454 : return ts;
285 : }
286 :
287 : static inline void
288 363669220 : xfs_inode_to_disk_iext_counters(
289 : struct xfs_inode *ip,
290 : struct xfs_dinode *to)
291 : {
292 363669220 : if (xfs_inode_has_large_extent_counts(ip)) {
293 363668912 : to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
294 727337824 : to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af));
295 : /*
296 : * We might be upgrading the inode to use larger extent counters
297 : * than was previously used. Hence zero the unused field.
298 : */
299 363668912 : to->di_nrext64_pad = cpu_to_be16(0);
300 : } else {
301 616 : to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
302 616 : to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af));
303 : }
304 363669220 : }
305 :
306 : void
307 363665998 : xfs_inode_to_disk(
308 : struct xfs_inode *ip,
309 : struct xfs_dinode *to,
310 : xfs_lsn_t lsn)
311 : {
312 363665998 : struct inode *inode = VFS_I(ip);
313 :
314 363665998 : to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
315 363665998 : to->di_onlink = 0;
316 :
317 363665998 : to->di_format = xfs_ifork_format(&ip->i_df);
318 363665998 : to->di_uid = cpu_to_be32(i_uid_read(inode));
319 363673327 : to->di_gid = cpu_to_be32(i_gid_read(inode));
320 363672251 : to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
321 363672251 : to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
322 :
323 363672251 : to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
324 363672251 : to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
325 363672251 : to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
326 363672251 : to->di_nlink = cpu_to_be32(inode->i_nlink);
327 363672251 : to->di_gen = cpu_to_be32(inode->i_generation);
328 363672251 : to->di_mode = cpu_to_be16(inode->i_mode);
329 :
330 363672251 : to->di_size = cpu_to_be64(ip->i_disk_size);
331 363672251 : to->di_nblocks = cpu_to_be64(ip->i_nblocks);
332 363672251 : to->di_extsize = cpu_to_be32(ip->i_extsize);
333 363672251 : to->di_forkoff = ip->i_forkoff;
334 363672251 : to->di_aformat = xfs_ifork_format(&ip->i_af);
335 363672251 : to->di_flags = cpu_to_be16(ip->i_diflags);
336 :
337 363672251 : if (xfs_has_v3inodes(ip->i_mount)) {
338 363671993 : to->di_version = 3;
339 363671993 : to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
340 363671993 : to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
341 363671993 : to->di_flags2 = cpu_to_be64(ip->i_diflags2);
342 363671993 : to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
343 363671993 : to->di_ino = cpu_to_be64(ip->i_ino);
344 363671993 : to->di_lsn = cpu_to_be64(lsn);
345 363671993 : memset(to->di_pad2, 0, sizeof(to->di_pad2));
346 363671993 : uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
347 363672252 : to->di_v3_pad = 0;
348 : } else {
349 258 : to->di_version = 2;
350 258 : to->di_flushiter = cpu_to_be16(ip->i_flushiter);
351 516 : memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
352 : }
353 :
354 363672510 : xfs_inode_to_disk_iext_counters(ip, to);
355 363669744 : }
356 :
357 : static xfs_failaddr_t
358 751141851 : xfs_dinode_verify_fork(
359 : struct xfs_dinode *dip,
360 : struct xfs_mount *mp,
361 : int whichfork)
362 : {
363 751141851 : xfs_extnum_t di_nextents;
364 751141851 : xfs_extnum_t max_extents;
365 751141851 : mode_t mode = be16_to_cpu(dip->di_mode);
366 751142009 : uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
367 751141851 : uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork);
368 :
369 751141851 : di_nextents = xfs_dfork_nextents(dip, whichfork);
370 :
371 : /*
372 : * For fork types that can contain local data, check that the fork
373 : * format matches the size of local data contained within the fork.
374 : *
375 : * For all types, check that when the size says the should be in extent
376 : * or btree format, the inode isn't claiming it is in local format.
377 : */
378 751155702 : if (whichfork == XFS_DATA_FORK) {
379 271933223 : if (S_ISDIR(mode) || S_ISLNK(mode)) {
380 102899721 : if (be64_to_cpu(dip->di_size) <= fork_size &&
381 : fork_format != XFS_DINODE_FMT_LOCAL)
382 6 : return __this_address;
383 : }
384 :
385 271933217 : if (be64_to_cpu(dip->di_size) > fork_size &&
386 : fork_format == XFS_DINODE_FMT_LOCAL)
387 0 : return __this_address;
388 : }
389 :
390 751155696 : switch (fork_format) {
391 554496493 : case XFS_DINODE_FMT_LOCAL:
392 : /*
393 : * No local regular files yet.
394 : */
395 554496493 : if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
396 4 : return __this_address;
397 554496489 : if (di_nextents)
398 0 : return __this_address;
399 : break;
400 184027287 : case XFS_DINODE_FMT_EXTENTS:
401 184027397 : if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
402 0 : return __this_address;
403 : break;
404 : case XFS_DINODE_FMT_BTREE:
405 25258972 : max_extents = xfs_iext_max_nextents(
406 : xfs_dinode_has_large_extent_counts(dip),
407 : whichfork);
408 12629471 : if (di_nextents > max_extents)
409 0 : return __this_address;
410 : break;
411 1211 : case XFS_DINODE_FMT_RMAP:
412 1211 : if (!xfs_has_rtrmapbt(mp))
413 0 : return __this_address;
414 1211 : if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADIR)))
415 0 : return __this_address;
416 : break;
417 1211 : case XFS_DINODE_FMT_REFCOUNT:
418 1211 : if (!xfs_has_rtreflink(mp))
419 0 : return __this_address;
420 1211 : if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADIR)))
421 0 : return __this_address;
422 : break;
423 : default:
424 8 : return __this_address;
425 : }
426 : return NULL;
427 : }
428 :
429 : static xfs_failaddr_t
430 479348938 : xfs_dinode_verify_forkoff(
431 : struct xfs_dinode *dip,
432 : struct xfs_mount *mp)
433 : {
434 479348938 : if (!dip->di_forkoff)
435 : return NULL;
436 :
437 479235858 : switch (dip->di_format) {
438 207406555 : case XFS_DINODE_FMT_DEV:
439 207406555 : if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
440 0 : return __this_address;
441 : break;
442 : case XFS_DINODE_FMT_RMAP:
443 : case XFS_DINODE_FMT_REFCOUNT:
444 2422 : if (!(xfs_has_metadir(mp) && xfs_has_parent(mp)))
445 0 : return __this_address;
446 271829303 : fallthrough;
447 : case XFS_DINODE_FMT_LOCAL: /* fall through ... */
448 : case XFS_DINODE_FMT_EXTENTS: /* fall through ... */
449 : case XFS_DINODE_FMT_BTREE:
450 271829305 : if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
451 0 : return __this_address;
452 : break;
453 : default:
454 0 : return __this_address;
455 : }
456 : return NULL;
457 : }
458 :
459 : static xfs_failaddr_t
460 479335254 : xfs_dinode_verify_nrext64(
461 : struct xfs_mount *mp,
462 : struct xfs_dinode *dip)
463 : {
464 958670508 : if (xfs_dinode_has_large_extent_counts(dip)) {
465 479334826 : if (!xfs_has_large_extent_counts(mp))
466 0 : return __this_address;
467 479334826 : if (dip->di_nrext64_pad != 0)
468 0 : return __this_address;
469 428 : } else if (dip->di_version >= 3) {
470 10 : if (dip->di_v3_pad != 0)
471 0 : return __this_address;
472 : }
473 :
474 : return NULL;
475 : }
476 :
477 : /*
478 : * Validate all the picky requirements we have for a file that claims to be
479 : * filesystem metadata.
480 : */
481 : xfs_failaddr_t
482 188679 : xfs_dinode_verify_metadir(
483 : struct xfs_mount *mp,
484 : struct xfs_dinode *dip,
485 : uint16_t mode,
486 : uint16_t flags,
487 : uint64_t flags2)
488 : {
489 188679 : if (!xfs_has_metadir(mp))
490 0 : return __this_address;
491 :
492 : /* V5 filesystem only */
493 188679 : if (dip->di_version < 3)
494 0 : return __this_address;
495 :
496 : /* V3 inode fields that are always zero */
497 188679 : if (dip->di_onlink)
498 0 : return __this_address;
499 188679 : if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad)
500 0 : return __this_address;
501 188679 : if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter)
502 0 : return __this_address;
503 :
504 : /* Metadata files can only be directories or regular files */
505 188679 : if (!S_ISDIR(mode) && !S_ISREG(mode))
506 0 : return __this_address;
507 :
508 : /* They must have zero access permissions */
509 188679 : if (mode & 0777)
510 0 : return __this_address;
511 :
512 : /* DMAPI event and state masks are zero */
513 188679 : if (dip->di_dmevmask || dip->di_dmstate)
514 0 : return __this_address;
515 :
516 : /* User, group, and project IDs must be zero */
517 188679 : if (dip->di_uid || dip->di_gid ||
518 188679 : dip->di_projid_lo || dip->di_projid_hi)
519 0 : return __this_address;
520 :
521 : /* Immutable, sync, noatime, nodump, and nodefrag flags must be set */
522 188679 : if (!(flags & XFS_DIFLAG_IMMUTABLE))
523 0 : return __this_address;
524 188679 : if (!(flags & XFS_DIFLAG_SYNC))
525 0 : return __this_address;
526 188679 : if (!(flags & XFS_DIFLAG_NOATIME))
527 0 : return __this_address;
528 188679 : if (!(flags & XFS_DIFLAG_NODUMP))
529 0 : return __this_address;
530 188679 : if (!(flags & XFS_DIFLAG_NODEFRAG))
531 0 : return __this_address;
532 :
533 : /* Directories must have nosymlinks flags set */
534 188679 : if (S_ISDIR(mode) && !(flags & XFS_DIFLAG_NOSYMLINKS))
535 0 : return __this_address;
536 :
537 : /* dax flags2 must not be set */
538 188679 : if (flags2 & XFS_DIFLAG2_DAX)
539 0 : return __this_address;
540 :
541 : return NULL;
542 : }
543 :
544 : xfs_failaddr_t
545 479339158 : xfs_dinode_verify(
546 : struct xfs_mount *mp,
547 : xfs_ino_t ino,
548 : struct xfs_dinode *dip)
549 : {
550 479339158 : xfs_failaddr_t fa;
551 479339158 : uint16_t mode;
552 479339158 : uint16_t flags;
553 479339158 : uint64_t flags2;
554 479339158 : uint64_t di_size;
555 479339158 : xfs_extnum_t nextents;
556 479339158 : xfs_extnum_t naextents;
557 479339158 : xfs_filblks_t nblocks;
558 :
559 479339158 : if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
560 0 : return __this_address;
561 :
562 : /* Verify v3 integrity information first */
563 479339158 : if (dip->di_version >= 3) {
564 479339041 : if (!xfs_has_v3inodes(mp))
565 0 : return __this_address;
566 479339041 : if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
567 : XFS_DINODE_CRC_OFF))
568 6 : return __this_address;
569 479319681 : if (be64_to_cpu(dip->di_ino) != ino)
570 606 : return __this_address;
571 479334653 : if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
572 0 : return __this_address;
573 : }
574 :
575 479345847 : if (dip->di_version > 1) {
576 479345847 : if (dip->di_onlink)
577 0 : return __this_address;
578 : } else {
579 0 : if (dip->di_nlink)
580 0 : return __this_address;
581 : }
582 :
583 : /* don't allow invalid i_size */
584 479345847 : di_size = be64_to_cpu(dip->di_size);
585 479345847 : if (di_size & (1ULL << 63))
586 0 : return __this_address;
587 :
588 479345847 : mode = be16_to_cpu(dip->di_mode);
589 479345847 : if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
590 126 : return __this_address;
591 :
592 : /* No zero-length symlinks/dirs. */
593 479343922 : if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
594 16 : return __this_address;
595 :
596 479343906 : fa = xfs_dinode_verify_nrext64(mp, dip);
597 479334054 : if (fa)
598 : return fa;
599 :
600 479341220 : nextents = xfs_dfork_data_extents(dip);
601 479341220 : naextents = xfs_dfork_attr_extents(dip);
602 479341220 : nblocks = be64_to_cpu(dip->di_nblocks);
603 :
604 : /* Fork checks carried over from xfs_iformat_fork */
605 479341220 : if (mode && nextents + naextents > nblocks)
606 0 : return __this_address;
607 :
608 479341220 : if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
609 0 : return __this_address;
610 :
611 479341220 : if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
612 0 : return __this_address;
613 :
614 479341220 : flags = be16_to_cpu(dip->di_flags);
615 :
616 479341220 : if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
617 0 : return __this_address;
618 :
619 : /* check for illegal values of forkoff */
620 479341220 : fa = xfs_dinode_verify_forkoff(dip, mp);
621 479325065 : if (fa)
622 : return fa;
623 :
624 : /* Do we have appropriate data fork formats for the mode? */
625 479312730 : switch (mode & S_IFMT) {
626 207412968 : case S_IFIFO:
627 : case S_IFCHR:
628 : case S_IFBLK:
629 : case S_IFSOCK:
630 207412968 : if (dip->di_format != XFS_DINODE_FMT_DEV)
631 32 : return __this_address;
632 : break;
633 271933612 : case S_IFREG:
634 : case S_IFLNK:
635 : case S_IFDIR:
636 271933612 : fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
637 271936193 : if (fa)
638 : return fa;
639 : break;
640 : case 0:
641 : /* Uninitialized inode ok. */
642 : break;
643 : default:
644 0 : return __this_address;
645 : }
646 :
647 479314662 : if (dip->di_forkoff) {
648 479220018 : fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
649 479232827 : if (fa)
650 : return fa;
651 : } else {
652 : /*
653 : * If there is no fork offset, this may be a freshly-made inode
654 : * in a new disk cluster, in which case di_aformat is zeroed.
655 : * Otherwise, such an inode must be in EXTENTS format; this goes
656 : * for freed inodes as well.
657 : */
658 112648 : switch (dip->di_aformat) {
659 : case 0:
660 : case XFS_DINODE_FMT_EXTENTS:
661 112648 : break;
662 : default:
663 0 : return __this_address;
664 : }
665 112648 : if (naextents)
666 0 : return __this_address;
667 : }
668 :
669 : /* extent size hint validation */
670 958654422 : fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
671 : mode, flags);
672 479344261 : if (fa)
673 : return fa;
674 :
675 : /* only version 3 or greater inodes are extensively verified here */
676 479336557 : if (dip->di_version < 3)
677 : return NULL;
678 :
679 479336141 : flags2 = be64_to_cpu(dip->di_flags2);
680 :
681 : /* don't allow reflink/cowextsize if we don't have reflink */
682 479336141 : if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
683 : !xfs_has_reflink(mp))
684 0 : return __this_address;
685 :
686 : /* only regular files get reflink */
687 479336141 : if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
688 0 : return __this_address;
689 :
690 : /* don't let reflink and realtime mix */
691 479336141 : if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME) &&
692 563389 : !xfs_has_rtreflink(mp))
693 0 : return __this_address;
694 :
695 : /* COW extent size hint validation */
696 958672282 : fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
697 : mode, flags, flags2);
698 479334543 : if (fa)
699 : return fa;
700 :
701 : /* bigtime iflag can only happen on bigtime filesystems */
702 958673530 : if (xfs_dinode_has_bigtime(dip) &&
703 : !xfs_has_bigtime(mp))
704 0 : return __this_address;
705 :
706 479336765 : if (flags2 & XFS_DIFLAG2_METADIR) {
707 188679 : fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2);
708 188679 : if (fa)
709 0 : return fa;
710 : }
711 :
712 : return NULL;
713 : }
714 :
715 : void
716 340420955 : xfs_dinode_calc_crc(
717 : struct xfs_mount *mp,
718 : struct xfs_dinode *dip)
719 : {
720 340420955 : uint32_t crc;
721 :
722 340420955 : if (dip->di_version < 3)
723 : return;
724 :
725 340420713 : ASSERT(xfs_has_crc(mp));
726 340420713 : crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
727 : XFS_DINODE_CRC_OFF);
728 340423924 : dip->di_crc = xfs_end_cksum(crc);
729 : }
730 :
731 : /*
732 : * Validate di_extsize hint.
733 : *
734 : * 1. Extent size hint is only valid for directories and regular files.
735 : * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
736 : * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
737 : * 4. Hint cannot be larger than MAXTEXTLEN.
738 : * 5. Can be changed on directories at any time.
739 : * 6. Hint value of 0 turns off hints, clears inode flags.
740 : * 7. Extent size must be a multiple of the appropriate block size.
741 : * For realtime files, this is the rt extent size.
742 : * 8. For non-realtime files, the extent size hint must be limited
743 : * to half the AG size to avoid alignment extending the extent beyond the
744 : * limits of the AG.
745 : */
746 : xfs_failaddr_t
747 567049663 : xfs_inode_validate_extsize(
748 : struct xfs_mount *mp,
749 : uint32_t extsize,
750 : uint16_t mode,
751 : uint16_t flags)
752 : {
753 567049663 : bool rt_flag;
754 567049663 : bool hint_flag;
755 567049663 : bool inherit_flag;
756 567049663 : uint32_t alloc_unit = 1;
757 :
758 567049663 : rt_flag = (flags & XFS_DIFLAG_REALTIME);
759 567049663 : hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
760 567049663 : inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
761 :
762 : /*
763 : * This comment describes a historic gap in this verifier function.
764 : *
765 : * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
766 : * function has never checked that the extent size hint is an integer
767 : * multiple of the realtime extent size. Since we allow users to set
768 : * this combination on non-rt filesystems /and/ to change the rt
769 : * extent size when adding a rt device to a filesystem, the net effect
770 : * is that users can configure a filesystem anticipating one rt
771 : * geometry and change their minds later. Directories do not use the
772 : * extent size hint, so this is harmless for them.
773 : *
774 : * If a directory with a misaligned extent size hint is allowed to
775 : * propagate that hint into a new regular realtime file, the result
776 : * is that the inode cluster buffer verifier will trigger a corruption
777 : * shutdown the next time it is run, because the verifier has always
778 : * enforced the alignment rule for regular files.
779 : *
780 : * Because we allow administrators to set a new rt extent size when
781 : * adding a rt section, we cannot add a check to this verifier because
782 : * that will result a new source of directory corruption errors when
783 : * reading an existing filesystem. Instead, we rely on callers to
784 : * decide when alignment checks are appropriate, and fix things up as
785 : * needed.
786 : */
787 :
788 567049663 : if (rt_flag)
789 28000789 : alloc_unit = mp->m_sb.sb_rextsize;
790 :
791 567049663 : if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
792 0 : return __this_address;
793 :
794 567049663 : if (hint_flag && !S_ISREG(mode))
795 0 : return __this_address;
796 :
797 567049663 : if (inherit_flag && !S_ISDIR(mode))
798 0 : return __this_address;
799 :
800 567049663 : if ((hint_flag || inherit_flag) && extsize == 0)
801 0 : return __this_address;
802 :
803 : /* free inodes get flags set to zero but extsize remains */
804 567049663 : if (mode && !(hint_flag || inherit_flag) && extsize != 0)
805 0 : return __this_address;
806 :
807 567049663 : if (extsize % alloc_unit)
808 0 : return __this_address;
809 :
810 567049663 : if (extsize > XFS_MAX_BMBT_EXTLEN)
811 0 : return __this_address;
812 :
813 567049663 : if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
814 0 : return __this_address;
815 :
816 : return NULL;
817 : }
818 :
819 : /*
820 : * Validate di_cowextsize hint.
821 : *
822 : * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
823 : * The inode does not have to have any shared blocks, but it must be a v3.
824 : * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
825 : * for a directory, the hint is propagated to new files.
826 : * 3. Can be changed on files & directories at any time.
827 : * 4. Hint value of 0 turns off hints, clears inode flags.
828 : * 5. Extent size must be a multiple of the appropriate block size.
829 : * 6. The extent size hint must be limited to half the AG size to avoid
830 : * alignment extending the extent beyond the limits of the AG.
831 : */
832 : xfs_failaddr_t
833 586272177 : xfs_inode_validate_cowextsize(
834 : struct xfs_mount *mp,
835 : uint32_t cowextsize,
836 : uint16_t mode,
837 : uint16_t flags,
838 : uint64_t flags2)
839 : {
840 586272177 : bool rt_flag;
841 586272177 : bool hint_flag;
842 586272177 : uint32_t alloc_unit = 1;
843 :
844 586272177 : rt_flag = (flags & XFS_DIFLAG_REALTIME);
845 586272177 : hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
846 :
847 : /*
848 : * Similar to extent size hints, a directory can be configured to
849 : * propagate realtime status and a CoW extent size hint to newly
850 : * created files even if there is no realtime device, and the hints on
851 : * disk can become misaligned if the sysadmin changes the rt extent
852 : * size while adding the realtime device.
853 : *
854 : * Therefore, we can only enforce the rextsize alignment check against
855 : * regular realtime files, and rely on callers to decide when alignment
856 : * checks are appropriate, and fix things up as needed.
857 : */
858 :
859 586272177 : if (rt_flag)
860 28000760 : alloc_unit = mp->m_sb.sb_rextsize;
861 :
862 586272177 : if (hint_flag && !xfs_has_reflink(mp))
863 0 : return __this_address;
864 :
865 586272177 : if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
866 0 : return __this_address;
867 :
868 586272177 : if (hint_flag && cowextsize == 0)
869 0 : return __this_address;
870 :
871 : /* free inodes get flags set to zero but cowextsize remains */
872 586272177 : if (mode && !hint_flag && cowextsize != 0)
873 0 : return __this_address;
874 :
875 586272177 : if (cowextsize % alloc_unit)
876 0 : return __this_address;
877 :
878 586272177 : if (cowextsize > XFS_MAX_BMBT_EXTLEN)
879 0 : return __this_address;
880 :
881 586272177 : if (!rt_flag && cowextsize > mp->m_sb.sb_agblocks / 2)
882 0 : return __this_address;
883 :
884 : return NULL;
885 : }
|