Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_bit.h"
15 : #include "xfs_log_format.h"
16 : #include "xfs_trans.h"
17 : #include "xfs_sb.h"
18 : #include "xfs_inode.h"
19 : #include "xfs_da_format.h"
20 : #include "xfs_da_btree.h"
21 : #include "xfs_dir2.h"
22 : #include "xfs_attr.h"
23 : #include "xfs_attr_leaf.h"
24 : #include "xfs_attr_sf.h"
25 : #include "xfs_attr_remote.h"
26 : #include "xfs_bmap.h"
27 : #include "xfs_bmap_util.h"
28 : #include "xfs_swapext.h"
29 : #include "xfs_xchgrange.h"
30 : #include "xfs_acl.h"
31 : #include "xfs_parent.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/tempfile.h"
38 : #include "scrub/tempswap.h"
39 : #include "scrub/xfile.h"
40 : #include "scrub/xfarray.h"
41 : #include "scrub/xfblob.h"
42 : #include "scrub/attr.h"
43 : #include "scrub/reap.h"
44 : #include "scrub/attr_repair.h"
45 :
46 : /*
47 : * Extended Attribute Repair
48 : * =========================
49 : *
50 : * We repair extended attributes by reading the attr leaf blocks looking for
51 : * attributes entries that look salvageable (name passes verifiers, value can
52 : * be retrieved, etc). Each extended attribute worth salvaging is stashed in
53 : * memory, and the stashed entries are periodically replayed into a temporary
54 : * file to constrain memory use. Batching the construction of the temporary
55 : * extended attribute structure in this fashion reduces lock cycling of the
56 : * file being repaired and the temporary file.
57 : *
58 : * When salvaging completes, the remaining stashed attributes are replayed to
59 : * the temporary file. An atomic extent swap is used to commit the new xattr
60 : * blocks to the file being repaired. This will disrupt attrmulti cursors.
61 : */
62 :
63 : struct xrep_xattr_key {
64 : /* Cookie for retrieval of the xattr name. */
65 : xfblob_cookie name_cookie;
66 :
67 : /* Cookie for retrieval of the xattr value. */
68 : xfblob_cookie value_cookie;
69 :
70 : /* XFS_ATTR_* flags */
71 : int flags;
72 :
73 : /* Length of the value and name. */
74 : uint32_t valuelen;
75 : uint16_t namelen;
76 : };
77 :
78 : /*
79 : * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
80 : * them to the temp file.
81 : */
82 : #define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
83 :
84 : struct xrep_xattr {
85 : struct xfs_scrub *sc;
86 :
87 : /* Information for swapping attr forks at the end. */
88 : struct xrep_tempswap tx;
89 :
90 : /* xattr keys */
91 : struct xfarray *xattr_records;
92 :
93 : /* xattr values */
94 : struct xfblob *xattr_blobs;
95 :
96 : /* Number of attributes that we are salvaging. */
97 : unsigned long long attrs_found;
98 :
99 : /* Can we flush stashed attrs to the tempfile? */
100 : bool can_flush;
101 :
102 : /* Did the live update fail, and hence the repair is now out of date? */
103 : bool live_update_aborted;
104 :
105 : /* Lock protecting parent pointer updates */
106 : struct mutex lock;
107 :
108 : /* Fixed-size array of xrep_xattr_pptr structures. */
109 : struct xfarray *pptr_recs;
110 :
111 : /* Blobs containing parent pointer names. */
112 : struct xfblob *pptr_names;
113 :
114 : /* Hook to capture parent pointer updates. */
115 : struct xfs_dir_hook hooks;
116 :
117 : /* xattr key and da args for parent pointer replay. */
118 : struct xfs_parent_scratch pptr_scratch;
119 :
120 : /*
121 : * Scratch buffer for scanning dirents to create pptr xattrs. At the
122 : * very end of the repair, it can also be used to compute the
123 : * lost+found filename if we need to reparent the file.
124 : */
125 : struct xfs_parent_name_irec pptr;
126 : };
127 :
128 : /* Create a parent pointer in the tempfile. */
129 : #define XREP_XATTR_PPTR_ADD (1)
130 :
131 : /* Remove a parent pointer from the tempfile. */
132 : #define XREP_XATTR_PPTR_REMOVE (2)
133 :
134 : /* A stashed parent pointer update. */
135 : struct xrep_xattr_pptr {
136 : /* Cookie for retrieval of the pptr name. */
137 : xfblob_cookie name_cookie;
138 :
139 : /* Parent pointer attr key. */
140 : xfs_ino_t p_ino;
141 : uint32_t p_gen;
142 :
143 : /* Length of the pptr name. */
144 : uint8_t namelen;
145 :
146 : /* XREP_XATTR_PPTR_{ADD,REMOVE} */
147 : uint8_t action;
148 : };
149 :
150 : /* Set up to recreate the extended attributes. */
151 : int
152 3380064 : xrep_setup_xattr(
153 : struct xfs_scrub *sc)
154 : {
155 3380064 : if (xfs_has_parent(sc->mp))
156 3380077 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
157 :
158 3378699 : return xrep_tempfile_create(sc, S_IFREG);
159 : }
160 :
161 : /*
162 : * Decide if we want to salvage this attribute. We don't bother with
163 : * incomplete or oversized keys or values. The @value parameter can be null
164 : * for remote attrs.
165 : */
166 : STATIC int
167 1088373 : xrep_xattr_want_salvage(
168 : struct xrep_xattr *rx,
169 : unsigned int attr_flags,
170 : const void *name,
171 : int namelen,
172 : const void *value,
173 : int valuelen)
174 : {
175 1088373 : if (attr_flags & XFS_ATTR_INCOMPLETE)
176 : return false;
177 1088373 : if (namelen > XATTR_NAME_MAX || namelen <= 0)
178 : return false;
179 1088373 : if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
180 : return false;
181 1088373 : if (attr_flags & XFS_ATTR_PARENT) {
182 159171 : if (!xfs_parent_namecheck(rx->sc->mp, name, namelen,
183 : attr_flags))
184 : return false;
185 159171 : if (!xfs_parent_valuecheck(rx->sc->mp, value, valuelen))
186 0 : return false;
187 : }
188 : return true;
189 : }
190 :
191 : /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
192 : STATIC int
193 1088373 : xrep_xattr_salvage_key(
194 : struct xrep_xattr *rx,
195 : int flags,
196 : unsigned char *name,
197 : int namelen,
198 : unsigned char *value,
199 : int valuelen)
200 : {
201 1088373 : struct xrep_xattr_key key = {
202 : .valuelen = valuelen,
203 1088373 : .flags = flags & XFS_ATTR_NSP_ONDISK_MASK,
204 : };
205 1088373 : unsigned int i = 0;
206 1088373 : int error = 0;
207 :
208 1088373 : if (xchk_should_terminate(rx->sc, &error))
209 0 : return error;
210 :
211 : /*
212 : * Truncate the name to the first character that would trip namecheck.
213 : * If we no longer have a name after that, ignore this attribute.
214 : */
215 1088373 : if (flags & XFS_ATTR_PARENT) {
216 159171 : key.namelen = namelen;
217 :
218 159171 : trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
219 : key.namelen, value, valuelen);
220 : } else {
221 3825533 : while (i < namelen && name[i] != 0)
222 2896331 : i++;
223 929202 : if (i == 0)
224 : return 0;
225 929202 : key.namelen = i;
226 :
227 929202 : trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
228 : key.namelen, valuelen);
229 : }
230 :
231 2176746 : error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
232 1088373 : key.namelen);
233 1088373 : if (error)
234 : return error;
235 :
236 1088373 : error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
237 : key.valuelen);
238 1088373 : if (error)
239 : return error;
240 :
241 1088373 : error = xfarray_append(rx->xattr_records, &key);
242 1088373 : if (error)
243 : return error;
244 :
245 1088373 : rx->attrs_found++;
246 1088373 : return 0;
247 : }
248 :
249 : /*
250 : * Record a shortform extended attribute key & value for later reinsertion
251 : * into the inode.
252 : */
253 : STATIC int
254 118445 : xrep_xattr_salvage_sf_attr(
255 : struct xrep_xattr *rx,
256 : struct xfs_attr_shortform *sf,
257 : struct xfs_attr_sf_entry *sfe)
258 : {
259 118445 : struct xfs_scrub *sc = rx->sc;
260 118445 : struct xchk_xattr_buf *ab = sc->buf;
261 118445 : unsigned char *name = sfe->nameval;
262 118445 : unsigned char *value = &sfe->nameval[sfe->namelen];
263 :
264 118445 : if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)sf,
265 : sfe->namelen))
266 : return 0;
267 :
268 118445 : if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)sf,
269 118445 : sfe->valuelen))
270 : return 0;
271 :
272 118445 : if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
273 118445 : sfe->namelen, value, sfe->valuelen))
274 : return 0;
275 :
276 118445 : return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
277 118445 : sfe->namelen, value, sfe->valuelen);
278 : }
279 :
280 : /*
281 : * Record a local format extended attribute key & value for later reinsertion
282 : * into the inode.
283 : */
284 : STATIC int
285 969853 : xrep_xattr_salvage_local_attr(
286 : struct xrep_xattr *rx,
287 : struct xfs_attr_leaf_entry *ent,
288 : unsigned int nameidx,
289 : const char *buf_end,
290 : struct xfs_attr_leaf_name_local *lentry)
291 : {
292 969853 : struct xchk_xattr_buf *ab = rx->sc->buf;
293 969853 : unsigned char *value;
294 969853 : unsigned int valuelen;
295 969853 : unsigned int namesize;
296 :
297 : /*
298 : * Decode the leaf local entry format. If something seems wrong, we
299 : * junk the attribute.
300 : */
301 969853 : value = &lentry->nameval[lentry->namelen];
302 969853 : valuelen = be16_to_cpu(lentry->valuelen);
303 969853 : namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
304 969853 : if ((char *)lentry + namesize > buf_end)
305 : return 0;
306 969853 : if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
307 : lentry->namelen, value, valuelen))
308 : return 0;
309 969853 : if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
310 : return 0;
311 :
312 : /* Try to save this attribute. */
313 969853 : return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
314 969853 : lentry->namelen, value, valuelen);
315 : }
316 :
317 : /*
318 : * Record a remote format extended attribute key & value for later reinsertion
319 : * into the inode.
320 : */
321 : STATIC int
322 75 : xrep_xattr_salvage_remote_attr(
323 : struct xrep_xattr *rx,
324 : struct xfs_attr_leaf_entry *ent,
325 : unsigned int nameidx,
326 : const char *buf_end,
327 : struct xfs_attr_leaf_name_remote *rentry,
328 : unsigned int ent_idx,
329 : struct xfs_buf *leaf_bp)
330 : {
331 75 : struct xfs_da_args args = {
332 75 : .trans = rx->sc->tp,
333 75 : .dp = rx->sc->ip,
334 : .index = ent_idx,
335 75 : .geo = rx->sc->mp->m_attr_geo,
336 75 : .owner = rx->sc->ip->i_ino,
337 : };
338 75 : struct xchk_xattr_buf *ab = rx->sc->buf;
339 75 : unsigned int valuelen;
340 75 : unsigned int namesize;
341 75 : int error;
342 :
343 : /*
344 : * Decode the leaf remote entry format. If something seems wrong, we
345 : * junk the attribute. Note that we should never find a zero-length
346 : * remote attribute value.
347 : */
348 75 : valuelen = be32_to_cpu(rentry->valuelen);
349 75 : namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
350 75 : if ((char *)rentry + namesize > buf_end)
351 : return 0;
352 150 : if (valuelen == 0 ||
353 75 : !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
354 : rentry->namelen, NULL, valuelen))
355 0 : return 0;
356 75 : if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
357 : return 0;
358 :
359 : /*
360 : * Enlarge the buffer (if needed) to hold the value that we're trying
361 : * to salvage from the old extended attribute data.
362 : */
363 75 : error = xchk_setup_xattr_buf(rx->sc, valuelen);
364 75 : if (error == -ENOMEM)
365 : error = -EDEADLOCK;
366 75 : if (error)
367 0 : return error;
368 :
369 : /* Look up the remote value and stash it for reconstruction. */
370 75 : args.valuelen = valuelen;
371 75 : args.namelen = rentry->namelen;
372 75 : args.name = rentry->name;
373 75 : args.value = ab->value;
374 75 : error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
375 75 : if (error || args.rmtblkno == 0)
376 0 : goto err_free;
377 :
378 75 : error = xfs_attr_rmtval_get(&args);
379 75 : if (error)
380 0 : goto err_free;
381 :
382 : /* Try to save this attribute. */
383 75 : error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
384 75 : rentry->namelen, ab->value, valuelen);
385 75 : err_free:
386 : /* remote value was garbage, junk it */
387 75 : if (error == -EFSBADCRC || error == -EFSCORRUPTED)
388 0 : error = 0;
389 : return error;
390 : }
391 :
392 : /* Extract every xattr key that we can from this attr fork block. */
393 : STATIC int
394 42635 : xrep_xattr_recover_leaf(
395 : struct xrep_xattr *rx,
396 : struct xfs_buf *bp)
397 : {
398 42635 : struct xfs_attr3_icleaf_hdr leafhdr;
399 42635 : struct xfs_scrub *sc = rx->sc;
400 42635 : struct xfs_mount *mp = sc->mp;
401 42635 : struct xfs_attr_leafblock *leaf;
402 42635 : struct xfs_attr_leaf_name_local *lentry;
403 42635 : struct xfs_attr_leaf_name_remote *rentry;
404 42635 : struct xfs_attr_leaf_entry *ent;
405 42635 : struct xfs_attr_leaf_entry *entries;
406 42635 : struct xchk_xattr_buf *ab = rx->sc->buf;
407 42635 : char *buf_end;
408 42635 : size_t off;
409 42635 : unsigned int nameidx;
410 42635 : unsigned int hdrsize;
411 42635 : int i;
412 42635 : int error = 0;
413 :
414 42635 : bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
415 :
416 : /* Check the leaf header */
417 42635 : leaf = bp->b_addr;
418 42635 : xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
419 42635 : hdrsize = xfs_attr3_leaf_hdr_size(leaf);
420 42635 : xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
421 42635 : entries = xfs_attr3_leaf_entryp(leaf);
422 :
423 42635 : buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
424 1012563 : for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
425 969928 : if (xchk_should_terminate(sc, &error))
426 0 : return error;
427 :
428 : /* Skip key if it conflicts with something else? */
429 969928 : off = (char *)ent - (char *)leaf;
430 969928 : if (!xchk_xattr_set_map(sc, ab->usedmap, off,
431 : sizeof(xfs_attr_leaf_entry_t)))
432 0 : continue;
433 :
434 : /* Check the name information. */
435 969928 : nameidx = be16_to_cpu(ent->nameidx);
436 969928 : if (nameidx < leafhdr.firstused ||
437 969928 : nameidx >= mp->m_attr_geo->blksize)
438 0 : continue;
439 :
440 969928 : if (ent->flags & XFS_ATTR_LOCAL) {
441 969853 : lentry = xfs_attr3_leaf_name_local(leaf, i);
442 969853 : error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
443 : buf_end, lentry);
444 : } else {
445 75 : rentry = xfs_attr3_leaf_name_remote(leaf, i);
446 75 : error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
447 : buf_end, rentry, i, bp);
448 : }
449 969928 : if (error)
450 0 : return error;
451 : }
452 :
453 : return 0;
454 : }
455 :
456 : /* Try to recover shortform attrs. */
457 : STATIC int
458 79999 : xrep_xattr_recover_sf(
459 : struct xrep_xattr *rx)
460 : {
461 79999 : struct xfs_scrub *sc = rx->sc;
462 79999 : struct xchk_xattr_buf *ab = sc->buf;
463 79999 : struct xfs_attr_shortform *sf;
464 79999 : struct xfs_attr_sf_entry *sfe;
465 79999 : struct xfs_attr_sf_entry *next;
466 79999 : struct xfs_ifork *ifp;
467 79999 : unsigned char *end;
468 79999 : int i;
469 79999 : int error = 0;
470 :
471 79999 : ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
472 :
473 79999 : bitmap_zero(ab->usedmap, ifp->if_bytes);
474 79999 : sf = (struct xfs_attr_shortform *)rx->sc->ip->i_af.if_u1.if_data;
475 79999 : end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
476 79999 : xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(sf->hdr));
477 :
478 79999 : sfe = &sf->list[0];
479 79999 : if ((unsigned char *)sfe > end)
480 : return 0;
481 :
482 198444 : for (i = 0; i < sf->hdr.count; i++) {
483 118445 : if (xchk_should_terminate(sc, &error))
484 0 : return error;
485 :
486 118445 : next = xfs_attr_sf_nextentry(sfe);
487 118445 : if ((unsigned char *)next > end)
488 : break;
489 :
490 118445 : if (xchk_xattr_set_map(sc, ab->usedmap,
491 118445 : (char *)sfe - (char *)sf,
492 : sizeof(struct xfs_attr_sf_entry))) {
493 : /*
494 : * No conflicts with the sf entry; let's save this
495 : * attribute.
496 : */
497 118445 : error = xrep_xattr_salvage_sf_attr(rx, sf, sfe);
498 118445 : if (error)
499 0 : return error;
500 : }
501 :
502 118445 : sfe = next;
503 : }
504 :
505 : return 0;
506 : }
507 :
508 : /*
509 : * Try to return a buffer of xattr data for a given physical extent.
510 : *
511 : * Because the buffer cache get function complains if it finds a buffer
512 : * matching the block number but not matching the length, we must be careful to
513 : * look for incore buffers (up to the maximum length of a remote value) that
514 : * could be hiding anywhere in the physical range. If we find an incore
515 : * buffer, we can pass that to the caller. Optionally, read a single block and
516 : * pass that back.
517 : *
518 : * Note the subtlety that remote attr value blocks for which there is no incore
519 : * buffer will be passed to the callback one block at a time. These buffers
520 : * will not have any ops attached and must be staled to prevent aliasing with
521 : * multiblock buffers once we drop the ILOCK.
522 : */
523 : STATIC int
524 43793 : xrep_xattr_find_buf(
525 : struct xfs_mount *mp,
526 : xfs_fsblock_t fsbno,
527 : xfs_extlen_t max_len,
528 : bool can_read,
529 : struct xfs_buf **bpp)
530 : {
531 87586 : struct xrep_bufscan scan = {
532 43793 : .daddr = XFS_FSB_TO_DADDR(mp, fsbno),
533 43793 : .max_sectors = xrep_bufscan_max_sectors(mp, max_len),
534 43793 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
535 : };
536 43793 : struct xfs_buf *bp;
537 :
538 43793 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
539 43793 : *bpp = bp;
540 43793 : return 0;
541 : }
542 :
543 0 : if (!can_read) {
544 0 : *bpp = NULL;
545 0 : return 0;
546 : }
547 :
548 0 : return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
549 : XBF_TRYLOCK, bpp, NULL);
550 : }
551 :
552 : /*
553 : * Deal with a buffer that we found during our walk of the attr fork.
554 : *
555 : * Attribute leaf and node blocks are simple -- they're a single block, so we
556 : * can walk them one at a time and we never have to worry about discontiguous
557 : * multiblock buffers like we do for directories.
558 : *
559 : * Unfortunately, remote attr blocks add a lot of complexity here. Each disk
560 : * block is totally self contained, in the sense that the v5 header provides no
561 : * indication that there could be more data in the next block. The incore
562 : * buffers can span multiple blocks, though they never cross extent records.
563 : * However, they don't necessarily start or end on an extent record boundary.
564 : * Therefore, we need a special buffer find function to walk the buffer cache
565 : * for us.
566 : *
567 : * The caller must hold the ILOCK on the file being repaired. We use
568 : * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
569 : * own the block and don't want to hang the system on a potentially garbage
570 : * buffer.
571 : */
572 : STATIC int
573 43793 : xrep_xattr_recover_block(
574 : struct xrep_xattr *rx,
575 : xfs_dablk_t dabno,
576 : xfs_fsblock_t fsbno,
577 : xfs_extlen_t max_len,
578 : xfs_extlen_t *actual_len)
579 : {
580 43793 : struct xfs_da_blkinfo *info;
581 43793 : struct xfs_buf *bp;
582 43793 : int error;
583 :
584 43793 : error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
585 43793 : if (error)
586 : return error;
587 43793 : info = bp->b_addr;
588 43793 : *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
589 :
590 43793 : trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
591 43793 : be16_to_cpu(info->magic));
592 :
593 : /*
594 : * If the buffer has the right magic number for an attr leaf block and
595 : * passes a structure check (we don't care about checksums), salvage
596 : * as much as we can from the block. */
597 86428 : if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
598 85270 : xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
599 42635 : xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
600 42635 : error = xrep_xattr_recover_leaf(rx, bp);
601 :
602 : /*
603 : * If the buffer didn't already have buffer ops set, it was read in by
604 : * the _find_buf function and could very well be /part/ of a multiblock
605 : * remote block. Mark it stale so that it doesn't hang around in
606 : * memory to cause problems.
607 : */
608 43793 : if (bp->b_ops == NULL)
609 0 : xfs_buf_stale(bp);
610 :
611 43793 : xfs_buf_relse(bp);
612 43793 : return error;
613 : }
614 :
615 : /* Insert one xattr key/value. */
616 : STATIC int
617 1088373 : xrep_xattr_insert_rec(
618 : struct xrep_xattr *rx,
619 : const struct xrep_xattr_key *key)
620 : {
621 1088373 : struct xfs_da_args args = {
622 1088373 : .dp = rx->sc->tempip,
623 1088373 : .attr_filter = key->flags,
624 : .attr_flags = XATTR_CREATE,
625 1088373 : .namelen = key->namelen,
626 1088373 : .valuelen = key->valuelen,
627 : .op_flags = XFS_DA_OP_NOTIME,
628 1088373 : .owner = rx->sc->ip->i_ino,
629 : };
630 1088373 : struct xchk_xattr_buf *ab = rx->sc->buf;
631 1088373 : int error;
632 :
633 1088373 : if (key->flags & XFS_ATTR_PARENT)
634 159171 : args.op_flags |= XFS_DA_OP_NVLOOKUP;
635 :
636 : /*
637 : * Grab pointers to the scrub buffer so that we can use them to insert
638 : * attrs into the temp file.
639 : */
640 1088373 : args.name = ab->name;
641 1088373 : args.value = ab->value;
642 :
643 : /*
644 : * The attribute name is stored near the end of the in-core buffer,
645 : * though we reserve one more byte to ensure null termination.
646 : */
647 1088373 : ab->name[XATTR_NAME_MAX] = 0;
648 :
649 1088373 : error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
650 1088373 : key->namelen);
651 1088372 : if (error)
652 : return error;
653 :
654 1088373 : error = xfblob_free(rx->xattr_blobs, key->name_cookie);
655 1088373 : if (error)
656 : return error;
657 :
658 1088373 : error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
659 1088373 : key->valuelen);
660 1088373 : if (error)
661 : return error;
662 :
663 1088373 : error = xfblob_free(rx->xattr_blobs, key->value_cookie);
664 1088373 : if (error)
665 : return error;
666 :
667 1088373 : ab->name[key->namelen] = 0;
668 :
669 1088373 : if (key->flags & XFS_ATTR_PARENT)
670 159171 : trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
671 159171 : ab->name, key->namelen, ab->value,
672 159171 : key->valuelen);
673 : else
674 929202 : trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
675 929202 : ab->name, key->namelen, key->valuelen);
676 :
677 : /*
678 : * xfs_attr_set creates and commits its own transaction. If the attr
679 : * already exists, we'll just drop it during the rebuild.
680 : */
681 1088373 : error = xfs_attr_set(&args);
682 1088373 : if (error == -EEXIST)
683 0 : error = 0;
684 :
685 : return error;
686 : }
687 :
688 : /*
689 : * Periodically flush salvaged attributes to the temporary file. This is done
690 : * to reduce the memory requirements of the xattr rebuild because files can
691 : * contain millions of attributes.
692 : */
693 : STATIC int
694 119424 : xrep_xattr_flush_stashed(
695 : struct xrep_xattr *rx)
696 : {
697 119424 : xfarray_idx_t array_cur;
698 119424 : int error;
699 :
700 : /*
701 : * Entering this function, the scrub context has a reference to the
702 : * inode being repaired, the temporary file, and a scrub transaction
703 : * that we use during xattr salvaging to avoid livelocking if there
704 : * are cycles in the xattr structures. We hold ILOCK_EXCL on both
705 : * the inode being repaired, though it is not ijoined to the scrub
706 : * transaction.
707 : *
708 : * To constrain kernel memory use, we occasionally flush salvaged
709 : * xattrs from the xfarray and xfblob structures into the temporary
710 : * file in preparation for swapping the xattr structures at the end.
711 : * Updating the temporary file requires a transaction, so we commit the
712 : * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
713 : * allocate whatever transaction it wants.
714 : *
715 : * We still hold IOLOCK_EXCL on the inode being repaired, which
716 : * prevents anyone from modifying the damaged xattr data while we
717 : * repair it.
718 : */
719 119424 : error = xrep_trans_commit(rx->sc);
720 119424 : if (error)
721 : return error;
722 119424 : xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
723 :
724 : /*
725 : * Take the IOLOCK of the temporary file while we modify xattrs. This
726 : * isn't strictly required because the temporary file is never revealed
727 : * to userspace, but we follow the same locking rules. We still hold
728 : * sc->ip's IOLOCK.
729 : */
730 119424 : error = xrep_tempfile_iolock_polled(rx->sc);
731 119424 : if (error)
732 : return error;
733 :
734 : /* Add all the salvaged attrs to the temporary file. */
735 1207797 : foreach_xfarray_idx(rx->xattr_records, array_cur) {
736 1088373 : struct xrep_xattr_key key;
737 :
738 1088373 : error = xfarray_load(rx->xattr_records, array_cur, &key);
739 1088373 : if (error)
740 0 : return error;
741 :
742 1088373 : error = xrep_xattr_insert_rec(rx, &key);
743 1088373 : if (error)
744 0 : return error;
745 : }
746 :
747 : /* Empty out both arrays now that we've added the entries. */
748 119424 : xfarray_truncate(rx->xattr_records);
749 119424 : xfblob_truncate(rx->xattr_blobs);
750 :
751 119424 : xrep_tempfile_iounlock(rx->sc);
752 :
753 : /* Recreate the salvage transaction and relock the inode. */
754 119424 : error = xchk_trans_alloc(rx->sc, 0);
755 119424 : if (error)
756 : return error;
757 119424 : xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
758 119424 : return 0;
759 : }
760 :
761 : /* Decide if we've stashed too much xattr data in memory. */
762 : static inline bool
763 43793 : xrep_xattr_want_flush_stashed(
764 : struct xrep_xattr *rx)
765 : {
766 43793 : unsigned long long bytes;
767 :
768 43793 : if (!rx->can_flush)
769 : return false;
770 :
771 43793 : bytes = xfarray_bytes(rx->xattr_records) +
772 43793 : xfblob_bytes(rx->xattr_blobs);
773 43793 : return bytes > XREP_XATTR_MAX_STASH_BYTES;
774 : }
775 :
776 : /*
777 : * Did we observe rename changing parent pointer xattrs while we were flushing
778 : * salvaged attrs?
779 : */
780 : static inline bool
781 16 : xrep_xattr_saw_pptr_conflict(
782 : struct xrep_xattr *rx)
783 : {
784 16 : bool ret;
785 :
786 16 : ASSERT(rx->can_flush);
787 :
788 16 : if (!xfs_has_parent(rx->sc->mp))
789 : return false;
790 :
791 16 : ASSERT(xfs_isilocked(rx->sc->ip, XFS_ILOCK_EXCL));
792 :
793 16 : mutex_lock(&rx->lock);
794 16 : ret = xfarray_bytes(rx->pptr_recs) > 0;
795 16 : mutex_unlock(&rx->lock);
796 :
797 16 : return ret;
798 : }
799 :
800 : /*
801 : * Reset the entire repair state back to initial conditions, now that we've
802 : * detected a parent pointer update to the attr structure while we were
803 : * flushing salvaged attrs. See the locking notes in dir_repair.c for more
804 : * information on why this is all necessary.
805 : */
806 : STATIC int
807 0 : xrep_xattr_full_reset(
808 : struct xrep_xattr *rx)
809 : {
810 0 : struct xfs_scrub *sc = rx->sc;
811 0 : struct xfs_attr_sf_hdr *hdr;
812 0 : struct xfs_ifork *ifp = &sc->tempip->i_af;
813 0 : int error;
814 :
815 0 : trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
816 :
817 : /* The temporary file's data fork had better not be in btree format. */
818 0 : if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
819 0 : ASSERT(0);
820 0 : return -EIO;
821 : }
822 :
823 : /*
824 : * We begin in transaction context with sc->ip ILOCKed but not joined
825 : * to the transaction. To reset to the initial state, we must hold
826 : * sc->ip's ILOCK to prevent rename from updating parent pointer
827 : * information and the tempfile's ILOCK to clear its contents.
828 : */
829 0 : xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
830 0 : xrep_tempfile_ilock_both(sc);
831 0 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
832 0 : xfs_trans_ijoin(sc->tp, sc->tempip, 0);
833 :
834 : /*
835 : * Free all the blocks of the attr fork of the temp file, and reset
836 : * it back to local format.
837 : */
838 0 : if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
839 0 : error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
840 0 : if (error)
841 : return error;
842 :
843 0 : ASSERT(ifp->if_bytes == 0);
844 0 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
845 0 : xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
846 : }
847 :
848 : /* Reinitialize the attr fork to an empty shortform structure. */
849 0 : hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
850 0 : memset(hdr, 0, sizeof(*hdr));
851 0 : hdr->totsize = cpu_to_be16(sizeof(*hdr));
852 0 : xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
853 :
854 : /*
855 : * Roll this transaction to commit our reset ondisk. The tempfile
856 : * should no longer be joined to the transaction, so we drop its ILOCK.
857 : * This should leave us in transaction context with sc->ip ILOCKed but
858 : * not joined to the transaction.
859 : */
860 0 : error = xrep_roll_trans(sc);
861 0 : if (error)
862 : return error;
863 0 : xrep_tempfile_iunlock(sc);
864 :
865 : /*
866 : * Erase any accumulated parent pointer updates now that we've erased
867 : * the tempfile's attr fork. We're resetting the entire repair state
868 : * back to where we were initially, except now we won't flush salvaged
869 : * xattrs until the very end.
870 : */
871 0 : mutex_lock(&rx->lock);
872 0 : xfarray_truncate(rx->pptr_recs);
873 0 : xfblob_truncate(rx->pptr_names);
874 0 : mutex_unlock(&rx->lock);
875 :
876 0 : rx->can_flush = false;
877 0 : rx->attrs_found = 0;
878 :
879 0 : ASSERT(xfarray_bytes(rx->xattr_records) == 0);
880 0 : ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
881 : return 0;
882 : }
883 :
884 : /* Extract as many attribute keys and values as we can. */
885 : STATIC int
886 39409 : xrep_xattr_recover(
887 : struct xrep_xattr *rx)
888 : {
889 39409 : struct xfs_bmbt_irec got;
890 39409 : struct xfs_scrub *sc = rx->sc;
891 39409 : struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
892 39409 : xfs_fileoff_t offset;
893 39409 : xfs_extlen_t len;
894 39409 : xfs_dablk_t dabno;
895 39409 : int nmap;
896 39409 : int error;
897 :
898 39409 : restart:
899 : /*
900 : * Iterate each xattr leaf block in the attr fork to scan them for any
901 : * attributes that we might salvage.
902 : */
903 39409 : for (offset = 0;
904 121512 : offset < XFS_MAX_FILEOFF;
905 82103 : offset = got.br_startoff + got.br_blockcount) {
906 82103 : nmap = 1;
907 82103 : error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
908 : &got, &nmap, XFS_BMAPI_ATTRFORK);
909 82103 : if (error)
910 0 : return error;
911 82103 : if (nmap != 1)
912 : return -EFSCORRUPTED;
913 82103 : if (!xfs_bmap_is_written_extent(&got))
914 39454 : continue;
915 :
916 42649 : for (dabno = round_up(got.br_startoff, geo->fsbcount);
917 86442 : dabno < got.br_startoff + got.br_blockcount;
918 43793 : dabno += len) {
919 43793 : xfs_fileoff_t curr_offset = dabno - got.br_startoff;
920 43793 : xfs_extlen_t maxlen;
921 :
922 43793 : if (xchk_should_terminate(rx->sc, &error))
923 0 : return error;
924 :
925 43793 : maxlen = min_t(xfs_filblks_t, INT_MAX,
926 : got.br_blockcount - curr_offset);
927 87586 : error = xrep_xattr_recover_block(rx, dabno,
928 43793 : curr_offset + got.br_startblock,
929 : maxlen, &len);
930 43793 : if (error)
931 0 : return error;
932 :
933 43793 : if (xrep_xattr_want_flush_stashed(rx)) {
934 16 : error = xrep_xattr_flush_stashed(rx);
935 16 : if (error)
936 0 : return error;
937 :
938 16 : if (xrep_xattr_saw_pptr_conflict(rx)) {
939 0 : error = xrep_xattr_full_reset(rx);
940 0 : if (error)
941 0 : return error;
942 :
943 0 : goto restart;
944 : }
945 : }
946 : }
947 : }
948 :
949 : return 0;
950 : }
951 :
952 : /*
953 : * Reset the extended attribute fork to a state where we can start re-adding
954 : * the salvaged attributes.
955 : */
956 : STATIC int
957 196994 : xrep_xattr_fork_remove(
958 : struct xfs_scrub *sc,
959 : struct xfs_inode *ip)
960 : {
961 196994 : struct xfs_attr_sf_hdr *hdr;
962 196994 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
963 :
964 : /*
965 : * If the data fork is in btree format, we can't change di_forkoff
966 : * because we could run afoul of the rule that the data fork isn't
967 : * supposed to be in btree format if there's enough space in the fork
968 : * that it could have used extents format. Instead, reinitialize the
969 : * attr fork to have a shortform structure with zero attributes.
970 : */
971 196994 : if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
972 0 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
973 0 : xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
974 : XFS_ATTR_FORK);
975 0 : hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
976 0 : hdr->count = 0;
977 0 : hdr->totsize = cpu_to_be16(sizeof(*hdr));
978 0 : xfs_trans_log_inode(sc->tp, ip,
979 : XFS_ILOG_CORE | XFS_ILOG_ADATA);
980 0 : return 0;
981 : }
982 :
983 : /* If we still have attr fork extents, something's wrong. */
984 196994 : if (ifp->if_nextents != 0) {
985 0 : struct xfs_iext_cursor icur;
986 0 : struct xfs_bmbt_irec irec;
987 0 : unsigned int i = 0;
988 :
989 0 : xfs_emerg(sc->mp,
990 : "inode 0x%llx attr fork still has %llu attr extents, format %d?!",
991 : ip->i_ino, ifp->if_nextents, ifp->if_format);
992 0 : for_each_xfs_iext(ifp, &icur, &irec) {
993 0 : xfs_err(sc->mp,
994 : "[%u]: startoff %llu startblock %llu blockcount %llu state %u",
995 : i++, irec.br_startoff,
996 : irec.br_startblock, irec.br_blockcount,
997 : irec.br_state);
998 : }
999 0 : ASSERT(0);
1000 0 : return -EFSCORRUPTED;
1001 : }
1002 :
1003 196994 : xfs_attr_fork_remove(ip, sc->tp);
1004 196994 : return 0;
1005 : }
1006 :
1007 : /*
1008 : * Free all the attribute fork blocks of the file being repaired and delete the
1009 : * fork. The caller must ILOCK the scrub file and join it to the transaction.
1010 : * This function returns with the inode joined to a clean transaction.
1011 : */
1012 : int
1013 1 : xrep_xattr_reset_fork(
1014 : struct xfs_scrub *sc)
1015 : {
1016 1 : int error;
1017 :
1018 1 : trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1019 :
1020 : /* Unmap all the attr blocks. */
1021 1 : if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1022 0 : error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1023 0 : if (error)
1024 : return error;
1025 : }
1026 :
1027 1 : error = xrep_xattr_fork_remove(sc, sc->ip);
1028 1 : if (error)
1029 : return error;
1030 :
1031 1 : return xfs_trans_roll_inode(&sc->tp, sc->ip);
1032 : }
1033 :
1034 : /*
1035 : * Free all the attribute fork blocks of the temporary file and delete the attr
1036 : * fork. The caller must ILOCK the tempfile and join it to the transaction.
1037 : * This function returns with the inode joined to a clean scrub transaction.
1038 : */
1039 : int
1040 196995 : xrep_xattr_reset_tempfile_fork(
1041 : struct xfs_scrub *sc)
1042 : {
1043 196995 : int error;
1044 :
1045 196995 : trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1046 :
1047 : /*
1048 : * Wipe out the attr fork of the temp file so that regular inode
1049 : * inactivation won't trip over the corrupt attr fork.
1050 : */
1051 196995 : if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1052 40834 : error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1053 40834 : if (error)
1054 : return error;
1055 : }
1056 :
1057 196995 : error = xrep_xattr_fork_remove(sc, sc->tempip);
1058 196993 : if (error)
1059 : return error;
1060 :
1061 196993 : return xrep_tempfile_roll_trans(sc);
1062 : }
1063 :
1064 : /*
1065 : * Find all the extended attributes for this inode by scraping them out of the
1066 : * attribute key blocks by hand, and flushing them into the temp file.
1067 : * When we're done, free the staging memory before swapping the xattr
1068 : * structures to reduce memory usage.
1069 : */
1070 : STATIC int
1071 119408 : xrep_xattr_salvage_attributes(
1072 : struct xrep_xattr *rx)
1073 : {
1074 119408 : struct xfs_inode *ip = rx->sc->ip;
1075 119408 : int error;
1076 :
1077 : /* Short format xattrs are easy! */
1078 119408 : if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1079 79999 : error = xrep_xattr_recover_sf(rx);
1080 79999 : if (error)
1081 : return error;
1082 :
1083 79999 : return xrep_xattr_flush_stashed(rx);
1084 : }
1085 :
1086 : /*
1087 : * For non-inline xattr structures, the salvage function scans the
1088 : * buffer cache looking for potential attr leaf blocks. The scan
1089 : * requires the ability to lock any buffer found and runs independently
1090 : * of any transaction <-> buffer item <-> buffer linkage. Therefore,
1091 : * roll the transaction to ensure there are no buffers joined. We hold
1092 : * the ILOCK independently of the transaction.
1093 : */
1094 39409 : error = xfs_trans_roll(&rx->sc->tp);
1095 39409 : if (error)
1096 : return error;
1097 :
1098 39409 : error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1099 39409 : if (error)
1100 : return error;
1101 :
1102 39409 : error = xrep_xattr_recover(rx);
1103 39409 : if (error)
1104 : return error;
1105 :
1106 39409 : return xrep_xattr_flush_stashed(rx);
1107 : }
1108 :
1109 : /*
1110 : * Add this stashed incore parent pointer to the temporary file.
1111 : * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
1112 : * must not be in transaction context.
1113 : */
1114 : STATIC int
1115 0 : xrep_xattr_replay_pptr_update(
1116 : struct xrep_xattr *rx,
1117 : const struct xrep_xattr_pptr *pptr)
1118 : {
1119 0 : struct xfs_scrub *sc = rx->sc;
1120 0 : int error;
1121 :
1122 0 : rx->pptr.p_ino = pptr->p_ino;
1123 0 : rx->pptr.p_gen = pptr->p_gen;
1124 0 : rx->pptr.p_namelen = pptr->namelen;
1125 0 : xfs_parent_irec_hashname(sc->mp, &rx->pptr);
1126 :
1127 0 : switch (pptr->action) {
1128 0 : case XREP_XATTR_PPTR_ADD:
1129 : /* Create parent pointer. */
1130 0 : trace_xrep_xattr_replay_parentadd(sc->tempip, &rx->pptr);
1131 :
1132 0 : error = xfs_parent_set(sc->tempip, sc->ip->i_ino, &rx->pptr,
1133 : &rx->pptr_scratch);
1134 0 : if (error) {
1135 0 : ASSERT(error != -EEXIST);
1136 0 : return error;
1137 : }
1138 : break;
1139 0 : case XREP_XATTR_PPTR_REMOVE:
1140 : /* Remove parent pointer. */
1141 0 : trace_xrep_xattr_replay_parentremove(sc->tempip, &rx->pptr);
1142 :
1143 0 : error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, &rx->pptr,
1144 : &rx->pptr_scratch);
1145 0 : if (error) {
1146 0 : ASSERT(error != -ENOATTR);
1147 0 : return error;
1148 : }
1149 : break;
1150 0 : default:
1151 0 : ASSERT(0);
1152 0 : return -EIO;
1153 : }
1154 :
1155 : return 0;
1156 : }
1157 :
1158 : /*
1159 : * Flush stashed parent pointer updates that have been recorded by the scanner.
1160 : * This is done to reduce the memory requirements of the parent pointer
1161 : * rebuild, since files can have a lot of hardlinks and the fs can be busy.
1162 : *
1163 : * Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
1164 : * IOLOCK.
1165 : */
1166 : STATIC int
1167 119407 : xrep_xattr_replay_pptr_updates(
1168 : struct xrep_xattr *rx)
1169 : {
1170 119407 : xfarray_idx_t array_cur;
1171 119407 : int error;
1172 :
1173 119407 : mutex_lock(&rx->lock);
1174 238814 : foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1175 0 : struct xrep_xattr_pptr pptr;
1176 :
1177 0 : error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1178 0 : if (error)
1179 0 : goto out_unlock;
1180 :
1181 0 : error = xfblob_load(rx->pptr_names, pptr.name_cookie,
1182 0 : rx->pptr.p_name, pptr.namelen);
1183 0 : if (error)
1184 0 : goto out_unlock;
1185 0 : rx->pptr.p_name[MAXNAMELEN - 1] = 0;
1186 0 : mutex_unlock(&rx->lock);
1187 :
1188 0 : error = xrep_xattr_replay_pptr_update(rx, &pptr);
1189 0 : if (error)
1190 0 : return error;
1191 :
1192 0 : mutex_lock(&rx->lock);
1193 : }
1194 :
1195 : /* Empty out both arrays now that we've added the entries. */
1196 119407 : xfarray_truncate(rx->pptr_recs);
1197 119407 : xfblob_truncate(rx->pptr_names);
1198 119407 : mutex_unlock(&rx->lock);
1199 119407 : return 0;
1200 : out_unlock:
1201 0 : mutex_unlock(&rx->lock);
1202 0 : return error;
1203 : }
1204 :
1205 : /*
1206 : * Remember that we want to create a parent pointer in the tempfile. These
1207 : * stashed actions will be replayed later.
1208 : */
1209 : STATIC int
1210 0 : xrep_xattr_stash_parentadd(
1211 : struct xrep_xattr *rx,
1212 : const struct xfs_name *name,
1213 : const struct xfs_inode *dp)
1214 : {
1215 0 : struct xrep_xattr_pptr pptr = {
1216 : .action = XREP_XATTR_PPTR_ADD,
1217 0 : .namelen = name->len,
1218 0 : .p_ino = dp->i_ino,
1219 0 : .p_gen = VFS_IC(dp)->i_generation,
1220 : };
1221 0 : int error;
1222 :
1223 0 : trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1224 :
1225 0 : error = xfblob_store(rx->pptr_names, &pptr.name_cookie, name->name,
1226 0 : name->len);
1227 0 : if (error)
1228 : return error;
1229 :
1230 0 : return xfarray_append(rx->pptr_recs, &pptr);
1231 : }
1232 :
1233 : /*
1234 : * Remember that we want to remove a parent pointer from the tempfile. These
1235 : * stashed actions will be replayed later.
1236 : */
1237 : STATIC int
1238 0 : xrep_xattr_stash_parentremove(
1239 : struct xrep_xattr *rx,
1240 : const struct xfs_name *name,
1241 : const struct xfs_inode *dp)
1242 : {
1243 0 : struct xrep_xattr_pptr pptr = {
1244 : .action = XREP_XATTR_PPTR_REMOVE,
1245 0 : .namelen = name->len,
1246 0 : .p_ino = dp->i_ino,
1247 0 : .p_gen = VFS_IC(dp)->i_generation,
1248 : };
1249 0 : int error;
1250 :
1251 0 : trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1252 :
1253 0 : error = xfblob_store(rx->pptr_names, &pptr.name_cookie, name->name,
1254 0 : name->len);
1255 0 : if (error)
1256 : return error;
1257 :
1258 0 : return xfarray_append(rx->pptr_recs, &pptr);
1259 : }
1260 :
1261 : /*
1262 : * Capture dirent updates being made by other threads. We will have to replay
1263 : * the parent pointer updates before swapping attr forks.
1264 : */
1265 : STATIC int
1266 84461 : xrep_xattr_live_dirent_update(
1267 : struct notifier_block *nb,
1268 : unsigned long action,
1269 : void *data)
1270 : {
1271 84461 : struct xfs_dir_update_params *p = data;
1272 84461 : struct xrep_xattr *rx;
1273 84461 : struct xfs_scrub *sc;
1274 84461 : int error;
1275 :
1276 84461 : rx = container_of(nb, struct xrep_xattr, hooks.dirent_hook.nb);
1277 84461 : sc = rx->sc;
1278 :
1279 : /*
1280 : * This thread updated a dirent that points to the file that we're
1281 : * repairing, so stash the update for replay against the temporary
1282 : * file.
1283 : */
1284 84461 : if (p->ip->i_ino != sc->ip->i_ino)
1285 : return NOTIFY_DONE;
1286 :
1287 0 : mutex_lock(&rx->lock);
1288 0 : if (p->delta > 0)
1289 0 : error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1290 : else
1291 0 : error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1292 0 : if (error)
1293 0 : rx->live_update_aborted = true;
1294 0 : mutex_unlock(&rx->lock);
1295 0 : return NOTIFY_DONE;
1296 : }
1297 :
1298 : /*
1299 : * Prepare both inodes' attribute forks for extent swapping. Promote the
1300 : * tempfile from short format to leaf format, and if the file being repaired
1301 : * has a short format attr fork, turn it into an empty extent list.
1302 : */
1303 : STATIC int
1304 40834 : xrep_xattr_swap_prep(
1305 : struct xfs_scrub *sc,
1306 : bool temp_local,
1307 : bool ip_local)
1308 : {
1309 40834 : int error;
1310 :
1311 : /*
1312 : * If the tempfile's attributes are in shortform format, convert that
1313 : * to a single leaf extent so that we can use the atomic extent swap.
1314 : */
1315 40834 : if (temp_local) {
1316 771 : struct xfs_da_args args = {
1317 771 : .dp = sc->tempip,
1318 771 : .geo = sc->mp->m_attr_geo,
1319 : .whichfork = XFS_ATTR_FORK,
1320 771 : .trans = sc->tp,
1321 : .total = 1,
1322 771 : .owner = sc->ip->i_ino,
1323 : };
1324 :
1325 771 : error = xfs_attr_shortform_to_leaf(&args);
1326 771 : if (error)
1327 0 : return error;
1328 :
1329 : /*
1330 : * Roll the deferred log items to get us back to a clean
1331 : * transaction.
1332 : */
1333 771 : error = xfs_defer_finish(&sc->tp);
1334 771 : if (error)
1335 : return error;
1336 : }
1337 :
1338 : /*
1339 : * If the file being repaired had a shortform attribute fork, convert
1340 : * that to an empty extent list in preparation for the atomic extent
1341 : * swap.
1342 : */
1343 40834 : if (ip_local) {
1344 1 : struct xfs_ifork *ifp;
1345 :
1346 1 : ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1347 :
1348 1 : xfs_idestroy_fork(ifp);
1349 1 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1350 1 : ifp->if_nextents = 0;
1351 1 : ifp->if_bytes = 0;
1352 1 : ifp->if_u1.if_root = NULL;
1353 1 : ifp->if_height = 0;
1354 :
1355 1 : xfs_trans_log_inode(sc->tp, sc->ip,
1356 : XFS_ILOG_CORE | XFS_ILOG_ADATA);
1357 : }
1358 :
1359 : return 0;
1360 : }
1361 :
1362 : /* Swap the temporary file's attribute fork with the one being repaired. */
1363 : int
1364 196985 : xrep_xattr_swap(
1365 : struct xfs_scrub *sc,
1366 : struct xrep_tempswap *tx)
1367 : {
1368 196985 : bool ip_local, temp_local;
1369 196985 : int error = 0;
1370 :
1371 196985 : ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1372 196985 : temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1373 :
1374 : /*
1375 : * If the both files have a local format attr fork and the rebuilt
1376 : * xattr data would fit in the repaired file's attr fork, just copy
1377 : * the contents from the tempfile and declare ourselves done.
1378 : */
1379 196985 : if (ip_local && temp_local) {
1380 156158 : int forkoff;
1381 156158 : int newsize;
1382 :
1383 156158 : newsize = xfs_attr_sf_totsize(sc->tempip);
1384 156158 : forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1385 156146 : if (forkoff > 0) {
1386 156146 : sc->ip->i_forkoff = forkoff;
1387 156146 : xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1388 156146 : return 0;
1389 : }
1390 : }
1391 :
1392 : /* Otherwise, make sure both attr forks are in block-mapping mode. */
1393 40827 : error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1394 40834 : if (error)
1395 : return error;
1396 :
1397 40834 : return xrep_tempswap_contents(sc, tx);
1398 : }
1399 :
1400 : /*
1401 : * Finish replaying stashed parent pointer updates, allocate a transaction for
1402 : * swapping extents, and take the ILOCKs of both files before we commit the new
1403 : * extended attribute structure.
1404 : */
1405 : STATIC int
1406 119407 : xrep_xattr_finalize_tempfile(
1407 : struct xrep_xattr *rx)
1408 : {
1409 119407 : struct xfs_scrub *sc = rx->sc;
1410 119407 : int error;
1411 :
1412 119407 : if (!xfs_has_parent(sc->mp))
1413 0 : return xrep_tempswap_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1414 :
1415 119407 : error = xrep_xattr_replay_pptr_updates(rx);
1416 119407 : if (error)
1417 : return error;
1418 :
1419 119407 : error = xrep_tempswap_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1420 119407 : if (error)
1421 : return error;
1422 :
1423 : /*
1424 : * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
1425 : * possible parent pointer updates during the time when we did not hold
1426 : * the ILOCK. There should not be any pptr updates to replay, but
1427 : * check anyway.
1428 : */
1429 119407 : if (xfarray_length(rx->pptr_recs) != 0) {
1430 0 : ASSERT(xfarray_length(rx->pptr_recs) == 0);
1431 0 : return -EFSCORRUPTED;
1432 : }
1433 :
1434 : return 0;
1435 : }
1436 :
1437 : /*
1438 : * Swap the new extended attribute data (which we created in the tempfile) into
1439 : * the file being repaired.
1440 : */
1441 : STATIC int
1442 119408 : xrep_xattr_rebuild_tree(
1443 : struct xrep_xattr *rx)
1444 : {
1445 119408 : struct xfs_scrub *sc = rx->sc;
1446 119408 : int error;
1447 :
1448 : /*
1449 : * If we didn't find any attributes to salvage, repair the file by
1450 : * zapping its attr fork.
1451 : */
1452 119408 : if (rx->attrs_found == 0) {
1453 1 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1454 1 : error = xrep_xattr_reset_fork(sc);
1455 1 : if (error)
1456 : return error;
1457 :
1458 1 : goto forget_acls;
1459 : }
1460 :
1461 119407 : trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1462 :
1463 : /*
1464 : * Commit the repair transaction and drop the ILOCKs so that we can use
1465 : * the atomic extent swap helper functions to compute the correct
1466 : * resource reservations.
1467 : *
1468 : * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1469 : * modifications, but there's nothing to prevent userspace from reading
1470 : * the attributes until we're ready for the swap operation. Reads will
1471 : * return -EIO without shutting down the fs, so we're ok with that.
1472 : */
1473 119407 : error = xrep_trans_commit(sc);
1474 119407 : if (error)
1475 : return error;
1476 :
1477 119407 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1478 :
1479 : /*
1480 : * Take the IOLOCK on the temporary file so that we can run xattr
1481 : * operations with the same locks held as we would for a normal file.
1482 : * We still hold sc->ip's IOLOCK.
1483 : */
1484 119407 : error = xrep_tempfile_iolock_polled(rx->sc);
1485 119407 : if (error)
1486 : return error;
1487 :
1488 : /*
1489 : * Allocate transaction, lock inodes, and make sure that we've replayed
1490 : * all the stashed parent pointer updates to the temp file. After this
1491 : * point, we're ready to swapext.
1492 : */
1493 119407 : error = xrep_xattr_finalize_tempfile(rx);
1494 119407 : if (error)
1495 : return error;
1496 :
1497 : /*
1498 : * Exchange the blocks mapped by the tempfile's attr fork with the file
1499 : * being repaired. The old attr blocks will then be attached to the
1500 : * tempfile, so reap its attr fork.
1501 : */
1502 119407 : error = xrep_xattr_swap(sc, &rx->tx);
1503 119407 : if (error)
1504 : return error;
1505 :
1506 119407 : error = xrep_xattr_reset_tempfile_fork(sc);
1507 119407 : if (error)
1508 : return error;
1509 :
1510 119407 : forget_acls:
1511 : /* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1512 119408 : xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1513 119407 : xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1514 119407 : return 0;
1515 : }
1516 :
1517 : /* Tear down all the incore scan stuff we created. */
1518 : STATIC void
1519 119407 : xrep_xattr_teardown(
1520 : struct xrep_xattr *rx)
1521 : {
1522 119407 : if (xfs_has_parent(rx->sc->mp))
1523 119407 : xfs_dir_hook_del(rx->sc->mp, &rx->hooks);
1524 119408 : if (rx->pptr_names)
1525 119408 : xfblob_destroy(rx->pptr_names);
1526 119405 : if (rx->pptr_recs)
1527 119405 : xfarray_destroy(rx->pptr_recs);
1528 119406 : xfblob_destroy(rx->xattr_blobs);
1529 119407 : xfarray_destroy(rx->xattr_records);
1530 119405 : mutex_destroy(&rx->lock);
1531 119406 : kfree(rx);
1532 119407 : }
1533 :
1534 : /* Set up the filesystem scan so we can regenerate extended attributes. */
1535 : STATIC int
1536 119408 : xrep_xattr_setup_scan(
1537 : struct xfs_scrub *sc,
1538 : struct xrep_xattr **rxp)
1539 : {
1540 119408 : struct xrep_xattr *rx;
1541 119408 : char *descr;
1542 119408 : int max_len;
1543 119408 : int error;
1544 :
1545 119408 : rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1546 119408 : if (!rx)
1547 : return -ENOMEM;
1548 119408 : rx->sc = sc;
1549 119408 : rx->can_flush = true;
1550 :
1551 119408 : mutex_init(&rx->lock);
1552 :
1553 : /*
1554 : * Allocate enough memory to handle loading local attr values from the
1555 : * xfblob data while flushing stashed attrs to the temporary file.
1556 : * We only realloc the buffer when salvaging remote attr values.
1557 : */
1558 119408 : max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1559 119408 : error = xchk_setup_xattr_buf(rx->sc, max_len);
1560 119408 : if (error == -ENOMEM)
1561 : error = -EDEADLOCK;
1562 119408 : if (error)
1563 0 : goto out_rx;
1564 :
1565 : /* Set up some staging for salvaged attribute keys and values */
1566 119408 : descr = xchk_xfile_ino_descr(sc, "xattr keys");
1567 119408 : error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1568 : &rx->xattr_records);
1569 119408 : kfree(descr);
1570 119408 : if (error)
1571 0 : goto out_rx;
1572 :
1573 119408 : descr = xchk_xfile_ino_descr(sc, "xattr names");
1574 119408 : error = xfblob_create(descr, &rx->xattr_blobs);
1575 119408 : kfree(descr);
1576 119408 : if (error)
1577 0 : goto out_keys;
1578 :
1579 119408 : if (xfs_has_parent(sc->mp)) {
1580 119408 : ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1581 :
1582 119408 : descr = xchk_xfile_ino_descr(sc,
1583 : "xattr retained parent pointer entries");
1584 119408 : error = xfarray_create(descr, 0,
1585 : sizeof(struct xrep_xattr_pptr),
1586 : &rx->pptr_recs);
1587 119408 : kfree(descr);
1588 119408 : if (error)
1589 0 : goto out_values;
1590 :
1591 119408 : descr = xchk_xfile_ino_descr(sc,
1592 : "xattr retained parent pointer names");
1593 119408 : error = xfblob_create(descr, &rx->pptr_names);
1594 119408 : kfree(descr);
1595 119408 : if (error)
1596 0 : goto out_pprecs;
1597 :
1598 119408 : xfs_hook_setup(&rx->hooks.dirent_hook,
1599 : xrep_xattr_live_dirent_update);
1600 119408 : error = xfs_dir_hook_add(sc->mp, &rx->hooks);
1601 119408 : if (error)
1602 0 : goto out_ppnames;
1603 : }
1604 :
1605 119408 : *rxp = rx;
1606 119408 : return 0;
1607 : out_ppnames:
1608 0 : xfblob_destroy(rx->pptr_names);
1609 0 : out_pprecs:
1610 0 : xfarray_destroy(rx->pptr_recs);
1611 0 : out_values:
1612 0 : xfblob_destroy(rx->xattr_blobs);
1613 0 : out_keys:
1614 0 : xfarray_destroy(rx->xattr_records);
1615 0 : out_rx:
1616 0 : mutex_destroy(&rx->lock);
1617 0 : kfree(rx);
1618 0 : return error;
1619 : }
1620 :
1621 : /*
1622 : * Repair the extended attribute metadata.
1623 : *
1624 : * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1625 : * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1626 : * might misbehave if the attr fork is crosslinked with other filesystem
1627 : * metadata.
1628 : */
1629 : int
1630 3369068 : xrep_xattr(
1631 : struct xfs_scrub *sc)
1632 : {
1633 3369068 : struct xrep_xattr *rx = NULL;
1634 3369068 : int error;
1635 :
1636 3369068 : if (!xfs_inode_hasattr(sc->ip))
1637 : return -ENOENT;
1638 :
1639 : /* The rmapbt is required to reap the old attr fork. */
1640 3369088 : if (!xfs_has_rmapbt(sc->mp))
1641 : return -EOPNOTSUPP;
1642 :
1643 119408 : error = xrep_xattr_setup_scan(sc, &rx);
1644 119408 : if (error)
1645 : return error;
1646 :
1647 119408 : ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1648 :
1649 119408 : error = xrep_xattr_salvage_attributes(rx);
1650 119408 : if (error)
1651 0 : goto out_scan;
1652 :
1653 119408 : if (rx->live_update_aborted) {
1654 0 : error = -EIO;
1655 0 : goto out_scan;
1656 : }
1657 :
1658 : /* Last chance to abort before we start committing fixes. */
1659 119408 : if (xchk_should_terminate(sc, &error))
1660 0 : goto out_scan;
1661 :
1662 119408 : error = xrep_xattr_rebuild_tree(rx);
1663 119407 : if (error)
1664 : goto out_scan;
1665 :
1666 119407 : out_scan:
1667 119407 : xrep_xattr_teardown(rx);
1668 119406 : return error;
1669 : }
|