Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_log_format.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_sb.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_alloc_btree.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_ialloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_refcount_btree.h"
25 : #include "xfs_extent_busy.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_quota.h"
29 : #include "xfs_qm.h"
30 : #include "xfs_bmap.h"
31 : #include "xfs_da_format.h"
32 : #include "xfs_da_btree.h"
33 : #include "xfs_attr.h"
34 : #include "xfs_attr_remote.h"
35 : #include "xfs_defer.h"
36 : #include "scrub/scrub.h"
37 : #include "scrub/common.h"
38 : #include "scrub/trace.h"
39 : #include "scrub/repair.h"
40 : #include "scrub/bitmap.h"
41 : #include "scrub/reap.h"
42 :
43 : /*
44 : * Disposal of Blocks from Old Metadata
45 : *
46 : * Now that we've constructed a new btree to replace the damaged one, we want
47 : * to dispose of the blocks that (we think) the old btree was using.
48 : * Previously, we used the rmapbt to collect the extents (bitmap) with the
49 : * rmap owner corresponding to the tree we rebuilt, collected extents for any
50 : * blocks with the same rmap owner that are owned by another data structure
51 : * (sublist), and subtracted sublist from bitmap. In theory the extents
52 : * remaining in bitmap are the old btree's blocks.
53 : *
54 : * Unfortunately, it's possible that the btree was crosslinked with other
55 : * blocks on disk. The rmap data can tell us if there are multiple owners, so
56 : * if the rmapbt says there is an owner of this block other than @oinfo, then
57 : * the block is crosslinked. Remove the reverse mapping and continue.
58 : *
59 : * If there is one rmap record, we can free the block, which removes the
60 : * reverse mapping but doesn't add the block to the free space. Our repair
61 : * strategy is to hope the other metadata objects crosslinked on this block
62 : * will be rebuilt (atop different blocks), thereby removing all the cross
63 : * links.
64 : *
65 : * If there are no rmap records at all, we also free the block. If the btree
66 : * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
67 : * supposed to be a rmap record and everything is ok. For other btrees there
68 : * had to have been an rmap entry for the block to have ended up on @bitmap,
69 : * so if it's gone now there's something wrong and the fs will shut down.
70 : *
71 : * Note: If there are multiple rmap records with only the same rmap owner as
72 : * the btree we're trying to rebuild and the block is indeed owned by another
73 : * data structure with the same rmap owner, then the block will be in sublist
74 : * and therefore doesn't need disposal. If there are multiple rmap records
75 : * with only the same rmap owner but the block is not owned by something with
76 : * the same rmap owner, the block will be freed.
77 : *
78 : * The caller is responsible for locking the AG headers/inode for the entire
79 : * rebuild operation so that nothing else can sneak in and change the incore
80 : * state while we're not looking. We must also invalidate any buffers
81 : * associated with @bitmap.
82 : */
83 :
84 : /* Information about reaping extents after a repair. */
85 : struct xreap_state {
86 : struct xfs_scrub *sc;
87 :
88 : /* Reverse mapping owner and metadata reservation type. */
89 : const struct xfs_owner_info *oinfo;
90 : enum xfs_ag_resv_type resv;
91 :
92 : /* If true, roll the transaction before reaping the next extent. */
93 : bool force_roll;
94 :
95 : /* Number of deferred reaps attached to the current transaction. */
96 : unsigned int deferred;
97 :
98 : /* Number of invalidated buffers logged to the current transaction. */
99 : unsigned int invalidated;
100 :
101 : /* Number of deferred reaps queued during the whole reap sequence. */
102 : unsigned long long total_deferred;
103 : };
104 :
105 : /* Put a block back on the AGFL. */
106 : STATIC int
107 0 : xreap_put_freelist(
108 : struct xfs_scrub *sc,
109 : xfs_agblock_t agbno)
110 : {
111 0 : struct xfs_buf *agfl_bp;
112 0 : int error;
113 :
114 : /* Make sure there's space on the freelist. */
115 0 : error = xrep_fix_freelist(sc, 0);
116 0 : if (error)
117 : return error;
118 :
119 : /*
120 : * Since we're "freeing" a lost block onto the AGFL, we have to
121 : * create an rmap for the block prior to merging it or else other
122 : * parts will break.
123 : */
124 0 : error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
125 : &XFS_RMAP_OINFO_AG);
126 0 : if (error)
127 : return error;
128 :
129 : /* Put the block on the AGFL. */
130 0 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
131 0 : if (error)
132 : return error;
133 :
134 0 : error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
135 : agfl_bp, agbno, 0);
136 0 : if (error)
137 : return error;
138 0 : xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
139 : XFS_EXTENT_BUSY_SKIP_DISCARD);
140 :
141 0 : return 0;
142 : }
143 :
144 : /* Are there any uncommitted reap operations? */
145 2884222 : static inline bool xreap_dirty(const struct xreap_state *rs)
146 : {
147 2884222 : if (rs->force_roll)
148 : return true;
149 2884222 : if (rs->deferred)
150 : return true;
151 2701452 : if (rs->invalidated)
152 : return true;
153 2701452 : if (rs->total_deferred)
154 1 : return true;
155 : return false;
156 : }
157 :
158 : #define XREAP_MAX_DEFERRED (128)
159 : #define XREAP_MAX_BINVAL (2048)
160 :
161 : /*
162 : * Decide if we want to roll the transaction after reaping an extent. We don't
163 : * want to overrun the transaction reservation, so we prohibit more than
164 : * 128 EFIs per transaction. For the same reason, we limit the number
165 : * of buffer invalidations to 2048.
166 : */
167 330214 : static inline bool xreap_want_roll(const struct xreap_state *rs)
168 : {
169 330214 : if (rs->force_roll)
170 : return true;
171 330214 : if (rs->deferred > XREAP_MAX_DEFERRED)
172 : return true;
173 329548 : if (rs->invalidated > XREAP_MAX_BINVAL)
174 6 : return true;
175 : return false;
176 : }
177 :
178 : static inline void xreap_reset(struct xreap_state *rs)
179 : {
180 672 : rs->total_deferred += rs->deferred;
181 672 : rs->deferred = 0;
182 672 : rs->invalidated = 0;
183 672 : rs->force_roll = false;
184 672 : }
185 :
186 : #define XREAP_MAX_DEFER_CHAIN (2048)
187 :
188 : /*
189 : * Decide if we want to finish the deferred ops that are attached to the scrub
190 : * transaction. We don't want to queue huge chains of deferred ops because
191 : * that can consume a lot of log space and kernel memory. Hence we trigger a
192 : * xfs_defer_finish if there are more than 2048 deferred reap operations or the
193 : * caller did some real work.
194 : */
195 : static inline bool
196 351066 : xreap_want_defer_finish(const struct xreap_state *rs)
197 : {
198 351066 : if (rs->force_roll)
199 : return true;
200 330242 : if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
201 35 : return true;
202 : return false;
203 : }
204 :
205 : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
206 : {
207 20859 : rs->total_deferred = 0;
208 20859 : rs->deferred = 0;
209 20859 : rs->invalidated = 0;
210 20859 : rs->force_roll = false;
211 20859 : }
212 :
213 : /*
214 : * Compute the maximum length of a buffer cache scan (in units of sectors),
215 : * given a quantity of fs blocks.
216 : */
217 : xfs_daddr_t
218 1417676 : xrep_bufscan_max_sectors(
219 : struct xfs_mount *mp,
220 : xfs_extlen_t fsblocks)
221 : {
222 1417676 : int max_fsbs;
223 :
224 : /* Remote xattr values are the largest buffers that we support. */
225 1417676 : max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
226 :
227 1417340 : return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
228 : }
229 :
230 : /*
231 : * Return an incore buffer from a sector scan, or NULL if there are no buffers
232 : * left to return.
233 : */
234 : struct xfs_buf *
235 2500624 : xrep_bufscan_advance(
236 : struct xfs_mount *mp,
237 : struct xrep_bufscan *scan)
238 : {
239 2500624 : scan->__sector_count += scan->daddr_step;
240 12795292 : while (scan->__sector_count <= scan->max_sectors) {
241 11434367 : struct xfs_buf *bp = NULL;
242 11434367 : int error;
243 :
244 11434367 : error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
245 : scan->__sector_count, XBF_LIVESCAN, &bp);
246 11435083 : if (!error)
247 1140415 : return bp;
248 :
249 10294668 : scan->__sector_count += scan->daddr_step;
250 : }
251 :
252 : return NULL;
253 : }
254 :
255 : /* Try to invalidate the incore buffers for an extent that we're freeing. */
256 : STATIC void
257 351330 : xreap_agextent_binval(
258 : struct xreap_state *rs,
259 : xfs_agblock_t agbno,
260 : xfs_extlen_t *aglenp)
261 : {
262 351330 : struct xfs_scrub *sc = rs->sc;
263 351330 : struct xfs_perag *pag = sc->sa.pag;
264 351330 : struct xfs_mount *mp = sc->mp;
265 351330 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
266 351330 : xfs_agblock_t agbno_next = agbno + *aglenp;
267 351330 : xfs_agblock_t bno = agbno;
268 :
269 : /*
270 : * Avoid invalidating AG headers and post-EOFS blocks because we never
271 : * own those.
272 : */
273 351330 : if (!xfs_verify_agbno(pag, agbno) ||
274 351330 : !xfs_verify_agbno(pag, agbno_next - 1))
275 : return;
276 :
277 : /*
278 : * If there are incore buffers for these blocks, invalidate them. We
279 : * assume that the lack of any other known owners means that the buffer
280 : * can be locked without risk of deadlocking. The buffer cache cannot
281 : * detect aliasing, so employ nested loops to scan for incore buffers
282 : * of any plausible size.
283 : */
284 1607607 : while (bno < agbno_next) {
285 5023248 : struct xrep_bufscan scan = {
286 1255938 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
287 1255938 : .max_sectors = xrep_bufscan_max_sectors(mp,
288 : agbno_next - bno),
289 1255686 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
290 : };
291 1255686 : struct xfs_buf *bp;
292 :
293 2249119 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
294 992992 : xfs_trans_bjoin(sc->tp, bp);
295 993247 : xfs_trans_binval(sc->tp, bp);
296 993439 : rs->invalidated++;
297 :
298 : /*
299 : * Stop invalidating if we've hit the limit; we should
300 : * still have enough reservation left to free however
301 : * far we've gotten.
302 : */
303 993439 : if (rs->invalidated > XREAP_MAX_BINVAL) {
304 6 : *aglenp -= agbno_next - bno;
305 6 : goto out;
306 : }
307 : }
308 :
309 1256277 : bno++;
310 : }
311 :
312 351669 : out:
313 351675 : trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
314 : }
315 :
316 : /*
317 : * Figure out the longest run of blocks that we can dispose of with a single
318 : * call. Cross-linked blocks should have their reverse mappings removed, but
319 : * single-owner extents can be freed. AGFL blocks can only be put back one at
320 : * a time.
321 : */
322 : STATIC int
323 351198 : xreap_agextent_select(
324 : struct xreap_state *rs,
325 : xfs_agblock_t agbno,
326 : xfs_agblock_t agbno_next,
327 : bool *crosslinked,
328 : xfs_extlen_t *aglenp)
329 : {
330 351198 : struct xfs_scrub *sc = rs->sc;
331 351198 : struct xfs_btree_cur *cur;
332 351198 : xfs_agblock_t bno = agbno + 1;
333 351198 : xfs_extlen_t len = 1;
334 351198 : int error;
335 :
336 : /*
337 : * Determine if there are any other rmap records covering the first
338 : * block of this extent. If so, the block is crosslinked.
339 : */
340 351198 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
341 : sc->sa.pag);
342 351681 : error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
343 : crosslinked);
344 351254 : if (error)
345 0 : goto out_cur;
346 :
347 : /* AGFL blocks can only be deal with one at a time. */
348 351254 : if (rs->resv == XFS_AG_RESV_AGFL)
349 0 : goto out_found;
350 :
351 : /*
352 : * Figure out how many of the subsequent blocks have the same crosslink
353 : * status.
354 : */
355 1268441 : while (bno < agbno_next) {
356 917090 : bool also_crosslinked;
357 :
358 917090 : error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
359 : &also_crosslinked);
360 917187 : if (error)
361 0 : goto out_cur;
362 :
363 917187 : if (*crosslinked != also_crosslinked)
364 : break;
365 :
366 917187 : len++;
367 917187 : bno++;
368 : }
369 :
370 351351 : out_found:
371 351351 : *aglenp = len;
372 351351 : trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
373 351104 : out_cur:
374 351104 : xfs_btree_del_cursor(cur, error);
375 351666 : return error;
376 : }
377 :
378 : /*
379 : * Dispose of as much of the beginning of this AG extent as possible. The
380 : * number of blocks disposed of will be returned in @aglenp.
381 : */
382 : STATIC int
383 351365 : xreap_agextent_iter(
384 : struct xreap_state *rs,
385 : xfs_agblock_t agbno,
386 : xfs_extlen_t *aglenp,
387 : bool crosslinked)
388 : {
389 351365 : struct xfs_scrub *sc = rs->sc;
390 351365 : xfs_fsblock_t fsbno;
391 351365 : int error = 0;
392 :
393 351365 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
394 :
395 : /*
396 : * If there are other rmappings, this block is cross linked and must
397 : * not be freed. Remove the reverse mapping and move on. Otherwise,
398 : * we were the only owner of the block, so free the extent, which will
399 : * also remove the rmap.
400 : *
401 : * XXX: XFS doesn't support detecting the case where a single block
402 : * metadata structure is crosslinked with a multi-block structure
403 : * because the buffer cache doesn't detect aliasing problems, so we
404 : * can't fix 100% of crosslinking problems (yet). The verifiers will
405 : * blow on writeout, the filesystem will shut down, and the admin gets
406 : * to run xfs_repair.
407 : */
408 351365 : if (crosslinked) {
409 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
410 :
411 0 : rs->force_roll = true;
412 :
413 0 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
414 : /*
415 : * If we're unmapping CoW staging extents, remove the
416 : * records from the refcountbt, which will remove the
417 : * rmap record as well.
418 : */
419 0 : xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
420 0 : return 0;
421 : }
422 :
423 0 : return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
424 : *aglenp, rs->oinfo);
425 : }
426 :
427 351365 : trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
428 :
429 : /*
430 : * Invalidate as many buffers as we can, starting at agbno. If this
431 : * function sets *aglenp to zero, the transaction is full of logged
432 : * buffer invalidations, so we need to return early so that we can
433 : * roll and retry.
434 : */
435 351023 : xreap_agextent_binval(rs, agbno, aglenp);
436 351491 : if (*aglenp == 0) {
437 0 : ASSERT(xreap_want_roll(rs));
438 0 : return 0;
439 : }
440 :
441 : /*
442 : * If we're getting rid of CoW staging extents, use deferred work items
443 : * to remove the refcountbt records (which removes the rmap records)
444 : * and free the extent. We're not worried about the system going down
445 : * here because log recovery walks the refcount btree to clean out the
446 : * CoW staging extents.
447 : */
448 351491 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
449 20824 : ASSERT(rs->resv == XFS_AG_RESV_NONE);
450 :
451 20824 : xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
452 20824 : error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
453 : rs->resv, true);
454 20824 : if (error)
455 : return error;
456 :
457 20824 : rs->force_roll = true;
458 20824 : return 0;
459 : }
460 :
461 : /* Put blocks back on the AGFL one at a time. */
462 330667 : if (rs->resv == XFS_AG_RESV_AGFL) {
463 0 : ASSERT(*aglenp == 1);
464 0 : error = xreap_put_freelist(sc, agbno);
465 0 : if (error)
466 : return error;
467 :
468 0 : rs->force_roll = true;
469 0 : return 0;
470 : }
471 :
472 : /*
473 : * Use deferred frees to get rid of the old btree blocks to try to
474 : * minimize the window in which we could crash and lose the old blocks.
475 : */
476 330667 : error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
477 : rs->resv, true);
478 330625 : if (error)
479 : return error;
480 :
481 330625 : rs->deferred++;
482 330625 : return 0;
483 : }
484 :
485 : /*
486 : * Break an AG metadata extent into sub-extents by fate (crosslinked, not
487 : * crosslinked), and dispose of each sub-extent separately.
488 : */
489 : STATIC int
490 280948 : xreap_agmeta_extent(
491 : uint64_t fsbno,
492 : uint64_t len,
493 : void *priv)
494 : {
495 280948 : struct xreap_state *rs = priv;
496 280948 : struct xfs_scrub *sc = rs->sc;
497 280948 : xfs_agblock_t agbno = fsbno;
498 280948 : xfs_agblock_t agbno_next = agbno + len;
499 280948 : int error = 0;
500 :
501 280948 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
502 280948 : ASSERT(sc->ip == NULL);
503 :
504 561441 : while (agbno < agbno_next) {
505 280672 : xfs_extlen_t aglen;
506 280672 : bool crosslinked;
507 :
508 280672 : error = xreap_agextent_select(rs, agbno, agbno_next,
509 : &crosslinked, &aglen);
510 281414 : if (error)
511 0 : return error;
512 :
513 281414 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
514 281061 : if (error)
515 0 : return error;
516 :
517 281061 : if (xreap_want_defer_finish(rs)) {
518 24 : error = xrep_defer_finish(sc);
519 24 : if (error)
520 0 : return error;
521 24 : xreap_defer_finish_reset(rs);
522 280683 : } else if (xreap_want_roll(rs)) {
523 491 : error = xrep_roll_ag_trans(sc);
524 491 : if (error)
525 0 : return error;
526 491 : xreap_reset(rs);
527 : }
528 :
529 280493 : agbno += aglen;
530 : }
531 :
532 : return 0;
533 : }
534 :
535 : /* Dispose of every block of every AG metadata extent in the bitmap. */
536 : int
537 254834 : xrep_reap_agblocks(
538 : struct xfs_scrub *sc,
539 : struct xagb_bitmap *bitmap,
540 : const struct xfs_owner_info *oinfo,
541 : enum xfs_ag_resv_type type)
542 : {
543 254834 : struct xreap_state rs = {
544 : .sc = sc,
545 : .oinfo = oinfo,
546 : .resv = type,
547 : };
548 254834 : int error;
549 :
550 254834 : ASSERT(xfs_has_rmapbt(sc->mp));
551 254834 : ASSERT(sc->ip == NULL);
552 :
553 254834 : error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
554 254763 : if (error)
555 : return error;
556 :
557 254627 : if (xreap_dirty(&rs))
558 158755 : return xrep_defer_finish(sc);
559 :
560 : return 0;
561 : }
562 :
563 : /*
564 : * Break a file metadata extent into sub-extents by fate (crosslinked, not
565 : * crosslinked), and dispose of each sub-extent separately. The extent must
566 : * not cross an AG boundary.
567 : */
568 : STATIC int
569 70279 : xreap_fsmeta_extent(
570 : uint64_t fsbno,
571 : uint64_t len,
572 : void *priv)
573 : {
574 70279 : struct xreap_state *rs = priv;
575 70279 : struct xfs_scrub *sc = rs->sc;
576 70279 : xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
577 70279 : xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
578 70279 : xfs_agblock_t agbno_next = agbno + len;
579 70279 : int error = 0;
580 :
581 70279 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
582 70279 : ASSERT(sc->ip != NULL);
583 70279 : ASSERT(!sc->sa.pag);
584 :
585 : /*
586 : * We're reaping blocks after repairing file metadata, which means that
587 : * we have to init the xchk_ag structure ourselves.
588 : */
589 70279 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
590 70279 : if (!sc->sa.pag)
591 : return -EFSCORRUPTED;
592 :
593 70279 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
594 70279 : if (error)
595 0 : goto out_pag;
596 :
597 140558 : while (agbno < agbno_next) {
598 70279 : xfs_extlen_t aglen;
599 70279 : bool crosslinked;
600 :
601 70279 : error = xreap_agextent_select(rs, agbno, agbno_next,
602 : &crosslinked, &aglen);
603 70279 : if (error)
604 0 : goto out_agf;
605 :
606 70279 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
607 70279 : if (error)
608 0 : goto out_agf;
609 :
610 70279 : if (xreap_want_defer_finish(rs)) {
611 : /*
612 : * Holds the AGF buffer across the deferred chain
613 : * processing.
614 : */
615 20835 : error = xrep_defer_finish(sc);
616 20835 : if (error)
617 0 : goto out_agf;
618 20835 : xreap_defer_finish_reset(rs);
619 49444 : } else if (xreap_want_roll(rs)) {
620 : /*
621 : * Hold the AGF buffer across the transaction roll so
622 : * that we don't have to reattach it to the scrub
623 : * context.
624 : */
625 181 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
626 181 : error = xfs_trans_roll_inode(&sc->tp, sc->ip);
627 181 : xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
628 181 : if (error)
629 0 : goto out_agf;
630 181 : xreap_reset(rs);
631 : }
632 :
633 70279 : agbno += aglen;
634 : }
635 :
636 70279 : out_agf:
637 70279 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
638 70279 : sc->sa.agf_bp = NULL;
639 70279 : out_pag:
640 70279 : xfs_perag_put(sc->sa.pag);
641 70279 : sc->sa.pag = NULL;
642 70279 : return error;
643 : }
644 :
645 : /*
646 : * Dispose of every block of every fs metadata extent in the bitmap.
647 : * Do not use this to dispose of the mappings in an ondisk inode fork.
648 : */
649 : int
650 2629676 : xrep_reap_fsblocks(
651 : struct xfs_scrub *sc,
652 : struct xfsb_bitmap *bitmap,
653 : const struct xfs_owner_info *oinfo)
654 : {
655 2629676 : struct xreap_state rs = {
656 : .sc = sc,
657 : .oinfo = oinfo,
658 : .resv = XFS_AG_RESV_NONE,
659 : };
660 2629676 : int error;
661 :
662 2629676 : ASSERT(xfs_has_rmapbt(sc->mp));
663 2629676 : ASSERT(sc->ip != NULL);
664 :
665 2629676 : error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
666 2629675 : if (error)
667 : return error;
668 :
669 2629674 : if (xreap_dirty(&rs))
670 23757 : return xrep_defer_finish(sc);
671 :
672 : return 0;
673 : }
674 :
675 : /*
676 : * Metadata files are not supposed to share blocks with anything else.
677 : * If blocks are shared, we remove the reverse mapping (thus reducing the
678 : * crosslink factor); if blocks are not shared, we also need to free them.
679 : *
680 : * This first step determines the longest subset of the passed-in imap
681 : * (starting at its beginning) that is either crosslinked or not crosslinked.
682 : * The blockcount will be adjust down as needed.
683 : */
684 : STATIC int
685 77208 : xreap_bmapi_select(
686 : struct xfs_scrub *sc,
687 : struct xfs_inode *ip,
688 : int whichfork,
689 : struct xfs_bmbt_irec *imap,
690 : bool *crosslinked)
691 : {
692 77208 : struct xfs_owner_info oinfo;
693 77208 : struct xfs_btree_cur *cur;
694 77208 : xfs_filblks_t len = 1;
695 77208 : xfs_agblock_t bno;
696 77208 : xfs_agblock_t agbno;
697 77208 : xfs_agblock_t agbno_next;
698 77208 : int error;
699 :
700 77208 : agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
701 77209 : agbno_next = agbno + imap->br_blockcount;
702 :
703 77209 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
704 : sc->sa.pag);
705 :
706 77209 : xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
707 77209 : error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
708 77209 : if (error)
709 0 : goto out_cur;
710 :
711 77209 : bno = agbno + 1;
712 104342 : while (bno < agbno_next) {
713 27133 : bool also_crosslinked;
714 :
715 27133 : oinfo.oi_offset++;
716 27133 : error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
717 : &also_crosslinked);
718 27133 : if (error)
719 0 : goto out_cur;
720 :
721 27133 : if (also_crosslinked != *crosslinked)
722 : break;
723 :
724 27133 : len++;
725 27133 : bno++;
726 : }
727 :
728 77209 : imap->br_blockcount = len;
729 77209 : trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
730 77209 : out_cur:
731 77209 : xfs_btree_del_cursor(cur, error);
732 77209 : return error;
733 : }
734 :
735 : /*
736 : * Decide if this buffer can be joined to a transaction. This is true for most
737 : * buffers, but there are two cases that we want to catch: large remote xattr
738 : * value buffers are not logged and can overflow the buffer log item dirty
739 : * bitmap size; and oversized cached buffers if things have really gone
740 : * haywire.
741 : */
742 : static inline bool
743 89975 : xreap_buf_loggable(
744 : const struct xfs_buf *bp)
745 : {
746 89975 : int i;
747 :
748 179950 : for (i = 0; i < bp->b_map_count; i++) {
749 89975 : int chunks;
750 89975 : int map_size;
751 :
752 89975 : chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
753 : XFS_BLF_CHUNK);
754 89975 : map_size = DIV_ROUND_UP(chunks, NBWORD);
755 89975 : if (map_size > XFS_BLF_DATAMAP_SIZE)
756 : return false;
757 : }
758 :
759 : return true;
760 : }
761 :
762 : /*
763 : * Invalidate any buffers for this file mapping. The @imap blockcount may be
764 : * adjusted downward if we need to roll the transaction.
765 : */
766 : STATIC int
767 77209 : xreap_bmapi_binval(
768 : struct xfs_scrub *sc,
769 : struct xfs_inode *ip,
770 : int whichfork,
771 : struct xfs_bmbt_irec *imap)
772 : {
773 77209 : struct xfs_mount *mp = sc->mp;
774 77209 : struct xfs_perag *pag = sc->sa.pag;
775 77209 : int bmap_flags = xfs_bmapi_aflag(whichfork);
776 77209 : xfs_fileoff_t off;
777 77209 : xfs_fileoff_t max_off;
778 77209 : xfs_extlen_t scan_blocks;
779 77209 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
780 77209 : xfs_agblock_t bno;
781 77209 : xfs_agblock_t agbno;
782 77209 : xfs_agblock_t agbno_next;
783 77209 : unsigned int invalidated = 0;
784 77209 : int error;
785 :
786 : /*
787 : * Avoid invalidating AG headers and post-EOFS blocks because we never
788 : * own those.
789 : */
790 77209 : agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
791 77209 : agbno_next = agbno + imap->br_blockcount;
792 77209 : if (!xfs_verify_agbno(pag, agbno) ||
793 77209 : !xfs_verify_agbno(pag, agbno_next - 1))
794 : return 0;
795 :
796 : /*
797 : * Buffers for file blocks can span multiple contiguous mappings. This
798 : * means that for each block in the mapping, there could exist an
799 : * xfs_buf indexed by that block with any length up to the maximum
800 : * buffer size (remote xattr values) or to the next hole in the fork.
801 : * To set up our binval scan, first we need to figure out the location
802 : * of the next hole.
803 : */
804 77209 : off = imap->br_startoff + imap->br_blockcount;
805 77209 : max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
806 137283 : while (off < max_off) {
807 133321 : struct xfs_bmbt_irec hmap;
808 133321 : int nhmaps = 1;
809 :
810 133321 : error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
811 : &nhmaps, bmap_flags);
812 133321 : if (error)
813 0 : return error;
814 133321 : if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
815 0 : ASSERT(0);
816 0 : return -EFSCORRUPTED;
817 : }
818 :
819 193395 : if (!xfs_bmap_is_real_extent(&hmap))
820 : break;
821 :
822 60074 : off = hmap.br_startoff + hmap.br_blockcount;
823 : }
824 77209 : scan_blocks = off - imap->br_startoff;
825 :
826 77209 : trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
827 :
828 : /*
829 : * If there are incore buffers for these blocks, invalidate them. If
830 : * we can't (try)lock the buffer we assume it's owned by someone else
831 : * and leave it alone. The buffer cache cannot detect aliasing, so
832 : * employ nested loops to detect incore buffers of any plausible size.
833 : */
834 181542 : while (bno < agbno_next) {
835 417336 : struct xrep_bufscan scan = {
836 104334 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
837 104334 : .max_sectors = xrep_bufscan_max_sectors(mp,
838 : scan_blocks),
839 104334 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
840 : };
841 104334 : struct xfs_buf *bp;
842 :
843 194308 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
844 89975 : if (xreap_buf_loggable(bp)) {
845 89975 : xfs_trans_bjoin(sc->tp, bp);
846 89975 : xfs_trans_binval(sc->tp, bp);
847 : } else {
848 0 : xfs_buf_stale(bp);
849 0 : xfs_buf_relse(bp);
850 : }
851 89975 : invalidated++;
852 :
853 : /*
854 : * Stop invalidating if we've hit the limit; we should
855 : * still have enough reservation left to free however
856 : * much of the mapping we've seen so far.
857 : */
858 89975 : if (invalidated > XREAP_MAX_BINVAL) {
859 1 : imap->br_blockcount = agbno_next - bno;
860 1 : goto out;
861 : }
862 : }
863 :
864 104333 : bno++;
865 104333 : scan_blocks--;
866 : }
867 :
868 77208 : out:
869 77209 : trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
870 77209 : return 0;
871 : }
872 :
873 : /*
874 : * Dispose of as much of the beginning of this file fork mapping as possible.
875 : * The number of blocks disposed of is returned in @imap->br_blockcount.
876 : */
877 : STATIC int
878 77209 : xrep_reap_bmapi_iter(
879 : struct xfs_scrub *sc,
880 : struct xfs_inode *ip,
881 : int whichfork,
882 : struct xfs_bmbt_irec *imap,
883 : bool crosslinked)
884 : {
885 77209 : int error;
886 :
887 77209 : if (crosslinked) {
888 : /*
889 : * If there are other rmappings, this block is cross linked and
890 : * must not be freed. Remove the reverse mapping, leave the
891 : * buffer cache in its possibly confused state, and move on.
892 : * We don't want to risk discarding valid data buffers from
893 : * anybody else who thinks they own the block, even though that
894 : * runs the risk of stale buffer warnings in the future.
895 : */
896 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag,
897 0 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
898 0 : imap->br_blockcount);
899 :
900 : /*
901 : * Schedule removal of the mapping from the fork. We use
902 : * deferred log intents in this function to control the exact
903 : * sequence of metadata updates.
904 : */
905 0 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
906 0 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
907 0 : -(int64_t)imap->br_blockcount);
908 0 : xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
909 0 : return 0;
910 : }
911 :
912 : /*
913 : * If the block is not crosslinked, we can invalidate all the incore
914 : * buffers for the extent, and then free the extent. This is a bit of
915 : * a mess since we don't detect discontiguous buffers that are indexed
916 : * by a block starting before the first block of the extent but overlap
917 : * anyway.
918 : */
919 77209 : trace_xreap_dispose_free_extent(sc->sa.pag,
920 77209 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
921 77209 : imap->br_blockcount);
922 :
923 : /*
924 : * Invalidate as many buffers as we can, starting at the beginning of
925 : * this mapping. If this function sets blockcount to zero, the
926 : * transaction is full of logged buffer invalidations, so we need to
927 : * return early so that we can roll and retry.
928 : */
929 77209 : error = xreap_bmapi_binval(sc, ip, whichfork, imap);
930 77209 : if (error || imap->br_blockcount == 0)
931 : return error;
932 :
933 : /*
934 : * Schedule removal of the mapping from the fork. We use deferred log
935 : * intents in this function to control the exact sequence of metadata
936 : * updates.
937 : */
938 77209 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
939 77209 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
940 77209 : -(int64_t)imap->br_blockcount);
941 77209 : return __xfs_free_extent_later(sc->tp, imap->br_startblock,
942 : imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true);
943 : }
944 :
945 : /*
946 : * Dispose of as much of this file extent as we can. Upon successful return,
947 : * the imap will reflect the mapping that was removed from the fork.
948 : */
949 : STATIC int
950 77206 : xreap_ifork_extent(
951 : struct xfs_scrub *sc,
952 : struct xfs_inode *ip,
953 : int whichfork,
954 : struct xfs_bmbt_irec *imap)
955 : {
956 77206 : xfs_agnumber_t agno;
957 77206 : bool crosslinked;
958 77206 : int error;
959 :
960 77206 : ASSERT(sc->sa.pag == NULL);
961 :
962 77206 : trace_xreap_ifork_extent(sc, ip, whichfork, imap);
963 :
964 77207 : agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
965 77207 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
966 77209 : if (!sc->sa.pag)
967 : return -EFSCORRUPTED;
968 :
969 77209 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
970 77209 : if (error)
971 0 : goto out_pag;
972 :
973 : /*
974 : * Decide the fate of the blocks at the beginning of the mapping, then
975 : * update the mapping to use it with the unmap calls.
976 : */
977 77209 : error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
978 77209 : if (error)
979 0 : goto out_agf;
980 :
981 77209 : error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
982 77209 : if (error)
983 0 : goto out_agf;
984 :
985 77209 : out_agf:
986 77209 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
987 77209 : sc->sa.agf_bp = NULL;
988 77209 : out_pag:
989 77209 : xfs_perag_put(sc->sa.pag);
990 77209 : sc->sa.pag = NULL;
991 77209 : return error;
992 : }
993 :
994 : /*
995 : * Dispose of each block mapped to the given fork of the given file. Callers
996 : * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip. The fork
997 : * must not have any delalloc reservations.
998 : */
999 : int
1000 66729 : xrep_reap_ifork(
1001 : struct xfs_scrub *sc,
1002 : struct xfs_inode *ip,
1003 : int whichfork)
1004 : {
1005 66729 : xfs_fileoff_t off = 0;
1006 66729 : int bmap_flags = xfs_bmapi_aflag(whichfork);
1007 66729 : int error;
1008 :
1009 66729 : ASSERT(xfs_has_rmapbt(sc->mp));
1010 66729 : ASSERT(ip == sc->ip || ip == sc->tempip);
1011 66729 : ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
1012 :
1013 212569 : while (off < XFS_MAX_FILEOFF) {
1014 145838 : struct xfs_bmbt_irec imap;
1015 145838 : int nimaps = 1;
1016 :
1017 : /* Read the next extent, skip past holes and delalloc. */
1018 145838 : error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
1019 : &nimaps, bmap_flags);
1020 145839 : if (error)
1021 0 : return error;
1022 145839 : if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
1023 0 : ASSERT(0);
1024 0 : return -EFSCORRUPTED;
1025 : }
1026 :
1027 : /*
1028 : * If this is a real space mapping, reap as much of it as we
1029 : * can in a single transaction.
1030 : */
1031 223047 : if (xfs_bmap_is_real_extent(&imap)) {
1032 77208 : error = xreap_ifork_extent(sc, ip, whichfork, &imap);
1033 77209 : if (error)
1034 0 : return error;
1035 :
1036 77209 : error = xfs_defer_finish(&sc->tp);
1037 77209 : if (error)
1038 0 : return error;
1039 : }
1040 :
1041 145840 : off = imap.br_startoff + imap.br_blockcount;
1042 : }
1043 :
1044 : return 0;
1045 : }
|