Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_log_format.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_sb.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_alloc_btree.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_ialloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_refcount_btree.h"
25 : #include "xfs_extent_busy.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_quota.h"
29 : #include "xfs_qm.h"
30 : #include "xfs_bmap.h"
31 : #include "xfs_da_format.h"
32 : #include "xfs_da_btree.h"
33 : #include "xfs_attr.h"
34 : #include "xfs_attr_remote.h"
35 : #include "xfs_defer.h"
36 : #include "xfs_imeta.h"
37 : #include "xfs_rtgroup.h"
38 : #include "xfs_rtrmap_btree.h"
39 : #include "scrub/scrub.h"
40 : #include "scrub/common.h"
41 : #include "scrub/trace.h"
42 : #include "scrub/repair.h"
43 : #include "scrub/bitmap.h"
44 : #include "scrub/reap.h"
45 :
46 : /*
47 : * Disposal of Blocks from Old Metadata
48 : *
49 : * Now that we've constructed a new btree to replace the damaged one, we want
50 : * to dispose of the blocks that (we think) the old btree was using.
51 : * Previously, we used the rmapbt to collect the extents (bitmap) with the
52 : * rmap owner corresponding to the tree we rebuilt, collected extents for any
53 : * blocks with the same rmap owner that are owned by another data structure
54 : * (sublist), and subtracted sublist from bitmap. In theory the extents
55 : * remaining in bitmap are the old btree's blocks.
56 : *
57 : * Unfortunately, it's possible that the btree was crosslinked with other
58 : * blocks on disk. The rmap data can tell us if there are multiple owners, so
59 : * if the rmapbt says there is an owner of this block other than @oinfo, then
60 : * the block is crosslinked. Remove the reverse mapping and continue.
61 : *
62 : * If there is one rmap record, we can free the block, which removes the
63 : * reverse mapping but doesn't add the block to the free space. Our repair
64 : * strategy is to hope the other metadata objects crosslinked on this block
65 : * will be rebuilt (atop different blocks), thereby removing all the cross
66 : * links.
67 : *
68 : * If there are no rmap records at all, we also free the block. If the btree
69 : * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
70 : * supposed to be a rmap record and everything is ok. For other btrees there
71 : * had to have been an rmap entry for the block to have ended up on @bitmap,
72 : * so if it's gone now there's something wrong and the fs will shut down.
73 : *
74 : * Note: If there are multiple rmap records with only the same rmap owner as
75 : * the btree we're trying to rebuild and the block is indeed owned by another
76 : * data structure with the same rmap owner, then the block will be in sublist
77 : * and therefore doesn't need disposal. If there are multiple rmap records
78 : * with only the same rmap owner but the block is not owned by something with
79 : * the same rmap owner, the block will be freed.
80 : *
81 : * The caller is responsible for locking the AG headers/inode for the entire
82 : * rebuild operation so that nothing else can sneak in and change the incore
83 : * state while we're not looking. We must also invalidate any buffers
84 : * associated with @bitmap.
85 : */
86 :
87 : /* Information about reaping extents after a repair. */
88 : struct xreap_state {
89 : struct xfs_scrub *sc;
90 :
91 : /* Reverse mapping owner and metadata reservation type. */
92 : const struct xfs_owner_info *oinfo;
93 : enum xfs_ag_resv_type resv;
94 :
95 : /* If true, roll the transaction before reaping the next extent. */
96 : bool force_roll;
97 :
98 : /* Number of deferred reaps attached to the current transaction. */
99 : unsigned int deferred;
100 :
101 : /* Number of invalidated buffers logged to the current transaction. */
102 : unsigned int invalidated;
103 :
104 : /* Number of deferred reaps queued during the whole reap sequence. */
105 : unsigned long long total_deferred;
106 : };
107 :
108 : /* Put a block back on the AGFL. */
109 : STATIC int
110 0 : xreap_put_freelist(
111 : struct xfs_scrub *sc,
112 : xfs_agblock_t agbno)
113 : {
114 0 : struct xfs_buf *agfl_bp;
115 0 : int error;
116 :
117 : /* Make sure there's space on the freelist. */
118 0 : error = xrep_fix_freelist(sc, 0);
119 0 : if (error)
120 : return error;
121 :
122 : /*
123 : * Since we're "freeing" a lost block onto the AGFL, we have to
124 : * create an rmap for the block prior to merging it or else other
125 : * parts will break.
126 : */
127 0 : error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
128 : &XFS_RMAP_OINFO_AG);
129 0 : if (error)
130 : return error;
131 :
132 : /* Put the block on the AGFL. */
133 0 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
134 0 : if (error)
135 : return error;
136 :
137 0 : error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
138 : agfl_bp, agbno, 0);
139 0 : if (error)
140 : return error;
141 0 : xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
142 : XFS_EXTENT_BUSY_SKIP_DISCARD);
143 :
144 0 : return 0;
145 : }
146 :
147 : /* Are there any uncommitted reap operations? */
148 4127722 : static inline bool xreap_dirty(const struct xreap_state *rs)
149 : {
150 4127722 : if (rs->force_roll)
151 : return true;
152 4127722 : if (rs->deferred)
153 : return true;
154 3600933 : if (rs->invalidated)
155 : return true;
156 3600933 : if (rs->total_deferred)
157 3 : return true;
158 : return false;
159 : }
160 :
161 : #define XREAP_MAX_DEFERRED (128)
162 : #define XREAP_MAX_BINVAL (2048)
163 :
164 : /*
165 : * Decide if we want to roll the transaction after reaping an extent. We don't
166 : * want to overrun the transaction reservation, so we prohibit more than
167 : * 128 EFIs per transaction. For the same reason, we limit the number
168 : * of buffer invalidations to 2048.
169 : */
170 790840 : static inline bool xreap_want_roll(const struct xreap_state *rs)
171 : {
172 790840 : if (rs->force_roll)
173 : return true;
174 790840 : if (rs->deferred > XREAP_MAX_DEFERRED)
175 : return true;
176 789770 : if (rs->invalidated > XREAP_MAX_BINVAL)
177 36 : return true;
178 : return false;
179 : }
180 :
181 : static inline void xreap_reset(struct xreap_state *rs)
182 : {
183 1106 : rs->total_deferred += rs->deferred;
184 1106 : rs->deferred = 0;
185 1106 : rs->invalidated = 0;
186 1106 : rs->force_roll = false;
187 1106 : }
188 :
189 : #define XREAP_MAX_DEFER_CHAIN (2048)
190 :
191 : /*
192 : * Decide if we want to finish the deferred ops that are attached to the scrub
193 : * transaction. We don't want to queue huge chains of deferred ops because
194 : * that can consume a lot of log space and kernel memory. Hence we trigger a
195 : * xfs_defer_finish if there are more than 2048 deferred reap operations or the
196 : * caller did some real work.
197 : */
198 : static inline bool
199 806680 : xreap_want_defer_finish(const struct xreap_state *rs)
200 : {
201 806680 : if (rs->force_roll)
202 : return true;
203 791125 : if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
204 54 : return true;
205 : return false;
206 : }
207 :
208 : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
209 : {
210 15609 : rs->total_deferred = 0;
211 15609 : rs->deferred = 0;
212 15609 : rs->invalidated = 0;
213 15609 : rs->force_roll = false;
214 15609 : }
215 :
216 : /*
217 : * Compute the maximum length of a buffer cache scan (in units of sectors),
218 : * given a quantity of fs blocks.
219 : */
220 : xfs_daddr_t
221 5530543 : xrep_bufscan_max_sectors(
222 : struct xfs_mount *mp,
223 : xfs_extlen_t fsblocks)
224 : {
225 5530543 : int max_fsbs;
226 :
227 : /* Remote xattr values are the largest buffers that we support. */
228 5530543 : max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
229 :
230 5530062 : return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
231 : }
232 :
233 : /*
234 : * Return an incore buffer from a sector scan, or NULL if there are no buffers
235 : * left to return.
236 : */
237 : struct xfs_buf *
238 10752299 : xrep_bufscan_advance(
239 : struct xfs_mount *mp,
240 : struct xrep_bufscan *scan)
241 : {
242 10752299 : scan->__sector_count += scan->daddr_step;
243 64735365 : while (scan->__sector_count <= scan->max_sectors) {
244 59289368 : struct xfs_buf *bp = NULL;
245 59289368 : int error;
246 :
247 59289368 : error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
248 : scan->__sector_count, XBF_LIVESCAN, &bp);
249 59290686 : if (!error)
250 5307620 : return bp;
251 :
252 53983066 : scan->__sector_count += scan->daddr_step;
253 : }
254 :
255 : return NULL;
256 : }
257 :
258 : /* Try to invalidate the incore buffers for an extent that we're freeing. */
259 : STATIC void
260 801626 : xreap_agextent_binval(
261 : struct xreap_state *rs,
262 : xfs_agblock_t agbno,
263 : xfs_extlen_t *aglenp)
264 : {
265 801626 : struct xfs_scrub *sc = rs->sc;
266 801626 : struct xfs_perag *pag = sc->sa.pag;
267 801626 : struct xfs_mount *mp = sc->mp;
268 801626 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
269 801626 : xfs_agblock_t agbno_next = agbno + *aglenp;
270 801626 : xfs_agblock_t bno = agbno;
271 :
272 : /*
273 : * Avoid invalidating AG headers and post-EOFS blocks because we never
274 : * own those.
275 : */
276 801626 : if (!xfs_verify_agbno(pag, agbno) ||
277 801626 : !xfs_verify_agbno(pag, agbno_next - 1))
278 : return;
279 :
280 : /*
281 : * If there are incore buffers for these blocks, invalidate them. We
282 : * assume that the lack of any other known owners means that the buffer
283 : * can be locked without risk of deadlocking. The buffer cache cannot
284 : * detect aliasing, so employ nested loops to scan for incore buffers
285 : * of any plausible size.
286 : */
287 3501560 : while (bno < agbno_next) {
288 10795728 : struct xrep_bufscan scan = {
289 2699079 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
290 2699079 : .max_sectors = xrep_bufscan_max_sectors(mp,
291 : agbno_next - bno),
292 2698785 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
293 : };
294 2698785 : struct xfs_buf *bp;
295 :
296 5192526 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
297 2492722 : xfs_trans_bjoin(sc->tp, bp);
298 2493404 : xfs_trans_binval(sc->tp, bp);
299 2493777 : rs->invalidated++;
300 :
301 : /*
302 : * Stop invalidating if we've hit the limit; we should
303 : * still have enough reservation left to free however
304 : * far we've gotten.
305 : */
306 2493777 : if (rs->invalidated > XREAP_MAX_BINVAL) {
307 36 : *aglenp -= agbno_next - bno;
308 36 : goto out;
309 : }
310 : }
311 :
312 2699934 : bno++;
313 : }
314 :
315 802481 : out:
316 802517 : trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
317 : }
318 :
319 : /*
320 : * Figure out the longest run of blocks that we can dispose of with a single
321 : * call. Cross-linked blocks should have their reverse mappings removed, but
322 : * single-owner extents can be freed. AGFL blocks can only be put back one at
323 : * a time.
324 : */
325 : STATIC int
326 801871 : xreap_agextent_select(
327 : struct xreap_state *rs,
328 : xfs_agblock_t agbno,
329 : xfs_agblock_t agbno_next,
330 : bool *crosslinked,
331 : xfs_extlen_t *aglenp)
332 : {
333 801871 : struct xfs_scrub *sc = rs->sc;
334 801871 : struct xfs_btree_cur *cur;
335 801871 : xfs_agblock_t bno = agbno + 1;
336 801871 : xfs_extlen_t len = 1;
337 801871 : int error;
338 :
339 : /*
340 : * Determine if there are any other rmap records covering the first
341 : * block of this extent. If so, the block is crosslinked.
342 : */
343 801871 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
344 : sc->sa.pag);
345 802597 : error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
346 : crosslinked);
347 802215 : if (error)
348 0 : goto out_cur;
349 :
350 : /* AGFL blocks can only be deal with one at a time. */
351 802215 : if (rs->resv == XFS_AG_RESV_AGFL)
352 0 : goto out_found;
353 :
354 : /*
355 : * Figure out how many of the subsequent blocks have the same crosslink
356 : * status.
357 : */
358 2840397 : while (bno < agbno_next) {
359 2038004 : bool also_crosslinked;
360 :
361 2038004 : error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
362 : &also_crosslinked);
363 2038182 : if (error)
364 0 : goto out_cur;
365 :
366 2038182 : if (*crosslinked != also_crosslinked)
367 : break;
368 :
369 2038182 : len++;
370 2038182 : bno++;
371 : }
372 :
373 802393 : out_found:
374 802393 : *aglenp = len;
375 802393 : trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
376 801589 : out_cur:
377 801589 : xfs_btree_del_cursor(cur, error);
378 802574 : return error;
379 : }
380 :
381 : /*
382 : * Dispose of as much of the beginning of this AG extent as possible. The
383 : * number of blocks disposed of will be returned in @aglenp.
384 : */
385 : STATIC int
386 801868 : xreap_agextent_iter(
387 : struct xreap_state *rs,
388 : xfs_agblock_t agbno,
389 : xfs_extlen_t *aglenp,
390 : bool crosslinked)
391 : {
392 801868 : struct xfs_scrub *sc = rs->sc;
393 801868 : xfs_fsblock_t fsbno;
394 801868 : int error = 0;
395 :
396 801868 : ASSERT(rs->resv != XFS_AG_RESV_IMETA);
397 :
398 801868 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
399 :
400 : /*
401 : * If there are other rmappings, this block is cross linked and must
402 : * not be freed. Remove the reverse mapping and move on. Otherwise,
403 : * we were the only owner of the block, so free the extent, which will
404 : * also remove the rmap.
405 : *
406 : * XXX: XFS doesn't support detecting the case where a single block
407 : * metadata structure is crosslinked with a multi-block structure
408 : * because the buffer cache doesn't detect aliasing problems, so we
409 : * can't fix 100% of crosslinking problems (yet). The verifiers will
410 : * blow on writeout, the filesystem will shut down, and the admin gets
411 : * to run xfs_repair.
412 : */
413 801868 : if (crosslinked) {
414 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
415 :
416 0 : rs->force_roll = true;
417 :
418 0 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
419 : /*
420 : * If we're unmapping CoW staging extents, remove the
421 : * records from the refcountbt, which will remove the
422 : * rmap record as well.
423 : */
424 0 : xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
425 : *aglenp);
426 0 : return 0;
427 : }
428 :
429 0 : return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
430 : *aglenp, rs->oinfo);
431 : }
432 :
433 801868 : trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
434 :
435 : /*
436 : * Invalidate as many buffers as we can, starting at agbno. If this
437 : * function sets *aglenp to zero, the transaction is full of logged
438 : * buffer invalidations, so we need to return early so that we can
439 : * roll and retry.
440 : */
441 801119 : xreap_agextent_binval(rs, agbno, aglenp);
442 802352 : if (*aglenp == 0) {
443 0 : ASSERT(xreap_want_roll(rs));
444 0 : return 0;
445 : }
446 :
447 : /*
448 : * If we're getting rid of CoW staging extents, use deferred work items
449 : * to remove the refcountbt records (which removes the rmap records)
450 : * and free the extent. We're not worried about the system going down
451 : * here because log recovery walks the refcount btree to clean out the
452 : * CoW staging extents.
453 : */
454 802352 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
455 15555 : ASSERT(rs->resv == XFS_AG_RESV_NONE);
456 :
457 15555 : xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp);
458 15555 : error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
459 : rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
460 15555 : if (error)
461 : return error;
462 :
463 15555 : rs->force_roll = true;
464 15555 : return 0;
465 : }
466 :
467 : /* Put blocks back on the AGFL one at a time. */
468 786797 : if (rs->resv == XFS_AG_RESV_AGFL) {
469 0 : ASSERT(*aglenp == 1);
470 0 : error = xreap_put_freelist(sc, agbno);
471 0 : if (error)
472 : return error;
473 :
474 0 : rs->force_roll = true;
475 0 : return 0;
476 : }
477 :
478 : /*
479 : * Use deferred frees to get rid of the old btree blocks to try to
480 : * minimize the window in which we could crash and lose the old blocks.
481 : */
482 786797 : error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
483 : rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
484 786389 : if (error)
485 : return error;
486 :
487 786389 : rs->deferred++;
488 786389 : return 0;
489 : }
490 :
491 : /*
492 : * Break an AG metadata extent into sub-extents by fate (crosslinked, not
493 : * crosslinked), and dispose of each sub-extent separately.
494 : */
495 : STATIC int
496 557056 : xreap_agmeta_extent(
497 : uint64_t fsbno,
498 : uint64_t len,
499 : void *priv)
500 : {
501 557056 : struct xreap_state *rs = priv;
502 557056 : struct xfs_scrub *sc = rs->sc;
503 557056 : xfs_agblock_t agbno = fsbno;
504 557056 : xfs_agblock_t agbno_next = agbno + len;
505 557056 : int error = 0;
506 :
507 557056 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
508 557056 : ASSERT(sc->ip == NULL);
509 :
510 1113029 : while (agbno < agbno_next) {
511 556612 : xfs_extlen_t aglen;
512 556612 : bool crosslinked;
513 :
514 556612 : error = xreap_agextent_select(rs, agbno, agbno_next,
515 : &crosslinked, &aglen);
516 557912 : if (error)
517 0 : return error;
518 :
519 557912 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
520 557193 : if (error)
521 0 : return error;
522 :
523 557193 : if (xreap_want_defer_finish(rs)) {
524 24 : error = xrep_defer_finish(sc);
525 24 : if (error)
526 0 : return error;
527 24 : xreap_defer_finish_reset(rs);
528 556387 : } else if (xreap_want_roll(rs)) {
529 509 : error = xrep_roll_ag_trans(sc);
530 509 : if (error)
531 0 : return error;
532 509 : xreap_reset(rs);
533 : }
534 :
535 555973 : agbno += aglen;
536 : }
537 :
538 : return 0;
539 : }
540 :
541 : /* Dispose of every block of every AG metadata extent in the bitmap. */
542 : int
543 634143 : xrep_reap_agblocks(
544 : struct xfs_scrub *sc,
545 : struct xagb_bitmap *bitmap,
546 : const struct xfs_owner_info *oinfo,
547 : enum xfs_ag_resv_type type)
548 : {
549 634143 : struct xreap_state rs = {
550 : .sc = sc,
551 : .oinfo = oinfo,
552 : .resv = type,
553 : };
554 634143 : int error;
555 :
556 634143 : ASSERT(xfs_has_rmapbt(sc->mp));
557 634143 : ASSERT(sc->ip == NULL);
558 :
559 634143 : error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
560 633747 : if (error)
561 : return error;
562 :
563 633510 : if (xreap_dirty(&rs))
564 409251 : return xrep_defer_finish(sc);
565 :
566 : return 0;
567 : }
568 :
569 : /*
570 : * Break a file metadata extent into sub-extents by fate (crosslinked, not
571 : * crosslinked), and dispose of each sub-extent separately. The extent must
572 : * not cross an AG boundary.
573 : */
574 : STATIC int
575 244669 : xreap_fsmeta_extent(
576 : uint64_t fsbno,
577 : uint64_t len,
578 : void *priv)
579 : {
580 244669 : struct xreap_state *rs = priv;
581 244669 : struct xfs_scrub *sc = rs->sc;
582 244669 : xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
583 244669 : xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
584 244663 : xfs_agblock_t agbno_next = agbno + len;
585 244663 : int error = 0;
586 :
587 244663 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
588 244663 : ASSERT(sc->ip != NULL);
589 244663 : ASSERT(!sc->sa.pag);
590 :
591 : /*
592 : * We're reaping blocks after repairing file metadata, which means that
593 : * we have to init the xchk_ag structure ourselves.
594 : */
595 244663 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
596 244667 : if (!sc->sa.pag)
597 : return -EFSCORRUPTED;
598 :
599 244667 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
600 244669 : if (error)
601 0 : goto out_pag;
602 :
603 489366 : while (agbno < agbno_next) {
604 244698 : xfs_extlen_t aglen;
605 244698 : bool crosslinked;
606 :
607 244698 : error = xreap_agextent_select(rs, agbno, agbno_next,
608 : &crosslinked, &aglen);
609 244700 : if (error)
610 0 : goto out_agf;
611 :
612 244700 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
613 244698 : if (error)
614 0 : goto out_agf;
615 :
616 244698 : if (xreap_want_defer_finish(rs)) {
617 : /*
618 : * Holds the AGF buffer across the deferred chain
619 : * processing.
620 : */
621 15585 : error = xrep_defer_finish(sc);
622 15585 : if (error)
623 0 : goto out_agf;
624 15585 : xreap_defer_finish_reset(rs);
625 229113 : } else if (xreap_want_roll(rs)) {
626 : /*
627 : * Hold the AGF buffer across the transaction roll so
628 : * that we don't have to reattach it to the scrub
629 : * context.
630 : */
631 597 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
632 597 : error = xfs_trans_roll_inode(&sc->tp, sc->ip);
633 597 : xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
634 597 : if (error)
635 0 : goto out_agf;
636 597 : xreap_reset(rs);
637 : }
638 :
639 244697 : agbno += aglen;
640 : }
641 :
642 244668 : out_agf:
643 244668 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
644 244670 : sc->sa.agf_bp = NULL;
645 244670 : out_pag:
646 244670 : xfs_perag_put(sc->sa.pag);
647 244670 : sc->sa.pag = NULL;
648 244670 : return error;
649 : }
650 :
651 : /*
652 : * Dispose of every block of every fs metadata extent in the bitmap.
653 : * Do not use this to dispose of the mappings in an ondisk inode fork.
654 : */
655 : int
656 3405775 : xrep_reap_fsblocks(
657 : struct xfs_scrub *sc,
658 : struct xfsb_bitmap *bitmap,
659 : const struct xfs_owner_info *oinfo)
660 : {
661 3405775 : struct xreap_state rs = {
662 : .sc = sc,
663 : .oinfo = oinfo,
664 : .resv = XFS_AG_RESV_NONE,
665 : };
666 3405775 : int error;
667 :
668 3405775 : ASSERT(xfs_has_rmapbt(sc->mp));
669 3405775 : ASSERT(sc->ip != NULL);
670 :
671 3405775 : error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
672 3405769 : if (error)
673 : return error;
674 :
675 3405766 : if (xreap_dirty(&rs))
676 99040 : return xrep_defer_finish(sc);
677 :
678 : return 0;
679 : }
680 :
681 : #ifdef CONFIG_XFS_RT
682 : /*
683 : * Figure out the longest run of blocks that we can dispose of with a single
684 : * call. Cross-linked blocks should have their reverse mappings removed, but
685 : * single-owner extents can be freed. Units are rt blocks, not rt extents.
686 : */
687 : STATIC int
688 5323 : xreap_rgextent_select(
689 : struct xreap_state *rs,
690 : xfs_rgblock_t rgbno,
691 : xfs_rgblock_t rgbno_next,
692 : bool *crosslinked,
693 : xfs_extlen_t *rglenp)
694 : {
695 5323 : struct xfs_scrub *sc = rs->sc;
696 5323 : struct xfs_btree_cur *cur;
697 5323 : xfs_rgblock_t bno = rgbno + 1;
698 5323 : xfs_extlen_t len = 1;
699 5323 : int error;
700 :
701 : /*
702 : * Determine if there are any other rmap records covering the first
703 : * block of this extent. If so, the block is crosslinked.
704 : */
705 5323 : cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
706 5323 : sc->sr.rtg->rtg_rmapip);
707 5323 : error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo,
708 : crosslinked);
709 5323 : if (error)
710 0 : goto out_cur;
711 :
712 : /*
713 : * Figure out how many of the subsequent blocks have the same crosslink
714 : * status.
715 : */
716 69235 : while (bno < rgbno_next) {
717 63912 : bool also_crosslinked;
718 :
719 63912 : error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
720 : &also_crosslinked);
721 63912 : if (error)
722 0 : goto out_cur;
723 :
724 63912 : if (*crosslinked != also_crosslinked)
725 : break;
726 :
727 63912 : len++;
728 63912 : bno++;
729 : }
730 :
731 5323 : *rglenp = len;
732 5323 : trace_xreap_rgextent_select(sc->sr.rtg, rgbno, len, *crosslinked);
733 5323 : out_cur:
734 5323 : xfs_btree_del_cursor(cur, error);
735 5323 : return error;
736 : }
737 :
738 : /*
739 : * Dispose of as much of the beginning of this rtgroup extent as possible.
740 : * The number of blocks disposed of will be returned in @rglenp.
741 : */
742 : STATIC int
743 5323 : xreap_rgextent_iter(
744 : struct xreap_state *rs,
745 : xfs_rgblock_t rgbno,
746 : xfs_extlen_t *rglenp,
747 : bool crosslinked)
748 : {
749 5323 : struct xfs_scrub *sc = rs->sc;
750 5323 : xfs_rtblock_t rtbno;
751 5323 : int error;
752 :
753 : /*
754 : * The only caller so far is CoW fork repair, so we only know how to
755 : * unlink or free CoW staging extents. Here we don't have to worry
756 : * about invalidating buffers!
757 : */
758 5323 : if (rs->oinfo != &XFS_RMAP_OINFO_COW) {
759 0 : ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW);
760 0 : return -EFSCORRUPTED;
761 : }
762 5323 : ASSERT(rs->resv == XFS_AG_RESV_NONE);
763 :
764 5323 : rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno, rgbno);
765 :
766 : /*
767 : * If there are other rmappings, this block is cross linked and must
768 : * not be freed. Remove the forward and reverse mapping and move on.
769 : */
770 5323 : if (crosslinked) {
771 0 : trace_xreap_dispose_unmap_rtextent(sc->sr.rtg, rgbno, *rglenp);
772 :
773 0 : xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
774 0 : rs->deferred++;
775 0 : return 0;
776 : }
777 :
778 5323 : trace_xreap_dispose_free_rtextent(sc->sr.rtg, rgbno, *rglenp);
779 :
780 : /*
781 : * The CoW staging extent is not crosslinked. Use deferred work items
782 : * to remove the refcountbt records (which removes the rmap records)
783 : * and free the extent. We're not worried about the system going down
784 : * here because log recovery walks the refcount btree to clean out the
785 : * CoW staging extents.
786 : */
787 5323 : xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
788 5323 : error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL,
789 : rs->resv,
790 : XFS_FREE_EXTENT_REALTIME |
791 : XFS_FREE_EXTENT_SKIP_DISCARD);
792 5323 : if (error)
793 : return error;
794 :
795 5323 : rs->deferred++;
796 5323 : return 0;
797 : }
798 :
799 : #define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
800 : XFS_RTGLOCK_RMAP | \
801 : XFS_RTGLOCK_REFCOUNT)
802 :
803 : /*
804 : * Break a rt file metadata extent into sub-extents by fate (crosslinked, not
805 : * crosslinked), and dispose of each sub-extent separately. The extent must
806 : * be aligned to a realtime extent.
807 : */
808 : STATIC int
809 5323 : xreap_rtmeta_extent(
810 : uint64_t rtbno,
811 : uint64_t len,
812 : void *priv)
813 : {
814 5323 : struct xreap_state *rs = priv;
815 5323 : struct xfs_scrub *sc = rs->sc;
816 5323 : xfs_rgnumber_t rgno;
817 5323 : xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno, &rgno);
818 5323 : xfs_rgblock_t rgbno_next = rgbno + len;
819 5323 : int error = 0;
820 :
821 5323 : ASSERT(sc->ip != NULL);
822 5323 : ASSERT(!sc->sr.rtg);
823 :
824 : /*
825 : * We're reaping blocks after repairing file metadata, which means that
826 : * we have to init the xchk_ag structure ourselves.
827 : */
828 5323 : sc->sr.rtg = xfs_rtgroup_get(sc->mp, rgno);
829 5323 : if (!sc->sr.rtg)
830 : return -EFSCORRUPTED;
831 :
832 5323 : xfs_rtgroup_lock(NULL, sc->sr.rtg, XREAP_RTGLOCK_ALL);
833 :
834 10646 : while (rgbno < rgbno_next) {
835 5323 : xfs_extlen_t rglen;
836 5323 : bool crosslinked;
837 :
838 5323 : error = xreap_rgextent_select(rs, rgbno, rgbno_next,
839 : &crosslinked, &rglen);
840 5323 : if (error)
841 0 : goto out_unlock;
842 :
843 5323 : error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked);
844 5323 : if (error)
845 0 : goto out_unlock;
846 :
847 5323 : if (xreap_want_defer_finish(rs)) {
848 0 : error = xfs_defer_finish(&sc->tp);
849 0 : if (error)
850 0 : goto out_unlock;
851 0 : xreap_defer_finish_reset(rs);
852 5323 : } else if (xreap_want_roll(rs)) {
853 0 : error = xfs_trans_roll_inode(&sc->tp, sc->ip);
854 0 : if (error)
855 0 : goto out_unlock;
856 0 : xreap_reset(rs);
857 : }
858 :
859 5323 : rgbno += rglen;
860 : }
861 :
862 5323 : out_unlock:
863 5323 : xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
864 5323 : xfs_rtgroup_put(sc->sr.rtg);
865 5323 : sc->sr.rtg = NULL;
866 5323 : return error;
867 : }
868 :
869 : /*
870 : * Dispose of every block of every rt metadata extent in the bitmap.
871 : * Do not use this to dispose of the mappings in an ondisk inode fork.
872 : */
873 : int
874 49015 : xrep_reap_rtblocks(
875 : struct xfs_scrub *sc,
876 : struct xrtb_bitmap *bitmap,
877 : const struct xfs_owner_info *oinfo)
878 : {
879 49015 : struct xreap_state rs = {
880 : .sc = sc,
881 : .oinfo = oinfo,
882 : .resv = XFS_AG_RESV_NONE,
883 : };
884 49015 : int error;
885 :
886 49015 : ASSERT(xfs_has_rmapbt(sc->mp));
887 49015 : ASSERT(sc->ip != NULL);
888 :
889 49015 : error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
890 48997 : if (error)
891 : return error;
892 :
893 48997 : if (xreap_dirty(&rs))
894 2848 : return xrep_defer_finish(sc);
895 :
896 : return 0;
897 : }
898 : #endif /* CONFIG_XFS_RT */
899 :
900 : /*
901 : * Dispose of every block of an old metadata btree that used to be rooted in a
902 : * metadata directory file.
903 : */
904 : int
905 39434 : xrep_reap_metadir_fsblocks(
906 : struct xfs_scrub *sc,
907 : struct xfsb_bitmap *bitmap)
908 : {
909 : /*
910 : * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
911 : * blocks are no longer mapped by the inode, and inode metadata space
912 : * reservations can only account freed space to the i_nblocks.
913 : */
914 39434 : struct xfs_owner_info oinfo;
915 39434 : struct xreap_state rs = {
916 : .sc = sc,
917 : .oinfo = &oinfo,
918 : .resv = XFS_AG_RESV_NONE,
919 : };
920 39434 : int error;
921 :
922 39434 : ASSERT(xfs_has_rmapbt(sc->mp));
923 39434 : ASSERT(sc->ip != NULL);
924 39434 : ASSERT(xfs_is_metadir_inode(sc->ip));
925 :
926 39434 : xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
927 :
928 39434 : error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
929 39418 : if (error)
930 : return error;
931 :
932 39420 : if (xreap_dirty(&rs))
933 15176 : return xrep_defer_finish(sc);
934 :
935 : return 0;
936 : }
937 :
938 : /*
939 : * Metadata files are not supposed to share blocks with anything else.
940 : * If blocks are shared, we remove the reverse mapping (thus reducing the
941 : * crosslink factor); if blocks are not shared, we also need to free them.
942 : *
943 : * This first step determines the longest subset of the passed-in imap
944 : * (starting at its beginning) that is either crosslinked or not crosslinked.
945 : * The blockcount will be adjust down as needed.
946 : */
947 : STATIC int
948 282691 : xreap_bmapi_select(
949 : struct xfs_scrub *sc,
950 : struct xfs_inode *ip,
951 : int whichfork,
952 : struct xfs_bmbt_irec *imap,
953 : bool *crosslinked)
954 : {
955 282691 : struct xfs_owner_info oinfo;
956 282691 : struct xfs_btree_cur *cur;
957 282691 : xfs_filblks_t len = 1;
958 282691 : xfs_agblock_t bno;
959 282691 : xfs_agblock_t agbno;
960 282691 : xfs_agblock_t agbno_next;
961 282691 : int error;
962 :
963 282691 : agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
964 282689 : agbno_next = agbno + imap->br_blockcount;
965 :
966 282689 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
967 : sc->sa.pag);
968 :
969 282692 : xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
970 282692 : error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
971 282692 : if (error)
972 0 : goto out_cur;
973 :
974 282692 : bno = agbno + 1;
975 2745220 : while (bno < agbno_next) {
976 2462528 : bool also_crosslinked;
977 :
978 2462528 : oinfo.oi_offset++;
979 2462528 : error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
980 : &also_crosslinked);
981 2462528 : if (error)
982 0 : goto out_cur;
983 :
984 2462528 : if (also_crosslinked != *crosslinked)
985 : break;
986 :
987 2462528 : len++;
988 2462528 : bno++;
989 : }
990 :
991 282692 : imap->br_blockcount = len;
992 282692 : trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
993 282692 : out_cur:
994 282692 : xfs_btree_del_cursor(cur, error);
995 282692 : return error;
996 : }
997 :
998 : /*
999 : * Decide if this buffer can be joined to a transaction. This is true for most
1000 : * buffers, but there are two cases that we want to catch: large remote xattr
1001 : * value buffers are not logged and can overflow the buffer log item dirty
1002 : * bitmap size; and oversized cached buffers if things have really gone
1003 : * haywire.
1004 : */
1005 : static inline bool
1006 2728614 : xreap_buf_loggable(
1007 : const struct xfs_buf *bp)
1008 : {
1009 2728614 : int i;
1010 :
1011 5457227 : for (i = 0; i < bp->b_map_count; i++) {
1012 2728613 : int chunks;
1013 2728613 : int map_size;
1014 :
1015 2728613 : chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
1016 : XFS_BLF_CHUNK);
1017 2728613 : map_size = DIV_ROUND_UP(chunks, NBWORD);
1018 2728613 : if (map_size > XFS_BLF_DATAMAP_SIZE)
1019 : return false;
1020 : }
1021 :
1022 : return true;
1023 : }
1024 :
1025 : /*
1026 : * Invalidate any buffers for this file mapping. The @imap blockcount may be
1027 : * adjusted downward if we need to roll the transaction.
1028 : */
1029 : STATIC int
1030 282692 : xreap_bmapi_binval(
1031 : struct xfs_scrub *sc,
1032 : struct xfs_inode *ip,
1033 : int whichfork,
1034 : struct xfs_bmbt_irec *imap)
1035 : {
1036 282692 : struct xfs_mount *mp = sc->mp;
1037 282692 : struct xfs_perag *pag = sc->sa.pag;
1038 282692 : int bmap_flags = xfs_bmapi_aflag(whichfork);
1039 282692 : xfs_fileoff_t off;
1040 282692 : xfs_fileoff_t max_off;
1041 282692 : xfs_extlen_t scan_blocks;
1042 282692 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
1043 282692 : xfs_agblock_t bno;
1044 282692 : xfs_agblock_t agbno;
1045 282692 : xfs_agblock_t agbno_next;
1046 282692 : unsigned int invalidated = 0;
1047 282692 : int error;
1048 :
1049 : /*
1050 : * Avoid invalidating AG headers and post-EOFS blocks because we never
1051 : * own those.
1052 : */
1053 282692 : agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
1054 282691 : agbno_next = agbno + imap->br_blockcount;
1055 282691 : if (!xfs_verify_agbno(pag, agbno) ||
1056 282691 : !xfs_verify_agbno(pag, agbno_next - 1))
1057 : return 0;
1058 :
1059 : /*
1060 : * Buffers for file blocks can span multiple contiguous mappings. This
1061 : * means that for each block in the mapping, there could exist an
1062 : * xfs_buf indexed by that block with any length up to the maximum
1063 : * buffer size (remote xattr values) or to the next hole in the fork.
1064 : * To set up our binval scan, first we need to figure out the location
1065 : * of the next hole.
1066 : */
1067 282691 : off = imap->br_startoff + imap->br_blockcount;
1068 282691 : max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
1069 505580 : while (off < max_off) {
1070 472129 : struct xfs_bmbt_irec hmap;
1071 472129 : int nhmaps = 1;
1072 :
1073 472129 : error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
1074 : &nhmaps, bmap_flags);
1075 472130 : if (error)
1076 0 : return error;
1077 472130 : if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
1078 0 : ASSERT(0);
1079 0 : return -EFSCORRUPTED;
1080 : }
1081 :
1082 695020 : if (!xfs_bmap_is_real_extent(&hmap))
1083 : break;
1084 :
1085 222890 : off = hmap.br_startoff + hmap.br_blockcount;
1086 : }
1087 282691 : scan_blocks = off - imap->br_startoff;
1088 :
1089 282691 : trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
1090 :
1091 : /*
1092 : * If there are incore buffers for these blocks, invalidate them. If
1093 : * we can't (try)lock the buffer we assume it's owned by someone else
1094 : * and leave it alone. The buffer cache cannot detect aliasing, so
1095 : * employ nested loops to detect incore buffers of any plausible size.
1096 : */
1097 3027892 : while (bno < agbno_next) {
1098 10980810 : struct xrep_bufscan scan = {
1099 2745202 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
1100 2745202 : .max_sectors = xrep_bufscan_max_sectors(mp,
1101 : scan_blocks),
1102 2745203 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
1103 : };
1104 2745203 : struct xfs_buf *bp;
1105 :
1106 5473814 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
1107 2728614 : if (xreap_buf_loggable(bp)) {
1108 2728613 : xfs_trans_bjoin(sc->tp, bp);
1109 2728614 : xfs_trans_binval(sc->tp, bp);
1110 : } else {
1111 0 : xfs_buf_stale(bp);
1112 0 : xfs_buf_relse(bp);
1113 : }
1114 2728613 : invalidated++;
1115 :
1116 : /*
1117 : * Stop invalidating if we've hit the limit; we should
1118 : * still have enough reservation left to free however
1119 : * much of the mapping we've seen so far.
1120 : */
1121 2728613 : if (invalidated > XREAP_MAX_BINVAL) {
1122 2 : imap->br_blockcount = agbno_next - bno;
1123 2 : goto out;
1124 : }
1125 : }
1126 :
1127 2745201 : bno++;
1128 2745201 : scan_blocks--;
1129 : }
1130 :
1131 282689 : out:
1132 282691 : trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
1133 282691 : return 0;
1134 : }
1135 :
1136 : /*
1137 : * Dispose of as much of the beginning of this file fork mapping as possible.
1138 : * The number of blocks disposed of is returned in @imap->br_blockcount.
1139 : */
1140 : STATIC int
1141 282692 : xrep_reap_bmapi_iter(
1142 : struct xfs_scrub *sc,
1143 : struct xfs_inode *ip,
1144 : int whichfork,
1145 : struct xfs_bmbt_irec *imap,
1146 : bool crosslinked)
1147 : {
1148 282692 : int error;
1149 :
1150 282692 : if (crosslinked) {
1151 : /*
1152 : * If there are other rmappings, this block is cross linked and
1153 : * must not be freed. Remove the reverse mapping, leave the
1154 : * buffer cache in its possibly confused state, and move on.
1155 : * We don't want to risk discarding valid data buffers from
1156 : * anybody else who thinks they own the block, even though that
1157 : * runs the risk of stale buffer warnings in the future.
1158 : */
1159 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag,
1160 0 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
1161 0 : imap->br_blockcount);
1162 :
1163 : /*
1164 : * Schedule removal of the mapping from the fork. We use
1165 : * deferred log intents in this function to control the exact
1166 : * sequence of metadata updates.
1167 : */
1168 0 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
1169 0 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
1170 0 : -(int64_t)imap->br_blockcount);
1171 0 : xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
1172 0 : return 0;
1173 : }
1174 :
1175 : /*
1176 : * If the block is not crosslinked, we can invalidate all the incore
1177 : * buffers for the extent, and then free the extent. This is a bit of
1178 : * a mess since we don't detect discontiguous buffers that are indexed
1179 : * by a block starting before the first block of the extent but overlap
1180 : * anyway.
1181 : */
1182 282692 : trace_xreap_dispose_free_extent(sc->sa.pag,
1183 282692 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
1184 282692 : imap->br_blockcount);
1185 :
1186 : /*
1187 : * Invalidate as many buffers as we can, starting at the beginning of
1188 : * this mapping. If this function sets blockcount to zero, the
1189 : * transaction is full of logged buffer invalidations, so we need to
1190 : * return early so that we can roll and retry.
1191 : */
1192 282692 : error = xreap_bmapi_binval(sc, ip, whichfork, imap);
1193 282690 : if (error || imap->br_blockcount == 0)
1194 : return error;
1195 :
1196 : /*
1197 : * Schedule removal of the mapping from the fork. We use deferred log
1198 : * intents in this function to control the exact sequence of metadata
1199 : * updates.
1200 : */
1201 282691 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
1202 282692 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
1203 282692 : -(int64_t)imap->br_blockcount);
1204 282691 : return xfs_free_extent_later(sc->tp, imap->br_startblock,
1205 : imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
1206 : XFS_FREE_EXTENT_SKIP_DISCARD);
1207 : }
1208 :
1209 : /*
1210 : * Dispose of as much of this file extent as we can. Upon successful return,
1211 : * the imap will reflect the mapping that was removed from the fork.
1212 : */
1213 : STATIC int
1214 282688 : xreap_ifork_extent(
1215 : struct xfs_scrub *sc,
1216 : struct xfs_inode *ip,
1217 : int whichfork,
1218 : struct xfs_bmbt_irec *imap)
1219 : {
1220 282688 : xfs_agnumber_t agno;
1221 282688 : bool crosslinked;
1222 282688 : int error;
1223 :
1224 282688 : ASSERT(sc->sa.pag == NULL);
1225 :
1226 282688 : trace_xreap_ifork_extent(sc, ip, whichfork, imap);
1227 :
1228 282687 : agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
1229 282687 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
1230 282692 : if (!sc->sa.pag)
1231 : return -EFSCORRUPTED;
1232 :
1233 282692 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
1234 282692 : if (error)
1235 0 : goto out_pag;
1236 :
1237 : /*
1238 : * Decide the fate of the blocks at the beginning of the mapping, then
1239 : * update the mapping to use it with the unmap calls.
1240 : */
1241 282692 : error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
1242 282692 : if (error)
1243 0 : goto out_agf;
1244 :
1245 282692 : error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
1246 282692 : if (error)
1247 0 : goto out_agf;
1248 :
1249 282692 : out_agf:
1250 282692 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
1251 282692 : sc->sa.agf_bp = NULL;
1252 282692 : out_pag:
1253 282692 : xfs_perag_put(sc->sa.pag);
1254 282692 : sc->sa.pag = NULL;
1255 282692 : return error;
1256 : }
1257 :
1258 : /*
1259 : * Dispose of each block mapped to the given fork of the given file. Callers
1260 : * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip. The fork
1261 : * must not have any delalloc reservations.
1262 : */
1263 : int
1264 190826 : xrep_reap_ifork(
1265 : struct xfs_scrub *sc,
1266 : struct xfs_inode *ip,
1267 : int whichfork)
1268 : {
1269 190826 : xfs_fileoff_t off = 0;
1270 190826 : int bmap_flags = xfs_bmapi_aflag(whichfork);
1271 190826 : int error;
1272 :
1273 190826 : ASSERT(xfs_has_rmapbt(sc->mp));
1274 190826 : ASSERT(ip == sc->ip || ip == sc->tempip);
1275 190826 : ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
1276 :
1277 705875 : while (off < XFS_MAX_FILEOFF) {
1278 515048 : struct xfs_bmbt_irec imap;
1279 515048 : int nimaps = 1;
1280 :
1281 : /* Read the next extent, skip past holes and delalloc. */
1282 515048 : error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
1283 : &nimaps, bmap_flags);
1284 515045 : if (error)
1285 0 : return error;
1286 515045 : if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
1287 0 : ASSERT(0);
1288 0 : return -EFSCORRUPTED;
1289 : }
1290 :
1291 : /*
1292 : * If this is a real space mapping, reap as much of it as we
1293 : * can in a single transaction.
1294 : */
1295 797733 : if (xfs_bmap_is_real_extent(&imap)) {
1296 282688 : error = xreap_ifork_extent(sc, ip, whichfork, &imap);
1297 282692 : if (error)
1298 0 : return error;
1299 :
1300 282692 : error = xfs_defer_finish(&sc->tp);
1301 282692 : if (error)
1302 0 : return error;
1303 : }
1304 :
1305 515049 : off = imap.br_startoff + imap.br_blockcount;
1306 : }
1307 :
1308 : return 0;
1309 : }
|