Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_log_format.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_sb.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_alloc_btree.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_ialloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_refcount_btree.h"
25 : #include "xfs_extent_busy.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_quota.h"
29 : #include "xfs_qm.h"
30 : #include "xfs_bmap.h"
31 : #include "xfs_da_format.h"
32 : #include "xfs_da_btree.h"
33 : #include "xfs_attr.h"
34 : #include "xfs_attr_remote.h"
35 : #include "xfs_defer.h"
36 : #include "xfs_imeta.h"
37 : #include "xfs_rtgroup.h"
38 : #include "xfs_rtrmap_btree.h"
39 : #include "scrub/scrub.h"
40 : #include "scrub/common.h"
41 : #include "scrub/trace.h"
42 : #include "scrub/repair.h"
43 : #include "scrub/bitmap.h"
44 : #include "scrub/reap.h"
45 :
46 : /*
47 : * Disposal of Blocks from Old Metadata
48 : *
49 : * Now that we've constructed a new btree to replace the damaged one, we want
50 : * to dispose of the blocks that (we think) the old btree was using.
51 : * Previously, we used the rmapbt to collect the extents (bitmap) with the
52 : * rmap owner corresponding to the tree we rebuilt, collected extents for any
53 : * blocks with the same rmap owner that are owned by another data structure
54 : * (sublist), and subtracted sublist from bitmap. In theory the extents
55 : * remaining in bitmap are the old btree's blocks.
56 : *
57 : * Unfortunately, it's possible that the btree was crosslinked with other
58 : * blocks on disk. The rmap data can tell us if there are multiple owners, so
59 : * if the rmapbt says there is an owner of this block other than @oinfo, then
60 : * the block is crosslinked. Remove the reverse mapping and continue.
61 : *
62 : * If there is one rmap record, we can free the block, which removes the
63 : * reverse mapping but doesn't add the block to the free space. Our repair
64 : * strategy is to hope the other metadata objects crosslinked on this block
65 : * will be rebuilt (atop different blocks), thereby removing all the cross
66 : * links.
67 : *
68 : * If there are no rmap records at all, we also free the block. If the btree
69 : * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
70 : * supposed to be a rmap record and everything is ok. For other btrees there
71 : * had to have been an rmap entry for the block to have ended up on @bitmap,
72 : * so if it's gone now there's something wrong and the fs will shut down.
73 : *
74 : * Note: If there are multiple rmap records with only the same rmap owner as
75 : * the btree we're trying to rebuild and the block is indeed owned by another
76 : * data structure with the same rmap owner, then the block will be in sublist
77 : * and therefore doesn't need disposal. If there are multiple rmap records
78 : * with only the same rmap owner but the block is not owned by something with
79 : * the same rmap owner, the block will be freed.
80 : *
81 : * The caller is responsible for locking the AG headers/inode for the entire
82 : * rebuild operation so that nothing else can sneak in and change the incore
83 : * state while we're not looking. We must also invalidate any buffers
84 : * associated with @bitmap.
85 : */
86 :
87 : /* Information about reaping extents after a repair. */
88 : struct xreap_state {
89 : struct xfs_scrub *sc;
90 :
91 : /* Reverse mapping owner and metadata reservation type. */
92 : const struct xfs_owner_info *oinfo;
93 : enum xfs_ag_resv_type resv;
94 :
95 : /* If true, roll the transaction before reaping the next extent. */
96 : bool force_roll;
97 :
98 : /* Number of deferred reaps attached to the current transaction. */
99 : unsigned int deferred;
100 :
101 : /* Number of invalidated buffers logged to the current transaction. */
102 : unsigned int invalidated;
103 :
104 : /* Number of deferred reaps queued during the whole reap sequence. */
105 : unsigned long long total_deferred;
106 : };
107 :
108 : /* Put a block back on the AGFL. */
109 : STATIC int
110 0 : xreap_put_freelist(
111 : struct xfs_scrub *sc,
112 : xfs_agblock_t agbno)
113 : {
114 0 : struct xfs_buf *agfl_bp;
115 0 : int error;
116 :
117 : /* Make sure there's space on the freelist. */
118 0 : error = xrep_fix_freelist(sc, 0);
119 0 : if (error)
120 : return error;
121 :
122 : /*
123 : * Since we're "freeing" a lost block onto the AGFL, we have to
124 : * create an rmap for the block prior to merging it or else other
125 : * parts will break.
126 : */
127 0 : error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
128 : &XFS_RMAP_OINFO_AG);
129 0 : if (error)
130 : return error;
131 :
132 : /* Put the block on the AGFL. */
133 0 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
134 0 : if (error)
135 : return error;
136 :
137 0 : error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
138 : agfl_bp, agbno, 0);
139 0 : if (error)
140 : return error;
141 0 : xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
142 : XFS_EXTENT_BUSY_SKIP_DISCARD);
143 :
144 0 : return 0;
145 : }
146 :
147 : /* Are there any uncommitted reap operations? */
148 523030 : static inline bool xreap_dirty(const struct xreap_state *rs)
149 : {
150 523030 : if (rs->force_roll)
151 : return true;
152 523030 : if (rs->deferred)
153 : return true;
154 273180 : if (rs->invalidated)
155 : return true;
156 273180 : if (rs->total_deferred)
157 0 : return true;
158 : return false;
159 : }
160 :
161 : #define XREAP_MAX_DEFERRED (128)
162 : #define XREAP_MAX_BINVAL (2048)
163 :
164 : /*
165 : * Decide if we want to roll the transaction after reaping an extent. We don't
166 : * want to overrun the transaction reservation, so we prohibit more than
167 : * 128 EFIs per transaction. For the same reason, we limit the number
168 : * of buffer invalidations to 2048.
169 : */
170 : static inline bool xreap_want_roll(const struct xreap_state *rs)
171 : {
172 300806 : if (rs->force_roll)
173 : return true;
174 0 : if (rs->deferred > XREAP_MAX_DEFERRED)
175 : return true;
176 300804 : if (rs->invalidated > XREAP_MAX_BINVAL)
177 0 : return true;
178 : return false;
179 : }
180 :
181 : static inline void xreap_reset(struct xreap_state *rs)
182 : {
183 2 : rs->total_deferred += rs->deferred;
184 2 : rs->deferred = 0;
185 2 : rs->invalidated = 0;
186 2 : rs->force_roll = false;
187 2 : }
188 :
189 : #define XREAP_MAX_DEFER_CHAIN (2048)
190 :
191 : /*
192 : * Decide if we want to finish the deferred ops that are attached to the scrub
193 : * transaction. We don't want to queue huge chains of deferred ops because
194 : * that can consume a lot of log space and kernel memory. Hence we trigger a
195 : * xfs_defer_finish if there are more than 2048 deferred reap operations or the
196 : * caller did some real work.
197 : */
198 : static inline bool
199 : xreap_want_defer_finish(const struct xreap_state *rs)
200 : {
201 301454 : if (rs->force_roll)
202 : return true;
203 300806 : if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
204 : return true;
205 : return false;
206 : }
207 :
208 : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
209 : {
210 648 : rs->total_deferred = 0;
211 648 : rs->deferred = 0;
212 648 : rs->invalidated = 0;
213 648 : rs->force_roll = false;
214 648 : }
215 :
216 : /*
217 : * Compute the maximum length of a buffer cache scan (in units of sectors),
218 : * given a quantity of fs blocks.
219 : */
220 : xfs_daddr_t
221 42558 : xrep_bufscan_max_sectors(
222 : struct xfs_mount *mp,
223 : xfs_extlen_t fsblocks)
224 : {
225 3240130 : int max_fsbs;
226 :
227 : /* Remote xattr values are the largest buffers that we support. */
228 42558 : max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
229 :
230 3240120 : return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
231 : }
232 :
233 : /*
234 : * Return an incore buffer from a sector scan, or NULL if there are no buffers
235 : * left to return.
236 : */
237 : struct xfs_buf *
238 6425402 : xrep_bufscan_advance(
239 : struct xfs_mount *mp,
240 : struct xrep_bufscan *scan)
241 : {
242 6425402 : scan->__sector_count += scan->daddr_step;
243 40365981 : while (scan->__sector_count <= scan->max_sectors) {
244 37168411 : struct xfs_buf *bp = NULL;
245 37168411 : int error;
246 :
247 37168411 : error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
248 : scan->__sector_count, XBF_LIVESCAN, &bp);
249 37168421 : if (!error)
250 3227842 : return bp;
251 :
252 33940579 : scan->__sector_count += scan->daddr_step;
253 : }
254 :
255 : return NULL;
256 : }
257 :
258 : /* Try to invalidate the incore buffers for an extent that we're freeing. */
259 : STATIC void
260 298785 : xreap_agextent_binval(
261 : struct xreap_state *rs,
262 : xfs_agblock_t agbno,
263 : xfs_extlen_t *aglenp)
264 : {
265 298785 : struct xfs_scrub *sc = rs->sc;
266 298785 : struct xfs_perag *pag = sc->sa.pag;
267 298785 : struct xfs_mount *mp = sc->mp;
268 298785 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
269 298785 : xfs_agblock_t agbno_next = agbno + *aglenp;
270 298785 : xfs_agblock_t bno = agbno;
271 :
272 : /*
273 : * Avoid invalidating AG headers and post-EOFS blocks because we never
274 : * own those.
275 : */
276 298785 : if (!xfs_verify_agbno(pag, agbno) ||
277 298785 : !xfs_verify_agbno(pag, agbno_next - 1))
278 : return;
279 :
280 : /*
281 : * If there are incore buffers for these blocks, invalidate them. We
282 : * assume that the lack of any other known owners means that the buffer
283 : * can be locked without risk of deadlocking. The buffer cache cannot
284 : * detect aliasing, so employ nested loops to scan for incore buffers
285 : * of any plausible size.
286 : */
287 1486784 : while (bno < agbno_next) {
288 2375986 : struct xrep_bufscan scan = {
289 1187998 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
290 1187998 : .max_sectors = xrep_bufscan_max_sectors(mp,
291 : agbno_next - bno),
292 1187988 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
293 : };
294 1187988 : struct xfs_buf *bp;
295 :
296 2365667 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
297 1177680 : xfs_trans_bjoin(sc->tp, bp);
298 1177673 : xfs_trans_binval(sc->tp, bp);
299 1177679 : rs->invalidated++;
300 :
301 : /*
302 : * Stop invalidating if we've hit the limit; we should
303 : * still have enough reservation left to free however
304 : * far we've gotten.
305 : */
306 1177679 : if (rs->invalidated > XREAP_MAX_BINVAL) {
307 0 : *aglenp -= agbno_next - bno;
308 0 : goto out;
309 : }
310 : }
311 :
312 1187999 : bno++;
313 : }
314 :
315 298786 : out:
316 298786 : trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
317 : }
318 :
319 : /*
320 : * Figure out the longest run of blocks that we can dispose of with a single
321 : * call. Cross-linked blocks should have their reverse mappings removed, but
322 : * single-owner extents can be freed. AGFL blocks can only be put back one at
323 : * a time.
324 : */
325 : STATIC int
326 298770 : xreap_agextent_select(
327 : struct xreap_state *rs,
328 : xfs_agblock_t agbno,
329 : xfs_agblock_t agbno_next,
330 : bool *crosslinked,
331 : xfs_extlen_t *aglenp)
332 : {
333 298770 : struct xfs_scrub *sc = rs->sc;
334 298770 : struct xfs_btree_cur *cur;
335 298770 : xfs_agblock_t bno = agbno + 1;
336 298770 : xfs_extlen_t len = 1;
337 298770 : int error;
338 :
339 : /*
340 : * Determine if there are any other rmap records covering the first
341 : * block of this extent. If so, the block is crosslinked.
342 : */
343 298770 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
344 : sc->sa.pag);
345 298773 : error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
346 : crosslinked);
347 298775 : if (error)
348 0 : goto out_cur;
349 :
350 : /* AGFL blocks can only be deal with one at a time. */
351 298775 : if (rs->resv == XFS_AG_RESV_AGFL)
352 0 : goto out_found;
353 :
354 : /*
355 : * Figure out how many of the subsequent blocks have the same crosslink
356 : * status.
357 : */
358 1187990 : while (bno < agbno_next) {
359 889205 : bool also_crosslinked;
360 :
361 889205 : error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
362 : &also_crosslinked);
363 889215 : if (error)
364 0 : goto out_cur;
365 :
366 889215 : if (*crosslinked != also_crosslinked)
367 : break;
368 :
369 889215 : len++;
370 889215 : bno++;
371 : }
372 :
373 298785 : out_found:
374 298785 : *aglenp = len;
375 298785 : trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
376 298786 : out_cur:
377 298786 : xfs_btree_del_cursor(cur, error);
378 298786 : return error;
379 : }
380 :
381 : /*
382 : * Dispose of as much of the beginning of this AG extent as possible. The
383 : * number of blocks disposed of will be returned in @aglenp.
384 : */
385 : STATIC int
386 298783 : xreap_agextent_iter(
387 : struct xreap_state *rs,
388 : xfs_agblock_t agbno,
389 : xfs_extlen_t *aglenp,
390 : bool crosslinked)
391 : {
392 298783 : struct xfs_scrub *sc = rs->sc;
393 298783 : xfs_fsblock_t fsbno;
394 298783 : int error = 0;
395 :
396 298783 : ASSERT(rs->resv != XFS_AG_RESV_IMETA);
397 :
398 298783 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
399 :
400 : /*
401 : * If there are other rmappings, this block is cross linked and must
402 : * not be freed. Remove the reverse mapping and move on. Otherwise,
403 : * we were the only owner of the block, so free the extent, which will
404 : * also remove the rmap.
405 : *
406 : * XXX: XFS doesn't support detecting the case where a single block
407 : * metadata structure is crosslinked with a multi-block structure
408 : * because the buffer cache doesn't detect aliasing problems, so we
409 : * can't fix 100% of crosslinking problems (yet). The verifiers will
410 : * blow on writeout, the filesystem will shut down, and the admin gets
411 : * to run xfs_repair.
412 : */
413 298783 : if (crosslinked) {
414 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
415 :
416 0 : rs->force_roll = true;
417 :
418 0 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
419 : /*
420 : * If we're unmapping CoW staging extents, remove the
421 : * records from the refcountbt, which will remove the
422 : * rmap record as well.
423 : */
424 0 : xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
425 : *aglenp);
426 0 : return 0;
427 : }
428 :
429 0 : return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
430 : *aglenp, rs->oinfo);
431 : }
432 :
433 298783 : trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
434 :
435 : /*
436 : * Invalidate as many buffers as we can, starting at agbno. If this
437 : * function sets *aglenp to zero, the transaction is full of logged
438 : * buffer invalidations, so we need to return early so that we can
439 : * roll and retry.
440 : */
441 298776 : xreap_agextent_binval(rs, agbno, aglenp);
442 298784 : if (*aglenp == 0) {
443 0 : ASSERT(xreap_want_roll(rs));
444 0 : return 0;
445 : }
446 :
447 : /*
448 : * If we're getting rid of CoW staging extents, use deferred work items
449 : * to remove the refcountbt records (which removes the rmap records)
450 : * and free the extent. We're not worried about the system going down
451 : * here because log recovery walks the refcount btree to clean out the
452 : * CoW staging extents.
453 : */
454 298784 : if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
455 648 : ASSERT(rs->resv == XFS_AG_RESV_NONE);
456 :
457 648 : xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp);
458 648 : error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
459 : rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
460 648 : if (error)
461 : return error;
462 :
463 648 : rs->force_roll = true;
464 648 : return 0;
465 : }
466 :
467 : /* Put blocks back on the AGFL one at a time. */
468 298136 : if (rs->resv == XFS_AG_RESV_AGFL) {
469 0 : ASSERT(*aglenp == 1);
470 0 : error = xreap_put_freelist(sc, agbno);
471 0 : if (error)
472 : return error;
473 :
474 0 : rs->force_roll = true;
475 0 : return 0;
476 : }
477 :
478 : /*
479 : * Use deferred frees to get rid of the old btree blocks to try to
480 : * minimize the window in which we could crash and lose the old blocks.
481 : */
482 298136 : error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
483 : rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
484 298129 : if (error)
485 : return error;
486 :
487 298129 : rs->deferred++;
488 298129 : return 0;
489 : }
490 :
491 : /*
492 : * Break an AG metadata extent into sub-extents by fate (crosslinked, not
493 : * crosslinked), and dispose of each sub-extent separately.
494 : */
495 : STATIC int
496 239093 : xreap_agmeta_extent(
497 : uint64_t fsbno,
498 : uint64_t len,
499 : void *priv)
500 : {
501 239093 : struct xreap_state *rs = priv;
502 239093 : struct xfs_scrub *sc = rs->sc;
503 239093 : xfs_agblock_t agbno = fsbno;
504 239093 : xfs_agblock_t agbno_next = agbno + len;
505 239093 : int error = 0;
506 :
507 239093 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
508 239093 : ASSERT(sc->ip == NULL);
509 :
510 478198 : while (agbno < agbno_next) {
511 239101 : xfs_extlen_t aglen;
512 239101 : bool crosslinked;
513 :
514 239101 : error = xreap_agextent_select(rs, agbno, agbno_next,
515 : &crosslinked, &aglen);
516 239107 : if (error)
517 0 : return error;
518 :
519 239107 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
520 239105 : if (error)
521 0 : return error;
522 :
523 239105 : if (xreap_want_defer_finish(rs)) {
524 0 : error = xrep_defer_finish(sc);
525 0 : if (error)
526 0 : return error;
527 0 : xreap_defer_finish_reset(rs);
528 239105 : } else if (xreap_want_roll(rs)) {
529 0 : error = xrep_roll_ag_trans(sc);
530 0 : if (error)
531 0 : return error;
532 0 : xreap_reset(rs);
533 : }
534 :
535 239105 : agbno += aglen;
536 : }
537 :
538 : return 0;
539 : }
540 :
541 : /* Dispose of every block of every AG metadata extent in the bitmap. */
542 : int
543 334577 : xrep_reap_agblocks(
544 : struct xfs_scrub *sc,
545 : struct xagb_bitmap *bitmap,
546 : const struct xfs_owner_info *oinfo,
547 : enum xfs_ag_resv_type type)
548 : {
549 334577 : struct xreap_state rs = {
550 : .sc = sc,
551 : .oinfo = oinfo,
552 : .resv = type,
553 : };
554 334577 : int error;
555 :
556 334577 : ASSERT(xfs_has_rmapbt(sc->mp));
557 334577 : ASSERT(sc->ip == NULL);
558 :
559 334577 : error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
560 334580 : if (error)
561 : return error;
562 :
563 334580 : if (xreap_dirty(&rs))
564 203910 : return xrep_defer_finish(sc);
565 :
566 : return 0;
567 : }
568 :
569 : /*
570 : * Break a file metadata extent into sub-extents by fate (crosslinked, not
571 : * crosslinked), and dispose of each sub-extent separately. The extent must
572 : * not cross an AG boundary.
573 : */
574 : STATIC int
575 59676 : xreap_fsmeta_extent(
576 : uint64_t fsbno,
577 : uint64_t len,
578 : void *priv)
579 : {
580 59676 : struct xreap_state *rs = priv;
581 59676 : struct xfs_scrub *sc = rs->sc;
582 59676 : xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
583 59676 : xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
584 59676 : xfs_agblock_t agbno_next = agbno + len;
585 59676 : int error = 0;
586 :
587 59676 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
588 59676 : ASSERT(sc->ip != NULL);
589 59676 : ASSERT(!sc->sa.pag);
590 :
591 : /*
592 : * We're reaping blocks after repairing file metadata, which means that
593 : * we have to init the xchk_ag structure ourselves.
594 : */
595 59676 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
596 59676 : if (!sc->sa.pag)
597 : return -EFSCORRUPTED;
598 :
599 59676 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
600 59676 : if (error)
601 0 : goto out_pag;
602 :
603 119352 : while (agbno < agbno_next) {
604 59676 : xfs_extlen_t aglen;
605 59676 : bool crosslinked;
606 :
607 59676 : error = xreap_agextent_select(rs, agbno, agbno_next,
608 : &crosslinked, &aglen);
609 59676 : if (error)
610 0 : goto out_agf;
611 :
612 59676 : error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
613 59676 : if (error)
614 0 : goto out_agf;
615 :
616 59676 : if (xreap_want_defer_finish(rs)) {
617 : /*
618 : * Holds the AGF buffer across the deferred chain
619 : * processing.
620 : */
621 648 : error = xrep_defer_finish(sc);
622 648 : if (error)
623 0 : goto out_agf;
624 648 : xreap_defer_finish_reset(rs);
625 59028 : } else if (xreap_want_roll(rs)) {
626 : /*
627 : * Hold the AGF buffer across the transaction roll so
628 : * that we don't have to reattach it to the scrub
629 : * context.
630 : */
631 2 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
632 2 : error = xfs_trans_roll_inode(&sc->tp, sc->ip);
633 2 : xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
634 2 : if (error)
635 0 : goto out_agf;
636 2 : xreap_reset(rs);
637 : }
638 :
639 59676 : agbno += aglen;
640 : }
641 :
642 59676 : out_agf:
643 59676 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
644 59676 : sc->sa.agf_bp = NULL;
645 59676 : out_pag:
646 59676 : xfs_perag_put(sc->sa.pag);
647 59676 : sc->sa.pag = NULL;
648 59676 : return error;
649 : }
650 :
651 : /*
652 : * Dispose of every block of every fs metadata extent in the bitmap.
653 : * Do not use this to dispose of the mappings in an ondisk inode fork.
654 : */
655 : int
656 156799 : xrep_reap_fsblocks(
657 : struct xfs_scrub *sc,
658 : struct xfsb_bitmap *bitmap,
659 : const struct xfs_owner_info *oinfo)
660 : {
661 156799 : struct xreap_state rs = {
662 : .sc = sc,
663 : .oinfo = oinfo,
664 : .resv = XFS_AG_RESV_NONE,
665 : };
666 156799 : int error;
667 :
668 156799 : ASSERT(xfs_has_rmapbt(sc->mp));
669 156799 : ASSERT(sc->ip != NULL);
670 :
671 156799 : error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
672 156799 : if (error)
673 : return error;
674 :
675 156799 : if (xreap_dirty(&rs))
676 35054 : return xrep_defer_finish(sc);
677 :
678 : return 0;
679 : }
680 :
681 : #ifdef CONFIG_XFS_RT
682 : /*
683 : * Figure out the longest run of blocks that we can dispose of with a single
684 : * call. Cross-linked blocks should have their reverse mappings removed, but
685 : * single-owner extents can be freed. Units are rt blocks, not rt extents.
686 : */
687 : STATIC int
688 2673 : xreap_rgextent_select(
689 : struct xreap_state *rs,
690 : xfs_rgblock_t rgbno,
691 : xfs_rgblock_t rgbno_next,
692 : bool *crosslinked,
693 : xfs_extlen_t *rglenp)
694 : {
695 2673 : struct xfs_scrub *sc = rs->sc;
696 2673 : struct xfs_btree_cur *cur;
697 2673 : xfs_rgblock_t bno = rgbno + 1;
698 2673 : xfs_extlen_t len = 1;
699 2673 : int error;
700 :
701 : /*
702 : * Determine if there are any other rmap records covering the first
703 : * block of this extent. If so, the block is crosslinked.
704 : */
705 2673 : cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
706 : sc->sr.rtg->rtg_rmapip);
707 2673 : error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo,
708 : crosslinked);
709 2673 : if (error)
710 0 : goto out_cur;
711 :
712 : /*
713 : * Figure out how many of the subsequent blocks have the same crosslink
714 : * status.
715 : */
716 37766 : while (bno < rgbno_next) {
717 35093 : bool also_crosslinked;
718 :
719 35093 : error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
720 : &also_crosslinked);
721 35093 : if (error)
722 0 : goto out_cur;
723 :
724 35093 : if (*crosslinked != also_crosslinked)
725 : break;
726 :
727 35093 : len++;
728 35093 : bno++;
729 : }
730 :
731 2673 : *rglenp = len;
732 2673 : trace_xreap_rgextent_select(sc->sr.rtg, rgbno, len, *crosslinked);
733 2673 : out_cur:
734 2673 : xfs_btree_del_cursor(cur, error);
735 2673 : return error;
736 : }
737 :
738 : /*
739 : * Dispose of as much of the beginning of this rtgroup extent as possible.
740 : * The number of blocks disposed of will be returned in @rglenp.
741 : */
742 : STATIC int
743 2673 : xreap_rgextent_iter(
744 : struct xreap_state *rs,
745 : xfs_rgblock_t rgbno,
746 : xfs_extlen_t *rglenp,
747 : bool crosslinked)
748 : {
749 2673 : struct xfs_scrub *sc = rs->sc;
750 2673 : xfs_rtblock_t rtbno;
751 2673 : int error;
752 :
753 : /*
754 : * The only caller so far is CoW fork repair, so we only know how to
755 : * unlink or free CoW staging extents. Here we don't have to worry
756 : * about invalidating buffers!
757 : */
758 2673 : if (rs->oinfo != &XFS_RMAP_OINFO_COW) {
759 0 : ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW);
760 0 : return -EFSCORRUPTED;
761 : }
762 2673 : ASSERT(rs->resv == XFS_AG_RESV_NONE);
763 :
764 2673 : rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno, rgbno);
765 :
766 : /*
767 : * If there are other rmappings, this block is cross linked and must
768 : * not be freed. Remove the forward and reverse mapping and move on.
769 : */
770 2673 : if (crosslinked) {
771 0 : trace_xreap_dispose_unmap_rtextent(sc->sr.rtg, rgbno, *rglenp);
772 :
773 0 : xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
774 0 : rs->deferred++;
775 0 : return 0;
776 : }
777 :
778 2673 : trace_xreap_dispose_free_rtextent(sc->sr.rtg, rgbno, *rglenp);
779 :
780 : /*
781 : * The CoW staging extent is not crosslinked. Use deferred work items
782 : * to remove the refcountbt records (which removes the rmap records)
783 : * and free the extent. We're not worried about the system going down
784 : * here because log recovery walks the refcount btree to clean out the
785 : * CoW staging extents.
786 : */
787 2673 : xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
788 2673 : error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL,
789 : rs->resv,
790 : XFS_FREE_EXTENT_REALTIME |
791 : XFS_FREE_EXTENT_SKIP_DISCARD);
792 2673 : if (error)
793 : return error;
794 :
795 2673 : rs->deferred++;
796 2673 : return 0;
797 : }
798 :
799 : #define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
800 : XFS_RTGLOCK_RMAP | \
801 : XFS_RTGLOCK_REFCOUNT)
802 :
803 : /*
804 : * Break a rt file metadata extent into sub-extents by fate (crosslinked, not
805 : * crosslinked), and dispose of each sub-extent separately. The extent must
806 : * be aligned to a realtime extent.
807 : */
808 : STATIC int
809 2673 : xreap_rtmeta_extent(
810 : uint64_t rtbno,
811 : uint64_t len,
812 : void *priv)
813 : {
814 2673 : struct xreap_state *rs = priv;
815 2673 : struct xfs_scrub *sc = rs->sc;
816 2673 : xfs_rgnumber_t rgno;
817 2673 : xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno, &rgno);
818 2673 : xfs_rgblock_t rgbno_next = rgbno + len;
819 2673 : int error = 0;
820 :
821 2673 : ASSERT(sc->ip != NULL);
822 2673 : ASSERT(!sc->sr.rtg);
823 :
824 : /*
825 : * We're reaping blocks after repairing file metadata, which means that
826 : * we have to init the xchk_ag structure ourselves.
827 : */
828 2673 : sc->sr.rtg = xfs_rtgroup_get(sc->mp, rgno);
829 2673 : if (!sc->sr.rtg)
830 : return -EFSCORRUPTED;
831 :
832 2673 : xfs_rtgroup_lock(NULL, sc->sr.rtg, XREAP_RTGLOCK_ALL);
833 :
834 5346 : while (rgbno < rgbno_next) {
835 2673 : xfs_extlen_t rglen;
836 2673 : bool crosslinked;
837 :
838 2673 : error = xreap_rgextent_select(rs, rgbno, rgbno_next,
839 : &crosslinked, &rglen);
840 2673 : if (error)
841 0 : goto out_unlock;
842 :
843 2673 : error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked);
844 2673 : if (error)
845 0 : goto out_unlock;
846 :
847 2673 : if (xreap_want_defer_finish(rs)) {
848 0 : error = xfs_defer_finish(&sc->tp);
849 0 : if (error)
850 0 : goto out_unlock;
851 0 : xreap_defer_finish_reset(rs);
852 2673 : } else if (xreap_want_roll(rs)) {
853 0 : error = xfs_trans_roll_inode(&sc->tp, sc->ip);
854 0 : if (error)
855 0 : goto out_unlock;
856 0 : xreap_reset(rs);
857 : }
858 :
859 2673 : rgbno += rglen;
860 : }
861 :
862 2673 : out_unlock:
863 2673 : xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
864 2673 : xfs_rtgroup_put(sc->sr.rtg);
865 2673 : sc->sr.rtg = NULL;
866 2673 : return error;
867 : }
868 :
869 : /*
870 : * Dispose of every block of every rt metadata extent in the bitmap.
871 : * Do not use this to dispose of the mappings in an ondisk inode fork.
872 : */
873 : int
874 13210 : xrep_reap_rtblocks(
875 : struct xfs_scrub *sc,
876 : struct xrtb_bitmap *bitmap,
877 : const struct xfs_owner_info *oinfo)
878 : {
879 13210 : struct xreap_state rs = {
880 : .sc = sc,
881 : .oinfo = oinfo,
882 : .resv = XFS_AG_RESV_NONE,
883 : };
884 13210 : int error;
885 :
886 13210 : ASSERT(xfs_has_rmapbt(sc->mp));
887 13210 : ASSERT(sc->ip != NULL);
888 :
889 13210 : error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
890 13210 : if (error)
891 : return error;
892 :
893 13210 : if (xreap_dirty(&rs))
894 1569 : return xrep_defer_finish(sc);
895 :
896 : return 0;
897 : }
898 : #endif /* CONFIG_XFS_RT */
899 :
900 : /*
901 : * Dispose of every block of an old metadata btree that used to be rooted in a
902 : * metadata directory file.
903 : */
904 : int
905 18431 : xrep_reap_metadir_fsblocks(
906 : struct xfs_scrub *sc,
907 : struct xfsb_bitmap *bitmap)
908 : {
909 : /*
910 : * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
911 : * blocks are no longer mapped by the inode, and inode metadata space
912 : * reservations can only account freed space to the i_nblocks.
913 : */
914 18431 : struct xfs_owner_info oinfo;
915 18431 : struct xreap_state rs = {
916 : .sc = sc,
917 : .oinfo = &oinfo,
918 : .resv = XFS_AG_RESV_NONE,
919 : };
920 18431 : int error;
921 :
922 18431 : ASSERT(xfs_has_rmapbt(sc->mp));
923 18431 : ASSERT(sc->ip != NULL);
924 18431 : ASSERT(xfs_is_metadir_inode(sc->ip));
925 :
926 18431 : xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
927 :
928 18431 : error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
929 18433 : if (error)
930 : return error;
931 :
932 18433 : if (xreap_dirty(&rs))
933 9317 : return xrep_defer_finish(sc);
934 :
935 : return 0;
936 : }
937 :
938 : /*
939 : * Metadata files are not supposed to share blocks with anything else.
940 : * If blocks are shared, we remove the reverse mapping (thus reducing the
941 : * crosslink factor); if blocks are not shared, we also need to free them.
942 : *
943 : * This first step determines the longest subset of the passed-in imap
944 : * (starting at its beginning) that is either crosslinked or not crosslinked.
945 : * The blockcount will be adjust down as needed.
946 : */
947 : STATIC int
948 157060 : xreap_bmapi_select(
949 : struct xfs_scrub *sc,
950 : struct xfs_inode *ip,
951 : int whichfork,
952 : struct xfs_bmbt_irec *imap,
953 : bool *crosslinked)
954 : {
955 157060 : struct xfs_owner_info oinfo;
956 157060 : struct xfs_btree_cur *cur;
957 157060 : xfs_filblks_t len = 1;
958 157060 : xfs_agblock_t bno;
959 157060 : xfs_agblock_t agbno;
960 157060 : xfs_agblock_t agbno_next;
961 157060 : int error;
962 :
963 157060 : agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
964 157060 : agbno_next = agbno + imap->br_blockcount;
965 :
966 157060 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
967 : sc->sa.pag);
968 :
969 157060 : xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
970 157060 : error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
971 157060 : if (error)
972 0 : goto out_cur;
973 :
974 157060 : bno = agbno + 1;
975 2009574 : while (bno < agbno_next) {
976 1852514 : bool also_crosslinked;
977 :
978 1852514 : oinfo.oi_offset++;
979 1852514 : error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
980 : &also_crosslinked);
981 1852514 : if (error)
982 0 : goto out_cur;
983 :
984 1852514 : if (also_crosslinked != *crosslinked)
985 : break;
986 :
987 1852514 : len++;
988 1852514 : bno++;
989 : }
990 :
991 157060 : imap->br_blockcount = len;
992 157060 : trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
993 157060 : out_cur:
994 157060 : xfs_btree_del_cursor(cur, error);
995 157060 : return error;
996 : }
997 :
998 : /*
999 : * Decide if this buffer can be joined to a transaction. This is true for most
1000 : * buffers, but there are two cases that we want to catch: large remote xattr
1001 : * value buffers are not logged and can overflow the buffer log item dirty
1002 : * bitmap size; and oversized cached buffers if things have really gone
1003 : * haywire.
1004 : */
1005 : static inline bool
1006 2007609 : xreap_buf_loggable(
1007 : const struct xfs_buf *bp)
1008 : {
1009 2007609 : int i;
1010 :
1011 4015184 : for (i = 0; i < bp->b_map_count; i++) {
1012 2007609 : int chunks;
1013 2007609 : int map_size;
1014 :
1015 2007609 : chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
1016 : XFS_BLF_CHUNK);
1017 2007609 : map_size = DIV_ROUND_UP(chunks, NBWORD);
1018 2007609 : if (map_size > XFS_BLF_DATAMAP_SIZE)
1019 : return false;
1020 : }
1021 :
1022 : return true;
1023 : }
1024 :
1025 : /*
1026 : * Invalidate any buffers for this file mapping. The @imap blockcount may be
1027 : * adjusted downward if we need to roll the transaction.
1028 : */
1029 : STATIC int
1030 157060 : xreap_bmapi_binval(
1031 : struct xfs_scrub *sc,
1032 : struct xfs_inode *ip,
1033 : int whichfork,
1034 : struct xfs_bmbt_irec *imap)
1035 : {
1036 157060 : struct xfs_mount *mp = sc->mp;
1037 157060 : struct xfs_perag *pag = sc->sa.pag;
1038 157060 : int bmap_flags = xfs_bmapi_aflag(whichfork);
1039 157060 : xfs_fileoff_t off;
1040 157060 : xfs_fileoff_t max_off;
1041 157060 : xfs_extlen_t scan_blocks;
1042 157060 : xfs_agnumber_t agno = sc->sa.pag->pag_agno;
1043 157060 : xfs_agblock_t bno;
1044 157060 : xfs_agblock_t agbno;
1045 157060 : xfs_agblock_t agbno_next;
1046 157060 : unsigned int invalidated = 0;
1047 157060 : int error;
1048 :
1049 : /*
1050 : * Avoid invalidating AG headers and post-EOFS blocks because we never
1051 : * own those.
1052 : */
1053 157060 : agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
1054 157060 : agbno_next = agbno + imap->br_blockcount;
1055 157060 : if (!xfs_verify_agbno(pag, agbno) ||
1056 157060 : !xfs_verify_agbno(pag, agbno_next - 1))
1057 : return 0;
1058 :
1059 : /*
1060 : * Buffers for file blocks can span multiple contiguous mappings. This
1061 : * means that for each block in the mapping, there could exist an
1062 : * xfs_buf indexed by that block with any length up to the maximum
1063 : * buffer size (remote xattr values) or to the next hole in the fork.
1064 : * To set up our binval scan, first we need to figure out the location
1065 : * of the next hole.
1066 : */
1067 157060 : off = imap->br_startoff + imap->br_blockcount;
1068 157060 : max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
1069 232306 : while (off < max_off) {
1070 211374 : struct xfs_bmbt_irec hmap;
1071 211374 : int nhmaps = 1;
1072 :
1073 211374 : error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
1074 : &nhmaps, bmap_flags);
1075 211374 : if (error)
1076 0 : return error;
1077 211374 : if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
1078 0 : ASSERT(0);
1079 0 : return -EFSCORRUPTED;
1080 : }
1081 :
1082 286620 : if (!xfs_bmap_is_real_extent(&hmap))
1083 : break;
1084 :
1085 75246 : off = hmap.br_startoff + hmap.br_blockcount;
1086 : }
1087 157060 : scan_blocks = off - imap->br_startoff;
1088 :
1089 157060 : trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
1090 :
1091 : /*
1092 : * If there are incore buffers for these blocks, invalidate them. If
1093 : * we can't (try)lock the buffer we assume it's owned by someone else
1094 : * and leave it alone. The buffer cache cannot detect aliasing, so
1095 : * employ nested loops to detect incore buffers of any plausible size.
1096 : */
1097 2166634 : while (bno < agbno_next) {
1098 4019148 : struct xrep_bufscan scan = {
1099 2009574 : .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
1100 : .max_sectors = xrep_bufscan_max_sectors(mp,
1101 : scan_blocks),
1102 2009574 : .daddr_step = XFS_FSB_TO_BB(mp, 1),
1103 : };
1104 2009574 : struct xfs_buf *bp;
1105 :
1106 4017183 : while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
1107 2007609 : if (xreap_buf_loggable(bp)) {
1108 2007575 : xfs_trans_bjoin(sc->tp, bp);
1109 2007575 : xfs_trans_binval(sc->tp, bp);
1110 : } else {
1111 34 : xfs_buf_stale(bp);
1112 34 : xfs_buf_relse(bp);
1113 : }
1114 2007609 : invalidated++;
1115 :
1116 : /*
1117 : * Stop invalidating if we've hit the limit; we should
1118 : * still have enough reservation left to free however
1119 : * much of the mapping we've seen so far.
1120 : */
1121 2007609 : if (invalidated > XREAP_MAX_BINVAL) {
1122 0 : imap->br_blockcount = agbno_next - bno;
1123 0 : goto out;
1124 : }
1125 : }
1126 :
1127 2009574 : bno++;
1128 2009574 : scan_blocks--;
1129 : }
1130 :
1131 157060 : out:
1132 157060 : trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
1133 157060 : return 0;
1134 : }
1135 :
1136 : /*
1137 : * Dispose of as much of the beginning of this file fork mapping as possible.
1138 : * The number of blocks disposed of is returned in @imap->br_blockcount.
1139 : */
1140 : STATIC int
1141 157060 : xrep_reap_bmapi_iter(
1142 : struct xfs_scrub *sc,
1143 : struct xfs_inode *ip,
1144 : int whichfork,
1145 : struct xfs_bmbt_irec *imap,
1146 : bool crosslinked)
1147 : {
1148 157060 : int error;
1149 :
1150 157060 : if (crosslinked) {
1151 : /*
1152 : * If there are other rmappings, this block is cross linked and
1153 : * must not be freed. Remove the reverse mapping, leave the
1154 : * buffer cache in its possibly confused state, and move on.
1155 : * We don't want to risk discarding valid data buffers from
1156 : * anybody else who thinks they own the block, even though that
1157 : * runs the risk of stale buffer warnings in the future.
1158 : */
1159 0 : trace_xreap_dispose_unmap_extent(sc->sa.pag,
1160 0 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
1161 0 : imap->br_blockcount);
1162 :
1163 : /*
1164 : * Schedule removal of the mapping from the fork. We use
1165 : * deferred log intents in this function to control the exact
1166 : * sequence of metadata updates.
1167 : */
1168 0 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
1169 0 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
1170 0 : -(int64_t)imap->br_blockcount);
1171 0 : xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
1172 0 : return 0;
1173 : }
1174 :
1175 : /*
1176 : * If the block is not crosslinked, we can invalidate all the incore
1177 : * buffers for the extent, and then free the extent. This is a bit of
1178 : * a mess since we don't detect discontiguous buffers that are indexed
1179 : * by a block starting before the first block of the extent but overlap
1180 : * anyway.
1181 : */
1182 157060 : trace_xreap_dispose_free_extent(sc->sa.pag,
1183 157060 : XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
1184 157060 : imap->br_blockcount);
1185 :
1186 : /*
1187 : * Invalidate as many buffers as we can, starting at the beginning of
1188 : * this mapping. If this function sets blockcount to zero, the
1189 : * transaction is full of logged buffer invalidations, so we need to
1190 : * return early so that we can roll and retry.
1191 : */
1192 157060 : error = xreap_bmapi_binval(sc, ip, whichfork, imap);
1193 157060 : if (error || imap->br_blockcount == 0)
1194 : return error;
1195 :
1196 : /*
1197 : * Schedule removal of the mapping from the fork. We use deferred log
1198 : * intents in this function to control the exact sequence of metadata
1199 : * updates.
1200 : */
1201 157060 : xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
1202 157060 : xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
1203 157060 : -(int64_t)imap->br_blockcount);
1204 157060 : return xfs_free_extent_later(sc->tp, imap->br_startblock,
1205 : imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
1206 : XFS_FREE_EXTENT_SKIP_DISCARD);
1207 : }
1208 :
1209 : /*
1210 : * Dispose of as much of this file extent as we can. Upon successful return,
1211 : * the imap will reflect the mapping that was removed from the fork.
1212 : */
1213 : STATIC int
1214 157060 : xreap_ifork_extent(
1215 : struct xfs_scrub *sc,
1216 : struct xfs_inode *ip,
1217 : int whichfork,
1218 : struct xfs_bmbt_irec *imap)
1219 : {
1220 157060 : xfs_agnumber_t agno;
1221 157060 : bool crosslinked;
1222 157060 : int error;
1223 :
1224 157060 : ASSERT(sc->sa.pag == NULL);
1225 :
1226 157060 : trace_xreap_ifork_extent(sc, ip, whichfork, imap);
1227 :
1228 157060 : agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
1229 157060 : sc->sa.pag = xfs_perag_get(sc->mp, agno);
1230 157060 : if (!sc->sa.pag)
1231 : return -EFSCORRUPTED;
1232 :
1233 157060 : error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
1234 157060 : if (error)
1235 0 : goto out_pag;
1236 :
1237 : /*
1238 : * Decide the fate of the blocks at the beginning of the mapping, then
1239 : * update the mapping to use it with the unmap calls.
1240 : */
1241 157060 : error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
1242 157060 : if (error)
1243 0 : goto out_agf;
1244 :
1245 157060 : error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
1246 157060 : if (error)
1247 0 : goto out_agf;
1248 :
1249 157060 : out_agf:
1250 157060 : xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
1251 157060 : sc->sa.agf_bp = NULL;
1252 157060 : out_pag:
1253 157060 : xfs_perag_put(sc->sa.pag);
1254 157060 : sc->sa.pag = NULL;
1255 157060 : return error;
1256 : }
1257 :
1258 : /*
1259 : * Dispose of each block mapped to the given fork of the given file. Callers
1260 : * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip. The fork
1261 : * must not have any delalloc reservations.
1262 : */
1263 : int
1264 101614 : xrep_reap_ifork(
1265 : struct xfs_scrub *sc,
1266 : struct xfs_inode *ip,
1267 : int whichfork)
1268 : {
1269 101614 : xfs_fileoff_t off = 0;
1270 101614 : int bmap_flags = xfs_bmapi_aflag(whichfork);
1271 101614 : int error;
1272 :
1273 101614 : ASSERT(xfs_has_rmapbt(sc->mp));
1274 101614 : ASSERT(ip == sc->ip || ip == sc->tempip);
1275 101614 : ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
1276 :
1277 386724 : while (off < XFS_MAX_FILEOFF) {
1278 285110 : struct xfs_bmbt_irec imap;
1279 285110 : int nimaps = 1;
1280 :
1281 : /* Read the next extent, skip past holes and delalloc. */
1282 285110 : error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
1283 : &nimaps, bmap_flags);
1284 285110 : if (error)
1285 0 : return error;
1286 285110 : if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
1287 0 : ASSERT(0);
1288 0 : return -EFSCORRUPTED;
1289 : }
1290 :
1291 : /*
1292 : * If this is a real space mapping, reap as much of it as we
1293 : * can in a single transaction.
1294 : */
1295 442170 : if (xfs_bmap_is_real_extent(&imap)) {
1296 157060 : error = xreap_ifork_extent(sc, ip, whichfork, &imap);
1297 157060 : if (error)
1298 0 : return error;
1299 :
1300 157060 : error = xfs_defer_finish(&sc->tp);
1301 157060 : if (error)
1302 0 : return error;
1303 : }
1304 :
1305 285110 : off = imap.br_startoff + imap.br_blockcount;
1306 : }
1307 :
1308 : return 0;
1309 : }
|