Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_inode_fork.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_bmap.h"
20 : #include "xfs_rmap.h"
21 : #include "xfs_refcount.h"
22 : #include "xfs_quota.h"
23 : #include "xfs_ialloc.h"
24 : #include "xfs_ag.h"
25 : #include "xfs_error.h"
26 : #include "xfs_errortag.h"
27 : #include "xfs_icache.h"
28 : #include "xfs_refcount_btree.h"
29 : #include "xfs_rtalloc.h"
30 : #include "xfs_rtbitmap.h"
31 : #include "xfs_rtgroup.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/bitmap.h"
38 : #include "scrub/reap.h"
39 :
40 : /*
41 : * CoW Fork Mapping Repair
42 : * =======================
43 : *
44 : * Although CoW staging extents are owned by incore CoW inode forks, on disk
45 : * they are owned by the refcount btree. The ondisk metadata does not record
46 : * any ownership information, which limits what we can do to repair the
47 : * mappings in the CoW fork. At most, we can replace ifork mappings that lack
48 : * an entry in the refcount btree or are described by a reverse mapping record
49 : * whose owner is not OWN_COW.
50 : *
51 : * Replacing extents is also tricky -- we can't touch written CoW fork extents
52 : * since they are undergoing writeback, and delalloc extents do not require
53 : * repair since they only exist incore. Hence the most we can do is find the
54 : * bad parts of unwritten mappings, allocate a replacement set of blocks, and
55 : * replace the incore mapping. We use the regular reaping process to unmap
56 : * or free the discarded blocks, as appropriate.
57 : */
58 : struct xrep_cow {
59 : struct xfs_scrub *sc;
60 :
61 : /* Bitmap of file offset ranges that need replacing. */
62 : struct xbitmap bad_fileoffs;
63 :
64 : /* Bitmap of fsblocks that were removed from the CoW fork. */
65 : union {
66 : struct xfsb_bitmap old_cowfork_fsblocks;
67 : struct xrtb_bitmap old_cowfork_rtblocks;
68 : };
69 :
70 : /* CoW fork mappings used to scan for bad CoW staging extents. */
71 : struct xfs_bmbt_irec irec;
72 :
73 : /* refcount btree block number of irec.br_startblock */
74 : unsigned int irec_startbno;
75 :
76 : /* refcount btree block number of the next refcount record we expect */
77 : unsigned int next_bno;
78 : };
79 :
80 : /* CoW staging extent. */
81 : struct xrep_cow_extent {
82 : xfs_fsblock_t fsbno;
83 : xfs_extlen_t len;
84 : };
85 :
86 : /*
87 : * Mark the part of the file range that corresponds to the given physical
88 : * space. Caller must ensure that the physical range is within xc->irec.
89 : */
90 : STATIC int
91 3786 : xrep_cow_mark_file_range(
92 : struct xrep_cow *xc,
93 : xfs_fsblock_t startblock,
94 : xfs_filblks_t blockcount)
95 : {
96 3786 : xfs_fileoff_t startoff;
97 :
98 3786 : startoff = xc->irec.br_startoff +
99 3786 : (startblock - xc->irec.br_startblock);
100 :
101 3786 : trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
102 : blockcount);
103 :
104 3786 : return xbitmap_set(&xc->bad_fileoffs, startoff, blockcount);
105 : }
106 :
107 : /*
108 : * Trim @src to fit within the CoW fork mapping being examined, and put the
109 : * result in @dst.
110 : */
111 : static inline void
112 3786 : xrep_cow_trim_refcount(
113 : struct xrep_cow *xc,
114 : struct xfs_refcount_irec *dst,
115 : const struct xfs_refcount_irec *src)
116 : {
117 3786 : unsigned int adj;
118 :
119 7572 : memcpy(dst, src, sizeof(*dst));
120 :
121 3786 : if (dst->rc_startblock < xc->irec_startbno) {
122 529 : adj = xc->irec_startbno - dst->rc_startblock;
123 529 : dst->rc_blockcount -= adj;
124 529 : dst->rc_startblock += adj;
125 : }
126 :
127 3786 : if (dst->rc_startblock + dst->rc_blockcount >
128 3786 : xc->irec_startbno + xc->irec.br_blockcount) {
129 545 : adj = (dst->rc_startblock + dst->rc_blockcount) -
130 545 : (xc->irec_startbno + xc->irec.br_blockcount);
131 545 : dst->rc_blockcount -= adj;
132 : }
133 3786 : }
134 :
135 : /* Mark any shared CoW staging extents. */
136 : STATIC int
137 0 : xrep_cow_mark_shared_staging(
138 : struct xfs_btree_cur *cur,
139 : const struct xfs_refcount_irec *rec,
140 : void *priv)
141 : {
142 0 : struct xrep_cow *xc = priv;
143 0 : struct xfs_refcount_irec rrec;
144 0 : xfs_fsblock_t fsbno;
145 :
146 0 : if (!xfs_refcount_check_domain(rec) ||
147 0 : rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
148 : return -EFSCORRUPTED;
149 :
150 0 : xrep_cow_trim_refcount(xc, &rrec, rec);
151 :
152 0 : if (XFS_IS_REALTIME_INODE(xc->sc->ip))
153 0 : fsbno = xfs_rgbno_to_rtb(xc->sc->mp, cur->bc_ino.rtg->rtg_rgno,
154 : rrec.rc_startblock);
155 : else
156 0 : fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
157 : rrec.rc_startblock);
158 0 : return xrep_cow_mark_file_range(xc, fsbno, rrec.rc_blockcount);
159 : }
160 :
161 : /*
162 : * Mark any portion of the CoW fork file offset range where there is not a CoW
163 : * staging extent record in the refcountbt, and keep a record of where we did
164 : * find correct refcountbt records. Staging records are always cleaned out at
165 : * mount time, so any two inodes trying to map the same staging area would have
166 : * already taken the fs down due to refcount btree verifier errors. Hence this
167 : * inode should be the sole creator of the staging extent records ondisk.
168 : */
169 : STATIC int
170 3786 : xrep_cow_mark_missing_staging(
171 : struct xfs_btree_cur *cur,
172 : const struct xfs_refcount_irec *rec,
173 : void *priv)
174 : {
175 3786 : struct xrep_cow *xc = priv;
176 3786 : struct xfs_refcount_irec rrec;
177 3786 : xfs_fsblock_t fsbno;
178 3786 : int error;
179 :
180 3786 : if (!xfs_refcount_check_domain(rec) ||
181 3786 : rec->rc_domain != XFS_REFC_DOMAIN_COW)
182 : return -EFSCORRUPTED;
183 :
184 3786 : xrep_cow_trim_refcount(xc, &rrec, rec);
185 :
186 3786 : if (xc->next_bno >= rrec.rc_startblock)
187 3786 : goto next;
188 :
189 0 : if (XFS_IS_REALTIME_INODE(xc->sc->ip))
190 0 : fsbno = xfs_rgbno_to_rtb(xc->sc->mp, cur->bc_ino.rtg->rtg_rgno,
191 : xc->next_bno);
192 : else
193 0 : fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
194 : xc->next_bno);
195 0 : error = xrep_cow_mark_file_range(xc, fsbno,
196 0 : rrec.rc_startblock - xc->next_bno);
197 0 : if (error)
198 : return error;
199 :
200 0 : next:
201 3786 : xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
202 3786 : return 0;
203 : }
204 :
205 : /*
206 : * Mark any area that does not correspond to a CoW staging rmap. These are
207 : * cross-linked areas that must be avoided.
208 : */
209 : STATIC int
210 3786 : xrep_cow_mark_missing_staging_rmap(
211 : struct xfs_btree_cur *cur,
212 : const struct xfs_rmap_irec *rec,
213 : void *priv)
214 : {
215 3786 : struct xrep_cow *xc = priv;
216 3786 : xfs_fsblock_t fsbno;
217 3786 : xfs_agblock_t rec_bno;
218 3786 : xfs_extlen_t rec_len;
219 3786 : unsigned int adj;
220 :
221 3786 : if (rec->rm_owner == XFS_RMAP_OWN_COW)
222 : return 0;
223 :
224 0 : rec_bno = rec->rm_startblock;
225 0 : rec_len = rec->rm_blockcount;
226 0 : if (rec_bno < xc->irec_startbno) {
227 0 : adj = xc->irec_startbno - rec_bno;
228 0 : rec_len -= adj;
229 0 : rec_bno += adj;
230 : }
231 :
232 0 : if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
233 0 : adj = (rec_bno + rec_len) -
234 0 : (xc->irec_startbno + xc->irec.br_blockcount);
235 0 : rec_len -= adj;
236 : }
237 :
238 0 : if (XFS_IS_REALTIME_INODE(xc->sc->ip))
239 0 : fsbno = xfs_rgbno_to_rtb(xc->sc->mp, cur->bc_ino.rtg->rtg_rgno,
240 : rec_bno);
241 : else
242 0 : fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
243 : rec_bno);
244 0 : return xrep_cow_mark_file_range(xc, fsbno, rec_len);
245 : }
246 :
247 : /*
248 : * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
249 : * extent and mark the corresponding part of the file range in the bitmap.
250 : */
251 : STATIC int
252 1054 : xrep_cow_find_bad(
253 : struct xrep_cow *xc)
254 : {
255 1054 : struct xfs_refcount_irec rc_low = { 0 };
256 1054 : struct xfs_refcount_irec rc_high = { 0 };
257 1054 : struct xfs_rmap_irec rm_low = { 0 };
258 1054 : struct xfs_rmap_irec rm_high = { 0 };
259 1054 : struct xfs_perag *pag;
260 1054 : struct xfs_scrub *sc = xc->sc;
261 1054 : xfs_agnumber_t agno;
262 1054 : int error;
263 :
264 1054 : agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
265 1054 : xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
266 :
267 1054 : pag = xfs_perag_get(sc->mp, agno);
268 1054 : if (!pag)
269 : return -EFSCORRUPTED;
270 :
271 1054 : error = xrep_ag_init(sc, pag, &sc->sa);
272 1054 : if (error)
273 0 : goto out_pag;
274 :
275 : /* Mark any CoW fork extents that are shared. */
276 1054 : rc_low.rc_startblock = xc->irec_startbno;
277 1054 : rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
278 1054 : rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
279 1054 : error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
280 : xrep_cow_mark_shared_staging, xc);
281 1054 : if (error)
282 0 : goto out_sa;
283 :
284 : /* Make sure there are CoW staging extents for the whole mapping. */
285 1054 : rc_low.rc_startblock = xc->irec_startbno;
286 1054 : rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
287 1054 : rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
288 1054 : xc->next_bno = xc->irec_startbno;
289 1054 : error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
290 : xrep_cow_mark_missing_staging, xc);
291 1054 : if (error)
292 0 : goto out_sa;
293 :
294 1054 : if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
295 0 : error = xrep_cow_mark_file_range(xc,
296 0 : XFS_AGB_TO_FSB(sc->mp, pag->pag_agno,
297 : xc->next_bno),
298 : xc->irec_startbno + xc->irec.br_blockcount -
299 : xc->next_bno);
300 0 : if (error)
301 0 : goto out_sa;
302 : }
303 :
304 : /* Mark any area has an rmap that isn't a COW staging extent. */
305 1054 : rm_low.rm_startblock = xc->irec_startbno;
306 1054 : memset(&rm_high, 0xFF, sizeof(rm_high));
307 1054 : rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
308 1054 : error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
309 : xrep_cow_mark_missing_staging_rmap, xc);
310 1054 : if (error)
311 0 : goto out_sa;
312 :
313 : /*
314 : * If userspace is forcing us to rebuild the CoW fork or someone turned
315 : * on the debugging knob, replace everything in the CoW fork.
316 : */
317 1054 : if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
318 0 : XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
319 1054 : error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
320 : xc->irec.br_blockcount);
321 1054 : if (error)
322 : return error;
323 : }
324 :
325 1054 : out_sa:
326 1054 : xchk_ag_free(sc, &sc->sa);
327 1054 : out_pag:
328 1054 : xfs_perag_put(pag);
329 1054 : return 0;
330 : }
331 :
332 : /*
333 : * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
334 : * extent and mark the corresponding part of the file range in the bitmap.
335 : */
336 : STATIC int
337 2732 : xrep_cow_find_bad_rt(
338 : struct xrep_cow *xc)
339 : {
340 2732 : struct xfs_refcount_irec rc_low = { 0 };
341 2732 : struct xfs_refcount_irec rc_high = { 0 };
342 2732 : struct xfs_rmap_irec rm_low = { 0 };
343 2732 : struct xfs_rmap_irec rm_high = { 0 };
344 2732 : struct xfs_scrub *sc = xc->sc;
345 2732 : struct xfs_rtgroup *rtg;
346 2732 : xfs_rgnumber_t rgno;
347 2732 : int error = 0;
348 :
349 2732 : xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock,
350 : &rgno);
351 :
352 2732 : rtg = xfs_rtgroup_get(sc->mp, rgno);
353 2732 : if (!rtg)
354 : return -EFSCORRUPTED;
355 :
356 2732 : if (xrep_is_rtmeta_ino(sc, rtg, sc->ip->i_ino))
357 0 : goto out_rtg;
358 :
359 2732 : error = xrep_rtgroup_init(sc, rtg, &sc->sr,
360 : XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
361 2732 : if (error)
362 0 : goto out_rtg;
363 :
364 : /* Mark any CoW fork extents that are shared. */
365 2732 : rc_low.rc_startblock = xc->irec_startbno;
366 2732 : rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
367 2732 : rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
368 2732 : error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
369 : xrep_cow_mark_shared_staging, xc);
370 2732 : if (error)
371 0 : goto out_sr;
372 :
373 : /* Make sure there are CoW staging extents for the whole mapping. */
374 2732 : rc_low.rc_startblock = xc->irec_startbno;
375 2732 : rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
376 2732 : rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
377 2732 : xc->next_bno = xc->irec_startbno;
378 2732 : error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
379 : xrep_cow_mark_missing_staging, xc);
380 2732 : if (error)
381 0 : goto out_sr;
382 :
383 2732 : if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
384 0 : error = xrep_cow_mark_file_range(xc,
385 : xfs_rgbno_to_rtb(sc->mp, rtg->rtg_rgno,
386 : xc->next_bno),
387 0 : xc->irec_startbno + xc->irec.br_blockcount -
388 0 : xc->next_bno);
389 0 : if (error)
390 0 : goto out_sr;
391 : }
392 :
393 : /* Mark any area has an rmap that isn't a COW staging extent. */
394 2732 : rm_low.rm_startblock = xc->irec_startbno;
395 2732 : memset(&rm_high, 0xFF, sizeof(rm_high));
396 2732 : rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
397 2732 : error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
398 : xrep_cow_mark_missing_staging_rmap, xc);
399 2732 : if (error)
400 0 : goto out_sr;
401 :
402 : /*
403 : * If userspace is forcing us to rebuild the CoW fork or someone
404 : * turned on the debugging knob, replace everything in the
405 : * CoW fork and then scan for staging extents in the refcountbt.
406 : */
407 2732 : if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
408 0 : XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
409 2732 : error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
410 : xc->irec.br_blockcount);
411 2732 : if (error)
412 0 : goto out_rtg;
413 : }
414 :
415 2732 : out_sr:
416 2732 : xchk_rtgroup_btcur_free(&sc->sr);
417 2732 : xchk_rtgroup_free(sc, &sc->sr);
418 2732 : out_rtg:
419 2732 : xfs_rtgroup_put(rtg);
420 2732 : return error;
421 : }
422 :
423 : /*
424 : * Allocate a replacement CoW staging extent of up to the given number of
425 : * blocks, and fill out the mapping.
426 : */
427 : STATIC int
428 1315 : xrep_cow_alloc(
429 : struct xfs_scrub *sc,
430 : xfs_filblks_t maxlen,
431 : struct xrep_cow_extent *repl)
432 : {
433 1315 : struct xfs_alloc_arg args = {
434 1315 : .tp = sc->tp,
435 1315 : .mp = sc->mp,
436 : .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
437 : .minlen = 1,
438 : .maxlen = maxlen,
439 : .prod = 1,
440 : .resv = XFS_AG_RESV_NONE,
441 : .datatype = XFS_ALLOC_USERDATA,
442 : };
443 1315 : int error;
444 :
445 1315 : error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
446 1315 : if (error)
447 : return error;
448 :
449 1315 : error = xfs_alloc_vextent_start_ag(&args,
450 1315 : XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
451 1315 : if (error)
452 : return error;
453 1315 : if (args.fsbno == NULLFSBLOCK)
454 : return -ENOSPC;
455 :
456 1315 : xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
457 :
458 1315 : repl->fsbno = args.fsbno;
459 1315 : repl->len = args.len;
460 1315 : return 0;
461 : }
462 :
463 : /*
464 : * Allocate a replacement rt CoW staging extent of up to the given number of
465 : * blocks, and fill out the mapping.
466 : */
467 : STATIC int
468 2732 : xrep_cow_alloc_rt(
469 : struct xfs_scrub *sc,
470 : xfs_filblks_t maxlen,
471 : struct xrep_cow_extent *repl)
472 : {
473 2732 : xfs_rtxnum_t rtx = NULLRTEXTNO;
474 2732 : xfs_rtxlen_t maxrtx = maxlen;
475 2732 : xfs_rtxlen_t rtxlen = 0;
476 2732 : xfs_rtblock_t rtbno;
477 2732 : xfs_extlen_t len;
478 2732 : uint32_t mod;
479 2732 : int error;
480 :
481 2732 : maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen, &mod);
482 :
483 2732 : error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
484 2732 : if (error)
485 : return error;
486 :
487 2732 : xfs_rtbitmap_lock(sc->tp, sc->mp);
488 :
489 2732 : error = xfs_rtallocate_extent(sc->tp, 0, 1, maxrtx, &rtxlen, 0, 1,
490 : &rtx);
491 2732 : if (error)
492 : return error;
493 2732 : if (rtx == NULLRTEXTNO)
494 : return -ENOSPC;
495 :
496 2732 : rtbno = xfs_rtx_to_rtb(sc->mp, rtx);
497 2732 : len = xfs_rtxlen_to_extlen(sc->mp, rtxlen);
498 2732 : xfs_refcount_alloc_cow_extent(sc->tp, true, rtbno, len);
499 :
500 2732 : repl->fsbno = rtbno;
501 2732 : repl->len = len;
502 2732 : return 0;
503 : }
504 :
505 : /*
506 : * Look up the current CoW fork mapping so that we only allocate enough to
507 : * replace a single mapping. If we don't find a mapping that covers the start
508 : * of the file range, or we find a delalloc or written extent, something is
509 : * seriously wrong, since we didn't drop the ILOCK.
510 : */
511 : static inline int
512 4047 : xrep_cow_find_mapping(
513 : struct xrep_cow *xc,
514 : struct xfs_iext_cursor *icur,
515 : xfs_fileoff_t startoff,
516 : struct xfs_bmbt_irec *got)
517 : {
518 4047 : struct xfs_inode *ip = xc->sc->ip;
519 4047 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
520 :
521 4047 : if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
522 0 : goto bad;
523 :
524 4047 : if (got->br_startoff > startoff)
525 0 : goto bad;
526 :
527 4047 : if (got->br_blockcount == 0)
528 0 : goto bad;
529 :
530 4047 : if (isnullstartblock(got->br_startblock))
531 0 : goto bad;
532 :
533 4047 : if (xfs_bmap_is_written_extent(got))
534 0 : goto bad;
535 :
536 : return 0;
537 0 : bad:
538 0 : ASSERT(0);
539 0 : return -EFSCORRUPTED;
540 : }
541 :
542 : #define REPLACE_LEFT_SIDE (1U << 0)
543 : #define REPLACE_RIGHT_SIDE (1U << 1)
544 :
545 : /*
546 : * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
547 : * beginning of @got with the space described by @rep.
548 : */
549 : static inline void
550 4047 : xrep_cow_replace_mapping(
551 : struct xfs_inode *ip,
552 : struct xfs_iext_cursor *icur,
553 : const struct xfs_bmbt_irec *got,
554 : const struct xrep_cow_extent *repl)
555 : {
556 4047 : struct xfs_bmbt_irec new = *got; /* struct copy */
557 :
558 4047 : ASSERT(repl->len > 0);
559 4047 : ASSERT(!isnullstartblock(got->br_startblock));
560 :
561 4047 : trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
562 :
563 4047 : if (got->br_blockcount == repl->len) {
564 : /*
565 : * The new extent is a complete replacement for the existing
566 : * extent. Update the COW fork record.
567 : */
568 3786 : new.br_startblock = repl->fsbno;
569 3786 : xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
570 3786 : return;
571 : }
572 :
573 : /*
574 : * The new extent can replace the beginning of the COW fork record.
575 : * Move the left side of @got upwards, then insert the new record.
576 : */
577 261 : new.br_startoff += repl->len;
578 261 : new.br_startblock += repl->len;
579 261 : new.br_blockcount -= repl->len;
580 261 : xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
581 :
582 261 : new.br_startoff = got->br_startoff;
583 261 : new.br_startblock = repl->fsbno;
584 261 : new.br_blockcount = repl->len;
585 261 : xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
586 : }
587 :
588 : /*
589 : * Replace the unwritten CoW staging extent backing the given file range with a
590 : * new space extent that isn't as problematic.
591 : */
592 : STATIC int
593 4047 : xrep_cow_replace_range(
594 : struct xrep_cow *xc,
595 : xfs_fileoff_t startoff,
596 : xfs_extlen_t *blockcount)
597 : {
598 4047 : struct xfs_iext_cursor icur;
599 4047 : struct xrep_cow_extent repl;
600 4047 : struct xfs_bmbt_irec got;
601 4047 : struct xfs_scrub *sc = xc->sc;
602 4047 : xfs_fileoff_t nextoff;
603 4047 : int error;
604 :
605 : /*
606 : * Put the existing CoW fork mapping in @got. If @got ends before
607 : * @rep, truncate @rep so we only replace one extent mapping at a time.
608 : */
609 4047 : error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
610 4047 : if (error)
611 : return error;
612 4047 : nextoff = min(startoff + *blockcount,
613 : got.br_startoff + got.br_blockcount);
614 :
615 : /*
616 : * Allocate a replacement extent. If we don't fill all the blocks,
617 : * shorten the quantity that will be deleted in this step.
618 : */
619 4047 : if (XFS_IS_REALTIME_INODE(sc->ip))
620 2732 : error = xrep_cow_alloc_rt(sc, nextoff - startoff, &repl);
621 : else
622 1315 : error = xrep_cow_alloc(sc, nextoff - startoff, &repl);
623 4047 : if (error)
624 : return error;
625 :
626 : /*
627 : * Replace the old mapping with the new one, and commit the metadata
628 : * changes made so far.
629 : */
630 4047 : xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
631 :
632 4047 : xfs_inode_set_cowblocks_tag(sc->ip);
633 4047 : error = xfs_defer_finish(&sc->tp);
634 4047 : if (error)
635 : return error;
636 :
637 : /* Note the old CoW staging extents; we'll reap them all later. */
638 4047 : if (XFS_IS_REALTIME_INODE(sc->ip))
639 2732 : error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
640 2732 : got.br_startblock, repl.len);
641 : else
642 1315 : error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
643 1315 : got.br_startblock, repl.len);
644 4047 : if (error)
645 : return error;
646 :
647 4047 : *blockcount = repl.len;
648 4047 : return 0;
649 : }
650 :
651 : /*
652 : * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
653 : * reservation.
654 : */
655 : STATIC int
656 3013 : xrep_cow_replace(
657 : uint64_t startoff,
658 : uint64_t blockcount,
659 : void *priv)
660 : {
661 3013 : struct xrep_cow *xc = priv;
662 3013 : int error = 0;
663 :
664 7060 : while (blockcount > 0) {
665 4047 : xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
666 : XFS_MAX_BMBT_EXTLEN);
667 :
668 4047 : error = xrep_cow_replace_range(xc, startoff, &len);
669 4047 : if (error)
670 : break;
671 :
672 4047 : blockcount -= len;
673 4047 : startoff += len;
674 : }
675 :
676 3013 : return error;
677 : }
678 :
679 : /*
680 : * Repair an inode's CoW fork. The CoW fork is an in-core structure, so
681 : * there's no btree to rebuid. Instead, we replace any mappings that are
682 : * cross-linked or lack ondisk CoW fork records in the refcount btree.
683 : */
684 : int
685 36641 : xrep_bmap_cow(
686 : struct xfs_scrub *sc)
687 : {
688 36641 : struct xrep_cow *xc;
689 36641 : struct xfs_iext_cursor icur;
690 36641 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
691 36641 : int error;
692 :
693 36641 : if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
694 : return -EOPNOTSUPP;
695 :
696 36641 : if (!ifp)
697 : return 0;
698 :
699 : /*
700 : * Realtime files with large extent sizes are not supported because
701 : * we could encounter an CoW mapping that has been partially written
702 : * out *and* requires replacement, and there's no solution to that.
703 : */
704 36641 : if (XFS_IS_REALTIME_INODE(sc->ip) && sc->mp->m_sb.sb_rextsize != 1)
705 : return -EOPNOTSUPP;
706 :
707 : /*
708 : * If we're somehow not in extents format, then reinitialize it to
709 : * an empty extent mapping fork and exit.
710 : */
711 36641 : if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
712 0 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
713 0 : ifp->if_nextents = 0;
714 0 : return 0;
715 : }
716 :
717 36641 : xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
718 36641 : if (!xc)
719 : return -ENOMEM;
720 :
721 36641 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
722 :
723 36641 : xc->sc = sc;
724 36641 : xbitmap_init(&xc->bad_fileoffs);
725 36641 : if (XFS_IS_REALTIME_INODE(sc->ip))
726 13210 : xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
727 : else
728 23431 : xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
729 :
730 40427 : for_each_xfs_iext(ifp, &icur, &xc->irec) {
731 3786 : if (xchk_should_terminate(sc, &error))
732 0 : goto out_bitmap;
733 :
734 : /*
735 : * delalloc reservations only exist incore, so there is no
736 : * ondisk metadata that we can examine. Hence we leave them
737 : * alone.
738 : */
739 3786 : if (isnullstartblock(xc->irec.br_startblock))
740 0 : continue;
741 :
742 : /*
743 : * COW fork extents are only in the written state if writeback
744 : * is actively writing to disk. We cannot restart the write
745 : * at a different disk address since we've already issued the
746 : * IO, so we leave these alone and hope for the best.
747 : */
748 3786 : if (xfs_bmap_is_written_extent(&xc->irec))
749 0 : continue;
750 :
751 3786 : if (XFS_IS_REALTIME_INODE(sc->ip))
752 2732 : error = xrep_cow_find_bad_rt(xc);
753 : else
754 1054 : error = xrep_cow_find_bad(xc);
755 3786 : if (error)
756 0 : goto out_bitmap;
757 : }
758 :
759 : /* Replace any bad unwritten mappings with fresh reservations. */
760 36641 : error = xbitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
761 36641 : if (error)
762 0 : goto out_bitmap;
763 :
764 : /*
765 : * Reap as many of the old CoW blocks as we can. They are owned ondisk
766 : * by the refcount btree, not the inode, so it is correct to treat them
767 : * like inode metadata.
768 : */
769 36641 : if (XFS_IS_REALTIME_INODE(sc->ip))
770 13210 : error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
771 : &XFS_RMAP_OINFO_COW);
772 : else
773 23431 : error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
774 : &XFS_RMAP_OINFO_COW);
775 36641 : if (error)
776 : goto out_bitmap;
777 :
778 36641 : out_bitmap:
779 36641 : if (XFS_IS_REALTIME_INODE(sc->ip))
780 13210 : xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
781 : else
782 23431 : xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
783 36641 : xbitmap_destroy(&xc->bad_fileoffs);
784 36641 : kmem_free(xc);
785 36641 : return error;
786 : }
|