Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_bit.h"
16 : #include "xfs_log_format.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_sb.h"
19 : #include "xfs_inode.h"
20 : #include "xfs_alloc.h"
21 : #include "xfs_ialloc.h"
22 : #include "xfs_ialloc_btree.h"
23 : #include "xfs_icache.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_rmap_btree.h"
26 : #include "xfs_log.h"
27 : #include "xfs_trans_priv.h"
28 : #include "xfs_error.h"
29 : #include "xfs_health.h"
30 : #include "xfs_ag.h"
31 : #include "scrub/xfs_scrub.h"
32 : #include "scrub/scrub.h"
33 : #include "scrub/common.h"
34 : #include "scrub/btree.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/bitmap.h"
38 : #include "scrub/xfile.h"
39 : #include "scrub/xfarray.h"
40 : #include "scrub/newbt.h"
41 : #include "scrub/reap.h"
42 :
43 : /*
44 : * Inode Btree Repair
45 : * ==================
46 : *
47 : * A quick refresher of inode btrees on a v5 filesystem:
48 : *
49 : * - Inode records are read into memory in units of 'inode clusters'. However
50 : * many inodes fit in a cluster buffer is the smallest number of inodes that
51 : * can be allocated or freed. Clusters are never smaller than one fs block
52 : * though they can span multiple blocks. The size (in fs blocks) is
53 : * computed with xfs_icluster_size_fsb(). The fs block alignment of a
54 : * cluster is computed with xfs_ialloc_cluster_alignment().
55 : *
56 : * - Each inode btree record can describe a single 'inode chunk'. The chunk
57 : * size is defined to be 64 inodes. If sparse inodes are enabled, every
58 : * inobt record must be aligned to the chunk size; if not, every record must
59 : * be aligned to the start of a cluster. It is possible to construct an XFS
60 : * geometry where one inobt record maps to multiple inode clusters; it is
61 : * also possible to construct a geometry where multiple inobt records map to
62 : * different parts of one inode cluster.
63 : *
64 : * - If sparse inodes are not enabled, the smallest unit of allocation for
65 : * inode records is enough to contain one inode chunk's worth of inodes.
66 : *
67 : * - If sparse inodes are enabled, the holemask field will be active. Each
68 : * bit of the holemask represents 4 potential inodes; if set, the
69 : * corresponding space does *not* contain inodes and must be left alone.
70 : * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation
71 : * of inode records is one inode cluster.
72 : *
73 : * So what's the rebuild algorithm?
74 : *
75 : * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
76 : * records. The OWN_INOBT records are the old inode btree blocks and will be
77 : * cleared out after we've rebuilt the tree. Each possible inode cluster
78 : * within an OWN_INODES record will be read in; for each possible inobt record
79 : * associated with that cluster, compute the freemask calculated from the
80 : * i_mode data in the inode chunk. For sparse inodes the holemask will be
81 : * calculated by creating the properly aligned inobt record and punching out
82 : * any chunk that's missing. Inode allocations and frees grab the AGI first,
83 : * so repair protects itself from concurrent access by locking the AGI.
84 : *
85 : * Once we've reconstructed all the inode records, we can create new inode
86 : * btree roots and reload the btrees. We rebuild both inode trees at the same
87 : * time because they have the same rmap owner and it would be more complex to
88 : * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
89 : * blocks it owns. We have all the data we need to build both, so dump
90 : * everything and start over.
91 : *
92 : * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
93 : */
94 :
95 : struct xrep_ibt {
96 : /* Record under construction. */
97 : struct xfs_inobt_rec_incore rie;
98 :
99 : /* new inobt information */
100 : struct xrep_newbt new_inobt;
101 :
102 : /* new finobt information */
103 : struct xrep_newbt new_finobt;
104 :
105 : /* Old inode btree blocks we found in the rmap. */
106 : struct xagb_bitmap old_iallocbt_blocks;
107 :
108 : /* Reconstructed inode records. */
109 : struct xfarray *inode_records;
110 :
111 : struct xfs_scrub *sc;
112 :
113 : /* Number of inodes assigned disk space. */
114 : unsigned int icount;
115 :
116 : /* Number of inodes in use. */
117 : unsigned int iused;
118 :
119 : /* Number of finobt records needed. */
120 : unsigned int finobt_recs;
121 :
122 : /* get_records()'s position in the inode record array. */
123 : xfarray_idx_t array_cur;
124 : };
125 :
126 : /*
127 : * Is this inode in use? If the inode is in memory we can tell from i_mode,
128 : * otherwise we have to check di_mode in the on-disk buffer. We only care
129 : * that the high (i.e. non-permission) bits of _mode are zero. This should be
130 : * safe because repair keeps all AG headers locked until the end, and process
131 : * trying to perform an inode allocation/free must lock the AGI.
132 : *
133 : * @cluster_ag_base is the inode offset of the cluster within the AG.
134 : * @cluster_bp is the cluster buffer.
135 : * @cluster_index is the inode offset within the inode cluster.
136 : */
137 : STATIC int
138 196323991 : xrep_ibt_check_ifree(
139 : struct xrep_ibt *ri,
140 : xfs_agino_t cluster_ag_base,
141 : struct xfs_buf *cluster_bp,
142 : unsigned int cluster_index,
143 : bool *inuse)
144 : {
145 196323991 : struct xfs_scrub *sc = ri->sc;
146 196323991 : struct xfs_mount *mp = sc->mp;
147 196323991 : struct xfs_dinode *dip;
148 196323991 : xfs_ino_t fsino;
149 196323991 : xfs_agino_t agino;
150 196323991 : xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno;
151 196323991 : unsigned int cluster_buf_base;
152 196323991 : unsigned int offset;
153 196323991 : int error;
154 :
155 196323991 : agino = cluster_ag_base + cluster_index;
156 196323991 : fsino = XFS_AGINO_TO_INO(mp, agno, agino);
157 :
158 : /* Inode uncached or half assembled, read disk buffer */
159 196323991 : cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
160 196323991 : offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
161 196323991 : if (offset >= BBTOB(cluster_bp->b_length))
162 : return -EFSCORRUPTED;
163 196323991 : dip = xfs_buf_offset(cluster_bp, offset);
164 196323757 : if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
165 : return -EFSCORRUPTED;
166 :
167 196323757 : if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
168 : return -EFSCORRUPTED;
169 :
170 : /* Will the in-core inode tell us if it's in use? */
171 196323757 : error = xchk_inode_is_allocated(sc, agino, inuse);
172 196324245 : if (!error)
173 : return 0;
174 :
175 1934242 : *inuse = dip->di_mode != 0;
176 1934242 : return 0;
177 : }
178 :
179 : /* Stash the accumulated inobt record for rebuilding. */
180 : STATIC int
181 4092677 : xrep_ibt_stash(
182 : struct xrep_ibt *ri)
183 : {
184 4092677 : int error = 0;
185 :
186 4092677 : if (xchk_should_terminate(ri->sc, &error))
187 0 : return error;
188 :
189 4092677 : ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
190 4092677 : if (xfs_inobt_check_perag_irec(ri->sc->sa.pag, &ri->rie) != NULL)
191 : return -EFSCORRUPTED;
192 :
193 4092677 : if (ri->rie.ir_freecount > 0)
194 24971 : ri->finobt_recs++;
195 :
196 4092677 : trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
197 :
198 4092677 : error = xfarray_append(ri->inode_records, &ri->rie);
199 4092677 : if (error)
200 : return error;
201 :
202 4092677 : ri->rie.ir_startino = NULLAGINO;
203 4092677 : return 0;
204 : }
205 :
206 : /*
207 : * Given an extent of inodes and an inode cluster buffer, calculate the
208 : * location of the corresponding inobt record (creating it if necessary),
209 : * then update the parts of the holemask and freemask of that record that
210 : * correspond to the inode extent we were given.
211 : *
212 : * @cluster_ir_startino is the AG inode number of an inobt record that we're
213 : * proposing to create for this inode cluster. If sparse inodes are enabled,
214 : * we must round down to a chunk boundary to find the actual sparse record.
215 : * @cluster_bp is the buffer of the inode cluster.
216 : * @nr_inodes is the number of inodes to check from the cluster.
217 : */
218 : STATIC int
219 6135163 : xrep_ibt_cluster_record(
220 : struct xrep_ibt *ri,
221 : xfs_agino_t cluster_ir_startino,
222 : struct xfs_buf *cluster_bp,
223 : unsigned int nr_inodes)
224 : {
225 6135163 : struct xfs_scrub *sc = ri->sc;
226 6135163 : struct xfs_mount *mp = sc->mp;
227 6135163 : xfs_agino_t ir_startino;
228 6135163 : unsigned int cluster_base;
229 6135163 : unsigned int cluster_index;
230 6135163 : int error = 0;
231 :
232 6135163 : ir_startino = cluster_ir_startino;
233 6135163 : if (xfs_has_sparseinodes(mp))
234 6135163 : ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
235 6135163 : cluster_base = cluster_ir_startino - ir_startino;
236 :
237 : /*
238 : * If the accumulated inobt record doesn't map this cluster, add it to
239 : * the list and reset it.
240 : */
241 6135163 : if (ri->rie.ir_startino != NULLAGINO &&
242 6115073 : ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
243 4072587 : error = xrep_ibt_stash(ri);
244 4072586 : if (error)
245 : return error;
246 : }
247 :
248 6135162 : if (ri->rie.ir_startino == NULLAGINO) {
249 4092676 : ri->rie.ir_startino = ir_startino;
250 4092676 : ri->rie.ir_free = XFS_INOBT_ALL_FREE;
251 4092676 : ri->rie.ir_holemask = 0xFFFF;
252 4092676 : ri->rie.ir_count = 0;
253 : }
254 :
255 : /* Record the whole cluster. */
256 6135162 : ri->icount += nr_inodes;
257 6135162 : ri->rie.ir_count += nr_inodes;
258 6135162 : ri->rie.ir_holemask &= ~xfs_inobt_maskn(
259 : cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
260 : nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
261 :
262 : /* Which inodes within this cluster are free? */
263 202459265 : for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
264 196324103 : bool inuse = false;
265 :
266 196324103 : error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
267 : cluster_bp, cluster_index, &inuse);
268 196324103 : if (error)
269 0 : return error;
270 196324103 : if (!inuse)
271 613312 : continue;
272 195710791 : ri->iused++;
273 195710791 : ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
274 : cluster_index);
275 : }
276 : return 0;
277 : }
278 :
279 : /*
280 : * For each inode cluster covering the physical extent recorded by the rmapbt,
281 : * we must calculate the properly aligned startino of that cluster, then
282 : * iterate each cluster to fill in used and filled masks appropriately. We
283 : * then use the (startino, used, filled) information to construct the
284 : * appropriate inode records.
285 : */
286 : STATIC int
287 6135163 : xrep_ibt_process_cluster(
288 : struct xrep_ibt *ri,
289 : xfs_agblock_t cluster_bno)
290 : {
291 6135163 : struct xfs_imap imap;
292 6135163 : struct xfs_buf *cluster_bp;
293 6135163 : struct xfs_scrub *sc = ri->sc;
294 6135163 : struct xfs_mount *mp = sc->mp;
295 6135163 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
296 6135163 : xfs_agino_t cluster_ag_base;
297 6135163 : xfs_agino_t irec_index;
298 6135163 : unsigned int nr_inodes;
299 6135163 : int error;
300 :
301 6135163 : nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
302 : XFS_INODES_PER_CHUNK);
303 :
304 : /*
305 : * Grab the inode cluster buffer. This is safe to do with a broken
306 : * inobt because imap_to_bp directly maps the buffer without touching
307 : * either inode btree.
308 : */
309 6135163 : imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
310 6135163 : imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
311 6135163 : imap.im_boffset = 0;
312 6135163 : error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
313 6135163 : if (error)
314 : return error;
315 :
316 : /*
317 : * Record the contents of each possible inobt record mapping this
318 : * cluster.
319 : */
320 6135163 : cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
321 6135163 : for (irec_index = 0;
322 12270325 : irec_index < igeo->inodes_per_cluster;
323 6135162 : irec_index += XFS_INODES_PER_CHUNK) {
324 6135163 : error = xrep_ibt_cluster_record(ri,
325 : cluster_ag_base + irec_index, cluster_bp,
326 : nr_inodes);
327 6135162 : if (error)
328 : break;
329 :
330 : }
331 :
332 6135162 : xfs_trans_brelse(sc->tp, cluster_bp);
333 6135162 : return error;
334 : }
335 :
336 : /* Check for any obvious conflicts in the inode chunk extent. */
337 : STATIC int
338 4035849 : xrep_ibt_check_inode_ext(
339 : struct xfs_scrub *sc,
340 : xfs_agblock_t agbno,
341 : xfs_extlen_t len)
342 : {
343 4035849 : struct xfs_mount *mp = sc->mp;
344 4035849 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
345 4035849 : xfs_agino_t agino;
346 4035849 : enum xbtree_recpacking outcome;
347 4035849 : int error;
348 :
349 : /* Inode records must be within the AG. */
350 4035849 : if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
351 : return -EFSCORRUPTED;
352 :
353 : /* The entire record must align to the inode cluster size. */
354 4035849 : if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
355 4035849 : !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
356 : return -EFSCORRUPTED;
357 :
358 : /*
359 : * The entire record must also adhere to the inode cluster alignment
360 : * size if sparse inodes are not enabled.
361 : */
362 4035849 : if (!xfs_has_sparseinodes(mp) &&
363 0 : (!IS_ALIGNED(agbno, igeo->cluster_align) ||
364 0 : !IS_ALIGNED(agbno + len, igeo->cluster_align)))
365 : return -EFSCORRUPTED;
366 :
367 : /*
368 : * On a sparse inode fs, this cluster could be part of a sparse chunk.
369 : * Sparse clusters must be aligned to sparse chunk alignment.
370 : */
371 4035849 : if (xfs_has_sparseinodes(mp) &&
372 4035849 : (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
373 4035849 : !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
374 : return -EFSCORRUPTED;
375 :
376 : /* Make sure the entire range of blocks are valid AG inodes. */
377 4035849 : agino = XFS_AGB_TO_AGINO(mp, agbno);
378 4035849 : if (!xfs_verify_agino(sc->sa.pag, agino))
379 : return -EFSCORRUPTED;
380 :
381 4035849 : agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
382 4035849 : if (!xfs_verify_agino(sc->sa.pag, agino))
383 : return -EFSCORRUPTED;
384 :
385 : /* Make sure this isn't free space. */
386 4035849 : error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
387 4035849 : if (error)
388 : return error;
389 4035849 : if (outcome != XBTREE_RECPACKING_EMPTY)
390 0 : return -EFSCORRUPTED;
391 :
392 : return 0;
393 : }
394 :
395 : /* Found a fragment of the old inode btrees; dispose of them later. */
396 : STATIC int
397 45452 : xrep_ibt_record_old_btree_blocks(
398 : struct xrep_ibt *ri,
399 : const struct xfs_rmap_irec *rec)
400 : {
401 45452 : if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
402 45452 : rec->rm_blockcount))
403 : return -EFSCORRUPTED;
404 :
405 45452 : return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
406 : rec->rm_blockcount);
407 : }
408 :
409 : /* Record extents that belong to inode btrees. */
410 : STATIC int
411 501277821 : xrep_ibt_walk_rmap(
412 : struct xfs_btree_cur *cur,
413 : const struct xfs_rmap_irec *rec,
414 : void *priv)
415 : {
416 501277821 : struct xrep_ibt *ri = priv;
417 501277821 : struct xfs_mount *mp = cur->bc_mp;
418 501277821 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
419 501277821 : xfs_agblock_t cluster_base;
420 501277821 : int error = 0;
421 :
422 501277821 : if (xchk_should_terminate(ri->sc, &error))
423 0 : return error;
424 :
425 501278504 : if (rec->rm_owner == XFS_RMAP_OWN_INOBT)
426 45453 : return xrep_ibt_record_old_btree_blocks(ri, rec);
427 :
428 : /* Skip extents which are not owned by this inode and fork. */
429 501233051 : if (rec->rm_owner != XFS_RMAP_OWN_INODES)
430 : return 0;
431 :
432 8071698 : error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
433 4035849 : rec->rm_blockcount);
434 4035849 : if (error)
435 : return error;
436 :
437 4035849 : trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
438 4035849 : rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
439 4035849 : rec->rm_offset, rec->rm_flags);
440 :
441 : /*
442 : * Record the free/hole masks for each inode cluster that could be
443 : * mapped by this rmap record.
444 : */
445 4035849 : for (cluster_base = 0;
446 10171012 : cluster_base < rec->rm_blockcount;
447 6135163 : cluster_base += igeo->blocks_per_cluster) {
448 12270326 : error = xrep_ibt_process_cluster(ri,
449 6135163 : rec->rm_startblock + cluster_base);
450 6135163 : if (error)
451 0 : return error;
452 : }
453 :
454 : return 0;
455 : }
456 :
457 : /*
458 : * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
459 : * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct
460 : * the inode btrees. The caller must clean up the lists if anything goes
461 : * wrong.
462 : */
463 : STATIC int
464 29212 : xrep_ibt_find_inodes(
465 : struct xrep_ibt *ri)
466 : {
467 29212 : struct xfs_scrub *sc = ri->sc;
468 29212 : int error;
469 :
470 29212 : ri->rie.ir_startino = NULLAGINO;
471 :
472 : /* Collect all reverse mappings for inode blocks. */
473 29212 : xrep_ag_btcur_init(sc, &sc->sa);
474 29217 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
475 29218 : xchk_ag_btcur_free(&sc->sa);
476 29219 : if (error)
477 : return error;
478 :
479 : /* If we have a record ready to go, add it to the array. */
480 29219 : if (ri->rie.ir_startino == NULLAGINO)
481 : return 0;
482 :
483 20090 : return xrep_ibt_stash(ri);
484 : }
485 :
486 : /* Update the AGI counters. */
487 : STATIC int
488 29218 : xrep_ibt_reset_counters(
489 : struct xrep_ibt *ri)
490 : {
491 29218 : struct xfs_scrub *sc = ri->sc;
492 29218 : struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
493 29218 : unsigned int freecount = ri->icount - ri->iused;
494 :
495 : /* Trigger inode count recalculation */
496 29218 : xfs_force_summary_recalc(sc->mp);
497 :
498 : /*
499 : * The AGI header contains extra information related to the inode
500 : * btrees, so we must update those fields here.
501 : */
502 29219 : agi->agi_count = cpu_to_be32(ri->icount);
503 29219 : agi->agi_freecount = cpu_to_be32(freecount);
504 29219 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
505 : XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
506 :
507 : /* Reinitialize with the values we just logged. */
508 29219 : return xrep_reinit_pagi(sc);
509 : }
510 :
511 : /* Retrieve finobt data for bulk load. */
512 : STATIC int
513 19787 : xrep_fibt_get_records(
514 : struct xfs_btree_cur *cur,
515 : unsigned int idx,
516 : struct xfs_btree_block *block,
517 : unsigned int nr_wanted,
518 : void *priv)
519 : {
520 19787 : struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
521 19787 : struct xrep_ibt *ri = priv;
522 19787 : union xfs_btree_rec *block_rec;
523 19787 : unsigned int loaded;
524 19787 : int error;
525 :
526 44758 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
527 3098706 : do {
528 3098706 : error = xfarray_load(ri->inode_records,
529 3098706 : ri->array_cur++, irec);
530 3098706 : } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
531 24971 : if (error)
532 0 : return error;
533 :
534 24971 : block_rec = xfs_btree_rec_addr(cur, idx, block);
535 24971 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
536 : }
537 :
538 19787 : return loaded;
539 : }
540 :
541 : /* Retrieve inobt data for bulk load. */
542 : STATIC int
543 29613 : xrep_ibt_get_records(
544 : struct xfs_btree_cur *cur,
545 : unsigned int idx,
546 : struct xfs_btree_block *block,
547 : unsigned int nr_wanted,
548 : void *priv)
549 : {
550 29613 : struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
551 29613 : struct xrep_ibt *ri = priv;
552 29613 : union xfs_btree_rec *block_rec;
553 29613 : unsigned int loaded;
554 29613 : int error;
555 :
556 4122290 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
557 4092677 : error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
558 4092677 : if (error)
559 0 : return error;
560 :
561 4092677 : block_rec = xfs_btree_rec_addr(cur, idx, block);
562 4092677 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
563 : }
564 :
565 29613 : return loaded;
566 : }
567 :
568 : /* Feed one of the new inobt blocks to the bulk loader. */
569 : STATIC int
570 46739 : xrep_ibt_claim_block(
571 : struct xfs_btree_cur *cur,
572 : union xfs_btree_ptr *ptr,
573 : void *priv)
574 : {
575 46739 : struct xrep_ibt *ri = priv;
576 46739 : int error;
577 :
578 46739 : error = xrep_newbt_relog_autoreap(&ri->new_inobt);
579 46739 : if (error)
580 : return error;
581 :
582 46739 : return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
583 : }
584 :
585 : /* Feed one of the new finobt blocks to the bulk loader. */
586 : STATIC int
587 29218 : xrep_fibt_claim_block(
588 : struct xfs_btree_cur *cur,
589 : union xfs_btree_ptr *ptr,
590 : void *priv)
591 : {
592 29218 : struct xrep_ibt *ri = priv;
593 29218 : int error;
594 :
595 29218 : error = xrep_newbt_relog_autoreap(&ri->new_finobt);
596 29219 : if (error)
597 : return error;
598 :
599 29219 : return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
600 : }
601 :
602 : /* Make sure the records do not overlap in inumber address space. */
603 : STATIC int
604 29218 : xrep_ibt_check_startino(
605 : struct xrep_ibt *ri)
606 : {
607 29218 : struct xfs_inobt_rec_incore irec;
608 29218 : xfarray_idx_t cur;
609 29218 : xfs_agino_t next_agino = 0;
610 29218 : int error = 0;
611 :
612 4121895 : foreach_xfarray_idx(ri->inode_records, cur) {
613 4092677 : if (xchk_should_terminate(ri->sc, &error))
614 0 : return error;
615 :
616 4092677 : error = xfarray_load(ri->inode_records, cur, &irec);
617 4092677 : if (error)
618 0 : return error;
619 :
620 4092677 : if (irec.ir_startino < next_agino)
621 : return -EFSCORRUPTED;
622 :
623 4092677 : next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
624 : }
625 :
626 29216 : return error;
627 : }
628 :
629 : /* Build new inode btrees and dispose of the old one. */
630 : STATIC int
631 29218 : xrep_ibt_build_new_trees(
632 : struct xrep_ibt *ri)
633 : {
634 29218 : struct xfs_scrub *sc = ri->sc;
635 29218 : struct xfs_btree_cur *ino_cur;
636 29218 : struct xfs_btree_cur *fino_cur = NULL;
637 29218 : xfs_fsblock_t fsbno;
638 29218 : bool need_finobt;
639 29218 : int error;
640 :
641 29218 : need_finobt = xfs_has_finobt(sc->mp);
642 :
643 : /*
644 : * Create new btrees for staging all the inobt records we collected
645 : * earlier. The records were collected in order of increasing agino,
646 : * so we do not have to sort them. Ensure there are no overlapping
647 : * records.
648 : */
649 29218 : error = xrep_ibt_check_startino(ri);
650 29216 : if (error)
651 : return error;
652 :
653 : /*
654 : * The new inode btrees will not be rooted in the AGI until we've
655 : * successfully rebuilt the tree.
656 : *
657 : * Start by setting up the inobt staging cursor.
658 : */
659 29216 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
660 : XFS_IBT_BLOCK(sc->mp)),
661 29216 : xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
662 : XFS_AG_RESV_NONE);
663 29215 : ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
664 29215 : ri->new_inobt.bload.get_records = xrep_ibt_get_records;
665 :
666 29215 : ino_cur = xfs_inobt_stage_cursor(sc->sa.pag, &ri->new_inobt.afake,
667 : XFS_BTNUM_INO);
668 29216 : error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
669 : xfarray_length(ri->inode_records));
670 29203 : if (error)
671 0 : goto err_inocur;
672 :
673 : /* Set up finobt staging cursor. */
674 29203 : if (need_finobt) {
675 29203 : enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA;
676 :
677 29203 : if (sc->mp->m_finobt_nores)
678 0 : resv = XFS_AG_RESV_NONE;
679 :
680 29203 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
681 : XFS_FIBT_BLOCK(sc->mp)),
682 29203 : xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
683 : fsbno, resv);
684 29205 : ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
685 29205 : ri->new_finobt.bload.get_records = xrep_fibt_get_records;
686 :
687 29205 : fino_cur = xfs_inobt_stage_cursor(sc->sa.pag,
688 : &ri->new_finobt.afake, XFS_BTNUM_FINO);
689 58429 : error = xfs_btree_bload_compute_geometry(fino_cur,
690 29212 : &ri->new_finobt.bload, ri->finobt_recs);
691 29217 : if (error)
692 0 : goto err_finocur;
693 : }
694 :
695 : /* Last chance to abort before we start committing fixes. */
696 29217 : if (xchk_should_terminate(sc, &error))
697 0 : goto err_finocur;
698 :
699 : /* Reserve all the space we need to build the new btrees. */
700 29216 : error = xrep_newbt_alloc_blocks(&ri->new_inobt,
701 : ri->new_inobt.bload.nr_blocks);
702 29216 : if (error)
703 0 : goto err_finocur;
704 :
705 29216 : if (need_finobt) {
706 29214 : error = xrep_newbt_alloc_blocks(&ri->new_finobt,
707 : ri->new_finobt.bload.nr_blocks);
708 29219 : if (error)
709 0 : goto err_finocur;
710 : }
711 :
712 : /* Add all inobt records. */
713 29221 : ri->array_cur = XFARRAY_CURSOR_INIT;
714 29221 : error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
715 29219 : if (error)
716 0 : goto err_finocur;
717 :
718 : /* Add all finobt records. */
719 29219 : if (need_finobt) {
720 29219 : ri->array_cur = XFARRAY_CURSOR_INIT;
721 29219 : error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
722 29218 : if (error)
723 0 : goto err_finocur;
724 : }
725 :
726 : /*
727 : * Install the new btrees in the AG header. After this point the old
728 : * btrees are no longer accessible and the new trees are live.
729 : */
730 29218 : xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
731 29218 : xfs_btree_del_cursor(ino_cur, 0);
732 :
733 29217 : if (fino_cur) {
734 29217 : xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
735 29219 : xfs_btree_del_cursor(fino_cur, 0);
736 : }
737 :
738 : /* Reset the AGI counters now that we've changed the inode roots. */
739 29218 : error = xrep_ibt_reset_counters(ri);
740 29216 : if (error)
741 0 : goto err_finobt;
742 :
743 : /* Free unused blocks and bitmap. */
744 29216 : if (need_finobt) {
745 29216 : error = xrep_newbt_commit(&ri->new_finobt);
746 29218 : if (error)
747 0 : goto err_inobt;
748 : }
749 29218 : error = xrep_newbt_commit(&ri->new_inobt);
750 29219 : if (error)
751 : return error;
752 :
753 29219 : return xrep_roll_ag_trans(sc);
754 :
755 0 : err_finocur:
756 0 : if (need_finobt)
757 0 : xfs_btree_del_cursor(fino_cur, error);
758 0 : err_inocur:
759 0 : xfs_btree_del_cursor(ino_cur, error);
760 0 : err_finobt:
761 0 : if (need_finobt)
762 0 : xrep_newbt_cancel(&ri->new_finobt);
763 0 : err_inobt:
764 0 : xrep_newbt_cancel(&ri->new_inobt);
765 0 : return error;
766 : }
767 :
768 : /*
769 : * Now that we've logged the roots of the new btrees, invalidate all of the
770 : * old blocks and free them.
771 : */
772 : STATIC int
773 29211 : xrep_ibt_remove_old_trees(
774 : struct xrep_ibt *ri)
775 : {
776 29211 : struct xfs_scrub *sc = ri->sc;
777 29211 : int error;
778 :
779 : /*
780 : * Free the old inode btree blocks if they're not in use. It's ok to
781 : * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
782 : * reservation because we reset the reservation before releasing the
783 : * AGI and AGF header buffer locks.
784 : */
785 29211 : error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
786 : &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
787 29218 : if (error)
788 : return error;
789 :
790 : /*
791 : * If the finobt is enabled and has a per-AG reservation, make sure we
792 : * reinitialize the per-AG reservations.
793 : */
794 29218 : if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
795 29218 : sc->flags |= XREP_RESET_PERAG_RESV;
796 :
797 : return 0;
798 : }
799 :
800 : /* Repair both inode btrees. */
801 : int
802 44620 : xrep_iallocbt(
803 : struct xfs_scrub *sc)
804 : {
805 44620 : struct xrep_ibt *ri;
806 44620 : struct xfs_mount *mp = sc->mp;
807 44620 : char *descr;
808 44620 : xfs_agino_t first_agino, last_agino;
809 44620 : int error = 0;
810 :
811 : /* We require the rmapbt to rebuild anything. */
812 44620 : if (!xfs_has_rmapbt(mp))
813 : return -EOPNOTSUPP;
814 :
815 29219 : ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
816 29218 : if (!ri)
817 : return -ENOMEM;
818 29218 : ri->sc = sc;
819 :
820 : /* We rebuild both inode btrees. */
821 29218 : sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
822 :
823 : /* Set up enough storage to handle an AG with nothing but inodes. */
824 29218 : xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
825 29219 : last_agino /= XFS_INODES_PER_CHUNK;
826 29219 : descr = xchk_xfile_ag_descr(sc, "inode index records");
827 29218 : error = xfarray_create(descr, last_agino,
828 : sizeof(struct xfs_inobt_rec_incore),
829 : &ri->inode_records);
830 29213 : kfree(descr);
831 29212 : if (error)
832 0 : goto out_ri;
833 :
834 : /* Collect the inode data and find the old btree blocks. */
835 29212 : xagb_bitmap_init(&ri->old_iallocbt_blocks);
836 29211 : error = xrep_ibt_find_inodes(ri);
837 29219 : if (error)
838 0 : goto out_bitmap;
839 :
840 : /* Rebuild the inode indexes. */
841 29219 : error = xrep_ibt_build_new_trees(ri);
842 29212 : if (error)
843 0 : goto out_bitmap;
844 :
845 : /* Kill the old tree. */
846 29212 : error = xrep_ibt_remove_old_trees(ri);
847 :
848 29208 : out_bitmap:
849 29208 : xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
850 29198 : xfarray_destroy(ri->inode_records);
851 29200 : out_ri:
852 29200 : kfree(ri);
853 29200 : return error;
854 : }
855 :
856 : /* Make sure both btrees are ok after we've rebuilt them. */
857 : int
858 29212 : xrep_revalidate_iallocbt(
859 : struct xfs_scrub *sc)
860 : {
861 29212 : __u32 old_type = sc->sm->sm_type;
862 29212 : int error;
863 :
864 : /*
865 : * We must update sm_type temporarily so that the tree-to-tree cross
866 : * reference checks will work in the correct direction, and also so
867 : * that tracing will report correctly if there are more errors.
868 : */
869 29212 : sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
870 29212 : error = xchk_inobt(sc);
871 29219 : if (error)
872 0 : goto out;
873 :
874 29219 : if (xfs_has_finobt(sc->mp)) {
875 29219 : sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
876 29219 : error = xchk_finobt(sc);
877 : }
878 :
879 0 : out:
880 29219 : sc->sm->sm_type = old_type;
881 29219 : return error;
882 : }
|