Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_bit.h"
16 : #include "xfs_log_format.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_sb.h"
19 : #include "xfs_inode.h"
20 : #include "xfs_alloc.h"
21 : #include "xfs_ialloc.h"
22 : #include "xfs_ialloc_btree.h"
23 : #include "xfs_icache.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_rmap_btree.h"
26 : #include "xfs_log.h"
27 : #include "xfs_trans_priv.h"
28 : #include "xfs_error.h"
29 : #include "xfs_health.h"
30 : #include "xfs_ag.h"
31 : #include "scrub/xfs_scrub.h"
32 : #include "scrub/scrub.h"
33 : #include "scrub/common.h"
34 : #include "scrub/btree.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/bitmap.h"
38 : #include "scrub/xfile.h"
39 : #include "scrub/xfarray.h"
40 : #include "scrub/newbt.h"
41 : #include "scrub/reap.h"
42 :
43 : /*
44 : * Inode Btree Repair
45 : * ==================
46 : *
47 : * A quick refresher of inode btrees on a v5 filesystem:
48 : *
49 : * - Inode records are read into memory in units of 'inode clusters'. However
50 : * many inodes fit in a cluster buffer is the smallest number of inodes that
51 : * can be allocated or freed. Clusters are never smaller than one fs block
52 : * though they can span multiple blocks. The size (in fs blocks) is
53 : * computed with xfs_icluster_size_fsb(). The fs block alignment of a
54 : * cluster is computed with xfs_ialloc_cluster_alignment().
55 : *
56 : * - Each inode btree record can describe a single 'inode chunk'. The chunk
57 : * size is defined to be 64 inodes. If sparse inodes are enabled, every
58 : * inobt record must be aligned to the chunk size; if not, every record must
59 : * be aligned to the start of a cluster. It is possible to construct an XFS
60 : * geometry where one inobt record maps to multiple inode clusters; it is
61 : * also possible to construct a geometry where multiple inobt records map to
62 : * different parts of one inode cluster.
63 : *
64 : * - If sparse inodes are not enabled, the smallest unit of allocation for
65 : * inode records is enough to contain one inode chunk's worth of inodes.
66 : *
67 : * - If sparse inodes are enabled, the holemask field will be active. Each
68 : * bit of the holemask represents 4 potential inodes; if set, the
69 : * corresponding space does *not* contain inodes and must be left alone.
70 : * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation
71 : * of inode records is one inode cluster.
72 : *
73 : * So what's the rebuild algorithm?
74 : *
75 : * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
76 : * records. The OWN_INOBT records are the old inode btree blocks and will be
77 : * cleared out after we've rebuilt the tree. Each possible inode cluster
78 : * within an OWN_INODES record will be read in; for each possible inobt record
79 : * associated with that cluster, compute the freemask calculated from the
80 : * i_mode data in the inode chunk. For sparse inodes the holemask will be
81 : * calculated by creating the properly aligned inobt record and punching out
82 : * any chunk that's missing. Inode allocations and frees grab the AGI first,
83 : * so repair protects itself from concurrent access by locking the AGI.
84 : *
85 : * Once we've reconstructed all the inode records, we can create new inode
86 : * btree roots and reload the btrees. We rebuild both inode trees at the same
87 : * time because they have the same rmap owner and it would be more complex to
88 : * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
89 : * blocks it owns. We have all the data we need to build both, so dump
90 : * everything and start over.
91 : *
92 : * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
93 : */
94 :
95 : struct xrep_ibt {
96 : /* Record under construction. */
97 : struct xfs_inobt_rec_incore rie;
98 :
99 : /* new inobt information */
100 : struct xrep_newbt new_inobt;
101 :
102 : /* new finobt information */
103 : struct xrep_newbt new_finobt;
104 :
105 : /* Old inode btree blocks we found in the rmap. */
106 : struct xagb_bitmap old_iallocbt_blocks;
107 :
108 : /* Reconstructed inode records. */
109 : struct xfarray *inode_records;
110 :
111 : struct xfs_scrub *sc;
112 :
113 : /* Number of inodes assigned disk space. */
114 : unsigned int icount;
115 :
116 : /* Number of inodes in use. */
117 : unsigned int iused;
118 :
119 : /* Number of finobt records needed. */
120 : unsigned int finobt_recs;
121 :
122 : /* get_records()'s position in the inode record array. */
123 : xfarray_idx_t array_cur;
124 : };
125 :
126 : /*
127 : * Is this inode in use? If the inode is in memory we can tell from i_mode,
128 : * otherwise we have to check di_mode in the on-disk buffer. We only care
129 : * that the high (i.e. non-permission) bits of _mode are zero. This should be
130 : * safe because repair keeps all AG headers locked until the end, and process
131 : * trying to perform an inode allocation/free must lock the AGI.
132 : *
133 : * @cluster_ag_base is the inode offset of the cluster within the AG.
134 : * @cluster_bp is the cluster buffer.
135 : * @cluster_index is the inode offset within the inode cluster.
136 : */
137 : STATIC int
138 419877247 : xrep_ibt_check_ifree(
139 : struct xrep_ibt *ri,
140 : xfs_agino_t cluster_ag_base,
141 : struct xfs_buf *cluster_bp,
142 : unsigned int cluster_index,
143 : bool *inuse)
144 : {
145 419877247 : struct xfs_scrub *sc = ri->sc;
146 419877247 : struct xfs_mount *mp = sc->mp;
147 419877247 : struct xfs_dinode *dip;
148 419877247 : xfs_ino_t fsino;
149 419877247 : xfs_agino_t agino;
150 419877247 : xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno;
151 419877247 : unsigned int cluster_buf_base;
152 419877247 : unsigned int offset;
153 419877247 : int error;
154 :
155 419877247 : agino = cluster_ag_base + cluster_index;
156 419877247 : fsino = XFS_AGINO_TO_INO(mp, agno, agino);
157 :
158 : /* Inode uncached or half assembled, read disk buffer */
159 419877247 : cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
160 419877247 : offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
161 419877247 : if (offset >= BBTOB(cluster_bp->b_length))
162 : return -EFSCORRUPTED;
163 419877247 : dip = xfs_buf_offset(cluster_bp, offset);
164 419877174 : if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
165 : return -EFSCORRUPTED;
166 :
167 419877174 : if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
168 : return -EFSCORRUPTED;
169 :
170 : /* Will the in-core inode tell us if it's in use? */
171 419877174 : error = xchk_inode_is_allocated(sc, agino, inuse);
172 419878035 : if (!error)
173 : return 0;
174 :
175 4213616 : *inuse = dip->di_mode != 0;
176 4213616 : return 0;
177 : }
178 :
179 : /* Stash the accumulated inobt record for rebuilding. */
180 : STATIC int
181 8691630 : xrep_ibt_stash(
182 : struct xrep_ibt *ri)
183 : {
184 8691630 : int error = 0;
185 :
186 8691630 : if (xchk_should_terminate(ri->sc, &error))
187 0 : return error;
188 :
189 8691630 : ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
190 8691630 : if (xfs_inobt_check_perag_irec(ri->sc->sa.pag, &ri->rie) != NULL)
191 : return -EFSCORRUPTED;
192 :
193 8691630 : if (ri->rie.ir_freecount > 0)
194 48843 : ri->finobt_recs++;
195 :
196 8691630 : trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
197 :
198 8691630 : error = xfarray_append(ri->inode_records, &ri->rie);
199 8691630 : if (error)
200 : return error;
201 :
202 8691630 : ri->rie.ir_startino = NULLAGINO;
203 8691630 : return 0;
204 : }
205 :
206 : /*
207 : * Given an extent of inodes and an inode cluster buffer, calculate the
208 : * location of the corresponding inobt record (creating it if necessary),
209 : * then update the parts of the holemask and freemask of that record that
210 : * correspond to the inode extent we were given.
211 : *
212 : * @cluster_ir_startino is the AG inode number of an inobt record that we're
213 : * proposing to create for this inode cluster. If sparse inodes are enabled,
214 : * we must round down to a chunk boundary to find the actual sparse record.
215 : * @cluster_bp is the buffer of the inode cluster.
216 : * @nr_inodes is the number of inodes to check from the cluster.
217 : */
218 : STATIC int
219 13121201 : xrep_ibt_cluster_record(
220 : struct xrep_ibt *ri,
221 : xfs_agino_t cluster_ir_startino,
222 : struct xfs_buf *cluster_bp,
223 : unsigned int nr_inodes)
224 : {
225 13121201 : struct xfs_scrub *sc = ri->sc;
226 13121201 : struct xfs_mount *mp = sc->mp;
227 13121201 : xfs_agino_t ir_startino;
228 13121201 : unsigned int cluster_base;
229 13121201 : unsigned int cluster_index;
230 13121201 : int error = 0;
231 :
232 13121201 : ir_startino = cluster_ir_startino;
233 13121201 : if (xfs_has_sparseinodes(mp))
234 13121201 : ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
235 13121201 : cluster_base = cluster_ir_startino - ir_startino;
236 :
237 : /*
238 : * If the accumulated inobt record doesn't map this cluster, add it to
239 : * the list and reset it.
240 : */
241 13121201 : if (ri->rie.ir_startino != NULLAGINO &&
242 13080520 : ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
243 8650949 : error = xrep_ibt_stash(ri);
244 8650949 : if (error)
245 : return error;
246 : }
247 :
248 13121201 : if (ri->rie.ir_startino == NULLAGINO) {
249 8691630 : ri->rie.ir_startino = ir_startino;
250 8691630 : ri->rie.ir_free = XFS_INOBT_ALL_FREE;
251 8691630 : ri->rie.ir_holemask = 0xFFFF;
252 8691630 : ri->rie.ir_count = 0;
253 : }
254 :
255 : /* Record the whole cluster. */
256 13121201 : ri->icount += nr_inodes;
257 13121201 : ri->rie.ir_count += nr_inodes;
258 13121201 : ri->rie.ir_holemask &= ~xfs_inobt_maskn(
259 : cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
260 : nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
261 :
262 : /* Which inodes within this cluster are free? */
263 432999016 : for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
264 419877815 : bool inuse = false;
265 :
266 419877815 : error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
267 : cluster_bp, cluster_index, &inuse);
268 419877815 : if (error)
269 0 : return error;
270 419877815 : if (!inuse)
271 1183011 : continue;
272 418694804 : ri->iused++;
273 418694804 : ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
274 : cluster_index);
275 : }
276 : return 0;
277 : }
278 :
279 : /*
280 : * For each inode cluster covering the physical extent recorded by the rmapbt,
281 : * we must calculate the properly aligned startino of that cluster, then
282 : * iterate each cluster to fill in used and filled masks appropriately. We
283 : * then use the (startino, used, filled) information to construct the
284 : * appropriate inode records.
285 : */
286 : STATIC int
287 13121202 : xrep_ibt_process_cluster(
288 : struct xrep_ibt *ri,
289 : xfs_agblock_t cluster_bno)
290 : {
291 13121202 : struct xfs_imap imap;
292 13121202 : struct xfs_buf *cluster_bp;
293 13121202 : struct xfs_scrub *sc = ri->sc;
294 13121202 : struct xfs_mount *mp = sc->mp;
295 13121202 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
296 13121202 : xfs_agino_t cluster_ag_base;
297 13121202 : xfs_agino_t irec_index;
298 13121202 : unsigned int nr_inodes;
299 13121202 : int error;
300 :
301 13121202 : nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
302 : XFS_INODES_PER_CHUNK);
303 :
304 : /*
305 : * Grab the inode cluster buffer. This is safe to do with a broken
306 : * inobt because imap_to_bp directly maps the buffer without touching
307 : * either inode btree.
308 : */
309 13121202 : imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
310 13121202 : imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
311 13121202 : imap.im_boffset = 0;
312 13121202 : error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
313 13121201 : if (error)
314 : return error;
315 :
316 : /*
317 : * Record the contents of each possible inobt record mapping this
318 : * cluster.
319 : */
320 13121201 : cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
321 13121201 : for (irec_index = 0;
322 26242402 : irec_index < igeo->inodes_per_cluster;
323 13121201 : irec_index += XFS_INODES_PER_CHUNK) {
324 13121201 : error = xrep_ibt_cluster_record(ri,
325 : cluster_ag_base + irec_index, cluster_bp,
326 : nr_inodes);
327 13121201 : if (error)
328 : break;
329 :
330 : }
331 :
332 13121201 : xfs_trans_brelse(sc->tp, cluster_bp);
333 13121201 : return error;
334 : }
335 :
336 : /* Check for any obvious conflicts in the inode chunk extent. */
337 : STATIC int
338 8578304 : xrep_ibt_check_inode_ext(
339 : struct xfs_scrub *sc,
340 : xfs_agblock_t agbno,
341 : xfs_extlen_t len)
342 : {
343 8578304 : struct xfs_mount *mp = sc->mp;
344 8578304 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
345 8578304 : xfs_agino_t agino;
346 8578304 : enum xbtree_recpacking outcome;
347 8578304 : int error;
348 :
349 : /* Inode records must be within the AG. */
350 8578304 : if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
351 : return -EFSCORRUPTED;
352 :
353 : /* The entire record must align to the inode cluster size. */
354 8578304 : if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
355 8578304 : !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
356 : return -EFSCORRUPTED;
357 :
358 : /*
359 : * The entire record must also adhere to the inode cluster alignment
360 : * size if sparse inodes are not enabled.
361 : */
362 8578304 : if (!xfs_has_sparseinodes(mp) &&
363 0 : (!IS_ALIGNED(agbno, igeo->cluster_align) ||
364 0 : !IS_ALIGNED(agbno + len, igeo->cluster_align)))
365 : return -EFSCORRUPTED;
366 :
367 : /*
368 : * On a sparse inode fs, this cluster could be part of a sparse chunk.
369 : * Sparse clusters must be aligned to sparse chunk alignment.
370 : */
371 8578304 : if (xfs_has_sparseinodes(mp) &&
372 8578304 : (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
373 8578304 : !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
374 : return -EFSCORRUPTED;
375 :
376 : /* Make sure the entire range of blocks are valid AG inodes. */
377 8578304 : agino = XFS_AGB_TO_AGINO(mp, agbno);
378 8578304 : if (!xfs_verify_agino(sc->sa.pag, agino))
379 : return -EFSCORRUPTED;
380 :
381 8578304 : agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
382 8578304 : if (!xfs_verify_agino(sc->sa.pag, agino))
383 : return -EFSCORRUPTED;
384 :
385 : /* Make sure this isn't free space. */
386 8578304 : error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
387 8578304 : if (error)
388 : return error;
389 8578304 : if (outcome != XBTREE_RECPACKING_EMPTY)
390 0 : return -EFSCORRUPTED;
391 :
392 : return 0;
393 : }
394 :
395 : /* Found a fragment of the old inode btrees; dispose of them later. */
396 : STATIC int
397 88193 : xrep_ibt_record_old_btree_blocks(
398 : struct xrep_ibt *ri,
399 : const struct xfs_rmap_irec *rec)
400 : {
401 88193 : if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
402 88193 : rec->rm_blockcount))
403 : return -EFSCORRUPTED;
404 :
405 88193 : return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
406 : rec->rm_blockcount);
407 : }
408 :
409 : /* Record extents that belong to inode btrees. */
410 : STATIC int
411 637783686 : xrep_ibt_walk_rmap(
412 : struct xfs_btree_cur *cur,
413 : const struct xfs_rmap_irec *rec,
414 : void *priv)
415 : {
416 637783686 : struct xrep_ibt *ri = priv;
417 637783686 : struct xfs_mount *mp = cur->bc_mp;
418 637783686 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
419 637783686 : xfs_agblock_t cluster_base;
420 637783686 : int error = 0;
421 :
422 637783686 : if (xchk_should_terminate(ri->sc, &error))
423 0 : return error;
424 :
425 637783851 : if (rec->rm_owner == XFS_RMAP_OWN_INOBT)
426 88199 : return xrep_ibt_record_old_btree_blocks(ri, rec);
427 :
428 : /* Skip extents which are not owned by this inode and fork. */
429 637695652 : if (rec->rm_owner != XFS_RMAP_OWN_INODES)
430 : return 0;
431 :
432 17156608 : error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
433 8578304 : rec->rm_blockcount);
434 8578304 : if (error)
435 : return error;
436 :
437 8578304 : trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
438 8578304 : rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
439 8578304 : rec->rm_offset, rec->rm_flags);
440 :
441 : /*
442 : * Record the free/hole masks for each inode cluster that could be
443 : * mapped by this rmap record.
444 : */
445 8578304 : for (cluster_base = 0;
446 21699506 : cluster_base < rec->rm_blockcount;
447 13121202 : cluster_base += igeo->blocks_per_cluster) {
448 26242404 : error = xrep_ibt_process_cluster(ri,
449 13121202 : rec->rm_startblock + cluster_base);
450 13121202 : if (error)
451 0 : return error;
452 : }
453 :
454 : return 0;
455 : }
456 :
457 : /*
458 : * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
459 : * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct
460 : * the inode btrees. The caller must clean up the lists if anything goes
461 : * wrong.
462 : */
463 : STATIC int
464 56625 : xrep_ibt_find_inodes(
465 : struct xrep_ibt *ri)
466 : {
467 56625 : struct xfs_scrub *sc = ri->sc;
468 56625 : int error;
469 :
470 56625 : ri->rie.ir_startino = NULLAGINO;
471 :
472 : /* Collect all reverse mappings for inode blocks. */
473 56625 : xrep_ag_btcur_init(sc, &sc->sa);
474 56632 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
475 56648 : xchk_ag_btcur_free(&sc->sa);
476 56648 : if (error)
477 : return error;
478 :
479 : /* If we have a record ready to go, add it to the array. */
480 56648 : if (ri->rie.ir_startino == NULLAGINO)
481 : return 0;
482 :
483 40681 : return xrep_ibt_stash(ri);
484 : }
485 :
486 : /* Update the AGI counters. */
487 : STATIC int
488 56644 : xrep_ibt_reset_counters(
489 : struct xrep_ibt *ri)
490 : {
491 56644 : struct xfs_scrub *sc = ri->sc;
492 56644 : struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
493 56644 : unsigned int freecount = ri->icount - ri->iused;
494 :
495 : /* Trigger inode count recalculation */
496 56644 : xfs_force_summary_recalc(sc->mp);
497 :
498 : /*
499 : * The AGI header contains extra information related to the inode
500 : * btrees, so we must update those fields here.
501 : */
502 56648 : agi->agi_count = cpu_to_be32(ri->icount);
503 56648 : agi->agi_freecount = cpu_to_be32(freecount);
504 56648 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
505 : XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
506 :
507 : /* Reinitialize with the values we just logged. */
508 56645 : return xrep_reinit_pagi(sc);
509 : }
510 :
511 : /* Retrieve finobt data for bulk load. */
512 : STATIC int
513 40037 : xrep_fibt_get_records(
514 : struct xfs_btree_cur *cur,
515 : unsigned int idx,
516 : struct xfs_btree_block *block,
517 : unsigned int nr_wanted,
518 : void *priv)
519 : {
520 40037 : struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
521 40037 : struct xrep_ibt *ri = priv;
522 40037 : union xfs_btree_rec *block_rec;
523 40037 : unsigned int loaded;
524 40037 : int error;
525 :
526 88880 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
527 7536067 : do {
528 7536067 : error = xfarray_load(ri->inode_records,
529 7536067 : ri->array_cur++, irec);
530 7536067 : } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
531 48843 : if (error)
532 0 : return error;
533 :
534 48843 : block_rec = xfs_btree_rec_addr(cur, idx, block);
535 48843 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
536 : }
537 :
538 40037 : return loaded;
539 : }
540 :
541 : /* Retrieve inobt data for bulk load. */
542 : STATIC int
543 60650 : xrep_ibt_get_records(
544 : struct xfs_btree_cur *cur,
545 : unsigned int idx,
546 : struct xfs_btree_block *block,
547 : unsigned int nr_wanted,
548 : void *priv)
549 : {
550 60650 : struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i;
551 60650 : struct xrep_ibt *ri = priv;
552 60650 : union xfs_btree_rec *block_rec;
553 60650 : unsigned int loaded;
554 60650 : int error;
555 :
556 8752280 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
557 8691630 : error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
558 8691630 : if (error)
559 0 : return error;
560 :
561 8691630 : block_rec = xfs_btree_rec_addr(cur, idx, block);
562 8691630 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
563 : }
564 :
565 60650 : return loaded;
566 : }
567 :
568 : /* Feed one of the new inobt blocks to the bulk loader. */
569 : STATIC int
570 93511 : xrep_ibt_claim_block(
571 : struct xfs_btree_cur *cur,
572 : union xfs_btree_ptr *ptr,
573 : void *priv)
574 : {
575 93511 : struct xrep_ibt *ri = priv;
576 93511 : int error;
577 :
578 93511 : error = xrep_newbt_relog_autoreap(&ri->new_inobt);
579 93512 : if (error)
580 : return error;
581 :
582 93512 : return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
583 : }
584 :
585 : /* Feed one of the new finobt blocks to the bulk loader. */
586 : STATIC int
587 56646 : xrep_fibt_claim_block(
588 : struct xfs_btree_cur *cur,
589 : union xfs_btree_ptr *ptr,
590 : void *priv)
591 : {
592 56646 : struct xrep_ibt *ri = priv;
593 56646 : int error;
594 :
595 56646 : error = xrep_newbt_relog_autoreap(&ri->new_finobt);
596 56647 : if (error)
597 : return error;
598 :
599 56647 : return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
600 : }
601 :
602 : /* Make sure the records do not overlap in inumber address space. */
603 : STATIC int
604 56648 : xrep_ibt_check_startino(
605 : struct xrep_ibt *ri)
606 : {
607 56648 : struct xfs_inobt_rec_incore irec;
608 56648 : xfarray_idx_t cur;
609 56648 : xfs_agino_t next_agino = 0;
610 56648 : int error = 0;
611 :
612 8748278 : foreach_xfarray_idx(ri->inode_records, cur) {
613 8691630 : if (xchk_should_terminate(ri->sc, &error))
614 0 : return error;
615 :
616 8691630 : error = xfarray_load(ri->inode_records, cur, &irec);
617 8691630 : if (error)
618 0 : return error;
619 :
620 8691630 : if (irec.ir_startino < next_agino)
621 : return -EFSCORRUPTED;
622 :
623 8691630 : next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
624 : }
625 :
626 56647 : return error;
627 : }
628 :
629 : /* Build new inode btrees and dispose of the old one. */
630 : STATIC int
631 56647 : xrep_ibt_build_new_trees(
632 : struct xrep_ibt *ri)
633 : {
634 56647 : struct xfs_scrub *sc = ri->sc;
635 56647 : struct xfs_btree_cur *ino_cur;
636 56647 : struct xfs_btree_cur *fino_cur = NULL;
637 56647 : xfs_fsblock_t fsbno;
638 56647 : bool need_finobt;
639 56647 : int error;
640 :
641 56647 : need_finobt = xfs_has_finobt(sc->mp);
642 :
643 : /*
644 : * Create new btrees for staging all the inobt records we collected
645 : * earlier. The records were collected in order of increasing agino,
646 : * so we do not have to sort them. Ensure there are no overlapping
647 : * records.
648 : */
649 56647 : error = xrep_ibt_check_startino(ri);
650 56645 : if (error)
651 : return error;
652 :
653 : /*
654 : * The new inode btrees will not be rooted in the AGI until we've
655 : * successfully rebuilt the tree.
656 : *
657 : * Start by setting up the inobt staging cursor.
658 : */
659 56645 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
660 : XFS_IBT_BLOCK(sc->mp)),
661 56645 : xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
662 : XFS_AG_RESV_NONE);
663 56640 : ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
664 56640 : ri->new_inobt.bload.get_records = xrep_ibt_get_records;
665 :
666 56640 : ino_cur = xfs_inobt_stage_cursor(sc->sa.pag, &ri->new_inobt.afake,
667 : XFS_BTNUM_INO);
668 56639 : error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
669 : xfarray_length(ri->inode_records));
670 56631 : if (error)
671 0 : goto err_inocur;
672 :
673 : /* Set up finobt staging cursor. */
674 56631 : if (need_finobt) {
675 56631 : enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA;
676 :
677 56631 : if (sc->mp->m_finobt_nores)
678 0 : resv = XFS_AG_RESV_NONE;
679 :
680 56631 : fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
681 : XFS_FIBT_BLOCK(sc->mp)),
682 56631 : xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
683 : fsbno, resv);
684 56641 : ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
685 56641 : ri->new_finobt.bload.get_records = xrep_fibt_get_records;
686 :
687 56641 : fino_cur = xfs_inobt_stage_cursor(sc->sa.pag,
688 : &ri->new_finobt.afake, XFS_BTNUM_FINO);
689 113273 : error = xfs_btree_bload_compute_geometry(fino_cur,
690 56634 : &ri->new_finobt.bload, ri->finobt_recs);
691 56639 : if (error)
692 0 : goto err_finocur;
693 : }
694 :
695 : /* Last chance to abort before we start committing fixes. */
696 56639 : if (xchk_should_terminate(sc, &error))
697 0 : goto err_finocur;
698 :
699 : /* Reserve all the space we need to build the new btrees. */
700 56641 : error = xrep_newbt_alloc_blocks(&ri->new_inobt,
701 : ri->new_inobt.bload.nr_blocks);
702 56648 : if (error)
703 0 : goto err_finocur;
704 :
705 56648 : if (need_finobt) {
706 56648 : error = xrep_newbt_alloc_blocks(&ri->new_finobt,
707 : ri->new_finobt.bload.nr_blocks);
708 56648 : if (error)
709 0 : goto err_finocur;
710 : }
711 :
712 : /* Add all inobt records. */
713 56648 : ri->array_cur = XFARRAY_CURSOR_INIT;
714 56648 : error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
715 56648 : if (error)
716 0 : goto err_finocur;
717 :
718 : /* Add all finobt records. */
719 56648 : if (need_finobt) {
720 56648 : ri->array_cur = XFARRAY_CURSOR_INIT;
721 56648 : error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
722 56644 : if (error)
723 0 : goto err_finocur;
724 : }
725 :
726 : /*
727 : * Install the new btrees in the AG header. After this point the old
728 : * btrees are no longer accessible and the new trees are live.
729 : */
730 56644 : xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
731 56646 : xfs_btree_del_cursor(ino_cur, 0);
732 :
733 56646 : if (fino_cur) {
734 56646 : xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
735 56646 : xfs_btree_del_cursor(fino_cur, 0);
736 : }
737 :
738 : /* Reset the AGI counters now that we've changed the inode roots. */
739 56646 : error = xrep_ibt_reset_counters(ri);
740 56646 : if (error)
741 0 : goto err_finobt;
742 :
743 : /* Free unused blocks and bitmap. */
744 56646 : if (need_finobt) {
745 56646 : error = xrep_newbt_commit(&ri->new_finobt);
746 56647 : if (error)
747 0 : goto err_inobt;
748 : }
749 56647 : error = xrep_newbt_commit(&ri->new_inobt);
750 56648 : if (error)
751 : return error;
752 :
753 56648 : return xrep_roll_ag_trans(sc);
754 :
755 0 : err_finocur:
756 0 : if (need_finobt)
757 0 : xfs_btree_del_cursor(fino_cur, error);
758 0 : err_inocur:
759 0 : xfs_btree_del_cursor(ino_cur, error);
760 0 : err_finobt:
761 0 : if (need_finobt)
762 0 : xrep_newbt_cancel(&ri->new_finobt);
763 0 : err_inobt:
764 0 : xrep_newbt_cancel(&ri->new_inobt);
765 0 : return error;
766 : }
767 :
768 : /*
769 : * Now that we've logged the roots of the new btrees, invalidate all of the
770 : * old blocks and free them.
771 : */
772 : STATIC int
773 56641 : xrep_ibt_remove_old_trees(
774 : struct xrep_ibt *ri)
775 : {
776 56641 : struct xfs_scrub *sc = ri->sc;
777 56641 : int error;
778 :
779 : /*
780 : * Free the old inode btree blocks if they're not in use. It's ok to
781 : * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
782 : * reservation because we reset the reservation before releasing the
783 : * AGI and AGF header buffer locks.
784 : */
785 56641 : error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
786 : &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
787 56642 : if (error)
788 : return error;
789 :
790 : /*
791 : * If the finobt is enabled and has a per-AG reservation, make sure we
792 : * reinitialize the per-AG reservations.
793 : */
794 56642 : if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
795 56643 : sc->flags |= XREP_RESET_PERAG_RESV;
796 :
797 : return 0;
798 : }
799 :
800 : /* Repair both inode btrees. */
801 : int
802 56646 : xrep_iallocbt(
803 : struct xfs_scrub *sc)
804 : {
805 56646 : struct xrep_ibt *ri;
806 56646 : struct xfs_mount *mp = sc->mp;
807 56646 : char *descr;
808 56646 : xfs_agino_t first_agino, last_agino;
809 56646 : int error = 0;
810 :
811 : /* We require the rmapbt to rebuild anything. */
812 56646 : if (!xfs_has_rmapbt(mp))
813 : return -EOPNOTSUPP;
814 :
815 56646 : ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
816 56646 : if (!ri)
817 : return -ENOMEM;
818 56646 : ri->sc = sc;
819 :
820 : /* We rebuild both inode btrees. */
821 56646 : sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
822 :
823 : /* Set up enough storage to handle an AG with nothing but inodes. */
824 56646 : xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
825 56648 : last_agino /= XFS_INODES_PER_CHUNK;
826 56648 : descr = xchk_xfile_ag_descr(sc, "inode index records");
827 56633 : error = xfarray_create(descr, last_agino,
828 : sizeof(struct xfs_inobt_rec_incore),
829 : &ri->inode_records);
830 56630 : kfree(descr);
831 56630 : if (error)
832 0 : goto out_ri;
833 :
834 : /* Collect the inode data and find the old btree blocks. */
835 56630 : xagb_bitmap_init(&ri->old_iallocbt_blocks);
836 56621 : error = xrep_ibt_find_inodes(ri);
837 56648 : if (error)
838 0 : goto out_bitmap;
839 :
840 : /* Rebuild the inode indexes. */
841 56648 : error = xrep_ibt_build_new_trees(ri);
842 56640 : if (error)
843 0 : goto out_bitmap;
844 :
845 : /* Kill the old tree. */
846 56640 : error = xrep_ibt_remove_old_trees(ri);
847 :
848 56641 : out_bitmap:
849 56641 : xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
850 56636 : xfarray_destroy(ri->inode_records);
851 56641 : out_ri:
852 56641 : kfree(ri);
853 56641 : return error;
854 : }
855 :
856 : /* Make sure both btrees are ok after we've rebuilt them. */
857 : int
858 56645 : xrep_revalidate_iallocbt(
859 : struct xfs_scrub *sc)
860 : {
861 56645 : __u32 old_type = sc->sm->sm_type;
862 56645 : int error;
863 :
864 : /*
865 : * We must update sm_type temporarily so that the tree-to-tree cross
866 : * reference checks will work in the correct direction, and also so
867 : * that tracing will report correctly if there are more errors.
868 : */
869 56645 : sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
870 56645 : error = xchk_inobt(sc);
871 56648 : if (error)
872 0 : goto out;
873 :
874 56648 : if (xfs_has_finobt(sc->mp)) {
875 56648 : sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
876 56648 : error = xchk_finobt(sc);
877 : }
878 :
879 0 : out:
880 56648 : sc->sm->sm_type = old_type;
881 56648 : return error;
882 : }
|