Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_btree_staging.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_log.h"
17 : #include "xfs_sb.h"
18 : #include "xfs_inode.h"
19 : #include "xfs_alloc.h"
20 : #include "xfs_rmap.h"
21 : #include "xfs_ag.h"
22 : #include "xfs_defer.h"
23 : #include "xfs_extfree_item.h"
24 : #include "xfs_imeta.h"
25 : #include "xfs_quota.h"
26 : #include "scrub/scrub.h"
27 : #include "scrub/common.h"
28 : #include "scrub/trace.h"
29 : #include "scrub/repair.h"
30 : #include "scrub/newbt.h"
31 :
32 : /*
33 : * Estimate proper slack values for a btree that's being reloaded.
34 : *
35 : * Under most circumstances, we'll take whatever default loading value the
36 : * btree bulk loading code calculates for us. However, there are some
37 : * exceptions to this rule:
38 : *
39 : * (1) If someone turned one of the debug knobs.
40 : * (2) If this is a per-AG btree and the AG has less than ~9% space free.
41 : * (3) If this is an inode btree and the FS has less than ~9% space free.
42 : *
43 : * Note that we actually use 3/32 for the comparison to avoid division.
44 : */
45 : static void
46 4076337 : xrep_newbt_estimate_slack(
47 : struct xrep_newbt *xnr)
48 : {
49 4076337 : struct xfs_scrub *sc = xnr->sc;
50 4076337 : struct xfs_btree_bload *bload = &xnr->bload;
51 4076337 : uint64_t free;
52 4076337 : uint64_t sz;
53 :
54 : /*
55 : * The xfs_globals values are set to -1 (i.e. take the bload defaults)
56 : * unless someone has set them otherwise, so we just pull the values
57 : * here.
58 : */
59 4076337 : bload->leaf_slack = xfs_globals.bload_leaf_slack;
60 4076337 : bload->node_slack = xfs_globals.bload_node_slack;
61 :
62 4076337 : if (sc->ops->type == ST_PERAG) {
63 696496 : free = sc->sa.pag->pagf_freeblks;
64 696496 : sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
65 : } else {
66 3379841 : free = percpu_counter_sum(&sc->mp->m_fdblocks);
67 3379871 : sz = sc->mp->m_sb.sb_dblocks;
68 : }
69 :
70 : /* No further changes if there's more than 3/32ths space left. */
71 4076091 : if (free >= ((sz * 3) >> 5))
72 : return;
73 :
74 : /* We're low on space; load the btrees as tightly as possible. */
75 40015 : if (bload->leaf_slack < 0)
76 40017 : bload->leaf_slack = 0;
77 40015 : if (bload->node_slack < 0)
78 40015 : bload->node_slack = 0;
79 : }
80 :
81 : /* Initialize accounting resources for staging a new AG btree. */
82 : void
83 4076456 : xrep_newbt_init_ag(
84 : struct xrep_newbt *xnr,
85 : struct xfs_scrub *sc,
86 : const struct xfs_owner_info *oinfo,
87 : xfs_fsblock_t alloc_hint,
88 : enum xfs_ag_resv_type resv)
89 : {
90 4076456 : memset(xnr, 0, sizeof(struct xrep_newbt));
91 4076456 : xnr->sc = sc;
92 4076456 : xnr->oinfo = *oinfo; /* structure copy */
93 4076456 : xnr->alloc_hint = alloc_hint;
94 4076456 : xnr->resv = resv;
95 4076456 : INIT_LIST_HEAD(&xnr->resv_list);
96 4076456 : xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
97 4076456 : xrep_newbt_estimate_slack(xnr);
98 4076023 : }
99 :
100 : /* Initialize accounting resources for staging a new inode fork btree. */
101 : int
102 3340419 : xrep_newbt_init_inode(
103 : struct xrep_newbt *xnr,
104 : struct xfs_scrub *sc,
105 : int whichfork,
106 : const struct xfs_owner_info *oinfo)
107 : {
108 3340419 : struct xfs_ifork *ifp;
109 :
110 3340419 : ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
111 3340419 : if (!ifp)
112 : return -ENOMEM;
113 :
114 16702095 : xrep_newbt_init_ag(xnr, sc, oinfo,
115 3340419 : XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
116 : XFS_AG_RESV_NONE);
117 3340419 : xnr->ifake.if_fork = ifp;
118 3340419 : xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
119 3340419 : xnr->ifake.if_whichfork = whichfork;
120 3340419 : return 0;
121 : }
122 :
123 : /*
124 : * Initialize accounting resources for staging a new metadata inode btree.
125 : * If the inode has an imeta space reservation, the caller must adjust the
126 : * imeta reservation at btree commit.
127 : */
128 : int
129 39426 : xrep_newbt_init_metadir_inode(
130 : struct xrep_newbt *xnr,
131 : struct xfs_scrub *sc)
132 : {
133 39426 : struct xfs_owner_info oinfo;
134 39426 : struct xfs_ifork *ifp;
135 :
136 39426 : ASSERT(xfs_is_metadir_inode(sc->ip));
137 39426 : ASSERT(XFS_IS_DQDETACHED(sc->mp, sc->ip));
138 :
139 39426 : xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
140 :
141 39426 : ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
142 39442 : if (!ifp)
143 : return -ENOMEM;
144 :
145 : /*
146 : * Allocate new metadir btree blocks with XFS_AG_RESV_NONE because the
147 : * inode metadata space reservations can only account allocated space
148 : * to the i_nblocks. We do not want to change the inode core fields
149 : * until we're ready to commit the new tree, so we allocate the blocks
150 : * as if they were regular file blocks. This exposes us to a higher
151 : * risk of the repair being cancelled due to ENOSPC.
152 : */
153 197210 : xrep_newbt_init_ag(xnr, sc, &oinfo,
154 39442 : XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
155 : XFS_AG_RESV_NONE);
156 39452 : xnr->ifake.if_fork = ifp;
157 39452 : xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, XFS_DATA_FORK);
158 : xnr->ifake.if_whichfork = XFS_DATA_FORK;
159 39452 : return 0;
160 : }
161 :
162 : /*
163 : * Initialize accounting resources for staging a new btree. Callers are
164 : * expected to add their own reservations (and clean them up) manually.
165 : */
166 : void
167 283692 : xrep_newbt_init_bare(
168 : struct xrep_newbt *xnr,
169 : struct xfs_scrub *sc)
170 : {
171 283692 : xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
172 : XFS_AG_RESV_NONE);
173 283803 : }
174 :
175 : /*
176 : * Set up automatic reaping of the blocks reserved for btree reconstruction in
177 : * case we crash by logging a deferred free item for each extent we allocate so
178 : * that we can get all of the space back if we crash before we can commit the
179 : * new btree. This function returns a token that can be used to cancel
180 : * automatic reaping if repair is successful.
181 : */
182 : static int
183 586873 : xrep_newbt_schedule_autoreap(
184 : struct xrep_newbt *xnr,
185 : struct xrep_newbt_resv *resv)
186 : {
187 586873 : struct xfs_extent_free_item efi_item = {
188 586873 : .xefi_blockcount = resv->len,
189 586873 : .xefi_owner = xnr->oinfo.oi_owner,
190 : .xefi_flags = XFS_EFI_SKIP_DISCARD,
191 586873 : .xefi_pag = resv->pag,
192 : };
193 586873 : struct xfs_scrub *sc = xnr->sc;
194 586873 : struct xfs_log_item *lip;
195 586873 : LIST_HEAD(items);
196 :
197 586873 : ASSERT(xnr->oinfo.oi_offset == 0);
198 :
199 586873 : efi_item.xefi_startblock = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
200 : resv->agbno);
201 586873 : if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_ATTR_FORK)
202 6 : efi_item.xefi_flags |= XFS_EFI_ATTR_FORK;
203 586873 : if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
204 153003 : efi_item.xefi_flags |= XFS_EFI_BMBT_BLOCK;
205 :
206 586873 : INIT_LIST_HEAD(&efi_item.xefi_list);
207 586873 : list_add(&efi_item.xefi_list, &items);
208 :
209 586426 : xfs_perag_intent_hold(resv->pag);
210 586970 : lip = xfs_extent_free_defer_type.create_intent(sc->tp, &items, 1,
211 : false);
212 586895 : ASSERT(lip != NULL && !IS_ERR(lip));
213 :
214 586895 : resv->efi = lip;
215 586895 : return 0;
216 : }
217 :
218 : /*
219 : * Earlier, we logged EFIs for the extents that we allocated to hold the new
220 : * btree so that we could automatically roll back those allocations if the
221 : * system crashed. Now we log an EFD to cancel the EFI, either because the
222 : * repair succeeded and the new blocks are in use; or because the repair was
223 : * cancelled and we're about to free the extents directly.
224 : */
225 : static inline void
226 587023 : xrep_newbt_finish_autoreap(
227 : struct xfs_scrub *sc,
228 : struct xrep_newbt_resv *resv)
229 : {
230 587023 : struct xfs_efd_log_item *efdp;
231 587023 : struct xfs_extent *extp;
232 587023 : struct xfs_log_item *efd_lip;
233 :
234 587023 : efd_lip = xfs_extent_free_defer_type.create_done(sc->tp, resv->efi, 1);
235 587200 : efdp = container_of(efd_lip, struct xfs_efd_log_item, efd_item);
236 587200 : extp = efdp->efd_format.efd_extents;
237 587200 : extp->ext_start = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
238 : resv->agbno);
239 587200 : extp->ext_len = resv->len;
240 587200 : efdp->efd_next_extent++;
241 587200 : set_bit(XFS_LI_DIRTY, &efd_lip->li_flags);
242 587411 : xfs_perag_intent_rele(resv->pag);
243 587339 : }
244 :
245 : /* Abort an EFI logged for a new btree block reservation. */
246 : static inline void
247 0 : xrep_newbt_cancel_autoreap(
248 : struct xrep_newbt_resv *resv)
249 : {
250 0 : xfs_extent_free_defer_type.abort_intent(resv->efi);
251 0 : xfs_perag_intent_rele(resv->pag);
252 0 : }
253 :
254 : /*
255 : * Relog the EFIs attached to a staging btree so that we don't pin the log
256 : * tail. Same logic as xfs_defer_relog.
257 : */
258 : int
259 1595540 : xrep_newbt_relog_autoreap(
260 : struct xrep_newbt *xnr)
261 : {
262 1595540 : struct xrep_newbt_resv *resv;
263 1595540 : unsigned int efi_bytes = 0;
264 :
265 4245663 : list_for_each_entry(resv, &xnr->resv_list, list) {
266 : /*
267 : * If the log intent item for this deferred op is in a
268 : * different checkpoint, relog it to keep the log tail moving
269 : * forward. We're ok with this being racy because an incorrect
270 : * decision means we'll be a little slower at pushing the tail.
271 : */
272 2649706 : if (!resv->efi || xfs_log_item_in_current_chkpt(resv->efi))
273 2645216 : continue;
274 :
275 4903 : resv->efi = xfs_trans_item_relog(resv->efi, xnr->sc->tp);
276 :
277 : /*
278 : * If free space is very fragmented, it's possible that the new
279 : * btree will be allocated a large number of small extents.
280 : * On an active system, it's possible that so many of those
281 : * EFIs will need relogging here that doing them all in one
282 : * transaction will overflow the reservation.
283 : *
284 : * Each allocation for the new btree (xrep_newbt_resv) points
285 : * to a unique single-mapping EFI, so each relog operation logs
286 : * a single-mapping EFD followed by a new EFI. Each single
287 : * mapping EF[ID] item consumes about 128 bytes, so we'll
288 : * assume 256 bytes per relog. Roll if we consume more than
289 : * half of the transaction reservation.
290 : */
291 4907 : efi_bytes += 256;
292 4907 : if (efi_bytes > xnr->sc->tp->t_log_res / 2) {
293 0 : int error;
294 :
295 0 : error = xrep_roll_trans(xnr->sc);
296 0 : if (error)
297 0 : return error;
298 :
299 : efi_bytes = 0;
300 : }
301 : }
302 :
303 1595957 : if (xnr->sc->tp->t_flags & XFS_TRANS_DIRTY)
304 4734 : return xrep_roll_trans(xnr->sc);
305 : return 0;
306 : }
307 :
308 : /*
309 : * Designate specific blocks to be used to build our new btree. @pag must be
310 : * a passive reference.
311 : */
312 : STATIC int
313 728111 : xrep_newbt_add_blocks(
314 : struct xrep_newbt *xnr,
315 : struct xfs_perag *pag,
316 : xfs_agblock_t agbno,
317 : xfs_extlen_t len,
318 : bool autoreap)
319 : {
320 728111 : struct xrep_newbt_resv *resv;
321 728111 : int error;
322 :
323 728111 : resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
324 728334 : if (!resv)
325 : return -ENOMEM;
326 :
327 728334 : INIT_LIST_HEAD(&resv->list);
328 728334 : resv->agbno = agbno;
329 728334 : resv->len = len;
330 728334 : resv->used = 0;
331 728334 : resv->pag = xfs_perag_hold(pag);
332 :
333 729018 : if (autoreap) {
334 586864 : error = xrep_newbt_schedule_autoreap(xnr, resv);
335 586889 : if (error)
336 0 : goto out_pag;
337 : }
338 :
339 729043 : list_add_tail(&resv->list, &xnr->resv_list);
340 729043 : return 0;
341 : out_pag:
342 0 : xfs_perag_put(resv->pag);
343 0 : kfree(resv);
344 0 : return error;
345 : }
346 :
347 : /*
348 : * Add an extent to the new btree reservation pool. Callers are required to
349 : * handle any automatic reaping if the repair is cancelled. @pag must be a
350 : * passive reference.
351 : */
352 : int
353 141792 : xrep_newbt_add_extent(
354 : struct xrep_newbt *xnr,
355 : struct xfs_perag *pag,
356 : xfs_agblock_t agbno,
357 : xfs_extlen_t len)
358 : {
359 141792 : return xrep_newbt_add_blocks(xnr, pag, agbno, len, false);
360 : }
361 :
362 : /* Don't let our allocation hint take us beyond this AG */
363 : static inline void
364 433731 : xrep_newbt_validate_ag_alloc_hint(
365 : struct xrep_newbt *xnr)
366 : {
367 433731 : struct xfs_scrub *sc = xnr->sc;
368 433731 : xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
369 :
370 866504 : if (agno == sc->sa.pag->pag_agno &&
371 433591 : xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
372 : return;
373 :
374 2 : xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
375 : XFS_AGFL_BLOCK(sc->mp) + 1);
376 : }
377 :
378 : /* Allocate disk space for a new per-AG btree. */
379 : STATIC int
380 413179 : xrep_newbt_alloc_ag_blocks(
381 : struct xrep_newbt *xnr,
382 : uint64_t nr_blocks)
383 : {
384 413179 : struct xfs_scrub *sc = xnr->sc;
385 413179 : int error = 0;
386 :
387 413179 : ASSERT(sc->sa.pag != NULL);
388 413179 : ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
389 :
390 846706 : while (nr_blocks > 0) {
391 433379 : struct xfs_alloc_arg args = {
392 433379 : .tp = sc->tp,
393 433379 : .mp = sc->mp,
394 : .oinfo = xnr->oinfo,
395 : .minlen = 1,
396 : .maxlen = nr_blocks,
397 : .prod = 1,
398 433379 : .resv = xnr->resv,
399 : };
400 :
401 433379 : xrep_newbt_validate_ag_alloc_hint(xnr);
402 :
403 432708 : if (xnr->alloc_vextent)
404 37829 : error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
405 : else
406 394879 : error = xfs_alloc_vextent_near_bno(&args,
407 : xnr->alloc_hint);
408 433815 : if (error)
409 150 : return error;
410 433815 : if (args.fsbno == NULLFSBLOCK)
411 : return -ENOSPC;
412 :
413 433665 : trace_xrep_newbt_alloc_ag_blocks(sc->mp, args.agno, args.agbno,
414 433665 : args.len, xnr->oinfo.oi_owner);
415 :
416 433250 : error = xrep_newbt_add_blocks(xnr, sc->sa.pag, args.agbno,
417 : args.len, true);
418 433853 : if (error)
419 0 : return error;
420 :
421 433853 : nr_blocks -= args.len;
422 433853 : xnr->alloc_hint = args.fsbno + args.len;
423 :
424 433853 : error = xrep_defer_finish(sc);
425 433527 : if (error)
426 0 : return error;
427 : }
428 :
429 : return 0;
430 : }
431 :
432 : /* Don't let our allocation hint take us beyond EOFS */
433 : static inline void
434 153002 : xrep_newbt_validate_file_alloc_hint(
435 : struct xrep_newbt *xnr)
436 : {
437 153002 : struct xfs_scrub *sc = xnr->sc;
438 :
439 153002 : if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
440 : return;
441 :
442 1 : xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
443 : }
444 :
445 : /* Allocate disk space for our new file-based btree. */
446 : STATIC int
447 138466 : xrep_newbt_alloc_file_blocks(
448 : struct xrep_newbt *xnr,
449 : uint64_t nr_blocks)
450 : {
451 138466 : struct xfs_scrub *sc = xnr->sc;
452 138466 : int error = 0;
453 :
454 138466 : ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
455 :
456 291469 : while (nr_blocks > 0) {
457 153004 : struct xfs_alloc_arg args = {
458 153004 : .tp = sc->tp,
459 153004 : .mp = sc->mp,
460 : .oinfo = xnr->oinfo,
461 : .minlen = 1,
462 : .maxlen = nr_blocks,
463 : .prod = 1,
464 153004 : .resv = xnr->resv,
465 : };
466 153004 : struct xfs_perag *pag;
467 :
468 153004 : xrep_newbt_validate_file_alloc_hint(xnr);
469 :
470 153003 : if (xnr->alloc_vextent)
471 0 : error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
472 : else
473 153003 : error = xfs_alloc_vextent_start_ag(&args,
474 : xnr->alloc_hint);
475 153003 : if (error)
476 0 : return error;
477 153003 : if (args.fsbno == NULLFSBLOCK)
478 : return -ENOSPC;
479 :
480 153003 : trace_xrep_newbt_alloc_file_blocks(sc->mp, args.agno,
481 153003 : args.agbno, args.len, xnr->oinfo.oi_owner);
482 :
483 153002 : pag = xfs_perag_get(sc->mp, args.agno);
484 153003 : if (!pag) {
485 0 : ASSERT(0);
486 0 : return -EFSCORRUPTED;
487 : }
488 :
489 153003 : error = xrep_newbt_add_blocks(xnr, pag, args.agbno, args.len,
490 : true);
491 153003 : xfs_perag_put(pag);
492 153003 : if (error)
493 0 : return error;
494 :
495 153003 : nr_blocks -= args.len;
496 153003 : xnr->alloc_hint = args.fsbno + args.len;
497 :
498 153003 : error = xrep_defer_finish(sc);
499 153003 : if (error)
500 0 : return error;
501 : }
502 :
503 : return 0;
504 : }
505 :
506 : /* Allocate disk space for our new btree. */
507 : int
508 551595 : xrep_newbt_alloc_blocks(
509 : struct xrep_newbt *xnr,
510 : uint64_t nr_blocks)
511 : {
512 551595 : if (xnr->sc->ip)
513 138463 : return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
514 413132 : return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
515 : }
516 :
517 : /*
518 : * How many extent freeing items can we attach to a transaction before we want
519 : * to finish the chain so that unreserving new btree blocks doesn't overrun
520 : * the transaction reservation?
521 : */
522 : #define XREP_REAP_MAX_NEWBT_EFIS (128)
523 :
524 : /*
525 : * Free the unused part of an extent. Returns the number of EFIs logged or
526 : * a negative errno.
527 : */
528 : STATIC int
529 587054 : xrep_newbt_free_extent(
530 : struct xrep_newbt *xnr,
531 : struct xrep_newbt_resv *resv,
532 : bool btree_committed)
533 : {
534 587054 : struct xfs_scrub *sc = xnr->sc;
535 587054 : xfs_agblock_t free_agbno = resv->agbno;
536 587054 : xfs_extlen_t free_aglen = resv->len;
537 587054 : xfs_fsblock_t fsbno;
538 587054 : int error;
539 :
540 : /*
541 : * If we used space and committed the btree, remove those blocks from
542 : * the extent before we act on it.
543 : */
544 587054 : if (btree_committed) {
545 586563 : free_agbno += resv->used;
546 586563 : free_aglen -= resv->used;
547 : }
548 :
549 587054 : xrep_newbt_finish_autoreap(sc, resv);
550 :
551 587337 : if (free_aglen == 0)
552 : return 0;
553 :
554 447 : trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
555 447 : free_aglen, xnr->oinfo.oi_owner);
556 :
557 447 : ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
558 447 : ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
559 :
560 : /*
561 : * Use EFIs to free the reservations. This reduces the chance
562 : * that we leak blocks if the system goes down.
563 : */
564 447 : fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
565 447 : error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
566 : xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
567 447 : if (error)
568 0 : return error;
569 :
570 : return 1;
571 : }
572 :
573 : /* Free all the accounting info and disk space we reserved for a new btree. */
574 : STATIC int
575 4077669 : xrep_newbt_free(
576 : struct xrep_newbt *xnr,
577 : bool btree_committed)
578 : {
579 4077669 : struct xfs_scrub *sc = xnr->sc;
580 4077669 : struct xrep_newbt_resv *resv, *n;
581 4077669 : unsigned int freed = 0;
582 4077669 : int error = 0;
583 :
584 : /*
585 : * If the filesystem already went down, we can't free the blocks. Skip
586 : * ahead to freeing the incore metadata because we can't fix anything.
587 : */
588 8155338 : if (xfs_is_shutdown(sc->mp))
589 0 : goto junkit;
590 :
591 4664844 : list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
592 587139 : int ret;
593 :
594 587139 : ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
595 587340 : list_del(&resv->list);
596 587311 : xfs_perag_put(resv->pag);
597 587384 : kfree(resv);
598 587175 : if (ret < 0) {
599 0 : error = ret;
600 0 : goto junkit;
601 : }
602 :
603 587175 : freed += ret;
604 587175 : if (freed >= XREP_REAP_MAX_NEWBT_EFIS) {
605 0 : error = xrep_defer_finish(sc);
606 0 : if (error)
607 0 : goto junkit;
608 : freed = 0;
609 : }
610 : }
611 :
612 4077705 : if (freed)
613 38 : error = xrep_defer_finish(sc);
614 :
615 4077667 : junkit:
616 : /*
617 : * If we still have reservations attached to @newbt, cleanup must have
618 : * failed and the filesystem is about to go down. Clean up the incore
619 : * reservations.
620 : */
621 4077671 : list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
622 0 : xrep_newbt_cancel_autoreap(resv);
623 0 : list_del(&resv->list);
624 0 : xfs_perag_put(resv->pag);
625 0 : kfree(resv);
626 : }
627 :
628 4077671 : if (sc->ip) {
629 3379834 : kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
630 3379839 : xnr->ifake.if_fork = NULL;
631 : }
632 :
633 4077676 : return error;
634 : }
635 :
636 : /*
637 : * Free all the accounting info and unused disk space allocations after
638 : * committing a new btree.
639 : */
640 : int
641 3793012 : xrep_newbt_commit(
642 : struct xrep_newbt *xnr)
643 : {
644 3793012 : return xrep_newbt_free(xnr, true);
645 : }
646 :
647 : /*
648 : * Free all the accounting info and all of the disk space we reserved for a new
649 : * btree that we're not going to commit. We want to try to roll things back
650 : * cleanly for things like ENOSPC midway through allocation.
651 : */
652 : void
653 284545 : xrep_newbt_cancel(
654 : struct xrep_newbt *xnr)
655 : {
656 284545 : xrep_newbt_free(xnr, false);
657 284539 : }
658 :
659 : /* Feed one of the reserved btree blocks to the bulk loader. */
660 : int
661 2519578 : xrep_newbt_claim_block(
662 : struct xfs_btree_cur *cur,
663 : struct xrep_newbt *xnr,
664 : union xfs_btree_ptr *ptr)
665 : {
666 2519578 : struct xrep_newbt_resv *resv;
667 2519578 : struct xfs_mount *mp = cur->bc_mp;
668 2519578 : xfs_agblock_t agbno;
669 :
670 : /*
671 : * The first item in the list should always have a free block unless
672 : * we're completely out.
673 : */
674 2519578 : resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
675 2519578 : if (resv->used == resv->len)
676 : return -ENOSPC;
677 :
678 : /*
679 : * Peel off a block from the start of the reservation. We allocate
680 : * blocks in order to place blocks on disk in increasing record or key
681 : * order. The block reservations tend to end up on the list in
682 : * decreasing order, which hopefully results in leaf blocks ending up
683 : * together.
684 : */
685 2519578 : agbno = resv->agbno + resv->used;
686 2519578 : resv->used++;
687 :
688 : /* If we used all the blocks in this reservation, move it to the end. */
689 2519578 : if (resv->used == resv->len)
690 728249 : list_move_tail(&resv->list, &xnr->resv_list);
691 :
692 2519684 : trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
693 2519684 : xnr->oinfo.oi_owner);
694 :
695 2519682 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
696 930010 : ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
697 : agbno));
698 : else
699 1589672 : ptr->s = cpu_to_be32(agbno);
700 : return 0;
701 : }
702 :
703 : /* How many reserved blocks are unused? */
704 : unsigned int
705 284588 : xrep_newbt_unused_blocks(
706 : struct xrep_newbt *xnr)
707 : {
708 284588 : struct xrep_newbt_resv *resv;
709 284588 : unsigned int unused = 0;
710 :
711 426886 : list_for_each_entry(resv, &xnr->resv_list, list)
712 142298 : unused += resv->len - resv->used;
713 284588 : return unused;
714 : }
|