Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2014 Red Hat, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_alloc.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_btree_staging.h"
17 : #include "xfs_rmap.h"
18 : #include "xfs_rmap_btree.h"
19 : #include "xfs_trace.h"
20 : #include "xfs_error.h"
21 : #include "xfs_extent_busy.h"
22 : #include "xfs_ag.h"
23 : #include "xfs_ag_resv.h"
24 : #include "scrub/xfile.h"
25 : #include "scrub/xfbtree.h"
26 : #include "xfs_btree_mem.h"
27 :
28 : static struct kmem_cache *xfs_rmapbt_cur_cache;
29 :
30 : /*
31 : * Reverse map btree.
32 : *
33 : * This is a per-ag tree used to track the owner(s) of a given extent. With
34 : * reflink it is possible for there to be multiple owners, which is a departure
35 : * from classic XFS. Owner records for data extents are inserted when the
36 : * extent is mapped and removed when an extent is unmapped. Owner records for
37 : * all other block types (i.e. metadata) are inserted when an extent is
38 : * allocated and removed when an extent is freed. There can only be one owner
39 : * of a metadata extent, usually an inode or some other metadata structure like
40 : * an AG btree.
41 : *
42 : * The rmap btree is part of the free space management, so blocks for the tree
43 : * are sourced from the agfl. Hence we need transaction reservation support for
44 : * this tree so that the freelist is always large enough. This also impacts on
45 : * the minimum space we need to leave free in the AG.
46 : *
47 : * The tree is ordered by [ag block, owner, offset]. This is a large key size,
48 : * but it is the only way to enforce unique keys when a block can be owned by
49 : * multiple files at any offset. There's no need to order/search by extent
50 : * size for online updating/management of the tree. It is intended that most
51 : * reverse lookups will be to find the owner(s) of a particular block, or to
52 : * try to recover tree and file data from corrupt primary metadata.
53 : */
54 :
55 : static struct xfs_btree_cur *
56 55242922 : xfs_rmapbt_dup_cursor(
57 : struct xfs_btree_cur *cur)
58 : {
59 55242922 : return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
60 : cur->bc_ag.agbp, cur->bc_ag.pag);
61 : }
62 :
63 : STATIC void
64 13971 : xfs_rmapbt_set_root(
65 : struct xfs_btree_cur *cur,
66 : const union xfs_btree_ptr *ptr,
67 : int inc)
68 : {
69 13971 : struct xfs_buf *agbp = cur->bc_ag.agbp;
70 13971 : struct xfs_agf *agf = agbp->b_addr;
71 13971 : int btnum = cur->bc_btnum;
72 :
73 13971 : ASSERT(ptr->s != 0);
74 :
75 13971 : agf->agf_roots[btnum] = ptr->s;
76 13971 : be32_add_cpu(&agf->agf_levels[btnum], inc);
77 13971 : cur->bc_ag.pag->pagf_levels[btnum] += inc;
78 :
79 13971 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
80 13970 : }
81 :
82 : STATIC int
83 399048 : xfs_rmapbt_alloc_block(
84 : struct xfs_btree_cur *cur,
85 : const union xfs_btree_ptr *start,
86 : union xfs_btree_ptr *new,
87 : int *stat)
88 : {
89 399048 : struct xfs_buf *agbp = cur->bc_ag.agbp;
90 399048 : struct xfs_agf *agf = agbp->b_addr;
91 399048 : struct xfs_perag *pag = cur->bc_ag.pag;
92 399048 : int error;
93 399048 : xfs_agblock_t bno;
94 :
95 : /* Allocate the new block from the freelist. If we can't, give up. */
96 399048 : error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
97 : &bno, 1);
98 399048 : if (error)
99 : return error;
100 399048 : if (bno == NULLAGBLOCK) {
101 0 : *stat = 0;
102 0 : return 0;
103 : }
104 :
105 399048 : xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
106 :
107 399048 : new->s = cpu_to_be32(bno);
108 399048 : be32_add_cpu(&agf->agf_rmap_blocks, 1);
109 399048 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
110 :
111 399048 : xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
112 :
113 399048 : *stat = 1;
114 399048 : return 0;
115 : }
116 :
117 : STATIC int
118 160512 : xfs_rmapbt_free_block(
119 : struct xfs_btree_cur *cur,
120 : struct xfs_buf *bp)
121 : {
122 160512 : struct xfs_buf *agbp = cur->bc_ag.agbp;
123 160512 : struct xfs_agf *agf = agbp->b_addr;
124 160512 : struct xfs_perag *pag = cur->bc_ag.pag;
125 160512 : xfs_agblock_t bno;
126 160512 : int error;
127 :
128 160512 : bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
129 160512 : be32_add_cpu(&agf->agf_rmap_blocks, -1);
130 160512 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
131 160512 : error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
132 160512 : if (error)
133 : return error;
134 :
135 160512 : xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
136 : XFS_EXTENT_BUSY_SKIP_DISCARD);
137 :
138 160512 : xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
139 160512 : return 0;
140 : }
141 :
142 : STATIC int
143 127644227 : xfs_rmapbt_get_minrecs(
144 : struct xfs_btree_cur *cur,
145 : int level)
146 : {
147 127644227 : return cur->bc_mp->m_rmap_mnr[level != 0];
148 : }
149 :
150 : STATIC int
151 >16698*10^7 : xfs_rmapbt_get_maxrecs(
152 : struct xfs_btree_cur *cur,
153 : int level)
154 : {
155 >16698*10^7 : return cur->bc_mp->m_rmap_mxr[level != 0];
156 : }
157 :
158 : /*
159 : * Convert the ondisk record's offset field into the ondisk key's offset field.
160 : * Fork and bmbt are significant parts of the rmap record key, but written
161 : * status is merely a record attribute.
162 : */
163 : static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
164 : {
165 >41805*10^7 : return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
166 : }
167 :
168 : STATIC void
169 >19044*10^7 : xfs_rmapbt_init_key_from_rec(
170 : union xfs_btree_key *key,
171 : const union xfs_btree_rec *rec)
172 : {
173 >19044*10^7 : key->rmap.rm_startblock = rec->rmap.rm_startblock;
174 >19044*10^7 : key->rmap.rm_owner = rec->rmap.rm_owner;
175 >19044*10^7 : key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
176 >19044*10^7 : }
177 :
178 : /*
179 : * The high key for a reverse mapping record can be computed by shifting
180 : * the startblock and offset to the highest value that would still map
181 : * to that record. In practice this means that we add blockcount-1 to
182 : * the startblock for all records, and if the record is for a data/attr
183 : * fork mapping, we add blockcount-1 to the offset too.
184 : */
185 : STATIC void
186 >22753*10^7 : xfs_rmapbt_init_high_key_from_rec(
187 : union xfs_btree_key *key,
188 : const union xfs_btree_rec *rec)
189 : {
190 >22753*10^7 : uint64_t off;
191 >22753*10^7 : int adj;
192 :
193 >22753*10^7 : adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
194 :
195 >22753*10^7 : key->rmap.rm_startblock = rec->rmap.rm_startblock;
196 >22753*10^7 : be32_add_cpu(&key->rmap.rm_startblock, adj);
197 >22761*10^7 : key->rmap.rm_owner = rec->rmap.rm_owner;
198 >22761*10^7 : key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
199 >22761*10^7 : if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
200 >21708*10^7 : XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
201 : return;
202 >20595*10^7 : off = be64_to_cpu(key->rmap.rm_offset);
203 >20595*10^7 : off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
204 >20595*10^7 : key->rmap.rm_offset = cpu_to_be64(off);
205 : }
206 :
207 : STATIC void
208 3841526730 : xfs_rmapbt_init_rec_from_cur(
209 : struct xfs_btree_cur *cur,
210 : union xfs_btree_rec *rec)
211 : {
212 3841526730 : rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
213 3841526730 : rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
214 3841526730 : rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
215 3841526730 : rec->rmap.rm_offset = cpu_to_be64(
216 : xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
217 3841526730 : }
218 :
219 : STATIC void
220 2479842425 : xfs_rmapbt_init_ptr_from_cur(
221 : struct xfs_btree_cur *cur,
222 : union xfs_btree_ptr *ptr)
223 : {
224 2479842425 : struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
225 :
226 4959684850 : ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
227 :
228 2479842425 : ptr->s = agf->agf_roots[cur->bc_btnum];
229 2479842425 : }
230 :
231 : /*
232 : * Mask the appropriate parts of the ondisk key field for a key comparison.
233 : * Fork and bmbt are significant parts of the rmap record key, but written
234 : * status is merely a record attribute.
235 : */
236 : static inline uint64_t offset_keymask(uint64_t offset)
237 : {
238 11629918137 : return offset & ~XFS_RMAP_OFF_UNWRITTEN;
239 : }
240 :
241 : STATIC int64_t
242 15581211470 : xfs_rmapbt_key_diff(
243 : struct xfs_btree_cur *cur,
244 : const union xfs_btree_key *key)
245 : {
246 15581211470 : struct xfs_rmap_irec *rec = &cur->bc_rec.r;
247 15581211470 : const struct xfs_rmap_key *kp = &key->rmap;
248 15581211470 : __u64 x, y;
249 15581211470 : int64_t d;
250 :
251 15581211470 : d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
252 15581211470 : if (d)
253 : return d;
254 :
255 1386010090 : x = be64_to_cpu(kp->rm_owner);
256 1386010090 : y = rec->rm_owner;
257 1386010090 : if (x > y)
258 : return 1;
259 1345870551 : else if (y > x)
260 : return -1;
261 :
262 1140586235 : x = offset_keymask(be64_to_cpu(kp->rm_offset));
263 1140586235 : y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
264 1140586235 : if (x > y)
265 : return 1;
266 1068969926 : else if (y > x)
267 343409147 : return -1;
268 : return 0;
269 : }
270 :
271 : STATIC int64_t
272 >50723*10^7 : xfs_rmapbt_diff_two_keys(
273 : struct xfs_btree_cur *cur,
274 : const union xfs_btree_key *k1,
275 : const union xfs_btree_key *k2,
276 : const union xfs_btree_key *mask)
277 : {
278 >50723*10^7 : const struct xfs_rmap_key *kp1 = &k1->rmap;
279 >50723*10^7 : const struct xfs_rmap_key *kp2 = &k2->rmap;
280 >50723*10^7 : int64_t d;
281 >50723*10^7 : __u64 x, y;
282 :
283 : /* Doesn't make sense to mask off the physical space part */
284 >50723*10^7 : ASSERT(!mask || mask->rmap.rm_startblock);
285 :
286 >50723*10^7 : d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
287 >50723*10^7 : be32_to_cpu(kp2->rm_startblock);
288 >50723*10^7 : if (d)
289 : return d;
290 :
291 14010353345 : if (!mask || mask->rmap.rm_owner) {
292 14010318151 : x = be64_to_cpu(kp1->rm_owner);
293 14010318151 : y = be64_to_cpu(kp2->rm_owner);
294 14010318151 : if (x > y)
295 : return 1;
296 6245850861 : else if (y > x)
297 : return -1;
298 : }
299 :
300 5232625302 : if (!mask || mask->rmap.rm_offset) {
301 : /* Doesn't make sense to allow offset but not owner */
302 5232624332 : ASSERT(!mask || mask->rmap.rm_owner);
303 :
304 5232624332 : x = offset_keymask(be64_to_cpu(kp1->rm_offset));
305 5232624332 : y = offset_keymask(be64_to_cpu(kp2->rm_offset));
306 5232624332 : if (x > y)
307 : return 1;
308 888724405 : else if (y > x)
309 25043674 : return -1;
310 : }
311 :
312 : return 0;
313 : }
314 :
315 : static xfs_failaddr_t
316 33515571 : xfs_rmapbt_verify(
317 : struct xfs_buf *bp)
318 : {
319 33515571 : struct xfs_mount *mp = bp->b_mount;
320 33515571 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
321 33515571 : struct xfs_perag *pag = bp->b_pag;
322 33515571 : xfs_failaddr_t fa;
323 33515571 : unsigned int level;
324 :
325 : /*
326 : * magic number and level verification
327 : *
328 : * During growfs operations, we can't verify the exact level or owner as
329 : * the perag is not fully initialised and hence not attached to the
330 : * buffer. In this case, check against the maximum tree depth.
331 : *
332 : * Similarly, during log recovery we will have a perag structure
333 : * attached, but the agf information will not yet have been initialised
334 : * from the on disk AGF. Again, we can only check against maximum limits
335 : * in this case.
336 : */
337 33515571 : if (!xfs_verify_magic(bp, block->bb_magic))
338 0 : return __this_address;
339 :
340 33515637 : if (!xfs_has_rmapbt(mp))
341 0 : return __this_address;
342 33515637 : fa = xfs_btree_sblock_v5hdr_verify(bp);
343 33515680 : if (fa)
344 : return fa;
345 :
346 33515674 : level = be16_to_cpu(block->bb_level);
347 67025903 : if (pag && xfs_perag_initialised_agf(pag)) {
348 27571230 : unsigned int maxlevel = pag->pagf_levels[XFS_BTNUM_RMAPi];
349 :
350 : #ifdef CONFIG_XFS_ONLINE_REPAIR
351 : /*
352 : * Online repair could be rewriting the free space btrees, so
353 : * we'll validate against the larger of either tree while this
354 : * is going on.
355 : */
356 27571230 : maxlevel = max_t(unsigned int, maxlevel,
357 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi]);
358 : #endif
359 27571230 : if (level >= maxlevel)
360 0 : return __this_address;
361 5944444 : } else if (level >= mp->m_rmap_maxlevels)
362 0 : return __this_address;
363 :
364 33515674 : return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
365 : }
366 :
367 : static void
368 1722579 : xfs_rmapbt_read_verify(
369 : struct xfs_buf *bp)
370 : {
371 1722579 : xfs_failaddr_t fa;
372 :
373 1722579 : if (!xfs_btree_sblock_verify_crc(bp))
374 4 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
375 : else {
376 1722575 : fa = xfs_rmapbt_verify(bp);
377 1722575 : if (fa)
378 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
379 : }
380 :
381 1722579 : if (bp->b_error)
382 4 : trace_xfs_btree_corrupt(bp, _RET_IP_);
383 1722579 : }
384 :
385 : static void
386 12651597 : xfs_rmapbt_write_verify(
387 : struct xfs_buf *bp)
388 : {
389 12651597 : xfs_failaddr_t fa;
390 :
391 12651597 : fa = xfs_rmapbt_verify(bp);
392 12651595 : if (fa) {
393 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
394 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
395 0 : return;
396 : }
397 12651595 : xfs_btree_sblock_calc_crc(bp);
398 :
399 : }
400 :
401 : const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
402 : .name = "xfs_rmapbt",
403 : .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
404 : .verify_read = xfs_rmapbt_read_verify,
405 : .verify_write = xfs_rmapbt_write_verify,
406 : .verify_struct = xfs_rmapbt_verify,
407 : };
408 :
409 : STATIC int
410 6952867 : xfs_rmapbt_keys_inorder(
411 : struct xfs_btree_cur *cur,
412 : const union xfs_btree_key *k1,
413 : const union xfs_btree_key *k2)
414 : {
415 6952867 : uint32_t x;
416 6952867 : uint32_t y;
417 6952867 : uint64_t a;
418 6952867 : uint64_t b;
419 :
420 6952867 : x = be32_to_cpu(k1->rmap.rm_startblock);
421 6952867 : y = be32_to_cpu(k2->rmap.rm_startblock);
422 6952867 : if (x < y)
423 : return 1;
424 55832 : else if (x > y)
425 : return 0;
426 55832 : a = be64_to_cpu(k1->rmap.rm_owner);
427 55832 : b = be64_to_cpu(k2->rmap.rm_owner);
428 55832 : if (a < b)
429 : return 1;
430 52947 : else if (a > b)
431 : return 0;
432 52947 : a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
433 52947 : b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
434 52947 : if (a <= b)
435 52947 : return 1;
436 : return 0;
437 : }
438 :
439 : STATIC int
440 1235295386 : xfs_rmapbt_recs_inorder(
441 : struct xfs_btree_cur *cur,
442 : const union xfs_btree_rec *r1,
443 : const union xfs_btree_rec *r2)
444 : {
445 1235295386 : uint32_t x;
446 1235295386 : uint32_t y;
447 1235295386 : uint64_t a;
448 1235295386 : uint64_t b;
449 :
450 1235295386 : x = be32_to_cpu(r1->rmap.rm_startblock);
451 1235295386 : y = be32_to_cpu(r2->rmap.rm_startblock);
452 1235295386 : if (x < y)
453 : return 1;
454 41579004 : else if (x > y)
455 : return 0;
456 41579004 : a = be64_to_cpu(r1->rmap.rm_owner);
457 41579004 : b = be64_to_cpu(r2->rmap.rm_owner);
458 41579004 : if (a < b)
459 : return 1;
460 11988672 : else if (a > b)
461 : return 0;
462 11988672 : a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
463 11988672 : b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
464 11988672 : if (a <= b)
465 11988672 : return 1;
466 : return 0;
467 : }
468 :
469 : STATIC enum xbtree_key_contig
470 0 : xfs_rmapbt_keys_contiguous(
471 : struct xfs_btree_cur *cur,
472 : const union xfs_btree_key *key1,
473 : const union xfs_btree_key *key2,
474 : const union xfs_btree_key *mask)
475 : {
476 0 : ASSERT(!mask || mask->rmap.rm_startblock);
477 :
478 : /*
479 : * We only support checking contiguity of the physical space component.
480 : * If any callers ever need more specificity than that, they'll have to
481 : * implement it here.
482 : */
483 0 : ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
484 :
485 0 : return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
486 0 : be32_to_cpu(key2->rmap.rm_startblock));
487 : }
488 :
489 : const struct xfs_btree_ops xfs_rmapbt_ops = {
490 : .rec_len = sizeof(struct xfs_rmap_rec),
491 : .key_len = 2 * sizeof(struct xfs_rmap_key),
492 : .lru_refs = XFS_RMAP_BTREE_REF,
493 : .geom_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING,
494 :
495 : .dup_cursor = xfs_rmapbt_dup_cursor,
496 : .set_root = xfs_rmapbt_set_root,
497 : .alloc_block = xfs_rmapbt_alloc_block,
498 : .free_block = xfs_rmapbt_free_block,
499 : .get_minrecs = xfs_rmapbt_get_minrecs,
500 : .get_maxrecs = xfs_rmapbt_get_maxrecs,
501 : .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
502 : .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
503 : .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
504 : .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
505 : .key_diff = xfs_rmapbt_key_diff,
506 : .buf_ops = &xfs_rmapbt_buf_ops,
507 : .diff_two_keys = xfs_rmapbt_diff_two_keys,
508 : .keys_inorder = xfs_rmapbt_keys_inorder,
509 : .recs_inorder = xfs_rmapbt_recs_inorder,
510 : .keys_contiguous = xfs_rmapbt_keys_contiguous,
511 : };
512 :
513 : static struct xfs_btree_cur *
514 905897884 : xfs_rmapbt_init_common(
515 : struct xfs_mount *mp,
516 : struct xfs_trans *tp,
517 : struct xfs_perag *pag)
518 : {
519 905897884 : struct xfs_btree_cur *cur;
520 :
521 : /* Overlapping btree; 2 keys per pointer. */
522 905897884 : cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, &xfs_rmapbt_ops,
523 905897884 : mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
524 905909234 : cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
525 :
526 905909234 : cur->bc_ag.pag = xfs_perag_hold(pag);
527 905908586 : return cur;
528 : }
529 :
530 : /* Create a new reverse mapping btree cursor. */
531 : struct xfs_btree_cur *
532 905890927 : xfs_rmapbt_init_cursor(
533 : struct xfs_mount *mp,
534 : struct xfs_trans *tp,
535 : struct xfs_buf *agbp,
536 : struct xfs_perag *pag)
537 : {
538 905890927 : struct xfs_agf *agf = agbp->b_addr;
539 905890927 : struct xfs_btree_cur *cur;
540 :
541 905890927 : cur = xfs_rmapbt_init_common(mp, tp, pag);
542 905892287 : cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
543 905892287 : cur->bc_ag.agbp = agbp;
544 905892287 : return cur;
545 : }
546 :
547 : /* Create a new reverse mapping btree cursor with a fake root for staging. */
548 : struct xfs_btree_cur *
549 9219 : xfs_rmapbt_stage_cursor(
550 : struct xfs_mount *mp,
551 : struct xbtree_afakeroot *afake,
552 : struct xfs_perag *pag)
553 : {
554 9219 : struct xfs_btree_cur *cur;
555 :
556 9219 : cur = xfs_rmapbt_init_common(mp, NULL, pag);
557 9219 : xfs_btree_stage_afakeroot(cur, afake);
558 9219 : return cur;
559 : }
560 :
561 : #ifdef CONFIG_XFS_BTREE_IN_XFILE
562 : /*
563 : * Validate an in-memory rmap btree block. Callers are allowed to generate an
564 : * in-memory btree even if the ondisk feature is not enabled.
565 : */
566 : static xfs_failaddr_t
567 28234851 : xfs_rmapbt_mem_verify(
568 : struct xfs_buf *bp)
569 : {
570 28234851 : struct xfs_mount *mp = bp->b_mount;
571 28234851 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
572 28234851 : xfs_failaddr_t fa;
573 28234851 : unsigned int level;
574 :
575 28234851 : if (!xfs_verify_magic(bp, block->bb_magic))
576 0 : return __this_address;
577 :
578 28234858 : fa = xfs_btree_sblock_v5hdr_verify(bp);
579 28234861 : if (fa)
580 : return fa;
581 :
582 28234850 : level = be16_to_cpu(block->bb_level);
583 28234850 : if (xfs_has_rmapbt(mp)) {
584 28234850 : if (level >= mp->m_rmap_maxlevels)
585 0 : return __this_address;
586 : } else {
587 0 : if (level >= xfs_rmapbt_maxlevels_ondisk())
588 0 : return __this_address;
589 : }
590 :
591 28234850 : return xfbtree_sblock_verify(bp,
592 : xfs_rmapbt_maxrecs(mp, xfo_to_b(1), level == 0));
593 : }
594 :
595 : static void
596 9258 : xfs_rmapbt_mem_rw_verify(
597 : struct xfs_buf *bp)
598 : {
599 9258 : xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
600 :
601 9256 : if (fa)
602 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
603 9256 : }
604 :
605 : /* skip crc checks on in-memory btrees to save time */
606 : static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
607 : .name = "xfs_rmapbt_mem",
608 : .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
609 : .verify_read = xfs_rmapbt_mem_rw_verify,
610 : .verify_write = xfs_rmapbt_mem_rw_verify,
611 : .verify_struct = xfs_rmapbt_mem_verify,
612 : };
613 :
614 : static const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
615 : .rec_len = sizeof(struct xfs_rmap_rec),
616 : .key_len = 2 * sizeof(struct xfs_rmap_key),
617 : .lru_refs = XFS_RMAP_BTREE_REF,
618 : .geom_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING |
619 : XFS_BTREE_IN_XFILE,
620 :
621 : .dup_cursor = xfbtree_dup_cursor,
622 : .set_root = xfbtree_set_root,
623 : .alloc_block = xfbtree_alloc_block,
624 : .free_block = xfbtree_free_block,
625 : .get_minrecs = xfbtree_get_minrecs,
626 : .get_maxrecs = xfbtree_get_maxrecs,
627 : .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
628 : .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
629 : .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
630 : .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
631 : .key_diff = xfs_rmapbt_key_diff,
632 : .buf_ops = &xfs_rmapbt_mem_buf_ops,
633 : .diff_two_keys = xfs_rmapbt_diff_two_keys,
634 : .keys_inorder = xfs_rmapbt_keys_inorder,
635 : .recs_inorder = xfs_rmapbt_recs_inorder,
636 : .keys_contiguous = xfs_rmapbt_keys_contiguous,
637 : };
638 :
639 : /* Create a cursor for an in-memory btree. */
640 : struct xfs_btree_cur *
641 19612567 : xfs_rmapbt_mem_cursor(
642 : struct xfs_perag *pag,
643 : struct xfs_trans *tp,
644 : struct xfs_buf *head_bp,
645 : struct xfbtree *xfbtree)
646 : {
647 19612567 : struct xfs_btree_cur *cur;
648 19612567 : struct xfs_mount *mp = pag->pag_mount;
649 :
650 : /* Overlapping btree; 2 keys per pointer. */
651 19612567 : cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
652 19612567 : &xfs_rmapbt_mem_ops, mp->m_rmap_maxlevels,
653 : xfs_rmapbt_cur_cache);
654 19612506 : cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
655 19612506 : cur->bc_mem.xfbtree = xfbtree;
656 19612506 : cur->bc_mem.head_bp = head_bp;
657 19612506 : cur->bc_nlevels = xfs_btree_mem_head_nlevels(head_bp);
658 :
659 19612554 : cur->bc_mem.pag = xfs_perag_hold(pag);
660 19612558 : return cur;
661 : }
662 :
663 : /* Create an in-memory rmap btree. */
664 : int
665 9261 : xfs_rmapbt_mem_create(
666 : struct xfs_mount *mp,
667 : xfs_agnumber_t agno,
668 : struct xfs_buftarg *target,
669 : struct xfbtree **xfbtreep)
670 : {
671 9261 : struct xfbtree_config cfg = {
672 : .btree_ops = &xfs_rmapbt_mem_ops,
673 : .target = target,
674 : .owner = agno,
675 : .flags = XFBTREE_DIRECT_MAP,
676 : };
677 :
678 9261 : return xfbtree_create(mp, &cfg, xfbtreep);
679 : }
680 : #endif /* CONFIG_XFS_BTREE_IN_XFILE */
681 :
682 : /*
683 : * Install a new reverse mapping btree root. Caller is responsible for
684 : * invalidating and freeing the old btree blocks.
685 : */
686 : void
687 9218 : xfs_rmapbt_commit_staged_btree(
688 : struct xfs_btree_cur *cur,
689 : struct xfs_trans *tp,
690 : struct xfs_buf *agbp)
691 : {
692 9218 : struct xfs_agf *agf = agbp->b_addr;
693 9218 : struct xbtree_afakeroot *afake = cur->bc_ag.afake;
694 :
695 9218 : ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
696 :
697 9218 : agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
698 9218 : agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
699 9218 : agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
700 9218 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
701 : XFS_AGF_RMAP_BLOCKS);
702 9218 : xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
703 9219 : }
704 :
705 : /* Calculate number of records in a reverse mapping btree block. */
706 : static inline unsigned int
707 : xfs_rmapbt_block_maxrecs(
708 : unsigned int blocklen,
709 : bool leaf)
710 : {
711 28283536 : if (leaf)
712 24343 : return blocklen / sizeof(struct xfs_rmap_rec);
713 4350546 : return blocklen /
714 : (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
715 : }
716 :
717 : /*
718 : * Calculate number of records in an rmap btree block.
719 : */
720 : unsigned int
721 48686 : xfs_rmapbt_maxrecs(
722 : struct xfs_mount *mp,
723 : unsigned int blocklen,
724 : bool leaf)
725 : {
726 28283536 : blocklen -= XFS_RMAP_BLOCK_LEN;
727 28283536 : return xfs_rmapbt_block_maxrecs(blocklen, leaf);
728 : }
729 :
730 : /* Compute the max possible height for reverse mapping btrees. */
731 : unsigned int
732 24275 : xfs_rmapbt_maxlevels_ondisk(void)
733 : {
734 24275 : unsigned int minrecs[2];
735 24275 : unsigned int blocklen;
736 :
737 24275 : blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
738 :
739 24275 : minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
740 24275 : minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
741 :
742 : /*
743 : * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
744 : *
745 : * On a reflink filesystem, each AG block can have up to 2^32 (per the
746 : * refcount record format) owners, which means that theoretically we
747 : * could face up to 2^64 rmap records. However, we're likely to run
748 : * out of blocks in the AG long before that happens, which means that
749 : * we must compute the max height based on what the btree will look
750 : * like if it consumes almost all the blocks in the AG due to maximal
751 : * sharing factor.
752 : */
753 24275 : return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
754 : }
755 :
756 : /* Compute the maximum height of an rmap btree. */
757 : void
758 24337 : xfs_rmapbt_compute_maxlevels(
759 : struct xfs_mount *mp)
760 : {
761 24337 : if (!xfs_has_rmapbt(mp)) {
762 74 : mp->m_rmap_maxlevels = 0;
763 74 : return;
764 : }
765 :
766 24263 : if (xfs_has_reflink(mp)) {
767 : /*
768 : * Compute the asymptotic maxlevels for an rmap btree on a
769 : * filesystem that supports reflink.
770 : *
771 : * On a reflink filesystem, each AG block can have up to 2^32
772 : * (per the refcount record format) owners, which means that
773 : * theoretically we could face up to 2^64 rmap records.
774 : * However, we're likely to run out of blocks in the AG long
775 : * before that happens, which means that we must compute the
776 : * max height based on what the btree will look like if it
777 : * consumes almost all the blocks in the AG due to maximal
778 : * sharing factor.
779 : */
780 24258 : mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
781 24258 : mp->m_sb.sb_agblocks);
782 : } else {
783 : /*
784 : * If there's no block sharing, compute the maximum rmapbt
785 : * height assuming one rmap record per AG block.
786 : */
787 5 : mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
788 5 : mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
789 : }
790 24263 : ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
791 : }
792 :
793 : /* Calculate the refcount btree size for some records. */
794 : xfs_extlen_t
795 1570323 : xfs_rmapbt_calc_size(
796 : struct xfs_mount *mp,
797 : unsigned long long len)
798 : {
799 1936848 : return xfs_btree_calc_size(mp->m_rmap_mnr, len);
800 : }
801 :
802 : /*
803 : * Calculate the maximum refcount btree size.
804 : */
805 : xfs_extlen_t
806 0 : xfs_rmapbt_max_size(
807 : struct xfs_mount *mp,
808 : xfs_agblock_t agblocks)
809 : {
810 : /* Bail out if we're uninitialized, which can happen in mkfs. */
811 0 : if (mp->m_rmap_mxr[0] == 0)
812 : return 0;
813 :
814 366526 : return xfs_rmapbt_calc_size(mp, agblocks);
815 : }
816 :
817 : /*
818 : * Figure out how many blocks to reserve and how many are used by this btree.
819 : */
820 : int
821 369391 : xfs_rmapbt_calc_reserves(
822 : struct xfs_mount *mp,
823 : struct xfs_trans *tp,
824 : struct xfs_perag *pag,
825 : xfs_extlen_t *ask,
826 : xfs_extlen_t *used)
827 : {
828 369391 : struct xfs_buf *agbp;
829 369391 : struct xfs_agf *agf;
830 369391 : xfs_agblock_t agblocks;
831 369391 : xfs_extlen_t tree_len;
832 369391 : int error;
833 :
834 369391 : if (!xfs_has_rmapbt(mp))
835 : return 0;
836 :
837 366521 : error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
838 366518 : if (error)
839 : return error;
840 :
841 366518 : agf = agbp->b_addr;
842 366518 : agblocks = be32_to_cpu(agf->agf_length);
843 366518 : tree_len = be32_to_cpu(agf->agf_rmap_blocks);
844 366518 : xfs_trans_brelse(tp, agbp);
845 :
846 : /*
847 : * The log is permanently allocated, so the space it occupies will
848 : * never be available for the kinds of things that would require btree
849 : * expansion. We therefore can pretend the space isn't there.
850 : */
851 733056 : if (xfs_ag_contains_log(mp, pag->pag_agno))
852 75692 : agblocks -= mp->m_sb.sb_logblocks;
853 :
854 : /* Reserve 1% of the AG or enough for 1 block per record. */
855 366528 : *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
856 366527 : *used += tree_len;
857 :
858 366527 : return error;
859 : }
860 :
861 : int __init
862 12 : xfs_rmapbt_init_cur_cache(void)
863 : {
864 12 : xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
865 12 : xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
866 : 0, 0, NULL);
867 :
868 12 : if (!xfs_rmapbt_cur_cache)
869 0 : return -ENOMEM;
870 : return 0;
871 : }
872 :
873 : void
874 12 : xfs_rmapbt_destroy_cur_cache(void)
875 : {
876 12 : kmem_cache_destroy(xfs_rmapbt_cur_cache);
877 12 : xfs_rmapbt_cur_cache = NULL;
878 12 : }
|