Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2014 Red Hat, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_alloc.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_btree_staging.h"
17 : #include "xfs_rmap.h"
18 : #include "xfs_rmap_btree.h"
19 : #include "xfs_trace.h"
20 : #include "xfs_error.h"
21 : #include "xfs_extent_busy.h"
22 : #include "xfs_ag.h"
23 : #include "xfs_ag_resv.h"
24 : #include "scrub/xfile.h"
25 : #include "scrub/xfbtree.h"
26 : #include "xfs_btree_mem.h"
27 :
28 : static struct kmem_cache *xfs_rmapbt_cur_cache;
29 :
30 : /*
31 : * Reverse map btree.
32 : *
33 : * This is a per-ag tree used to track the owner(s) of a given extent. With
34 : * reflink it is possible for there to be multiple owners, which is a departure
35 : * from classic XFS. Owner records for data extents are inserted when the
36 : * extent is mapped and removed when an extent is unmapped. Owner records for
37 : * all other block types (i.e. metadata) are inserted when an extent is
38 : * allocated and removed when an extent is freed. There can only be one owner
39 : * of a metadata extent, usually an inode or some other metadata structure like
40 : * an AG btree.
41 : *
42 : * The rmap btree is part of the free space management, so blocks for the tree
43 : * are sourced from the agfl. Hence we need transaction reservation support for
44 : * this tree so that the freelist is always large enough. This also impacts on
45 : * the minimum space we need to leave free in the AG.
46 : *
47 : * The tree is ordered by [ag block, owner, offset]. This is a large key size,
48 : * but it is the only way to enforce unique keys when a block can be owned by
49 : * multiple files at any offset. There's no need to order/search by extent
50 : * size for online updating/management of the tree. It is intended that most
51 : * reverse lookups will be to find the owner(s) of a particular block, or to
52 : * try to recover tree and file data from corrupt primary metadata.
53 : */
54 :
55 : static struct xfs_btree_cur *
56 47062351 : xfs_rmapbt_dup_cursor(
57 : struct xfs_btree_cur *cur)
58 : {
59 47062351 : return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
60 : cur->bc_ag.agbp, cur->bc_ag.pag);
61 : }
62 :
63 : STATIC void
64 11285 : xfs_rmapbt_set_root(
65 : struct xfs_btree_cur *cur,
66 : const union xfs_btree_ptr *ptr,
67 : int inc)
68 : {
69 11285 : struct xfs_buf *agbp = cur->bc_ag.agbp;
70 11285 : struct xfs_agf *agf = agbp->b_addr;
71 11285 : int btnum = cur->bc_btnum;
72 :
73 11285 : ASSERT(ptr->s != 0);
74 :
75 11285 : agf->agf_roots[btnum] = ptr->s;
76 11285 : be32_add_cpu(&agf->agf_levels[btnum], inc);
77 11285 : cur->bc_ag.pag->pagf_levels[btnum] += inc;
78 :
79 11285 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
80 11285 : }
81 :
82 : STATIC int
83 337805 : xfs_rmapbt_alloc_block(
84 : struct xfs_btree_cur *cur,
85 : const union xfs_btree_ptr *start,
86 : union xfs_btree_ptr *new,
87 : int *stat)
88 : {
89 337805 : struct xfs_buf *agbp = cur->bc_ag.agbp;
90 337805 : struct xfs_agf *agf = agbp->b_addr;
91 337805 : struct xfs_perag *pag = cur->bc_ag.pag;
92 337805 : int error;
93 337805 : xfs_agblock_t bno;
94 :
95 : /* Allocate the new block from the freelist. If we can't, give up. */
96 337805 : error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
97 : &bno, 1);
98 337805 : if (error)
99 : return error;
100 337805 : if (bno == NULLAGBLOCK) {
101 0 : *stat = 0;
102 0 : return 0;
103 : }
104 :
105 337805 : xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
106 :
107 337805 : new->s = cpu_to_be32(bno);
108 337805 : be32_add_cpu(&agf->agf_rmap_blocks, 1);
109 337805 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
110 :
111 337805 : xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
112 :
113 337805 : *stat = 1;
114 337805 : return 0;
115 : }
116 :
117 : STATIC int
118 141319 : xfs_rmapbt_free_block(
119 : struct xfs_btree_cur *cur,
120 : struct xfs_buf *bp)
121 : {
122 141319 : struct xfs_buf *agbp = cur->bc_ag.agbp;
123 141319 : struct xfs_agf *agf = agbp->b_addr;
124 141319 : struct xfs_perag *pag = cur->bc_ag.pag;
125 141319 : xfs_agblock_t bno;
126 141319 : int error;
127 :
128 141319 : bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
129 141319 : be32_add_cpu(&agf->agf_rmap_blocks, -1);
130 141319 : xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
131 141319 : error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
132 141319 : if (error)
133 : return error;
134 :
135 141319 : xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
136 : XFS_EXTENT_BUSY_SKIP_DISCARD);
137 :
138 141319 : xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
139 141319 : return 0;
140 : }
141 :
142 : STATIC int
143 108137659 : xfs_rmapbt_get_minrecs(
144 : struct xfs_btree_cur *cur,
145 : int level)
146 : {
147 108137659 : return cur->bc_mp->m_rmap_mnr[level != 0];
148 : }
149 :
150 : STATIC int
151 >13018*10^7 : xfs_rmapbt_get_maxrecs(
152 : struct xfs_btree_cur *cur,
153 : int level)
154 : {
155 >13018*10^7 : return cur->bc_mp->m_rmap_mxr[level != 0];
156 : }
157 :
158 : /*
159 : * Convert the ondisk record's offset field into the ondisk key's offset field.
160 : * Fork and bmbt are significant parts of the rmap record key, but written
161 : * status is merely a record attribute.
162 : */
163 : static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
164 : {
165 >28839*10^7 : return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
166 : }
167 :
168 : STATIC void
169 >12989*10^7 : xfs_rmapbt_init_key_from_rec(
170 : union xfs_btree_key *key,
171 : const union xfs_btree_rec *rec)
172 : {
173 >12989*10^7 : key->rmap.rm_startblock = rec->rmap.rm_startblock;
174 >12989*10^7 : key->rmap.rm_owner = rec->rmap.rm_owner;
175 >12989*10^7 : key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
176 >12989*10^7 : }
177 :
178 : /*
179 : * The high key for a reverse mapping record can be computed by shifting
180 : * the startblock and offset to the highest value that would still map
181 : * to that record. In practice this means that we add blockcount-1 to
182 : * the startblock for all records, and if the record is for a data/attr
183 : * fork mapping, we add blockcount-1 to the offset too.
184 : */
185 : STATIC void
186 >15844*10^7 : xfs_rmapbt_init_high_key_from_rec(
187 : union xfs_btree_key *key,
188 : const union xfs_btree_rec *rec)
189 : {
190 >15844*10^7 : uint64_t off;
191 >15844*10^7 : int adj;
192 :
193 >15844*10^7 : adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
194 :
195 >15844*10^7 : key->rmap.rm_startblock = rec->rmap.rm_startblock;
196 >15844*10^7 : be32_add_cpu(&key->rmap.rm_startblock, adj);
197 >15850*10^7 : key->rmap.rm_owner = rec->rmap.rm_owner;
198 >15850*10^7 : key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
199 >15850*10^7 : if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
200 >15255*10^7 : XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
201 : return;
202 >15106*10^7 : off = be64_to_cpu(key->rmap.rm_offset);
203 >15106*10^7 : off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
204 >15106*10^7 : key->rmap.rm_offset = cpu_to_be64(off);
205 : }
206 :
207 : STATIC void
208 2690888203 : xfs_rmapbt_init_rec_from_cur(
209 : struct xfs_btree_cur *cur,
210 : union xfs_btree_rec *rec)
211 : {
212 2690888203 : rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
213 2690888203 : rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
214 2690888203 : rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
215 2690888203 : rec->rmap.rm_offset = cpu_to_be64(
216 : xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
217 2690888203 : }
218 :
219 : STATIC void
220 1533573520 : xfs_rmapbt_init_ptr_from_cur(
221 : struct xfs_btree_cur *cur,
222 : union xfs_btree_ptr *ptr)
223 : {
224 1533573520 : struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
225 :
226 3067147040 : ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
227 :
228 1533573520 : ptr->s = agf->agf_roots[cur->bc_btnum];
229 1533573520 : }
230 :
231 : /*
232 : * Mask the appropriate parts of the ondisk key field for a key comparison.
233 : * Fork and bmbt are significant parts of the rmap record key, but written
234 : * status is merely a record attribute.
235 : */
236 : static inline uint64_t offset_keymask(uint64_t offset)
237 : {
238 11008518363 : return offset & ~XFS_RMAP_OFF_UNWRITTEN;
239 : }
240 :
241 : STATIC int64_t
242 9834584126 : xfs_rmapbt_key_diff(
243 : struct xfs_btree_cur *cur,
244 : const union xfs_btree_key *key)
245 : {
246 9834584126 : struct xfs_rmap_irec *rec = &cur->bc_rec.r;
247 9834584126 : const struct xfs_rmap_key *kp = &key->rmap;
248 9834584126 : __u64 x, y;
249 9834584126 : int64_t d;
250 :
251 9834584126 : d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
252 9834584126 : if (d)
253 : return d;
254 :
255 1000392526 : x = be64_to_cpu(kp->rm_owner);
256 1000392526 : y = rec->rm_owner;
257 1000392526 : if (x > y)
258 : return 1;
259 964465564 : else if (y > x)
260 : return -1;
261 :
262 777664617 : x = offset_keymask(be64_to_cpu(kp->rm_offset));
263 777664617 : y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
264 777664617 : if (x > y)
265 : return 1;
266 706105328 : else if (y > x)
267 341810295 : return -1;
268 : return 0;
269 : }
270 :
271 : STATIC int64_t
272 >35035*10^7 : xfs_rmapbt_diff_two_keys(
273 : struct xfs_btree_cur *cur,
274 : const union xfs_btree_key *k1,
275 : const union xfs_btree_key *k2,
276 : const union xfs_btree_key *mask)
277 : {
278 >35035*10^7 : const struct xfs_rmap_key *kp1 = &k1->rmap;
279 >35035*10^7 : const struct xfs_rmap_key *kp2 = &k2->rmap;
280 >35035*10^7 : int64_t d;
281 >35035*10^7 : __u64 x, y;
282 :
283 : /* Doesn't make sense to mask off the physical space part */
284 >35035*10^7 : ASSERT(!mask || mask->rmap.rm_startblock);
285 :
286 >35035*10^7 : d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
287 >35035*10^7 : be32_to_cpu(kp2->rm_startblock);
288 >35035*10^7 : if (d)
289 : return d;
290 :
291 12176415272 : if (!mask || mask->rmap.rm_owner) {
292 12176391933 : x = be64_to_cpu(kp1->rm_owner);
293 12176391933 : y = be64_to_cpu(kp2->rm_owner);
294 12176391933 : if (x > y)
295 : return 1;
296 5627300956 : else if (y > x)
297 : return -1;
298 : }
299 :
300 5103398229 : if (!mask || mask->rmap.rm_offset) {
301 : /* Doesn't make sense to allow offset but not owner */
302 5103397186 : ASSERT(!mask || mask->rmap.rm_owner);
303 :
304 5103397186 : x = offset_keymask(be64_to_cpu(kp1->rm_offset));
305 5103397186 : y = offset_keymask(be64_to_cpu(kp2->rm_offset));
306 5103397186 : if (x > y)
307 : return 1;
308 762278504 : else if (y > x)
309 23152806 : return -1;
310 : }
311 :
312 : return 0;
313 : }
314 :
315 : static xfs_failaddr_t
316 27690881 : xfs_rmapbt_verify(
317 : struct xfs_buf *bp)
318 : {
319 27690881 : struct xfs_mount *mp = bp->b_mount;
320 27690881 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
321 27690881 : struct xfs_perag *pag = bp->b_pag;
322 27690881 : xfs_failaddr_t fa;
323 27690881 : unsigned int level;
324 :
325 : /*
326 : * magic number and level verification
327 : *
328 : * During growfs operations, we can't verify the exact level or owner as
329 : * the perag is not fully initialised and hence not attached to the
330 : * buffer. In this case, check against the maximum tree depth.
331 : *
332 : * Similarly, during log recovery we will have a perag structure
333 : * attached, but the agf information will not yet have been initialised
334 : * from the on disk AGF. Again, we can only check against maximum limits
335 : * in this case.
336 : */
337 27690881 : if (!xfs_verify_magic(bp, block->bb_magic))
338 0 : return __this_address;
339 :
340 27690995 : if (!xfs_has_rmapbt(mp))
341 0 : return __this_address;
342 27690995 : fa = xfs_btree_sblock_v5hdr_verify(bp);
343 27691001 : if (fa)
344 : return fa;
345 :
346 27691006 : level = be16_to_cpu(block->bb_level);
347 55376606 : if (pag && xfs_perag_initialised_agf(pag)) {
348 21944147 : unsigned int maxlevel = pag->pagf_levels[XFS_BTNUM_RMAPi];
349 :
350 : #ifdef CONFIG_XFS_ONLINE_REPAIR
351 : /*
352 : * Online repair could be rewriting the free space btrees, so
353 : * we'll validate against the larger of either tree while this
354 : * is going on.
355 : */
356 21944147 : maxlevel = max_t(unsigned int, maxlevel,
357 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi]);
358 : #endif
359 21944147 : if (level >= maxlevel)
360 0 : return __this_address;
361 5746859 : } else if (level >= mp->m_rmap_maxlevels)
362 0 : return __this_address;
363 :
364 27691006 : return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
365 : }
366 :
367 : static void
368 1711585 : xfs_rmapbt_read_verify(
369 : struct xfs_buf *bp)
370 : {
371 1711585 : xfs_failaddr_t fa;
372 :
373 1711585 : if (!xfs_btree_sblock_verify_crc(bp))
374 4 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
375 : else {
376 1711581 : fa = xfs_rmapbt_verify(bp);
377 1711581 : if (fa)
378 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
379 : }
380 :
381 1711585 : if (bp->b_error)
382 4 : trace_xfs_btree_corrupt(bp, _RET_IP_);
383 1711585 : }
384 :
385 : static void
386 11220524 : xfs_rmapbt_write_verify(
387 : struct xfs_buf *bp)
388 : {
389 11220524 : xfs_failaddr_t fa;
390 :
391 11220524 : fa = xfs_rmapbt_verify(bp);
392 11220518 : if (fa) {
393 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
394 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
395 0 : return;
396 : }
397 11220518 : xfs_btree_sblock_calc_crc(bp);
398 :
399 : }
400 :
401 : const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
402 : .name = "xfs_rmapbt",
403 : .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
404 : .verify_read = xfs_rmapbt_read_verify,
405 : .verify_write = xfs_rmapbt_write_verify,
406 : .verify_struct = xfs_rmapbt_verify,
407 : };
408 :
409 : STATIC int
410 5448558 : xfs_rmapbt_keys_inorder(
411 : struct xfs_btree_cur *cur,
412 : const union xfs_btree_key *k1,
413 : const union xfs_btree_key *k2)
414 : {
415 5448558 : uint32_t x;
416 5448558 : uint32_t y;
417 5448558 : uint64_t a;
418 5448558 : uint64_t b;
419 :
420 5448558 : x = be32_to_cpu(k1->rmap.rm_startblock);
421 5448558 : y = be32_to_cpu(k2->rmap.rm_startblock);
422 5448558 : if (x < y)
423 : return 1;
424 55949 : else if (x > y)
425 : return 0;
426 55949 : a = be64_to_cpu(k1->rmap.rm_owner);
427 55949 : b = be64_to_cpu(k2->rmap.rm_owner);
428 55949 : if (a < b)
429 : return 1;
430 52947 : else if (a > b)
431 : return 0;
432 52947 : a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
433 52947 : b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
434 52947 : if (a <= b)
435 52947 : return 1;
436 : return 0;
437 : }
438 :
439 : STATIC int
440 949794453 : xfs_rmapbt_recs_inorder(
441 : struct xfs_btree_cur *cur,
442 : const union xfs_btree_rec *r1,
443 : const union xfs_btree_rec *r2)
444 : {
445 949794453 : uint32_t x;
446 949794453 : uint32_t y;
447 949794453 : uint64_t a;
448 949794453 : uint64_t b;
449 :
450 949794453 : x = be32_to_cpu(r1->rmap.rm_startblock);
451 949794453 : y = be32_to_cpu(r2->rmap.rm_startblock);
452 949794453 : if (x < y)
453 : return 1;
454 42375874 : else if (x > y)
455 : return 0;
456 42375874 : a = be64_to_cpu(r1->rmap.rm_owner);
457 42375874 : b = be64_to_cpu(r2->rmap.rm_owner);
458 42375874 : if (a < b)
459 : return 1;
460 11976740 : else if (a > b)
461 : return 0;
462 11976740 : a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
463 11976740 : b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
464 11976740 : if (a <= b)
465 11976740 : return 1;
466 : return 0;
467 : }
468 :
469 : STATIC enum xbtree_key_contig
470 0 : xfs_rmapbt_keys_contiguous(
471 : struct xfs_btree_cur *cur,
472 : const union xfs_btree_key *key1,
473 : const union xfs_btree_key *key2,
474 : const union xfs_btree_key *mask)
475 : {
476 0 : ASSERT(!mask || mask->rmap.rm_startblock);
477 :
478 : /*
479 : * We only support checking contiguity of the physical space component.
480 : * If any callers ever need more specificity than that, they'll have to
481 : * implement it here.
482 : */
483 0 : ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
484 :
485 0 : return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
486 0 : be32_to_cpu(key2->rmap.rm_startblock));
487 : }
488 :
489 : const struct xfs_btree_ops xfs_rmapbt_ops = {
490 : .rec_len = sizeof(struct xfs_rmap_rec),
491 : .key_len = 2 * sizeof(struct xfs_rmap_key),
492 : .lru_refs = XFS_RMAP_BTREE_REF,
493 : .geom_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING,
494 :
495 : .dup_cursor = xfs_rmapbt_dup_cursor,
496 : .set_root = xfs_rmapbt_set_root,
497 : .alloc_block = xfs_rmapbt_alloc_block,
498 : .free_block = xfs_rmapbt_free_block,
499 : .get_minrecs = xfs_rmapbt_get_minrecs,
500 : .get_maxrecs = xfs_rmapbt_get_maxrecs,
501 : .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
502 : .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
503 : .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
504 : .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
505 : .key_diff = xfs_rmapbt_key_diff,
506 : .buf_ops = &xfs_rmapbt_buf_ops,
507 : .diff_two_keys = xfs_rmapbt_diff_two_keys,
508 : .keys_inorder = xfs_rmapbt_keys_inorder,
509 : .recs_inorder = xfs_rmapbt_recs_inorder,
510 : .keys_contiguous = xfs_rmapbt_keys_contiguous,
511 : };
512 :
513 : static struct xfs_btree_cur *
514 480693021 : xfs_rmapbt_init_common(
515 : struct xfs_mount *mp,
516 : struct xfs_trans *tp,
517 : struct xfs_perag *pag)
518 : {
519 480693021 : struct xfs_btree_cur *cur;
520 :
521 : /* Overlapping btree; 2 keys per pointer. */
522 480693021 : cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, &xfs_rmapbt_ops,
523 480693021 : mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
524 480696028 : cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
525 :
526 480696028 : cur->bc_ag.pag = xfs_perag_hold(pag);
527 480694025 : return cur;
528 : }
529 :
530 : /* Create a new reverse mapping btree cursor. */
531 : struct xfs_btree_cur *
532 480685749 : xfs_rmapbt_init_cursor(
533 : struct xfs_mount *mp,
534 : struct xfs_trans *tp,
535 : struct xfs_buf *agbp,
536 : struct xfs_perag *pag)
537 : {
538 480685749 : struct xfs_agf *agf = agbp->b_addr;
539 480685749 : struct xfs_btree_cur *cur;
540 :
541 480685749 : cur = xfs_rmapbt_init_common(mp, tp, pag);
542 480688823 : cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
543 480688823 : cur->bc_ag.agbp = agbp;
544 480688823 : return cur;
545 : }
546 :
547 : /* Create a new reverse mapping btree cursor with a fake root for staging. */
548 : struct xfs_btree_cur *
549 4449 : xfs_rmapbt_stage_cursor(
550 : struct xfs_mount *mp,
551 : struct xbtree_afakeroot *afake,
552 : struct xfs_perag *pag)
553 : {
554 4449 : struct xfs_btree_cur *cur;
555 :
556 4449 : cur = xfs_rmapbt_init_common(mp, NULL, pag);
557 4449 : xfs_btree_stage_afakeroot(cur, afake);
558 4448 : return cur;
559 : }
560 :
561 : #ifdef CONFIG_XFS_BTREE_IN_XFILE
562 : /*
563 : * Validate an in-memory rmap btree block. Callers are allowed to generate an
564 : * in-memory btree even if the ondisk feature is not enabled.
565 : */
566 : static xfs_failaddr_t
567 17801584 : xfs_rmapbt_mem_verify(
568 : struct xfs_buf *bp)
569 : {
570 17801584 : struct xfs_mount *mp = bp->b_mount;
571 17801584 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
572 17801584 : xfs_failaddr_t fa;
573 17801584 : unsigned int level;
574 :
575 17801584 : if (!xfs_verify_magic(bp, block->bb_magic))
576 0 : return __this_address;
577 :
578 17801580 : fa = xfs_btree_sblock_v5hdr_verify(bp);
579 17801581 : if (fa)
580 : return fa;
581 :
582 17801565 : level = be16_to_cpu(block->bb_level);
583 17801565 : if (xfs_has_rmapbt(mp)) {
584 17801565 : if (level >= mp->m_rmap_maxlevels)
585 0 : return __this_address;
586 : } else {
587 0 : if (level >= xfs_rmapbt_maxlevels_ondisk())
588 0 : return __this_address;
589 : }
590 :
591 17801565 : return xfbtree_sblock_verify(bp,
592 : xfs_rmapbt_maxrecs(xfo_to_b(1), level == 0));
593 : }
594 :
595 : static void
596 4475 : xfs_rmapbt_mem_rw_verify(
597 : struct xfs_buf *bp)
598 : {
599 4475 : xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
600 :
601 4469 : if (fa)
602 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
603 4469 : }
604 :
605 : /* skip crc checks on in-memory btrees to save time */
606 : static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
607 : .name = "xfs_rmapbt_mem",
608 : .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
609 : .verify_read = xfs_rmapbt_mem_rw_verify,
610 : .verify_write = xfs_rmapbt_mem_rw_verify,
611 : .verify_struct = xfs_rmapbt_mem_verify,
612 : };
613 :
614 : static const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
615 : .rec_len = sizeof(struct xfs_rmap_rec),
616 : .key_len = 2 * sizeof(struct xfs_rmap_key),
617 : .lru_refs = XFS_RMAP_BTREE_REF,
618 : .geom_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING |
619 : XFS_BTREE_IN_XFILE,
620 :
621 : .dup_cursor = xfbtree_dup_cursor,
622 : .set_root = xfbtree_set_root,
623 : .alloc_block = xfbtree_alloc_block,
624 : .free_block = xfbtree_free_block,
625 : .get_minrecs = xfbtree_get_minrecs,
626 : .get_maxrecs = xfbtree_get_maxrecs,
627 : .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
628 : .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
629 : .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
630 : .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
631 : .key_diff = xfs_rmapbt_key_diff,
632 : .buf_ops = &xfs_rmapbt_mem_buf_ops,
633 : .diff_two_keys = xfs_rmapbt_diff_two_keys,
634 : .keys_inorder = xfs_rmapbt_keys_inorder,
635 : .recs_inorder = xfs_rmapbt_recs_inorder,
636 : .keys_contiguous = xfs_rmapbt_keys_contiguous,
637 : };
638 :
639 : /* Create a cursor for an in-memory btree. */
640 : struct xfs_btree_cur *
641 11105082 : xfs_rmapbt_mem_cursor(
642 : struct xfs_perag *pag,
643 : struct xfs_trans *tp,
644 : struct xfs_buf *head_bp,
645 : struct xfbtree *xfbtree)
646 : {
647 11105082 : struct xfs_btree_cur *cur;
648 11105082 : struct xfs_mount *mp = pag->pag_mount;
649 :
650 : /* Overlapping btree; 2 keys per pointer. */
651 11105082 : cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
652 11105082 : &xfs_rmapbt_mem_ops, mp->m_rmap_maxlevels,
653 : xfs_rmapbt_cur_cache);
654 11105074 : cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
655 11105074 : cur->bc_mem.xfbtree = xfbtree;
656 11105074 : cur->bc_mem.head_bp = head_bp;
657 11105074 : cur->bc_nlevels = xfs_btree_mem_head_nlevels(head_bp);
658 :
659 11105074 : cur->bc_mem.pag = xfs_perag_hold(pag);
660 11105072 : return cur;
661 : }
662 :
663 : /* Create an in-memory rmap btree. */
664 : int
665 4471 : xfs_rmapbt_mem_create(
666 : struct xfs_mount *mp,
667 : xfs_agnumber_t agno,
668 : struct xfs_buftarg *target,
669 : struct xfbtree **xfbtreep)
670 : {
671 4471 : struct xfbtree_config cfg = {
672 : .btree_ops = &xfs_rmapbt_mem_ops,
673 : .target = target,
674 : .owner = agno,
675 : .flags = XFBTREE_DIRECT_MAP,
676 : };
677 :
678 4471 : return xfbtree_create(mp, &cfg, xfbtreep);
679 : }
680 : #endif /* CONFIG_XFS_BTREE_IN_XFILE */
681 :
682 : /*
683 : * Install a new reverse mapping btree root. Caller is responsible for
684 : * invalidating and freeing the old btree blocks.
685 : */
686 : void
687 4449 : xfs_rmapbt_commit_staged_btree(
688 : struct xfs_btree_cur *cur,
689 : struct xfs_trans *tp,
690 : struct xfs_buf *agbp)
691 : {
692 4449 : struct xfs_agf *agf = agbp->b_addr;
693 4449 : struct xbtree_afakeroot *afake = cur->bc_ag.afake;
694 :
695 4449 : ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
696 :
697 4449 : agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
698 4449 : agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
699 4449 : agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
700 4449 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
701 : XFS_AGF_RMAP_BLOCKS);
702 4449 : xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
703 4449 : }
704 :
705 : /* Calculate number of records in a reverse mapping btree block. */
706 : static inline unsigned int
707 : xfs_rmapbt_block_maxrecs(
708 : unsigned int blocklen,
709 : bool leaf)
710 : {
711 17849815 : if (leaf)
712 24125 : return blocklen / sizeof(struct xfs_rmap_rec);
713 3380733 : return blocklen /
714 : (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
715 : }
716 :
717 : /*
718 : * Calculate number of records in an rmap btree block.
719 : */
720 : int
721 48250 : xfs_rmapbt_maxrecs(
722 : int blocklen,
723 : int leaf)
724 : {
725 17849815 : blocklen -= XFS_RMAP_BLOCK_LEN;
726 17849815 : return xfs_rmapbt_block_maxrecs(blocklen, leaf);
727 : }
728 :
729 : /* Compute the max possible height for reverse mapping btrees. */
730 : unsigned int
731 23878 : xfs_rmapbt_maxlevels_ondisk(void)
732 : {
733 23878 : unsigned int minrecs[2];
734 23878 : unsigned int blocklen;
735 :
736 23878 : blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
737 :
738 23878 : minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
739 23878 : minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
740 :
741 : /*
742 : * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
743 : *
744 : * On a reflink filesystem, each AG block can have up to 2^32 (per the
745 : * refcount record format) owners, which means that theoretically we
746 : * could face up to 2^64 rmap records. However, we're likely to run
747 : * out of blocks in the AG long before that happens, which means that
748 : * we must compute the max height based on what the btree will look
749 : * like if it consumes almost all the blocks in the AG due to maximal
750 : * sharing factor.
751 : */
752 23878 : return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
753 : }
754 :
755 : /* Compute the maximum height of an rmap btree. */
756 : void
757 24119 : xfs_rmapbt_compute_maxlevels(
758 : struct xfs_mount *mp)
759 : {
760 24119 : if (!xfs_has_rmapbt(mp)) {
761 253 : mp->m_rmap_maxlevels = 0;
762 253 : return;
763 : }
764 :
765 23866 : if (xfs_has_reflink(mp)) {
766 : /*
767 : * Compute the asymptotic maxlevels for an rmap btree on a
768 : * filesystem that supports reflink.
769 : *
770 : * On a reflink filesystem, each AG block can have up to 2^32
771 : * (per the refcount record format) owners, which means that
772 : * theoretically we could face up to 2^64 rmap records.
773 : * However, we're likely to run out of blocks in the AG long
774 : * before that happens, which means that we must compute the
775 : * max height based on what the btree will look like if it
776 : * consumes almost all the blocks in the AG due to maximal
777 : * sharing factor.
778 : */
779 23861 : mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
780 23861 : mp->m_sb.sb_agblocks);
781 : } else {
782 : /*
783 : * If there's no block sharing, compute the maximum rmapbt
784 : * height assuming one rmap record per AG block.
785 : */
786 5 : mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
787 5 : mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
788 : }
789 23866 : ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
790 : }
791 :
792 : /* Calculate the refcount btree size for some records. */
793 : xfs_extlen_t
794 712132 : xfs_rmapbt_calc_size(
795 : struct xfs_mount *mp,
796 : unsigned long long len)
797 : {
798 1009052 : return xfs_btree_calc_size(mp->m_rmap_mnr, len);
799 : }
800 :
801 : /*
802 : * Calculate the maximum refcount btree size.
803 : */
804 : xfs_extlen_t
805 0 : xfs_rmapbt_max_size(
806 : struct xfs_mount *mp,
807 : xfs_agblock_t agblocks)
808 : {
809 : /* Bail out if we're uninitialized, which can happen in mkfs. */
810 0 : if (mp->m_rmap_mxr[0] == 0)
811 : return 0;
812 :
813 296921 : return xfs_rmapbt_calc_size(mp, agblocks);
814 : }
815 :
816 : /*
817 : * Figure out how many blocks to reserve and how many are used by this btree.
818 : */
819 : int
820 301995 : xfs_rmapbt_calc_reserves(
821 : struct xfs_mount *mp,
822 : struct xfs_trans *tp,
823 : struct xfs_perag *pag,
824 : xfs_extlen_t *ask,
825 : xfs_extlen_t *used)
826 : {
827 301995 : struct xfs_buf *agbp;
828 301995 : struct xfs_agf *agf;
829 301995 : xfs_agblock_t agblocks;
830 301995 : xfs_extlen_t tree_len;
831 301995 : int error;
832 :
833 301995 : if (!xfs_has_rmapbt(mp))
834 : return 0;
835 :
836 296919 : error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
837 296893 : if (error)
838 : return error;
839 :
840 296893 : agf = agbp->b_addr;
841 296893 : agblocks = be32_to_cpu(agf->agf_length);
842 296893 : tree_len = be32_to_cpu(agf->agf_rmap_blocks);
843 296893 : xfs_trans_brelse(tp, agbp);
844 :
845 : /*
846 : * The log is permanently allocated, so the space it occupies will
847 : * never be available for the kinds of things that would require btree
848 : * expansion. We therefore can pretend the space isn't there.
849 : */
850 593848 : if (xfs_ag_contains_log(mp, pag->pag_agno))
851 58293 : agblocks -= mp->m_sb.sb_logblocks;
852 :
853 : /* Reserve 1% of the AG or enough for 1 block per record. */
854 296924 : *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
855 296923 : *used += tree_len;
856 :
857 296923 : return error;
858 : }
859 :
860 : int __init
861 12 : xfs_rmapbt_init_cur_cache(void)
862 : {
863 12 : xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
864 12 : xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
865 : 0, 0, NULL);
866 :
867 12 : if (!xfs_rmapbt_cur_cache)
868 0 : return -ENOMEM;
869 : return 0;
870 : }
871 :
872 : void
873 12 : xfs_rmapbt_destroy_cur_cache(void)
874 : {
875 12 : kmem_cache_destroy(xfs_rmapbt_cur_cache);
876 12 : xfs_rmapbt_cur_cache = NULL;
877 12 : }
|