Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_buf_item.h"
17 : #include "xfs_btree.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_alloc.h"
22 : #include "xfs_log.h"
23 : #include "xfs_btree_staging.h"
24 : #include "xfs_ag.h"
25 : #include "xfs_alloc_btree.h"
26 : #include "xfs_ialloc_btree.h"
27 : #include "xfs_bmap_btree.h"
28 : #include "xfs_rmap_btree.h"
29 : #include "xfs_refcount_btree.h"
30 : #include "xfs_health.h"
31 : #include "scrub/xfile.h"
32 : #include "scrub/xfbtree.h"
33 : #include "xfs_btree_mem.h"
34 :
35 : /*
36 : * Btree magic numbers.
37 : */
38 : uint32_t
39 >15547*10^7 : xfs_btree_magic(
40 : struct xfs_mount *mp,
41 : const struct xfs_btree_ops *ops)
42 : {
43 >15547*10^7 : int idx = xfs_has_crc(mp) ? 1 : 0;
44 >15547*10^7 : __be32 magic = ops->buf_ops->magic[idx];
45 :
46 : /* Ensure we asked for crc for crc-only magics. */
47 >15547*10^7 : ASSERT(magic != 0);
48 >15547*10^7 : return be32_to_cpu(magic);
49 : }
50 :
51 : /*
52 : * These sibling pointer checks are optimised for null sibling pointers. This
53 : * happens a lot, and we don't need to byte swap at runtime if the sibling
54 : * pointer is NULL.
55 : *
56 : * These are explicitly marked at inline because the cost of calling them as
57 : * functions instead of inlining them is about 36 bytes extra code per call site
58 : * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
59 : * two sibling check functions reduces the compiled code size by over 300
60 : * bytes.
61 : */
62 : static inline xfs_failaddr_t
63 3654529052 : xfs_btree_check_lblock_siblings(
64 : struct xfs_mount *mp,
65 : struct xfs_btree_cur *cur,
66 : int level,
67 : xfs_fsblock_t fsb,
68 : __be64 dsibling)
69 : {
70 3654529052 : xfs_fsblock_t sibling;
71 :
72 3654529052 : if (dsibling == cpu_to_be64(NULLFSBLOCK))
73 : return NULL;
74 :
75 521879926 : sibling = be64_to_cpu(dsibling);
76 521879926 : if (sibling == fsb)
77 0 : return __this_address;
78 521879926 : if (level >= 0) {
79 508857333 : if (!xfs_btree_check_lptr(cur, sibling, level + 1))
80 0 : return __this_address;
81 13022593 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
82 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
83 0 : return __this_address;
84 : } else {
85 13022593 : if (!xfs_verify_fsbno(mp, sibling))
86 0 : return __this_address;
87 : }
88 :
89 : return NULL;
90 : }
91 :
92 : static inline xfs_failaddr_t
93 >30764*10^7 : xfs_btree_check_sblock_siblings(
94 : struct xfs_perag *pag,
95 : struct xfs_btree_cur *cur,
96 : int level,
97 : xfs_agblock_t agbno,
98 : __be32 dsibling)
99 : {
100 >30764*10^7 : xfs_agblock_t sibling;
101 :
102 >30764*10^7 : if (dsibling == cpu_to_be32(NULLAGBLOCK))
103 : return NULL;
104 :
105 >20917*10^7 : sibling = be32_to_cpu(dsibling);
106 >20917*10^7 : if (sibling == agbno)
107 0 : return __this_address;
108 >20917*10^7 : if (level >= 0) {
109 >20911*10^7 : if (!xfs_btree_check_sptr(cur, sibling, level + 1))
110 0 : return __this_address;
111 57498716 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
112 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
113 0 : return __this_address;
114 : } else {
115 57498716 : if (!xfs_verify_agbno(pag, sibling))
116 0 : return __this_address;
117 : }
118 : return NULL;
119 : }
120 :
121 : /*
122 : * Check a long btree block header. Return the address of the failing check,
123 : * or NULL if everything is ok.
124 : */
125 : xfs_failaddr_t
126 1812444744 : __xfs_btree_check_lblock(
127 : struct xfs_btree_cur *cur,
128 : struct xfs_btree_block *block,
129 : int level,
130 : struct xfs_buf *bp)
131 : {
132 1812444744 : struct xfs_mount *mp = cur->bc_mp;
133 1812444744 : int crc = xfs_has_crc(mp);
134 1812444744 : xfs_failaddr_t fa;
135 1812444744 : xfs_fsblock_t fsb = NULLFSBLOCK;
136 :
137 1812444744 : if (crc) {
138 1812440612 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
139 0 : return __this_address;
140 3624873414 : if (block->bb_u.l.bb_blkno !=
141 1812436707 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
142 0 : return __this_address;
143 1812436707 : if (block->bb_u.l.bb_pad != cpu_to_be32(0))
144 0 : return __this_address;
145 : }
146 :
147 3624881678 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
148 0 : return __this_address;
149 1812441142 : if (be16_to_cpu(block->bb_level) != level)
150 38 : return __this_address;
151 3624887164 : if (be16_to_cpu(block->bb_numrecs) >
152 1812441343 : cur->bc_ops->get_maxrecs(cur, level))
153 0 : return __this_address;
154 :
155 1812445821 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
156 1328473686 : fsb = xfbtree_buf_to_xfoff(cur, bp);
157 483972135 : else if (bp)
158 469375185 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
159 :
160 1812445811 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
161 : block->bb_u.l.bb_leftsib);
162 1812441170 : if (!fa)
163 1812441362 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
164 : block->bb_u.l.bb_rightsib);
165 : return fa;
166 : }
167 :
168 : /* Check a long btree block header. */
169 : static int
170 1808800568 : xfs_btree_check_lblock(
171 : struct xfs_btree_cur *cur,
172 : struct xfs_btree_block *block,
173 : int level,
174 : struct xfs_buf *bp)
175 : {
176 1808800568 : struct xfs_mount *mp = cur->bc_mp;
177 1808800568 : xfs_failaddr_t fa;
178 :
179 1808800568 : fa = __xfs_btree_check_lblock(cur, block, level, bp);
180 3617601860 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
181 1808799393 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
182 0 : if (bp)
183 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
184 0 : xfs_btree_mark_sick(cur);
185 0 : return -EFSCORRUPTED;
186 : }
187 : return 0;
188 : }
189 :
190 : /*
191 : * Check a short btree block header. Return the address of the failing check,
192 : * or NULL if everything is ok.
193 : */
194 : xfs_failaddr_t
195 >15353*10^7 : __xfs_btree_check_sblock(
196 : struct xfs_btree_cur *cur,
197 : struct xfs_btree_block *block,
198 : int level,
199 : struct xfs_buf *bp)
200 : {
201 >15353*10^7 : struct xfs_mount *mp = cur->bc_mp;
202 >15353*10^7 : struct xfs_perag *pag = cur->bc_ag.pag;
203 >15353*10^7 : int crc = xfs_has_crc(mp);
204 >15353*10^7 : xfs_failaddr_t fa;
205 >15353*10^7 : xfs_agblock_t agbno = NULLAGBLOCK;
206 :
207 >15353*10^7 : if (crc) {
208 >15352*10^7 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
209 0 : return __this_address;
210 >30735*10^7 : if (block->bb_u.s.bb_blkno !=
211 >15367*10^7 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
212 0 : return __this_address;
213 : }
214 :
215 >30739*10^7 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
216 0 : return __this_address;
217 >15377*10^7 : if (be16_to_cpu(block->bb_level) != level)
218 0 : return __this_address;
219 >30762*10^7 : if (be16_to_cpu(block->bb_numrecs) >
220 >15388*10^7 : cur->bc_ops->get_maxrecs(cur, level))
221 0 : return __this_address;
222 :
223 >15373*10^7 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
224 157822390 : pag = NULL;
225 157822390 : agbno = xfbtree_buf_to_xfoff(cur, bp);
226 >15357*10^7 : } else if (bp) {
227 >15357*10^7 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
228 : }
229 :
230 >15373*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
231 : block->bb_u.s.bb_leftsib);
232 >15385*10^7 : if (!fa)
233 >15388*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
234 : block->bb_u.s.bb_rightsib);
235 : return fa;
236 : }
237 :
238 : /* Check a short btree block header. */
239 : STATIC int
240 >15354*10^7 : xfs_btree_check_sblock(
241 : struct xfs_btree_cur *cur,
242 : struct xfs_btree_block *block,
243 : int level,
244 : struct xfs_buf *bp)
245 : {
246 >15354*10^7 : struct xfs_mount *mp = cur->bc_mp;
247 >15354*10^7 : xfs_failaddr_t fa;
248 :
249 >15354*10^7 : fa = __xfs_btree_check_sblock(cur, block, level, bp);
250 >30879*10^7 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
251 >15377*10^7 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
252 0 : if (bp)
253 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
254 0 : xfs_btree_mark_sick(cur);
255 0 : return -EFSCORRUPTED;
256 : }
257 : return 0;
258 : }
259 :
260 : /*
261 : * Debug routine: check that block header is ok.
262 : */
263 : int
264 >15540*10^7 : xfs_btree_check_block(
265 : struct xfs_btree_cur *cur, /* btree cursor */
266 : struct xfs_btree_block *block, /* generic btree block pointer */
267 : int level, /* level of the btree block */
268 : struct xfs_buf *bp) /* buffer containing block, if any */
269 : {
270 >15540*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
271 1808798850 : return xfs_btree_check_lblock(cur, block, level, bp);
272 : else
273 >15359*10^7 : return xfs_btree_check_sblock(cur, block, level, bp);
274 : }
275 :
276 : /* Check that this long pointer is valid and points within the fs. */
277 : bool
278 1284892818 : xfs_btree_check_lptr(
279 : struct xfs_btree_cur *cur,
280 : xfs_fsblock_t fsbno,
281 : int level)
282 : {
283 1284892818 : if (level <= 0)
284 : return false;
285 1284892818 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
286 0 : return xfbtree_verify_xfileoff(cur, fsbno);
287 1284892818 : return xfs_verify_fsbno(cur->bc_mp, fsbno);
288 : }
289 :
290 : /* Check that this short pointer is valid and points within the AG. */
291 : bool
292 >24242*10^7 : xfs_btree_check_sptr(
293 : struct xfs_btree_cur *cur,
294 : xfs_agblock_t agbno,
295 : int level)
296 : {
297 >24242*10^7 : if (level <= 0)
298 : return false;
299 >24242*10^7 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
300 208897228 : return xfbtree_verify_xfileoff(cur, agbno);
301 >24221*10^7 : return xfs_verify_agbno(cur->bc_ag.pag, agbno);
302 : }
303 :
304 : /*
305 : * Check that a given (indexed) btree pointer at a certain level of a
306 : * btree is valid and doesn't point past where it should.
307 : */
308 : static int
309 35333832563 : xfs_btree_check_ptr(
310 : struct xfs_btree_cur *cur,
311 : const union xfs_btree_ptr *ptr,
312 : int index,
313 : int level)
314 : {
315 35333832563 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
316 1107152226 : return xfbtree_check_ptr(cur, ptr, index, level);
317 :
318 34226680337 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
319 772323881 : if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
320 : level))
321 : return 0;
322 0 : xfs_err(cur->bc_mp,
323 : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
324 : cur->bc_ino.ip->i_ino,
325 : cur->bc_ino.whichfork, cur->bc_btnum,
326 : level, index);
327 : } else {
328 66908712912 : if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
329 : level))
330 : return 0;
331 0 : xfs_err(cur->bc_mp,
332 : "AG %u: Corrupt btree %d pointer at level %d index %d.",
333 : cur->bc_ag.pag->pag_agno, cur->bc_btnum,
334 : level, index);
335 : }
336 :
337 0 : xfs_btree_mark_sick(cur);
338 0 : return -EFSCORRUPTED;
339 : }
340 :
341 : #ifdef DEBUG
342 : # define xfs_btree_debug_check_ptr xfs_btree_check_ptr
343 : #else
344 : # define xfs_btree_debug_check_ptr(...) (0)
345 : #endif
346 :
347 : /*
348 : * Calculate CRC on the whole btree block and stuff it into the
349 : * long-form btree header.
350 : *
351 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
352 : * it into the buffer so recovery knows what the last modification was that made
353 : * it to disk.
354 : */
355 : void
356 5033163 : xfs_btree_lblock_calc_crc(
357 : struct xfs_buf *bp)
358 : {
359 5033163 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
360 5033163 : struct xfs_buf_log_item *bip = bp->b_log_item;
361 :
362 5033163 : if (!xfs_has_crc(bp->b_mount))
363 : return;
364 5033163 : if (bip)
365 5027823 : block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
366 5033163 : xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
367 : }
368 :
369 : bool
370 6631688 : xfs_btree_lblock_verify_crc(
371 : struct xfs_buf *bp)
372 : {
373 6631688 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
374 6631688 : struct xfs_mount *mp = bp->b_mount;
375 :
376 6631688 : if (xfs_has_crc(mp)) {
377 6631688 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
378 : return false;
379 6631688 : return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
380 : }
381 :
382 : return true;
383 : }
384 :
385 : /*
386 : * Calculate CRC on the whole btree block and stuff it into the
387 : * short-form btree header.
388 : *
389 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
390 : * it into the buffer so recovery knows what the last modification was that made
391 : * it to disk.
392 : */
393 : void
394 17123713 : xfs_btree_sblock_calc_crc(
395 : struct xfs_buf *bp)
396 : {
397 17123713 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
398 17123713 : struct xfs_buf_log_item *bip = bp->b_log_item;
399 :
400 17123713 : if (!xfs_has_crc(bp->b_mount))
401 : return;
402 17116829 : if (bip)
403 16654205 : block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
404 17116829 : xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
405 : }
406 :
407 : bool
408 2393030 : xfs_btree_sblock_verify_crc(
409 : struct xfs_buf *bp)
410 : {
411 2393030 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
412 2393030 : struct xfs_mount *mp = bp->b_mount;
413 :
414 2393030 : if (xfs_has_crc(mp)) {
415 2392878 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
416 : return false;
417 2392878 : return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
418 : }
419 :
420 : return true;
421 : }
422 :
423 : static int
424 406346 : xfs_btree_free_block(
425 : struct xfs_btree_cur *cur,
426 : struct xfs_buf *bp)
427 : {
428 406346 : int error;
429 :
430 406346 : trace_xfs_btree_free_block(cur, bp);
431 :
432 406346 : error = cur->bc_ops->free_block(cur, bp);
433 406346 : if (!error) {
434 406346 : xfs_trans_binval(cur->bc_tp, bp);
435 406346 : XFS_BTREE_STATS_INC(cur, free);
436 : }
437 406346 : return error;
438 : }
439 :
440 : /*
441 : * Delete the btree cursor.
442 : */
443 : void
444 4537031743 : xfs_btree_del_cursor(
445 : struct xfs_btree_cur *cur, /* btree cursor */
446 : int error) /* del because of error */
447 : {
448 4537031743 : int i; /* btree level */
449 :
450 : /*
451 : * Clear the buffer pointers and release the buffers. If we're doing
452 : * this because of an error, inspect all of the entries in the bc_bufs
453 : * array for buffers to be unlocked. This is because some of the btree
454 : * code works from level n down to 0, and if we get an error along the
455 : * way we won't have initialized all the entries down to 0.
456 : */
457 11548233907 : for (i = 0; i < cur->bc_nlevels; i++) {
458 7192292527 : if (cur->bc_levels[i].bp)
459 6403425036 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
460 788867491 : else if (!error)
461 : break;
462 : }
463 :
464 : /*
465 : * If we are doing a BMBT update, the number of unaccounted blocks
466 : * allocated during this cursor life time should be zero. If it's not
467 : * zero, then we should be shut down or on our way to shutdown due to
468 : * cancelling a dirty transaction on error.
469 : */
470 4537729305 : ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
471 : xfs_is_shutdown(cur->bc_mp) || error != 0);
472 4537729305 : if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
473 0 : kmem_free(cur->bc_ops);
474 4537729305 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
475 3861475857 : !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
476 3861475857 : xfs_perag_put(cur->bc_ag.pag);
477 4537901984 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
478 512441385 : if (cur->bc_mem.pag)
479 14450845 : xfs_perag_put(cur->bc_mem.pag);
480 : }
481 4537901993 : kmem_cache_free(cur->bc_cache, cur);
482 4537925537 : }
483 :
484 : /* Return the buffer target for this btree's buffer. */
485 : static inline struct xfs_buftarg *
486 7450555782 : xfs_btree_buftarg(
487 : struct xfs_btree_cur *cur)
488 : {
489 7450555782 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
490 531870425 : return xfbtree_target(cur->bc_mem.xfbtree);
491 6918685357 : return cur->bc_mp->m_ddev_targp;
492 : }
493 :
494 : /* Return the block size (in units of 512b sectors) for this btree. */
495 : static inline unsigned int
496 7450990955 : xfs_btree_bbsize(
497 : struct xfs_btree_cur *cur)
498 : {
499 7450990955 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
500 531870425 : return xfbtree_bbsize();
501 6919120530 : return cur->bc_mp->m_bsize;
502 : }
503 :
504 : /*
505 : * Duplicate the btree cursor.
506 : * Allocate a new one, copy the record, re-get the buffers.
507 : */
508 : int /* error */
509 175922617 : xfs_btree_dup_cursor(
510 : struct xfs_btree_cur *cur, /* input cursor */
511 : struct xfs_btree_cur **ncur) /* output cursor */
512 : {
513 175922617 : struct xfs_buf *bp; /* btree block's buffer pointer */
514 175922617 : int error; /* error return value */
515 175922617 : int i; /* level number of btree block */
516 175922617 : xfs_mount_t *mp; /* mount structure for filesystem */
517 175922617 : struct xfs_btree_cur *new; /* new cursor value */
518 175922617 : xfs_trans_t *tp; /* transaction pointer, can be NULL */
519 :
520 175922617 : tp = cur->bc_tp;
521 175922617 : mp = cur->bc_mp;
522 :
523 : /*
524 : * Allocate a new cursor like the old one.
525 : */
526 175922617 : new = cur->bc_ops->dup_cursor(cur);
527 :
528 : /*
529 : * Copy the record currently in the cursor.
530 : */
531 175924137 : new->bc_rec = cur->bc_rec;
532 :
533 : /*
534 : * For each level current, re-get the buffer and copy the ptr value.
535 : */
536 541278083 : for (i = 0; i < new->bc_nlevels; i++) {
537 365351906 : new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
538 365351906 : new->bc_levels[i].ra = cur->bc_levels[i].ra;
539 365351906 : bp = cur->bc_levels[i].bp;
540 365351906 : if (bp) {
541 321615872 : error = xfs_trans_read_buf(mp, tp,
542 : xfs_btree_buftarg(cur),
543 : xfs_buf_daddr(bp),
544 321615930 : xfs_btree_bbsize(cur), 0, &bp,
545 321617892 : cur->bc_ops->buf_ops);
546 321617919 : if (xfs_metadata_is_sick(error))
547 0 : xfs_btree_mark_sick(new);
548 321617919 : if (error) {
549 7 : xfs_btree_del_cursor(new, error);
550 7 : *ncur = NULL;
551 7 : return error;
552 : }
553 : }
554 365353946 : new->bc_levels[i].bp = bp;
555 : }
556 175926177 : *ncur = new;
557 175926177 : return 0;
558 : }
559 :
560 : /*
561 : * XFS btree block layout and addressing:
562 : *
563 : * There are two types of blocks in the btree: leaf and non-leaf blocks.
564 : *
565 : * The leaf record start with a header then followed by records containing
566 : * the values. A non-leaf block also starts with the same header, and
567 : * then first contains lookup keys followed by an equal number of pointers
568 : * to the btree blocks at the previous level.
569 : *
570 : * +--------+-------+-------+-------+-------+-------+-------+
571 : * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
572 : * +--------+-------+-------+-------+-------+-------+-------+
573 : *
574 : * +--------+-------+-------+-------+-------+-------+-------+
575 : * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
576 : * +--------+-------+-------+-------+-------+-------+-------+
577 : *
578 : * The header is called struct xfs_btree_block for reasons better left unknown
579 : * and comes in different versions for short (32bit) and long (64bit) block
580 : * pointers. The record and key structures are defined by the btree instances
581 : * and opaque to the btree core. The block pointers are simple disk endian
582 : * integers, available in a short (32bit) and long (64bit) variant.
583 : *
584 : * The helpers below calculate the offset of a given record, key or pointer
585 : * into a btree block (xfs_btree_*_offset) or return a pointer to the given
586 : * record, key or pointer (xfs_btree_*_addr). Note that all addressing
587 : * inside the btree block is done using indices starting at one, not zero!
588 : *
589 : * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
590 : * overlapping intervals. In such a tree, records are still sorted lowest to
591 : * highest and indexed by the smallest key value that refers to the record.
592 : * However, nodes are different: each pointer has two associated keys -- one
593 : * indexing the lowest key available in the block(s) below (the same behavior
594 : * as the key in a regular btree) and another indexing the highest key
595 : * available in the block(s) below. Because records are /not/ sorted by the
596 : * highest key, all leaf block updates require us to compute the highest key
597 : * that matches any record in the leaf and to recursively update the high keys
598 : * in the nodes going further up in the tree, if necessary. Nodes look like
599 : * this:
600 : *
601 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
602 : * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
603 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
604 : *
605 : * To perform an interval query on an overlapped tree, perform the usual
606 : * depth-first search and use the low and high keys to decide if we can skip
607 : * that particular node. If a leaf node is reached, return the records that
608 : * intersect the interval. Note that an interval query may return numerous
609 : * entries. For a non-overlapped tree, simply search for the record associated
610 : * with the lowest key and iterate forward until a non-matching record is
611 : * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
612 : * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
613 : * more detail.
614 : *
615 : * Why do we care about overlapping intervals? Let's say you have a bunch of
616 : * reverse mapping records on a reflink filesystem:
617 : *
618 : * 1: +- file A startblock B offset C length D -----------+
619 : * 2: +- file E startblock F offset G length H --------------+
620 : * 3: +- file I startblock F offset J length K --+
621 : * 4: +- file L... --+
622 : *
623 : * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
624 : * we'd simply increment the length of record 1. But how do we find the record
625 : * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
626 : * record 3 because the keys are ordered first by startblock. An interval
627 : * query would return records 1 and 2 because they both overlap (B+D-1), and
628 : * from that we can pick out record 1 as the appropriate left neighbor.
629 : *
630 : * In the non-overlapped case you can do a LE lookup and decrement the cursor
631 : * because a record's interval must end before the next record.
632 : */
633 :
634 : /*
635 : * Return size of the btree block header for this btree instance.
636 : */
637 >44839*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
638 : {
639 >44839*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
640 4715206330 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
641 : return XFS_BTREE_LBLOCK_CRC_LEN;
642 0 : return XFS_BTREE_LBLOCK_LEN;
643 : }
644 >44367*10^7 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
645 >44372*10^7 : return XFS_BTREE_SBLOCK_CRC_LEN;
646 : return XFS_BTREE_SBLOCK_LEN;
647 : }
648 :
649 : /*
650 : * Return size of btree block pointers for this btree instance.
651 : */
652 : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
653 : {
654 39391868669 : return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
655 39391868669 : sizeof(__be64) : sizeof(__be32);
656 : }
657 :
658 : /*
659 : * Calculate offset of the n-th record in a btree block.
660 : */
661 : STATIC size_t
662 >31449*10^7 : xfs_btree_rec_offset(
663 : struct xfs_btree_cur *cur,
664 : int n)
665 : {
666 >31449*10^7 : return xfs_btree_block_len(cur) +
667 >31449*10^7 : (n - 1) * cur->bc_ops->rec_len;
668 : }
669 :
670 : /*
671 : * Calculate offset of the n-th key in a btree block.
672 : */
673 : STATIC size_t
674 55610048775 : xfs_btree_key_offset(
675 : struct xfs_btree_cur *cur,
676 : int n)
677 : {
678 55610048775 : return xfs_btree_block_len(cur) +
679 55610048775 : (n - 1) * cur->bc_ops->key_len;
680 : }
681 :
682 : /*
683 : * Calculate offset of the n-th high key in a btree block.
684 : */
685 : STATIC size_t
686 40428255662 : xfs_btree_high_key_offset(
687 : struct xfs_btree_cur *cur,
688 : int n)
689 : {
690 40428255662 : return xfs_btree_block_len(cur) +
691 40428255662 : (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
692 : }
693 :
694 : /*
695 : * Calculate offset of the n-th block pointer in a btree block.
696 : */
697 : STATIC size_t
698 39384152755 : xfs_btree_ptr_offset(
699 : struct xfs_btree_cur *cur,
700 : int n,
701 : int level)
702 : {
703 39384152755 : return xfs_btree_block_len(cur) +
704 78768611045 : cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
705 78491362062 : (n - 1) * xfs_btree_ptr_len(cur);
706 : }
707 :
708 : /*
709 : * Return a pointer to the n-th record in the btree block.
710 : */
711 : union xfs_btree_rec *
712 2708405854 : xfs_btree_rec_addr(
713 : struct xfs_btree_cur *cur,
714 : int n,
715 : struct xfs_btree_block *block)
716 : {
717 >31346*10^7 : return (union xfs_btree_rec *)
718 >31346*10^7 : ((char *)block + xfs_btree_rec_offset(cur, n));
719 : }
720 :
721 : /*
722 : * Return a pointer to the n-th key in the btree block.
723 : */
724 : union xfs_btree_key *
725 2548892420 : xfs_btree_key_addr(
726 : struct xfs_btree_cur *cur,
727 : int n,
728 : struct xfs_btree_block *block)
729 : {
730 55352287743 : return (union xfs_btree_key *)
731 55352287743 : ((char *)block + xfs_btree_key_offset(cur, n));
732 : }
733 :
734 : /*
735 : * Return a pointer to the n-th high key in the btree block.
736 : */
737 : union xfs_btree_key *
738 1933729782 : xfs_btree_high_key_addr(
739 : struct xfs_btree_cur *cur,
740 : int n,
741 : struct xfs_btree_block *block)
742 : {
743 40437391843 : return (union xfs_btree_key *)
744 40437391843 : ((char *)block + xfs_btree_high_key_offset(cur, n));
745 : }
746 :
747 : /*
748 : * Return a pointer to the n-th block pointer in the btree block.
749 : */
750 : union xfs_btree_ptr *
751 39381441046 : xfs_btree_ptr_addr(
752 : struct xfs_btree_cur *cur,
753 : int n,
754 : struct xfs_btree_block *block)
755 : {
756 39381441046 : int level = xfs_btree_get_level(block);
757 :
758 39381441046 : ASSERT(block->bb_level != 0);
759 :
760 78764289291 : return (union xfs_btree_ptr *)
761 39381441046 : ((char *)block + xfs_btree_ptr_offset(cur, n, level));
762 : }
763 :
764 : struct xfs_ifork *
765 388288568 : xfs_btree_ifork_ptr(
766 : struct xfs_btree_cur *cur)
767 : {
768 388288568 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
769 :
770 388288568 : if (cur->bc_flags & XFS_BTREE_STAGING)
771 10371 : return cur->bc_ino.ifake->if_fork;
772 388278197 : return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
773 : }
774 :
775 : /*
776 : * Get the root block which is stored in the inode.
777 : *
778 : * For now this btree implementation assumes the btree root is always
779 : * stored in the if_broot field of an inode fork.
780 : */
781 : STATIC struct xfs_btree_block *
782 225220689 : xfs_btree_get_iroot(
783 : struct xfs_btree_cur *cur)
784 : {
785 225220689 : struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
786 :
787 225233756 : return (struct xfs_btree_block *)ifp->if_broot;
788 : }
789 :
790 : /*
791 : * Retrieve the block pointer from the cursor at the given level.
792 : * This may be an inode btree root or from a buffer.
793 : */
794 : struct xfs_btree_block * /* generic btree block pointer */
795 >26847*10^7 : xfs_btree_get_block(
796 : struct xfs_btree_cur *cur, /* btree cursor */
797 : int level, /* level in btree */
798 : struct xfs_buf **bpp) /* buffer containing the block */
799 : {
800 >26847*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
801 1105622688 : (level == cur->bc_nlevels - 1)) {
802 85377008 : *bpp = NULL;
803 85377008 : return xfs_btree_get_iroot(cur);
804 : }
805 :
806 >26839*10^7 : *bpp = cur->bc_levels[level].bp;
807 >26839*10^7 : return XFS_BUF_TO_BLOCK(*bpp);
808 : }
809 :
810 : /*
811 : * Change the cursor to point to the first record at the given level.
812 : * Other levels are unaffected.
813 : */
814 : STATIC int /* success=1, failure=0 */
815 69123188 : xfs_btree_firstrec(
816 : struct xfs_btree_cur *cur, /* btree cursor */
817 : int level) /* level to change */
818 : {
819 69123188 : struct xfs_btree_block *block; /* generic btree block pointer */
820 69123188 : struct xfs_buf *bp; /* buffer containing block */
821 :
822 : /*
823 : * Get the block pointer for this level.
824 : */
825 69123188 : block = xfs_btree_get_block(cur, level, &bp);
826 69123183 : if (xfs_btree_check_block(cur, block, level, bp))
827 : return 0;
828 : /*
829 : * It's empty, there is no such record.
830 : */
831 69123212 : if (!block->bb_numrecs)
832 : return 0;
833 : /*
834 : * Set the ptr value to 1, that's the first record/key.
835 : */
836 69123212 : cur->bc_levels[level].ptr = 1;
837 69123212 : return 1;
838 : }
839 :
840 : /*
841 : * Change the cursor to point to the last record in the current block
842 : * at the given level. Other levels are unaffected.
843 : */
844 : STATIC int /* success=1, failure=0 */
845 71398852 : xfs_btree_lastrec(
846 : struct xfs_btree_cur *cur, /* btree cursor */
847 : int level) /* level to change */
848 : {
849 71398852 : struct xfs_btree_block *block; /* generic btree block pointer */
850 71398852 : struct xfs_buf *bp; /* buffer containing block */
851 :
852 : /*
853 : * Get the block pointer for this level.
854 : */
855 71398852 : block = xfs_btree_get_block(cur, level, &bp);
856 71399041 : if (xfs_btree_check_block(cur, block, level, bp))
857 : return 0;
858 : /*
859 : * It's empty, there is no such record.
860 : */
861 71399836 : if (!block->bb_numrecs)
862 : return 0;
863 : /*
864 : * Set the ptr value to numrecs, that's the last record/key.
865 : */
866 71399836 : cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
867 71399836 : return 1;
868 : }
869 :
870 : /*
871 : * Compute first and last byte offsets for the fields given.
872 : * Interprets the offsets table, which contains struct field offsets.
873 : */
874 : void
875 1495629157 : xfs_btree_offsets(
876 : uint32_t fields, /* bitmask of fields */
877 : const short *offsets, /* table of field offsets */
878 : int nbits, /* number of bits to inspect */
879 : int *first, /* output: first byte offset */
880 : int *last) /* output: last byte offset */
881 : {
882 1495629157 : int i; /* current bit number */
883 1495629157 : uint32_t imask; /* mask for current bit number */
884 :
885 1495629157 : ASSERT(fields != 0);
886 : /*
887 : * Find the lowest bit, so the first byte offset.
888 : */
889 6054540460 : for (i = 0, imask = 1u; ; i++, imask <<= 1) {
890 6054540460 : if (imask & fields) {
891 1495629157 : *first = offsets[i];
892 1495629157 : break;
893 : }
894 : }
895 : /*
896 : * Find the highest bit, so the last byte offset.
897 : */
898 1495629157 : for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
899 10355034251 : if (imask & fields) {
900 1495629157 : *last = offsets[i + 1] - 1;
901 1495629157 : break;
902 : }
903 : }
904 1495629157 : }
905 :
906 : /*
907 : * Get a buffer for the block, return it read in.
908 : * Long-form addressing.
909 : */
910 : int
911 391020451 : xfs_btree_read_bufl(
912 : struct xfs_mount *mp, /* file system mount point */
913 : struct xfs_trans *tp, /* transaction pointer */
914 : xfs_fsblock_t fsbno, /* file system block number */
915 : struct xfs_buf **bpp, /* buffer for fsbno */
916 : int refval, /* ref count value for buffer */
917 : const struct xfs_buf_ops *ops)
918 : {
919 391020451 : struct xfs_buf *bp; /* return value */
920 391020451 : xfs_daddr_t d; /* real disk block address */
921 391020451 : int error;
922 :
923 391020451 : if (!xfs_verify_fsbno(mp, fsbno))
924 : return -EFSCORRUPTED;
925 391023006 : d = XFS_FSB_TO_DADDR(mp, fsbno);
926 391023006 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
927 : mp->m_bsize, 0, &bp, ops);
928 391023578 : if (error)
929 : return error;
930 391023531 : if (bp)
931 391023531 : xfs_buf_set_ref(bp, refval);
932 391023576 : *bpp = bp;
933 391023576 : return 0;
934 : }
935 :
936 : /*
937 : * Read-ahead the block, don't wait for it, don't return a buffer.
938 : * Long-form addressing.
939 : */
940 : /* ARGSUSED */
941 : void
942 45259659 : xfs_btree_reada_bufl(
943 : struct xfs_mount *mp, /* file system mount point */
944 : xfs_fsblock_t fsbno, /* file system block number */
945 : xfs_extlen_t count, /* count of filesystem blocks */
946 : const struct xfs_buf_ops *ops)
947 : {
948 45259659 : xfs_daddr_t d;
949 :
950 45259659 : ASSERT(fsbno != NULLFSBLOCK);
951 45259659 : d = XFS_FSB_TO_DADDR(mp, fsbno);
952 45259659 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
953 45259660 : }
954 :
955 : /*
956 : * Read-ahead the block, don't wait for it, don't return a buffer.
957 : * Short-form addressing.
958 : */
959 : /* ARGSUSED */
960 : void
961 4202240281 : xfs_btree_reada_bufs(
962 : struct xfs_mount *mp, /* file system mount point */
963 : xfs_agnumber_t agno, /* allocation group number */
964 : xfs_agblock_t agbno, /* allocation group block number */
965 : xfs_extlen_t count, /* count of filesystem blocks */
966 : const struct xfs_buf_ops *ops)
967 : {
968 4202240281 : xfs_daddr_t d;
969 :
970 4202240281 : ASSERT(agno != NULLAGNUMBER);
971 4202240281 : ASSERT(agbno != NULLAGBLOCK);
972 4202240281 : d = XFS_AGB_TO_DADDR(mp, agno, agbno);
973 4202240281 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
974 4203900566 : }
975 :
976 : STATIC int
977 45259719 : xfs_btree_readahead_lblock(
978 : struct xfs_btree_cur *cur,
979 : int lr,
980 : struct xfs_btree_block *block)
981 : {
982 45259719 : int rval = 0;
983 45259719 : xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
984 45259719 : xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
985 :
986 45259719 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
987 : return 0;
988 :
989 45259719 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
990 19612268 : xfs_btree_reada_bufl(cur->bc_mp, left, 1,
991 19612268 : cur->bc_ops->buf_ops);
992 19612268 : rval++;
993 : }
994 :
995 45259719 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
996 25647391 : xfs_btree_reada_bufl(cur->bc_mp, right, 1,
997 25647391 : cur->bc_ops->buf_ops);
998 25647391 : rval++;
999 : }
1000 :
1001 : return rval;
1002 : }
1003 :
1004 : STATIC int
1005 1169326707 : xfs_btree_readahead_sblock(
1006 : struct xfs_btree_cur *cur,
1007 : int lr,
1008 : struct xfs_btree_block *block)
1009 : {
1010 1169326707 : int rval = 0;
1011 1169326707 : xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
1012 1169326707 : xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
1013 :
1014 1169326707 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1015 : return 0;
1016 :
1017 1164851855 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
1018 73868786 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1019 73868786 : left, 1, cur->bc_ops->buf_ops);
1020 73868786 : rval++;
1021 : }
1022 :
1023 1164852208 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
1024 1090989346 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1025 1090989346 : right, 1, cur->bc_ops->buf_ops);
1026 1091066971 : rval++;
1027 : }
1028 :
1029 : return rval;
1030 : }
1031 :
1032 : /*
1033 : * Read-ahead btree blocks, at the given level.
1034 : * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
1035 : */
1036 : STATIC int
1037 74183174249 : xfs_btree_readahead(
1038 : struct xfs_btree_cur *cur, /* btree cursor */
1039 : int lev, /* level in btree */
1040 : int lr) /* left/right bits */
1041 : {
1042 74183174249 : struct xfs_btree_block *block;
1043 :
1044 : /*
1045 : * No readahead needed if we are at the root level and the
1046 : * btree root is stored in the inode.
1047 : */
1048 74183174249 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1049 62605400 : (lev == cur->bc_nlevels - 1))
1050 : return 0;
1051 :
1052 74177608326 : if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
1053 : return 0;
1054 :
1055 1214611462 : cur->bc_levels[lev].ra |= lr;
1056 1214611462 : block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
1057 :
1058 1214611462 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1059 45259718 : return xfs_btree_readahead_lblock(cur, lr, block);
1060 1169351744 : return xfs_btree_readahead_sblock(cur, lr, block);
1061 : }
1062 :
1063 : STATIC int
1064 29495716203 : xfs_btree_ptr_to_daddr(
1065 : struct xfs_btree_cur *cur,
1066 : const union xfs_btree_ptr *ptr,
1067 : xfs_daddr_t *daddr)
1068 : {
1069 29495716203 : xfs_fsblock_t fsbno;
1070 29495716203 : xfs_agblock_t agbno;
1071 29495716203 : int error;
1072 :
1073 29495716203 : error = xfs_btree_check_ptr(cur, ptr, 0, 1);
1074 29505369212 : if (error)
1075 : return error;
1076 :
1077 29505369212 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1078 1085569913 : *daddr = xfbtree_ptr_to_daddr(cur, ptr);
1079 1085569791 : return 0;
1080 : }
1081 :
1082 28419799299 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1083 535702725 : fsbno = be64_to_cpu(ptr->l);
1084 535702725 : *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
1085 : } else {
1086 27884096574 : agbno = be32_to_cpu(ptr->s);
1087 27884096574 : *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1088 : agbno);
1089 : }
1090 :
1091 : return 0;
1092 : }
1093 :
1094 : /*
1095 : * Readahead @count btree blocks at the given @ptr location.
1096 : *
1097 : * We don't need to care about long or short form btrees here as we have a
1098 : * method of converting the ptr directly to a daddr available to us.
1099 : */
1100 : STATIC void
1101 5025771 : xfs_btree_readahead_ptr(
1102 : struct xfs_btree_cur *cur,
1103 : union xfs_btree_ptr *ptr,
1104 : xfs_extlen_t count)
1105 : {
1106 5025771 : xfs_daddr_t daddr;
1107 :
1108 5025771 : if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
1109 0 : return;
1110 5025764 : xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
1111 5025765 : xfs_btree_bbsize(cur) * count,
1112 5025760 : cur->bc_ops->buf_ops);
1113 : }
1114 :
1115 : /*
1116 : * Set the buffer for level "lev" in the cursor to bp, releasing
1117 : * any previous buffer.
1118 : */
1119 : STATIC void
1120 7010957157 : xfs_btree_setbuf(
1121 : struct xfs_btree_cur *cur, /* btree cursor */
1122 : int lev, /* level in btree */
1123 : struct xfs_buf *bp) /* new buffer to set */
1124 : {
1125 7010957157 : struct xfs_btree_block *b; /* btree block */
1126 :
1127 7010957157 : if (cur->bc_levels[lev].bp)
1128 927330787 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
1129 7009167989 : cur->bc_levels[lev].bp = bp;
1130 7009167989 : cur->bc_levels[lev].ra = 0;
1131 :
1132 7009167989 : b = XFS_BUF_TO_BLOCK(bp);
1133 7009167989 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1134 741406483 : if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1135 588094253 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1136 741406483 : if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1137 693050522 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1138 : } else {
1139 6267761506 : if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1140 4006630286 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1141 6267761506 : if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1142 4091201361 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1143 : }
1144 7009167989 : }
1145 :
1146 : bool
1147 22271900 : xfs_btree_ptr_is_null(
1148 : struct xfs_btree_cur *cur,
1149 : const union xfs_btree_ptr *ptr)
1150 : {
1151 268553409 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1152 1213626490 : return ptr->l == cpu_to_be64(NULLFSBLOCK);
1153 : else
1154 3399516725 : return ptr->s == cpu_to_be32(NULLAGBLOCK);
1155 : }
1156 :
1157 : void
1158 732234 : xfs_btree_set_ptr_null(
1159 : struct xfs_btree_cur *cur,
1160 : union xfs_btree_ptr *ptr)
1161 : {
1162 732234 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1163 459296181 : ptr->l = cpu_to_be64(NULLFSBLOCK);
1164 : else
1165 648189869 : ptr->s = cpu_to_be32(NULLAGBLOCK);
1166 732234 : }
1167 :
1168 : /*
1169 : * Get/set/init sibling pointers
1170 : */
1171 : void
1172 3844844077 : xfs_btree_get_sibling(
1173 : struct xfs_btree_cur *cur,
1174 : struct xfs_btree_block *block,
1175 : union xfs_btree_ptr *ptr,
1176 : int lr)
1177 : {
1178 3844844077 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1179 :
1180 3844844077 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1181 751640778 : if (lr == XFS_BB_RIGHTSIB)
1182 511589186 : ptr->l = block->bb_u.l.bb_rightsib;
1183 : else
1184 240051592 : ptr->l = block->bb_u.l.bb_leftsib;
1185 : } else {
1186 3093203299 : if (lr == XFS_BB_RIGHTSIB)
1187 2991452008 : ptr->s = block->bb_u.s.bb_rightsib;
1188 : else
1189 101751291 : ptr->s = block->bb_u.s.bb_leftsib;
1190 : }
1191 3844844077 : }
1192 :
1193 : void
1194 3651981 : xfs_btree_set_sibling(
1195 : struct xfs_btree_cur *cur,
1196 : struct xfs_btree_block *block,
1197 : const union xfs_btree_ptr *ptr,
1198 : int lr)
1199 : {
1200 3651981 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1201 :
1202 3651981 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1203 820966 : if (lr == XFS_BB_RIGHTSIB)
1204 585511 : block->bb_u.l.bb_rightsib = ptr->l;
1205 : else
1206 235455 : block->bb_u.l.bb_leftsib = ptr->l;
1207 : } else {
1208 2831015 : if (lr == XFS_BB_RIGHTSIB)
1209 1428241 : block->bb_u.s.bb_rightsib = ptr->s;
1210 : else
1211 1402774 : block->bb_u.s.bb_leftsib = ptr->s;
1212 : }
1213 3651981 : }
1214 :
1215 : static void
1216 13924210 : __xfs_btree_init_block(
1217 : struct xfs_mount *mp,
1218 : struct xfs_btree_block *buf,
1219 : const struct xfs_btree_ops *ops,
1220 : xfs_daddr_t blkno,
1221 : __u16 level,
1222 : __u16 numrecs,
1223 : __u64 owner)
1224 : {
1225 13924210 : int crc = xfs_has_crc(mp);
1226 13924210 : __u32 magic = xfs_btree_magic(mp, ops);
1227 :
1228 13924204 : buf->bb_magic = cpu_to_be32(magic);
1229 13924204 : buf->bb_level = cpu_to_be16(level);
1230 13924204 : buf->bb_numrecs = cpu_to_be16(numrecs);
1231 :
1232 13924204 : if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
1233 12965683 : buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1234 12965683 : buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1235 12965683 : if (crc) {
1236 12965683 : buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1237 12965683 : buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1238 12965683 : uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1239 12965680 : buf->bb_u.l.bb_pad = 0;
1240 12965680 : buf->bb_u.l.bb_lsn = 0;
1241 : }
1242 : } else {
1243 : /* owner is a 32 bit value on short blocks */
1244 958521 : __u32 __owner = (__u32)owner;
1245 :
1246 958521 : buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1247 958521 : buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1248 958521 : if (crc) {
1249 951645 : buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1250 951645 : buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1251 951645 : uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1252 951634 : buf->bb_u.s.bb_lsn = 0;
1253 : }
1254 : }
1255 13924190 : }
1256 :
1257 : void
1258 11869454 : xfs_btree_init_block(
1259 : struct xfs_mount *mp,
1260 : struct xfs_btree_block *block,
1261 : const struct xfs_btree_ops *ops,
1262 : __u16 level,
1263 : __u16 numrecs,
1264 : __u64 owner)
1265 : {
1266 11869454 : __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
1267 : numrecs, owner);
1268 11869447 : }
1269 :
1270 : void
1271 2054724 : xfs_btree_init_buf(
1272 : struct xfs_mount *mp,
1273 : struct xfs_buf *bp,
1274 : const struct xfs_btree_ops *ops,
1275 : __u16 level,
1276 : __u16 numrecs,
1277 : __u64 owner)
1278 : {
1279 2054724 : __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
1280 : xfs_buf_daddr(bp), level, numrecs, owner);
1281 2054687 : bp->b_ops = ops->buf_ops;
1282 2054687 : }
1283 :
1284 : void
1285 1136014 : xfs_btree_init_block_cur(
1286 : struct xfs_btree_cur *cur,
1287 : struct xfs_buf *bp,
1288 : int level,
1289 : int numrecs)
1290 : {
1291 1136014 : __u64 owner;
1292 :
1293 : /*
1294 : * we can pull the owner from the cursor right now as the different
1295 : * owners align directly with the pointer size of the btree. This may
1296 : * change in future, but is safe for current users of the generic btree
1297 : * code.
1298 : */
1299 1136014 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1300 4743 : owner = xfbtree_owner(cur);
1301 1131271 : else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1302 221297 : owner = cur->bc_ino.ip->i_ino;
1303 : else
1304 909974 : owner = cur->bc_ag.pag->pag_agno;
1305 :
1306 1136014 : xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
1307 1135988 : }
1308 :
1309 : /*
1310 : * Return true if ptr is the last record in the btree and
1311 : * we need to track updates to this record. The decision
1312 : * will be further refined in the update_lastrec method.
1313 : */
1314 : STATIC int
1315 1437416873 : xfs_btree_is_lastrec(
1316 : struct xfs_btree_cur *cur,
1317 : struct xfs_btree_block *block,
1318 : int level)
1319 : {
1320 1437416873 : union xfs_btree_ptr ptr;
1321 :
1322 1437416873 : if (level > 0)
1323 : return 0;
1324 1436379958 : if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1325 : return 0;
1326 :
1327 187171524 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1328 374341930 : if (!xfs_btree_ptr_is_null(cur, &ptr))
1329 24945663 : return 0;
1330 : return 1;
1331 : }
1332 :
1333 : STATIC void
1334 83945028 : xfs_btree_buf_to_ptr(
1335 : struct xfs_btree_cur *cur,
1336 : struct xfs_buf *bp,
1337 : union xfs_btree_ptr *ptr)
1338 : {
1339 83945028 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1340 4743 : xfbtree_buf_to_ptr(cur, bp, ptr);
1341 4743 : return;
1342 : }
1343 :
1344 83940285 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1345 5342995 : ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1346 : xfs_buf_daddr(bp)));
1347 : else {
1348 78597290 : ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1349 : xfs_buf_daddr(bp)));
1350 : }
1351 : }
1352 :
1353 : static inline void
1354 : xfs_btree_set_refs(
1355 : struct xfs_btree_cur *cur,
1356 : struct xfs_buf *bp)
1357 : {
1358 7124782691 : xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
1359 : }
1360 :
1361 : int
1362 1138527 : xfs_btree_get_buf_block(
1363 : struct xfs_btree_cur *cur,
1364 : const union xfs_btree_ptr *ptr,
1365 : struct xfs_btree_block **block,
1366 : struct xfs_buf **bpp)
1367 : {
1368 1138527 : xfs_daddr_t d;
1369 1138527 : int error;
1370 :
1371 1138527 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1372 1138532 : if (error)
1373 : return error;
1374 2277036 : error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
1375 1138511 : xfs_btree_bbsize(cur), 0, bpp);
1376 1138541 : if (error)
1377 : return error;
1378 :
1379 1138541 : (*bpp)->b_ops = cur->bc_ops->buf_ops;
1380 1138541 : *block = XFS_BUF_TO_BLOCK(*bpp);
1381 1138541 : return 0;
1382 : }
1383 :
1384 : /*
1385 : * Read in the buffer at the given ptr and return the buffer and
1386 : * the block pointer within the buffer.
1387 : */
1388 : int
1389 7122804882 : xfs_btree_read_buf_block(
1390 : struct xfs_btree_cur *cur,
1391 : const union xfs_btree_ptr *ptr,
1392 : int flags,
1393 : struct xfs_btree_block **block,
1394 : struct xfs_buf **bpp)
1395 : {
1396 7122804882 : struct xfs_mount *mp = cur->bc_mp;
1397 7122804882 : xfs_daddr_t d;
1398 7122804882 : int error;
1399 :
1400 : /* need to sort out how callers deal with failures first */
1401 7122804882 : ASSERT(!(flags & XBF_TRYLOCK));
1402 :
1403 7122804882 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1404 7123866353 : if (error)
1405 : return error;
1406 7123745139 : error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
1407 7123669396 : xfs_btree_bbsize(cur), flags, bpp,
1408 7123314834 : cur->bc_ops->buf_ops);
1409 7124765471 : if (xfs_metadata_is_sick(error))
1410 270 : xfs_btree_mark_sick(cur);
1411 7124765471 : if (error)
1412 : return error;
1413 :
1414 7124782691 : xfs_btree_set_refs(cur, *bpp);
1415 7125127016 : *block = XFS_BUF_TO_BLOCK(*bpp);
1416 7125127016 : return 0;
1417 : }
1418 :
1419 : /*
1420 : * Copy keys from one btree block to another.
1421 : */
1422 : void
1423 169238681 : xfs_btree_copy_keys(
1424 : struct xfs_btree_cur *cur,
1425 : union xfs_btree_key *dst_key,
1426 : const union xfs_btree_key *src_key,
1427 : int numkeys)
1428 : {
1429 169238681 : ASSERT(numkeys >= 0);
1430 338477362 : memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1431 169238681 : }
1432 :
1433 : /*
1434 : * Copy records from one btree block to another.
1435 : */
1436 : STATIC void
1437 1074490103 : xfs_btree_copy_recs(
1438 : struct xfs_btree_cur *cur,
1439 : union xfs_btree_rec *dst_rec,
1440 : union xfs_btree_rec *src_rec,
1441 : int numrecs)
1442 : {
1443 1074490103 : ASSERT(numrecs >= 0);
1444 2148980206 : memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1445 1074490103 : }
1446 :
1447 : /*
1448 : * Copy block pointers from one btree block to another.
1449 : */
1450 : void
1451 6783133 : xfs_btree_copy_ptrs(
1452 : struct xfs_btree_cur *cur,
1453 : union xfs_btree_ptr *dst_ptr,
1454 : const union xfs_btree_ptr *src_ptr,
1455 : int numptrs)
1456 : {
1457 6783133 : ASSERT(numptrs >= 0);
1458 16425810 : memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1459 6783133 : }
1460 :
1461 : /*
1462 : * Shift keys one index left/right inside a single btree block.
1463 : */
1464 : STATIC void
1465 932783 : xfs_btree_shift_keys(
1466 : struct xfs_btree_cur *cur,
1467 : union xfs_btree_key *key,
1468 : int dir,
1469 : int numkeys)
1470 : {
1471 932783 : char *dst_key;
1472 :
1473 932783 : ASSERT(numkeys >= 0);
1474 932783 : ASSERT(dir == 1 || dir == -1);
1475 :
1476 932783 : dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1477 1865566 : memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1478 932783 : }
1479 :
1480 : /*
1481 : * Shift records one index left/right inside a single btree block.
1482 : */
1483 : STATIC void
1484 908245953 : xfs_btree_shift_recs(
1485 : struct xfs_btree_cur *cur,
1486 : union xfs_btree_rec *rec,
1487 : int dir,
1488 : int numrecs)
1489 : {
1490 908245953 : char *dst_rec;
1491 :
1492 908245953 : ASSERT(numrecs >= 0);
1493 908245953 : ASSERT(dir == 1 || dir == -1);
1494 :
1495 908245953 : dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1496 1816491906 : memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1497 908245953 : }
1498 :
1499 : /*
1500 : * Shift block pointers one index left/right inside a single btree block.
1501 : */
1502 : STATIC void
1503 932781 : xfs_btree_shift_ptrs(
1504 : struct xfs_btree_cur *cur,
1505 : union xfs_btree_ptr *ptr,
1506 : int dir,
1507 : int numptrs)
1508 : {
1509 932781 : char *dst_ptr;
1510 :
1511 932781 : ASSERT(numptrs >= 0);
1512 932781 : ASSERT(dir == 1 || dir == -1);
1513 :
1514 932781 : dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1515 1865562 : memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1516 932781 : }
1517 :
1518 : /*
1519 : * Log key values from the btree block.
1520 : */
1521 : STATIC void
1522 168534039 : xfs_btree_log_keys(
1523 : struct xfs_btree_cur *cur,
1524 : struct xfs_buf *bp,
1525 : int first,
1526 : int last)
1527 : {
1528 :
1529 168534039 : if (bp) {
1530 161766204 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1531 161766254 : xfs_trans_log_buf(cur->bc_tp, bp,
1532 161766254 : xfs_btree_key_offset(cur, first),
1533 161766254 : xfs_btree_key_offset(cur, last + 1) - 1);
1534 : } else {
1535 6767835 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1536 6767835 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1537 : }
1538 168534219 : }
1539 :
1540 : /*
1541 : * Log record values from the btree block.
1542 : */
1543 : void
1544 1385330980 : xfs_btree_log_recs(
1545 : struct xfs_btree_cur *cur,
1546 : struct xfs_buf *bp,
1547 : int first,
1548 : int last)
1549 : {
1550 :
1551 1385330980 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1552 1385367201 : xfs_trans_log_buf(cur->bc_tp, bp,
1553 1385367201 : xfs_btree_rec_offset(cur, first),
1554 1385367201 : xfs_btree_rec_offset(cur, last + 1) - 1);
1555 :
1556 1385451267 : }
1557 :
1558 : /*
1559 : * Log block pointer fields from a btree block (nonleaf).
1560 : */
1561 : STATIC void
1562 1041395 : xfs_btree_log_ptrs(
1563 : struct xfs_btree_cur *cur, /* btree cursor */
1564 : struct xfs_buf *bp, /* buffer containing btree block */
1565 : int first, /* index of first pointer to log */
1566 : int last) /* index of last pointer to log */
1567 : {
1568 :
1569 1041395 : if (bp) {
1570 1012022 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1571 1012022 : int level = xfs_btree_get_level(block);
1572 :
1573 1012022 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1574 1012023 : xfs_trans_log_buf(cur->bc_tp, bp,
1575 1012022 : xfs_btree_ptr_offset(cur, first, level),
1576 1012023 : xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1577 : } else {
1578 29373 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1579 29373 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1580 : }
1581 :
1582 1041395 : }
1583 :
1584 : /*
1585 : * Log fields from a btree block header.
1586 : */
1587 : void
1588 1240185880 : xfs_btree_log_block(
1589 : struct xfs_btree_cur *cur, /* btree cursor */
1590 : struct xfs_buf *bp, /* buffer containing btree block */
1591 : uint32_t fields) /* mask of fields: XFS_BB_... */
1592 : {
1593 1240185880 : int first; /* first byte offset logged */
1594 1240185880 : int last; /* last byte offset logged */
1595 1240185880 : static const short soffsets[] = { /* table of offsets (short) */
1596 : offsetof(struct xfs_btree_block, bb_magic),
1597 : offsetof(struct xfs_btree_block, bb_level),
1598 : offsetof(struct xfs_btree_block, bb_numrecs),
1599 : offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1600 : offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1601 : offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1602 : offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1603 : offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1604 : offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1605 : offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1606 : XFS_BTREE_SBLOCK_CRC_LEN
1607 : };
1608 1240185880 : static const short loffsets[] = { /* table of offsets (long) */
1609 : offsetof(struct xfs_btree_block, bb_magic),
1610 : offsetof(struct xfs_btree_block, bb_level),
1611 : offsetof(struct xfs_btree_block, bb_numrecs),
1612 : offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1613 : offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1614 : offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1615 : offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1616 : offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1617 : offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1618 : offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1619 : offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1620 : XFS_BTREE_LBLOCK_CRC_LEN
1621 : };
1622 :
1623 1240185880 : if (bp) {
1624 1240132870 : int nbits;
1625 :
1626 1240132870 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1627 : /*
1628 : * We don't log the CRC when updating a btree
1629 : * block but instead recreate it during log
1630 : * recovery. As the log buffers have checksums
1631 : * of their own this is safe and avoids logging a crc
1632 : * update in a lot of places.
1633 : */
1634 1240130978 : if (fields == XFS_BB_ALL_BITS)
1635 1561268 : fields = XFS_BB_ALL_BITS_CRC;
1636 : nbits = XFS_BB_NUM_BITS_CRC;
1637 : } else {
1638 : nbits = XFS_BB_NUM_BITS;
1639 : }
1640 1240132870 : xfs_btree_offsets(fields,
1641 1240132870 : (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1642 : loffsets : soffsets,
1643 : nbits, &first, &last);
1644 1240158018 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1645 1240175174 : xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1646 : } else {
1647 53010 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1648 53010 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1649 : }
1650 1240141697 : }
1651 :
1652 : /*
1653 : * Increment cursor by one record at the level.
1654 : * For nonzero levels the leaf-ward information is untouched.
1655 : */
1656 : int /* error */
1657 72560051954 : xfs_btree_increment(
1658 : struct xfs_btree_cur *cur,
1659 : int level,
1660 : int *stat) /* success/failure */
1661 : {
1662 72560051954 : struct xfs_btree_block *block;
1663 72560051954 : union xfs_btree_ptr ptr;
1664 72560051954 : struct xfs_buf *bp;
1665 72560051954 : int error; /* error return value */
1666 72560051954 : int lev;
1667 :
1668 72560051954 : ASSERT(level < cur->bc_nlevels);
1669 :
1670 : /* Read-ahead to the right at this level. */
1671 72560051954 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1672 :
1673 : /* Get a pointer to the btree block. */
1674 72703199415 : block = xfs_btree_get_block(cur, level, &bp);
1675 :
1676 : #ifdef DEBUG
1677 72773950888 : error = xfs_btree_check_block(cur, block, level, bp);
1678 72275484087 : if (error)
1679 0 : goto error0;
1680 : #endif
1681 :
1682 : /* We're done if we remain in the block after the increment. */
1683 >14455*10^7 : if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
1684 69816870596 : goto out1;
1685 :
1686 : /* Fail if we just went off the right edge of the tree. */
1687 2458613491 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1688 4917470776 : if (xfs_btree_ptr_is_null(cur, &ptr))
1689 2043744903 : goto out0;
1690 :
1691 414990485 : XFS_BTREE_STATS_INC(cur, increment);
1692 :
1693 : /*
1694 : * March up the tree incrementing pointers.
1695 : * Stop when we don't go off the right edge of a block.
1696 : */
1697 418916115 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1698 418914120 : block = xfs_btree_get_block(cur, lev, &bp);
1699 :
1700 : #ifdef DEBUG
1701 418938919 : error = xfs_btree_check_block(cur, block, lev, bp);
1702 418921404 : if (error)
1703 0 : goto error0;
1704 : #endif
1705 :
1706 837842808 : if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
1707 : break;
1708 :
1709 : /* Read-ahead the right block for the next loop. */
1710 3917725 : xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1711 : }
1712 :
1713 : /*
1714 : * If we went off the root then we are either seriously
1715 : * confused or have the tree root in an inode.
1716 : */
1717 415005674 : if (lev == cur->bc_nlevels) {
1718 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1719 0 : goto out0;
1720 0 : ASSERT(0);
1721 0 : xfs_btree_mark_sick(cur);
1722 0 : error = -EFSCORRUPTED;
1723 0 : goto error0;
1724 : }
1725 415005674 : ASSERT(lev < cur->bc_nlevels);
1726 :
1727 : /*
1728 : * Now walk back down the tree, fixing up the cursor's buffer
1729 : * pointers and key numbers.
1730 : */
1731 833906368 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1732 418926789 : union xfs_btree_ptr *ptrp;
1733 :
1734 418926789 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1735 418937897 : --lev;
1736 418937897 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1737 418944485 : if (error)
1738 9 : goto error0;
1739 :
1740 418944476 : xfs_btree_setbuf(cur, lev, bp);
1741 418900694 : cur->bc_levels[lev].ptr = 1;
1742 : }
1743 414976597 : out1:
1744 70231847193 : *stat = 1;
1745 70231847193 : return 0;
1746 :
1747 2043744903 : out0:
1748 2043744903 : *stat = 0;
1749 2043744903 : return 0;
1750 :
1751 : error0:
1752 : return error;
1753 : }
1754 :
1755 : /*
1756 : * Decrement cursor by one record at the level.
1757 : * For nonzero levels the leaf-ward information is untouched.
1758 : */
1759 : int /* error */
1760 1343974554 : xfs_btree_decrement(
1761 : struct xfs_btree_cur *cur,
1762 : int level,
1763 : int *stat) /* success/failure */
1764 : {
1765 1343974554 : struct xfs_btree_block *block;
1766 1343974554 : struct xfs_buf *bp;
1767 1343974554 : int error; /* error return value */
1768 1343974554 : int lev;
1769 1343974554 : union xfs_btree_ptr ptr;
1770 :
1771 1343974554 : ASSERT(level < cur->bc_nlevels);
1772 :
1773 : /* Read-ahead to the left at this level. */
1774 1343974554 : xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1775 :
1776 : /* We're done if we remain in the block after the decrement. */
1777 1343933279 : if (--cur->bc_levels[level].ptr > 0)
1778 1125082839 : goto out1;
1779 :
1780 : /* Get a pointer to the btree block. */
1781 218850440 : block = xfs_btree_get_block(cur, level, &bp);
1782 :
1783 : #ifdef DEBUG
1784 218850507 : error = xfs_btree_check_block(cur, block, level, bp);
1785 218850446 : if (error)
1786 0 : goto error0;
1787 : #endif
1788 :
1789 : /* Fail if we just went off the left edge of the tree. */
1790 218850446 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1791 437701042 : if (xfs_btree_ptr_is_null(cur, &ptr))
1792 170662904 : goto out0;
1793 :
1794 48187617 : XFS_BTREE_STATS_INC(cur, decrement);
1795 :
1796 : /*
1797 : * March up the tree decrementing pointers.
1798 : * Stop when we don't go off the left edge of a block.
1799 : */
1800 48296201 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1801 48296190 : if (--cur->bc_levels[lev].ptr > 0)
1802 : break;
1803 : /* Read-ahead the left block for the next loop. */
1804 108493 : xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1805 : }
1806 :
1807 : /*
1808 : * If we went off the root then we are seriously confused.
1809 : * or the root of the tree is in an inode.
1810 : */
1811 48187708 : if (lev == cur->bc_nlevels) {
1812 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1813 0 : goto out0;
1814 0 : ASSERT(0);
1815 0 : xfs_btree_mark_sick(cur);
1816 0 : error = -EFSCORRUPTED;
1817 0 : goto error0;
1818 : }
1819 48187708 : ASSERT(lev < cur->bc_nlevels);
1820 :
1821 : /*
1822 : * Now walk back down the tree, fixing up the cursor's buffer
1823 : * pointers and key numbers.
1824 : */
1825 96483816 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1826 48296175 : union xfs_btree_ptr *ptrp;
1827 :
1828 48296175 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1829 48296188 : --lev;
1830 48296188 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1831 48296147 : if (error)
1832 7 : goto error0;
1833 48296140 : xfs_btree_setbuf(cur, lev, bp);
1834 96592216 : cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
1835 : }
1836 48187610 : out1:
1837 1173270449 : *stat = 1;
1838 1173270449 : return 0;
1839 :
1840 170662904 : out0:
1841 170662904 : *stat = 0;
1842 170662904 : return 0;
1843 :
1844 : error0:
1845 : return error;
1846 : }
1847 :
1848 : /*
1849 : * Check the btree block owner now that we have the context to know who the
1850 : * real owner is.
1851 : */
1852 : static inline xfs_failaddr_t
1853 6541789975 : xfs_btree_check_block_owner(
1854 : struct xfs_btree_cur *cur,
1855 : struct xfs_btree_block *block)
1856 : {
1857 6541789975 : if (!xfs_has_crc(cur->bc_mp))
1858 : return NULL;
1859 :
1860 6541789316 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1861 517932659 : return xfbtree_check_block_owner(cur, block);
1862 :
1863 6023856657 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
1864 5820154599 : if (be32_to_cpu(block->bb_u.s.bb_owner) !=
1865 5820154599 : cur->bc_ag.pag->pag_agno)
1866 0 : return __this_address;
1867 : return NULL;
1868 : }
1869 :
1870 203702058 : if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
1871 : return NULL;
1872 :
1873 203702058 : if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
1874 0 : return __this_address;
1875 :
1876 : return NULL;
1877 : }
1878 :
1879 : int
1880 22517278017 : xfs_btree_lookup_get_block(
1881 : struct xfs_btree_cur *cur, /* btree cursor */
1882 : int level, /* level in the btree */
1883 : const union xfs_btree_ptr *pp, /* ptr to btree block */
1884 : struct xfs_btree_block **blkp) /* return btree block */
1885 : {
1886 22517278017 : struct xfs_buf *bp; /* buffer pointer for btree block */
1887 22517278017 : xfs_daddr_t daddr;
1888 22517278017 : int error = 0;
1889 :
1890 : /* special case the root block if in an inode */
1891 22517278017 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1892 380462668 : (level == cur->bc_nlevels - 1)) {
1893 139707981 : *blkp = xfs_btree_get_iroot(cur);
1894 139707958 : return 0;
1895 : }
1896 :
1897 : /*
1898 : * If the old buffer at this level for the disk address we are
1899 : * looking for re-use it.
1900 : *
1901 : * Otherwise throw it away and get a new one.
1902 : */
1903 22377570036 : bp = cur->bc_levels[level].bp;
1904 22377570036 : error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
1905 22387359174 : if (error)
1906 : return error;
1907 22387359174 : if (bp && xfs_buf_daddr(bp) == daddr) {
1908 15847217110 : *blkp = XFS_BUF_TO_BLOCK(bp);
1909 15847217110 : return 0;
1910 : }
1911 :
1912 6540142064 : error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1913 6543475522 : if (error)
1914 : return error;
1915 :
1916 : /* Check the inode owner since the verifiers don't. */
1917 6543478836 : if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
1918 0 : goto out_bad;
1919 :
1920 : /* Did we get the level we were looking for? */
1921 6542987060 : if (be16_to_cpu((*blkp)->bb_level) != level)
1922 0 : goto out_bad;
1923 :
1924 : /* Check that internal nodes have at least one record. */
1925 6542987060 : if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1926 0 : goto out_bad;
1927 :
1928 6542987060 : xfs_btree_setbuf(cur, level, bp);
1929 6542987060 : return 0;
1930 :
1931 0 : out_bad:
1932 0 : *blkp = NULL;
1933 0 : xfs_buf_mark_corrupt(bp);
1934 0 : xfs_trans_brelse(cur->bc_tp, bp);
1935 0 : xfs_btree_mark_sick(cur);
1936 0 : return -EFSCORRUPTED;
1937 : }
1938 :
1939 : /*
1940 : * Get current search key. For level 0 we don't actually have a key
1941 : * structure so we make one up from the record. For all other levels
1942 : * we just return the right key.
1943 : */
1944 : STATIC union xfs_btree_key *
1945 88676765276 : xfs_lookup_get_search_key(
1946 : struct xfs_btree_cur *cur,
1947 : int level,
1948 : int keyno,
1949 : struct xfs_btree_block *block,
1950 : union xfs_btree_key *kp)
1951 : {
1952 88676765276 : if (level == 0) {
1953 69493324707 : cur->bc_ops->init_key_from_rec(kp,
1954 : xfs_btree_rec_addr(cur, keyno, block));
1955 69493324707 : return kp;
1956 : }
1957 :
1958 19183440569 : return xfs_btree_key_addr(cur, keyno, block);
1959 : }
1960 :
1961 : /*
1962 : * Lookup the record. The cursor is made to point to it, based on dir.
1963 : * stat is set to 0 if can't find any such record, 1 for success.
1964 : */
1965 : int /* error */
1966 14224203938 : xfs_btree_lookup(
1967 : struct xfs_btree_cur *cur, /* btree cursor */
1968 : xfs_lookup_t dir, /* <=, ==, or >= */
1969 : int *stat) /* success/failure */
1970 : {
1971 14224203938 : struct xfs_btree_block *block; /* current btree block */
1972 14224203938 : int64_t diff; /* difference for the current key */
1973 14224203938 : int error; /* error return value */
1974 14224203938 : int keyno; /* current key number */
1975 14224203938 : int level; /* level in the btree */
1976 14224203938 : union xfs_btree_ptr *pp; /* ptr to btree block */
1977 14224203938 : union xfs_btree_ptr ptr; /* ptr to btree block */
1978 :
1979 14224203938 : XFS_BTREE_STATS_INC(cur, lookup);
1980 :
1981 : /* No such thing as a zero-level tree. */
1982 14224203938 : if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
1983 0 : xfs_btree_mark_sick(cur);
1984 0 : return -EFSCORRUPTED;
1985 : }
1986 :
1987 14224203938 : block = NULL;
1988 14224203938 : keyno = 0;
1989 :
1990 : /* initialise start pointer from cursor */
1991 14224203938 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1992 14225374003 : pp = &ptr;
1993 :
1994 : /*
1995 : * Iterate over each level in the btree, starting at the root.
1996 : * For each level above the leaves, find the key we need, based
1997 : * on the lookup record, then follow the corresponding block
1998 : * pointer down to the next level.
1999 : */
2000 33652276764 : for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
2001 : /* Get the block we need to do the lookup on. */
2002 20030947462 : error = xfs_btree_lookup_get_block(cur, level, pp, &block);
2003 20032337920 : if (error)
2004 4546 : goto error0;
2005 :
2006 20032333374 : if (diff == 0) {
2007 : /*
2008 : * If we already had a key match at a higher level, we
2009 : * know we need to use the first entry in this block.
2010 : */
2011 : keyno = 1;
2012 : } else {
2013 : /* Otherwise search this block. Do a binary search. */
2014 :
2015 20017637441 : int high; /* high entry number */
2016 20017637441 : int low; /* low entry number */
2017 :
2018 : /* Set low and high entry numbers, 1-based. */
2019 20017637441 : low = 1;
2020 20017637441 : high = xfs_btree_get_numrecs(block);
2021 20017637441 : if (!high) {
2022 : /* Block is empty, must be an empty leaf. */
2023 608026904 : if (level != 0 || cur->bc_nlevels != 1) {
2024 0 : XFS_CORRUPTION_ERROR(__func__,
2025 : XFS_ERRLEVEL_LOW,
2026 : cur->bc_mp, block,
2027 : sizeof(*block));
2028 0 : xfs_btree_mark_sick(cur);
2029 0 : return -EFSCORRUPTED;
2030 : }
2031 :
2032 608026904 : cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
2033 608026904 : *stat = 0;
2034 608026904 : return 0;
2035 : }
2036 :
2037 : /* Binary search the block. */
2038 >10676*10^7 : while (low <= high) {
2039 88694912445 : union xfs_btree_key key;
2040 88694912445 : union xfs_btree_key *kp;
2041 :
2042 88694912445 : XFS_BTREE_STATS_INC(cur, compare);
2043 :
2044 : /* keyno is average of low and high. */
2045 88694912445 : keyno = (low + high) >> 1;
2046 :
2047 : /* Get current search key */
2048 88694912445 : kp = xfs_lookup_get_search_key(cur, level,
2049 : keyno, block, &key);
2050 :
2051 : /*
2052 : * Compute difference to get next direction:
2053 : * - less than, move right
2054 : * - greater than, move left
2055 : * - equal, we're done
2056 : */
2057 88633231141 : diff = cur->bc_ops->key_diff(cur, kp);
2058 88697566633 : if (diff < 0)
2059 49045215372 : low = keyno + 1;
2060 39652351261 : else if (diff > 0)
2061 38312353382 : high = keyno - 1;
2062 : else
2063 : break;
2064 : }
2065 : }
2066 :
2067 : /*
2068 : * If there are more levels, set up for the next level
2069 : * by getting the block number and filling in the cursor.
2070 : */
2071 19426960658 : if (level > 0) {
2072 : /*
2073 : * If we moved left, need the previous key number,
2074 : * unless there isn't one.
2075 : */
2076 5806596213 : if (diff > 0 && --keyno < 1)
2077 : keyno = 1;
2078 5806596213 : pp = xfs_btree_ptr_addr(cur, keyno, block);
2079 :
2080 5806375598 : error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
2081 5806538316 : if (error)
2082 0 : goto error0;
2083 :
2084 5806538316 : cur->bc_levels[level].ptr = keyno;
2085 : }
2086 : }
2087 :
2088 : /* Done with the search. See if we need to adjust the results. */
2089 13621329302 : if (dir != XFS_LOOKUP_LE && diff < 0) {
2090 592810276 : keyno++;
2091 : /*
2092 : * If ge search and we went off the end of the block, but it's
2093 : * not the last block, we're in the wrong block.
2094 : */
2095 592810276 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2096 1005927297 : if (dir == XFS_LOOKUP_GE &&
2097 202806326 : keyno > xfs_btree_get_numrecs(block) &&
2098 : !xfs_btree_ptr_is_null(cur, &ptr)) {
2099 1044505 : int i;
2100 :
2101 1044505 : cur->bc_levels[0].ptr = keyno;
2102 1044505 : error = xfs_btree_increment(cur, 0, &i);
2103 1044505 : if (error)
2104 0 : goto error0;
2105 1044505 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
2106 0 : xfs_btree_mark_sick(cur);
2107 1044505 : return -EFSCORRUPTED;
2108 : }
2109 1044505 : *stat = 1;
2110 1044505 : return 0;
2111 : }
2112 13028519026 : } else if (dir == XFS_LOOKUP_LE && diff > 0)
2113 6985527368 : keyno--;
2114 13620273824 : cur->bc_levels[0].ptr = keyno;
2115 :
2116 : /* Return if we succeeded or not. */
2117 23857436181 : if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
2118 3664680689 : *stat = 0;
2119 9955593135 : else if (dir != XFS_LOOKUP_EQ || diff == 0)
2120 9690966722 : *stat = 1;
2121 : else
2122 264626413 : *stat = 0;
2123 : return 0;
2124 :
2125 : error0:
2126 : return error;
2127 : }
2128 :
2129 : /* Find the high key storage area from a regular key. */
2130 : union xfs_btree_key *
2131 711728139 : xfs_btree_high_key_from_key(
2132 : struct xfs_btree_cur *cur,
2133 : union xfs_btree_key *key)
2134 : {
2135 711728139 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2136 711728139 : return (union xfs_btree_key *)((char *)key +
2137 711728139 : (cur->bc_ops->key_len / 2));
2138 : }
2139 :
2140 : /* Determine the low (and high if overlapped) keys of a leaf block */
2141 : STATIC void
2142 670279221 : xfs_btree_get_leaf_keys(
2143 : struct xfs_btree_cur *cur,
2144 : struct xfs_btree_block *block,
2145 : union xfs_btree_key *key)
2146 : {
2147 670279221 : union xfs_btree_key max_hkey;
2148 670279221 : union xfs_btree_key hkey;
2149 670279221 : union xfs_btree_rec *rec;
2150 670279221 : union xfs_btree_key *high;
2151 670279221 : int n;
2152 :
2153 670279221 : rec = xfs_btree_rec_addr(cur, 1, block);
2154 670279221 : cur->bc_ops->init_key_from_rec(key, rec);
2155 :
2156 670272013 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2157 :
2158 325123187 : cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2159 >16517*10^7 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2160 82101500785 : rec = xfs_btree_rec_addr(cur, n, block);
2161 82101500785 : cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2162 82107338000 : if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
2163 81806087668 : max_hkey = hkey;
2164 : }
2165 :
2166 325125162 : high = xfs_btree_high_key_from_key(cur, key);
2167 650247928 : memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2168 : }
2169 670272790 : }
2170 :
2171 : /* Determine the low (and high if overlapped) keys of a node block */
2172 : STATIC void
2173 61560644 : xfs_btree_get_node_keys(
2174 : struct xfs_btree_cur *cur,
2175 : struct xfs_btree_block *block,
2176 : union xfs_btree_key *key)
2177 : {
2178 61560644 : union xfs_btree_key *hkey;
2179 61560644 : union xfs_btree_key *max_hkey;
2180 61560644 : union xfs_btree_key *high;
2181 61560644 : int n;
2182 :
2183 61560644 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2184 123025690 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2185 : cur->bc_ops->key_len / 2);
2186 :
2187 61512845 : max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2188 10055273724 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2189 4966124070 : hkey = xfs_btree_high_key_addr(cur, n, block);
2190 4966124070 : if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
2191 4954088921 : max_hkey = hkey;
2192 : }
2193 :
2194 61512792 : high = xfs_btree_high_key_from_key(cur, key);
2195 123025612 : memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2196 : } else {
2197 95598 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2198 : cur->bc_ops->key_len);
2199 : }
2200 61560605 : }
2201 :
2202 : /* Derive the keys for any btree block. */
2203 : void
2204 670261368 : xfs_btree_get_keys(
2205 : struct xfs_btree_cur *cur,
2206 : struct xfs_btree_block *block,
2207 : union xfs_btree_key *key)
2208 : {
2209 670261368 : if (be16_to_cpu(block->bb_level) == 0)
2210 669569630 : xfs_btree_get_leaf_keys(cur, block, key);
2211 : else
2212 692456 : xfs_btree_get_node_keys(cur, block, key);
2213 670257053 : }
2214 :
2215 : /*
2216 : * Decide if we need to update the parent keys of a btree block. For
2217 : * a standard btree this is only necessary if we're updating the first
2218 : * record/key. For an overlapping btree, we must always update the
2219 : * keys because the highest key can be in any of the records or keys
2220 : * in the block.
2221 : */
2222 : static inline bool
2223 : xfs_btree_needs_key_update(
2224 : struct xfs_btree_cur *cur,
2225 : int ptr)
2226 : {
2227 919889974 : return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2228 : }
2229 :
2230 : /*
2231 : * Update the low and high parent keys of the given level, progressing
2232 : * towards the root. If force_all is false, stop if the keys for a given
2233 : * level do not need updating.
2234 : */
2235 : STATIC int
2236 328669538 : __xfs_btree_updkeys(
2237 : struct xfs_btree_cur *cur,
2238 : int level,
2239 : struct xfs_btree_block *block,
2240 : struct xfs_buf *bp0,
2241 : bool force_all)
2242 : {
2243 328669538 : union xfs_btree_key key; /* keys from current level */
2244 328669538 : union xfs_btree_key *lkey; /* keys from the next level up */
2245 328669538 : union xfs_btree_key *hkey;
2246 328669538 : union xfs_btree_key *nlkey; /* keys from the next level up */
2247 328669538 : union xfs_btree_key *nhkey;
2248 328669538 : struct xfs_buf *bp;
2249 328669538 : int ptr;
2250 :
2251 328669538 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2252 :
2253 : /* Exit if there aren't any parent levels to update. */
2254 328669538 : if (level + 1 >= cur->bc_nlevels)
2255 : return 0;
2256 :
2257 315248673 : trace_xfs_btree_updkeys(cur, level, bp0);
2258 :
2259 315249612 : lkey = &key;
2260 315249612 : hkey = xfs_btree_high_key_from_key(cur, lkey);
2261 315250247 : xfs_btree_get_keys(cur, block, lkey);
2262 691360721 : for (level++; level < cur->bc_nlevels; level++) {
2263 : #ifdef DEBUG
2264 376110474 : int error;
2265 : #endif
2266 376110474 : block = xfs_btree_get_block(cur, level, &bp);
2267 376110516 : trace_xfs_btree_updkeys(cur, level, bp);
2268 : #ifdef DEBUG
2269 376110696 : error = xfs_btree_check_block(cur, block, level, bp);
2270 376108701 : if (error)
2271 0 : return error;
2272 : #endif
2273 376108701 : ptr = cur->bc_levels[level].ptr;
2274 376108701 : nlkey = xfs_btree_key_addr(cur, ptr, block);
2275 376108701 : nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2276 751975197 : if (!force_all &&
2277 340748146 : xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
2278 : xfs_btree_keycmp_eq(cur, nhkey, hkey))
2279 : break;
2280 87159008 : xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2281 87159522 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2282 87159537 : if (level + 1 >= cur->bc_nlevels)
2283 : break;
2284 60860046 : xfs_btree_get_node_keys(cur, block, lkey);
2285 : }
2286 :
2287 : return 0;
2288 : }
2289 :
2290 : /* Update all the keys from some level in cursor back to the root. */
2291 : STATIC int
2292 135991 : xfs_btree_updkeys_force(
2293 : struct xfs_btree_cur *cur,
2294 : int level)
2295 : {
2296 135991 : struct xfs_buf *bp;
2297 135991 : struct xfs_btree_block *block;
2298 :
2299 135991 : block = xfs_btree_get_block(cur, level, &bp);
2300 135991 : return __xfs_btree_updkeys(cur, level, block, bp, true);
2301 : }
2302 :
2303 : /*
2304 : * Update the parent keys of the given level, progressing towards the root.
2305 : */
2306 : STATIC int
2307 666062429 : xfs_btree_update_keys(
2308 : struct xfs_btree_cur *cur,
2309 : int level)
2310 : {
2311 666062429 : struct xfs_btree_block *block;
2312 666062429 : struct xfs_buf *bp;
2313 666062429 : union xfs_btree_key *kp;
2314 666062429 : union xfs_btree_key key;
2315 666062429 : int ptr;
2316 :
2317 666062429 : ASSERT(level >= 0);
2318 :
2319 666062429 : block = xfs_btree_get_block(cur, level, &bp);
2320 666080756 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2321 328535588 : return __xfs_btree_updkeys(cur, level, block, bp, false);
2322 :
2323 : /*
2324 : * Go up the tree from this level toward the root.
2325 : * At each level, update the key value to the value input.
2326 : * Stop when we reach a level where the cursor isn't pointing
2327 : * at the first entry in the block.
2328 : */
2329 337545168 : xfs_btree_get_keys(cur, block, &key);
2330 417868090 : for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2331 : #ifdef DEBUG
2332 80324412 : int error;
2333 : #endif
2334 80324412 : block = xfs_btree_get_block(cur, level, &bp);
2335 : #ifdef DEBUG
2336 80332989 : error = xfs_btree_check_block(cur, block, level, bp);
2337 80333316 : if (error)
2338 0 : return error;
2339 : #endif
2340 80333316 : ptr = cur->bc_levels[level].ptr;
2341 80333316 : kp = xfs_btree_key_addr(cur, ptr, block);
2342 80333316 : xfs_btree_copy_keys(cur, kp, &key, 1);
2343 80333404 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2344 : }
2345 :
2346 : return 0;
2347 : }
2348 :
2349 : /*
2350 : * Update the record referred to by cur to the value in the
2351 : * given record. This either works (return 0) or gets an
2352 : * EFSCORRUPTED error.
2353 : */
2354 : int
2355 413987069 : xfs_btree_update(
2356 : struct xfs_btree_cur *cur,
2357 : union xfs_btree_rec *rec)
2358 : {
2359 413987069 : struct xfs_btree_block *block;
2360 413987069 : struct xfs_buf *bp;
2361 413987069 : int error;
2362 413987069 : int ptr;
2363 413987069 : union xfs_btree_rec *rp;
2364 :
2365 : /* Pick up the current block. */
2366 413987069 : block = xfs_btree_get_block(cur, 0, &bp);
2367 :
2368 : #ifdef DEBUG
2369 414004373 : error = xfs_btree_check_block(cur, block, 0, bp);
2370 413973449 : if (error)
2371 0 : goto error0;
2372 : #endif
2373 : /* Get the address of the rec to be updated. */
2374 413973449 : ptr = cur->bc_levels[0].ptr;
2375 413973449 : rp = xfs_btree_rec_addr(cur, ptr, block);
2376 :
2377 : /* Fill in the new contents and log them. */
2378 413973449 : xfs_btree_copy_recs(cur, rp, rec, 1);
2379 414002765 : xfs_btree_log_recs(cur, bp, ptr, ptr);
2380 :
2381 : /*
2382 : * If we are tracking the last record in the tree and
2383 : * we are at the far right edge of the tree, update it.
2384 : */
2385 414003467 : if (xfs_btree_is_lastrec(cur, block, 0)) {
2386 0 : cur->bc_ops->update_lastrec(cur, block, rec,
2387 : ptr, LASTREC_UPDATE);
2388 : }
2389 :
2390 : /* Pass new key value up to our parent. */
2391 795882358 : if (xfs_btree_needs_key_update(cur, ptr)) {
2392 114214142 : error = xfs_btree_update_keys(cur, 0);
2393 114207079 : if (error)
2394 0 : goto error0;
2395 : }
2396 :
2397 : return 0;
2398 :
2399 : error0:
2400 : return error;
2401 : }
2402 :
2403 : /*
2404 : * Move 1 record left from cur/level if possible.
2405 : * Update cur to reflect the new path.
2406 : */
2407 : STATIC int /* error */
2408 62220545 : xfs_btree_lshift(
2409 : struct xfs_btree_cur *cur,
2410 : int level,
2411 : int *stat) /* success/failure */
2412 : {
2413 62220545 : struct xfs_buf *lbp; /* left buffer pointer */
2414 62220545 : struct xfs_btree_block *left; /* left btree block */
2415 62220545 : int lrecs; /* left record count */
2416 62220545 : struct xfs_buf *rbp; /* right buffer pointer */
2417 62220545 : struct xfs_btree_block *right; /* right btree block */
2418 62220545 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2419 62220545 : int rrecs; /* right record count */
2420 62220545 : union xfs_btree_ptr lptr; /* left btree pointer */
2421 62220545 : union xfs_btree_key *rkp = NULL; /* right btree key */
2422 62220545 : union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2423 62220545 : union xfs_btree_rec *rrp = NULL; /* right record pointer */
2424 62220545 : int error; /* error return value */
2425 62220545 : int i;
2426 :
2427 62220545 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2428 27459798 : level == cur->bc_nlevels - 1)
2429 0 : goto out0;
2430 :
2431 : /* Set up variables for this block as "right". */
2432 62220545 : right = xfs_btree_get_block(cur, level, &rbp);
2433 :
2434 : #ifdef DEBUG
2435 62220532 : error = xfs_btree_check_block(cur, right, level, rbp);
2436 62220506 : if (error)
2437 0 : goto error0;
2438 : #endif
2439 :
2440 : /* If we've got no left sibling then we can't shift an entry left. */
2441 62220506 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2442 124441066 : if (xfs_btree_ptr_is_null(cur, &lptr))
2443 34699 : goto out0;
2444 :
2445 : /*
2446 : * If the cursor entry is the one that would be moved, don't
2447 : * do it... it's too complicated.
2448 : */
2449 62185834 : if (cur->bc_levels[level].ptr <= 1)
2450 9582 : goto out0;
2451 :
2452 : /* Set up the left neighbor as "left". */
2453 62176252 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2454 62176161 : if (error)
2455 45 : goto error0;
2456 :
2457 : /* If it's full, it can't take another entry. */
2458 62176116 : lrecs = xfs_btree_get_numrecs(left);
2459 62176116 : if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2460 632622 : goto out0;
2461 :
2462 61543579 : rrecs = xfs_btree_get_numrecs(right);
2463 :
2464 : /*
2465 : * We add one entry to the left side and remove one for the right side.
2466 : * Account for it here, the changes will be updated on disk and logged
2467 : * later.
2468 : */
2469 61543579 : lrecs++;
2470 61543579 : rrecs--;
2471 :
2472 61543579 : XFS_BTREE_STATS_INC(cur, lshift);
2473 61543579 : XFS_BTREE_STATS_ADD(cur, moves, 1);
2474 :
2475 : /*
2476 : * If non-leaf, copy a key and a ptr to the left block.
2477 : * Log the changes to the left block.
2478 : */
2479 61543579 : if (level > 0) {
2480 : /* It's a non-leaf. Move keys and pointers. */
2481 79602 : union xfs_btree_key *lkp; /* left btree key */
2482 79602 : union xfs_btree_ptr *lpp; /* left address pointer */
2483 :
2484 79602 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2485 79602 : rkp = xfs_btree_key_addr(cur, 1, right);
2486 :
2487 79602 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2488 79592 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2489 :
2490 79592 : error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
2491 79592 : if (error)
2492 0 : goto error0;
2493 :
2494 79592 : xfs_btree_copy_keys(cur, lkp, rkp, 1);
2495 79592 : xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2496 :
2497 79592 : xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2498 79592 : xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2499 :
2500 79592 : ASSERT(cur->bc_ops->keys_inorder(cur,
2501 : xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2502 : } else {
2503 : /* It's a leaf. Move records. */
2504 61463977 : union xfs_btree_rec *lrp; /* left record pointer */
2505 :
2506 61463977 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2507 61463977 : rrp = xfs_btree_rec_addr(cur, 1, right);
2508 :
2509 61463977 : xfs_btree_copy_recs(cur, lrp, rrp, 1);
2510 61463991 : xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2511 :
2512 61463989 : ASSERT(cur->bc_ops->recs_inorder(cur,
2513 : xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2514 : }
2515 :
2516 61543591 : xfs_btree_set_numrecs(left, lrecs);
2517 61543591 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2518 :
2519 61543585 : xfs_btree_set_numrecs(right, rrecs);
2520 61543585 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2521 :
2522 : /*
2523 : * Slide the contents of right down one entry.
2524 : */
2525 61543553 : XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2526 61543553 : if (level > 0) {
2527 : /* It's a nonleaf. operate on keys and ptrs */
2528 14070212 : for (i = 0; i < rrecs; i++) {
2529 13990620 : error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
2530 13990620 : if (error)
2531 0 : goto error0;
2532 : }
2533 :
2534 79592 : xfs_btree_shift_keys(cur,
2535 : xfs_btree_key_addr(cur, 2, right),
2536 : -1, rrecs);
2537 79592 : xfs_btree_shift_ptrs(cur,
2538 : xfs_btree_ptr_addr(cur, 2, right),
2539 : -1, rrecs);
2540 :
2541 79592 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2542 79592 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2543 : } else {
2544 : /* It's a leaf. operate on records */
2545 61463961 : xfs_btree_shift_recs(cur,
2546 : xfs_btree_rec_addr(cur, 2, right),
2547 : -1, rrecs);
2548 61463988 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2549 : }
2550 :
2551 : /*
2552 : * Using a temporary cursor, update the parent key values of the
2553 : * block on the left.
2554 : */
2555 61543597 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2556 18585551 : error = xfs_btree_dup_cursor(cur, &tcur);
2557 18585560 : if (error)
2558 0 : goto error0;
2559 18585560 : i = xfs_btree_firstrec(tcur, level);
2560 18585556 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2561 0 : xfs_btree_mark_sick(cur);
2562 0 : error = -EFSCORRUPTED;
2563 0 : goto error0;
2564 : }
2565 :
2566 18585556 : error = xfs_btree_decrement(tcur, level, &i);
2567 18585544 : if (error)
2568 0 : goto error1;
2569 :
2570 : /* Update the parent high keys of the left block, if needed. */
2571 18585544 : error = xfs_btree_update_keys(tcur, level);
2572 18585557 : if (error)
2573 0 : goto error1;
2574 :
2575 18585557 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2576 : }
2577 :
2578 : /* Update the parent keys of the right block. */
2579 61543597 : error = xfs_btree_update_keys(cur, level);
2580 61543586 : if (error)
2581 0 : goto error0;
2582 :
2583 : /* Slide the cursor value left one. */
2584 61543586 : cur->bc_levels[level].ptr--;
2585 :
2586 61543586 : *stat = 1;
2587 61543586 : return 0;
2588 :
2589 676903 : out0:
2590 676903 : *stat = 0;
2591 676903 : return 0;
2592 :
2593 : error0:
2594 : return error;
2595 :
2596 0 : error1:
2597 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2598 0 : return error;
2599 : }
2600 :
2601 : /*
2602 : * Move 1 record right from cur/level if possible.
2603 : * Update cur to reflect the new path.
2604 : */
2605 : STATIC int /* error */
2606 98138190 : xfs_btree_rshift(
2607 : struct xfs_btree_cur *cur,
2608 : int level,
2609 : int *stat) /* success/failure */
2610 : {
2611 98138190 : struct xfs_buf *lbp; /* left buffer pointer */
2612 98138190 : struct xfs_btree_block *left; /* left btree block */
2613 98138190 : struct xfs_buf *rbp; /* right buffer pointer */
2614 98138190 : struct xfs_btree_block *right; /* right btree block */
2615 98138190 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2616 98138190 : union xfs_btree_ptr rptr; /* right block pointer */
2617 98138190 : union xfs_btree_key *rkp; /* right btree key */
2618 98138190 : int rrecs; /* right record count */
2619 98138190 : int lrecs; /* left record count */
2620 98138190 : int error; /* error return value */
2621 98138190 : int i; /* loop counter */
2622 :
2623 98138190 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2624 47195752 : (level == cur->bc_nlevels - 1))
2625 0 : goto out0;
2626 :
2627 : /* Set up variables for this block as "left". */
2628 98138190 : left = xfs_btree_get_block(cur, level, &lbp);
2629 :
2630 : #ifdef DEBUG
2631 98137296 : error = xfs_btree_check_block(cur, left, level, lbp);
2632 98137159 : if (error)
2633 0 : goto error0;
2634 : #endif
2635 :
2636 : /* If we've got no right sibling then we can't shift an entry right. */
2637 98137159 : xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2638 196274782 : if (xfs_btree_ptr_is_null(cur, &rptr))
2639 39807571 : goto out0;
2640 :
2641 : /*
2642 : * If the cursor entry is the one that would be moved, don't
2643 : * do it... it's too complicated.
2644 : */
2645 58329820 : lrecs = xfs_btree_get_numrecs(left);
2646 58329820 : if (cur->bc_levels[level].ptr >= lrecs)
2647 6936569 : goto out0;
2648 :
2649 : /* Set up the right neighbor as "right". */
2650 51393251 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2651 51386671 : if (error)
2652 116 : goto error0;
2653 :
2654 : /* If it's full, it can't take another entry. */
2655 51386555 : rrecs = xfs_btree_get_numrecs(right);
2656 51386555 : if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2657 6162272 : goto out0;
2658 :
2659 45227101 : XFS_BTREE_STATS_INC(cur, rshift);
2660 45227101 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2661 :
2662 : /*
2663 : * Make a hole at the start of the right neighbor block, then
2664 : * copy the last left block entry to the hole.
2665 : */
2666 45227101 : if (level > 0) {
2667 : /* It's a nonleaf. make a hole in the keys and ptrs */
2668 15759 : union xfs_btree_key *lkp;
2669 15759 : union xfs_btree_ptr *lpp;
2670 15759 : union xfs_btree_ptr *rpp;
2671 :
2672 15759 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2673 15759 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2674 15759 : rkp = xfs_btree_key_addr(cur, 1, right);
2675 15759 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2676 :
2677 1037803 : for (i = rrecs - 1; i >= 0; i--) {
2678 1022044 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
2679 1022044 : if (error)
2680 0 : goto error0;
2681 : }
2682 :
2683 15759 : xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2684 15759 : xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2685 :
2686 15759 : error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
2687 15759 : if (error)
2688 0 : goto error0;
2689 :
2690 : /* Now put the new data in, and log it. */
2691 15759 : xfs_btree_copy_keys(cur, rkp, lkp, 1);
2692 15759 : xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2693 :
2694 15759 : xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2695 15759 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2696 :
2697 15759 : ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2698 : xfs_btree_key_addr(cur, 2, right)));
2699 : } else {
2700 : /* It's a leaf. make a hole in the records */
2701 45211342 : union xfs_btree_rec *lrp;
2702 45211342 : union xfs_btree_rec *rrp;
2703 :
2704 45211342 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2705 45211342 : rrp = xfs_btree_rec_addr(cur, 1, right);
2706 :
2707 45211342 : xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2708 :
2709 : /* Now put the new data in, and log it. */
2710 45212233 : xfs_btree_copy_recs(cur, rrp, lrp, 1);
2711 45212219 : xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2712 : }
2713 :
2714 : /*
2715 : * Decrement and log left's numrecs, bump and log right's numrecs.
2716 : */
2717 45229192 : xfs_btree_set_numrecs(left, --lrecs);
2718 45229192 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2719 :
2720 45228548 : xfs_btree_set_numrecs(right, ++rrecs);
2721 45228548 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2722 :
2723 : /*
2724 : * Using a temporary cursor, update the parent key values of the
2725 : * block on the right.
2726 : */
2727 45228368 : error = xfs_btree_dup_cursor(cur, &tcur);
2728 45230435 : if (error)
2729 0 : goto error0;
2730 45230435 : i = xfs_btree_lastrec(tcur, level);
2731 45230688 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2732 0 : xfs_btree_mark_sick(cur);
2733 0 : error = -EFSCORRUPTED;
2734 0 : goto error0;
2735 : }
2736 :
2737 45230688 : error = xfs_btree_increment(tcur, level, &i);
2738 45231666 : if (error)
2739 0 : goto error1;
2740 :
2741 : /* Update the parent high keys of the left block, if needed. */
2742 45231666 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2743 15539348 : error = xfs_btree_update_keys(cur, level);
2744 15539347 : if (error)
2745 0 : goto error1;
2746 : }
2747 :
2748 : /* Update the parent keys of the right block. */
2749 45231665 : error = xfs_btree_update_keys(tcur, level);
2750 45231644 : if (error)
2751 0 : goto error1;
2752 :
2753 45231644 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2754 :
2755 45231657 : *stat = 1;
2756 45231657 : return 0;
2757 :
2758 52906412 : out0:
2759 52906412 : *stat = 0;
2760 52906412 : return 0;
2761 :
2762 : error0:
2763 : return error;
2764 :
2765 0 : error1:
2766 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2767 0 : return error;
2768 : }
2769 :
2770 : static inline int
2771 703023 : xfs_btree_alloc_block(
2772 : struct xfs_btree_cur *cur,
2773 : const union xfs_btree_ptr *hint_block,
2774 : union xfs_btree_ptr *new_block,
2775 : int *stat)
2776 : {
2777 703023 : int error;
2778 :
2779 703023 : error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
2780 703023 : trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
2781 703023 : return error;
2782 : }
2783 :
2784 : /*
2785 : * Split cur/level block in half.
2786 : * Return new block number and the key to its first
2787 : * record (to be inserted into parent).
2788 : */
2789 : STATIC int /* error */
2790 676903 : __xfs_btree_split(
2791 : struct xfs_btree_cur *cur,
2792 : int level,
2793 : union xfs_btree_ptr *ptrp,
2794 : union xfs_btree_key *key,
2795 : struct xfs_btree_cur **curp,
2796 : int *stat) /* success/failure */
2797 : {
2798 676903 : union xfs_btree_ptr lptr; /* left sibling block ptr */
2799 676903 : struct xfs_buf *lbp; /* left buffer pointer */
2800 676903 : struct xfs_btree_block *left; /* left btree block */
2801 676903 : union xfs_btree_ptr rptr; /* right sibling block ptr */
2802 676903 : struct xfs_buf *rbp; /* right buffer pointer */
2803 676903 : struct xfs_btree_block *right; /* right btree block */
2804 676903 : union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2805 676903 : struct xfs_buf *rrbp; /* right-right buffer pointer */
2806 676903 : struct xfs_btree_block *rrblock; /* right-right btree block */
2807 676903 : int lrecs;
2808 676903 : int rrecs;
2809 676903 : int src_index;
2810 676903 : int error; /* error return value */
2811 676903 : int i;
2812 :
2813 676903 : XFS_BTREE_STATS_INC(cur, split);
2814 :
2815 : /* Set up left block (current one). */
2816 676903 : left = xfs_btree_get_block(cur, level, &lbp);
2817 :
2818 : #ifdef DEBUG
2819 676903 : error = xfs_btree_check_block(cur, left, level, lbp);
2820 676903 : if (error)
2821 0 : goto error0;
2822 : #endif
2823 :
2824 676903 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2825 :
2826 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
2827 676903 : error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
2828 676903 : if (error)
2829 1 : goto error0;
2830 676902 : if (*stat == 0)
2831 0 : goto out0;
2832 676902 : XFS_BTREE_STATS_INC(cur, alloc);
2833 :
2834 : /* Set up the new block as "right". */
2835 676902 : error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
2836 676902 : if (error)
2837 0 : goto error0;
2838 :
2839 : /* Fill in the btree header for the new right block. */
2840 1353804 : xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2841 :
2842 : /*
2843 : * Split the entries between the old and the new block evenly.
2844 : * Make sure that if there's an odd number of entries now, that
2845 : * each new block will have the same number of entries.
2846 : */
2847 676902 : lrecs = xfs_btree_get_numrecs(left);
2848 676902 : rrecs = lrecs / 2;
2849 676902 : if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
2850 28767 : rrecs++;
2851 676902 : src_index = (lrecs - rrecs + 1);
2852 :
2853 676902 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2854 :
2855 : /* Adjust numrecs for the later get_*_keys() calls. */
2856 676902 : lrecs -= rrecs;
2857 676902 : xfs_btree_set_numrecs(left, lrecs);
2858 1353804 : xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2859 :
2860 : /*
2861 : * Copy btree block entries from the left block over to the
2862 : * new block, the right. Update the right block and log the
2863 : * changes.
2864 : */
2865 676902 : if (level > 0) {
2866 : /* It's a non-leaf. Move keys and pointers. */
2867 2481 : union xfs_btree_key *lkp; /* left btree key */
2868 2481 : union xfs_btree_ptr *lpp; /* left address pointer */
2869 2481 : union xfs_btree_key *rkp; /* right btree key */
2870 2481 : union xfs_btree_ptr *rpp; /* right address pointer */
2871 :
2872 2481 : lkp = xfs_btree_key_addr(cur, src_index, left);
2873 2481 : lpp = xfs_btree_ptr_addr(cur, src_index, left);
2874 2481 : rkp = xfs_btree_key_addr(cur, 1, right);
2875 2481 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2876 :
2877 4962 : for (i = src_index; i < rrecs; i++) {
2878 0 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
2879 0 : if (error)
2880 0 : goto error0;
2881 : }
2882 :
2883 : /* Copy the keys & pointers to the new block. */
2884 2481 : xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2885 2481 : xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2886 :
2887 2481 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2888 2481 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2889 :
2890 : /* Stash the keys of the new block for later insertion. */
2891 2481 : xfs_btree_get_node_keys(cur, right, key);
2892 : } else {
2893 : /* It's a leaf. Move records. */
2894 674421 : union xfs_btree_rec *lrp; /* left record pointer */
2895 674421 : union xfs_btree_rec *rrp; /* right record pointer */
2896 :
2897 674421 : lrp = xfs_btree_rec_addr(cur, src_index, left);
2898 674421 : rrp = xfs_btree_rec_addr(cur, 1, right);
2899 :
2900 : /* Copy records to the new block. */
2901 674421 : xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2902 674421 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2903 :
2904 : /* Stash the keys of the new block for later insertion. */
2905 674421 : xfs_btree_get_leaf_keys(cur, right, key);
2906 : }
2907 :
2908 : /*
2909 : * Find the left block number by looking in the buffer.
2910 : * Adjust sibling pointers.
2911 : */
2912 676902 : xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2913 676902 : xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2914 676902 : xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2915 676902 : xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2916 :
2917 676902 : xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2918 676902 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2919 :
2920 : /*
2921 : * If there's a block to the new block's right, make that block
2922 : * point back to right instead of to left.
2923 : */
2924 1353804 : if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2925 340146 : error = xfs_btree_read_buf_block(cur, &rrptr,
2926 : 0, &rrblock, &rrbp);
2927 340146 : if (error)
2928 233 : goto error0;
2929 339913 : xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2930 339913 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2931 : }
2932 :
2933 : /* Update the parent high keys of the left block, if needed. */
2934 676669 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2935 335514 : error = xfs_btree_update_keys(cur, level);
2936 335514 : if (error)
2937 0 : goto error0;
2938 : }
2939 :
2940 : /*
2941 : * If the cursor is really in the right block, move it there.
2942 : * If it's just pointing past the last entry in left, then we'll
2943 : * insert there, so don't change anything in that case.
2944 : */
2945 676669 : if (cur->bc_levels[level].ptr > lrecs + 1) {
2946 518290 : xfs_btree_setbuf(cur, level, rbp);
2947 518290 : cur->bc_levels[level].ptr -= lrecs;
2948 : }
2949 : /*
2950 : * If there are more levels, we'll need another cursor which refers
2951 : * the right block, no matter where this cursor was.
2952 : */
2953 676669 : if (level + 1 < cur->bc_nlevels) {
2954 653003 : error = xfs_btree_dup_cursor(cur, curp);
2955 653003 : if (error)
2956 7 : goto error0;
2957 652996 : (*curp)->bc_levels[level + 1].ptr++;
2958 : }
2959 676662 : *ptrp = rptr;
2960 676662 : *stat = 1;
2961 676662 : return 0;
2962 : out0:
2963 0 : *stat = 0;
2964 0 : return 0;
2965 :
2966 : error0:
2967 : return error;
2968 : }
2969 :
2970 : #ifdef __KERNEL__
2971 : struct xfs_btree_split_args {
2972 : struct xfs_btree_cur *cur;
2973 : int level;
2974 : union xfs_btree_ptr *ptrp;
2975 : union xfs_btree_key *key;
2976 : struct xfs_btree_cur **curp;
2977 : int *stat; /* success/failure */
2978 : int result;
2979 : bool kswapd; /* allocation in kswapd context */
2980 : struct completion *done;
2981 : struct work_struct work;
2982 : };
2983 :
2984 : /*
2985 : * Stack switching interfaces for allocation
2986 : */
2987 : static void
2988 12428 : xfs_btree_split_worker(
2989 : struct work_struct *work)
2990 : {
2991 12428 : struct xfs_btree_split_args *args = container_of(work,
2992 : struct xfs_btree_split_args, work);
2993 12428 : unsigned long pflags;
2994 12428 : unsigned long new_pflags = 0;
2995 :
2996 : /*
2997 : * we are in a transaction context here, but may also be doing work
2998 : * in kswapd context, and hence we may need to inherit that state
2999 : * temporarily to ensure that we don't block waiting for memory reclaim
3000 : * in any way.
3001 : */
3002 12428 : if (args->kswapd)
3003 0 : new_pflags |= PF_MEMALLOC | PF_KSWAPD;
3004 :
3005 12428 : current_set_flags_nested(&pflags, new_pflags);
3006 12428 : xfs_trans_set_context(args->cur->bc_tp);
3007 :
3008 12428 : args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
3009 : args->key, args->curp, args->stat);
3010 :
3011 12428 : xfs_trans_clear_context(args->cur->bc_tp);
3012 12428 : current_restore_flags_nested(&pflags, new_pflags);
3013 :
3014 : /*
3015 : * Do not access args after complete() has run here. We don't own args
3016 : * and the owner may run and free args before we return here.
3017 : */
3018 12428 : complete(args->done);
3019 :
3020 12428 : }
3021 :
3022 : /*
3023 : * BMBT split requests often come in with little stack to work on so we push
3024 : * them off to a worker thread so there is lots of stack to use. For the other
3025 : * btree types, just call directly to avoid the context switch overhead here.
3026 : *
3027 : * Care must be taken here - the work queue rescuer thread introduces potential
3028 : * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
3029 : * AGFs to allocate blocks. A task being run by the rescuer could attempt to
3030 : * lock an AGF that is already locked by a task queued to run by the rescuer,
3031 : * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
3032 : * release it until the current thread it is running gains the lock.
3033 : *
3034 : * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
3035 : * already locked to allocate from. The only place that doesn't hold an AGF
3036 : * locked is unwritten extent conversion at IO completion, but that has already
3037 : * been offloaded to a worker thread and hence has no stack consumption issues
3038 : * we have to worry about.
3039 : */
3040 : STATIC int /* error */
3041 676903 : xfs_btree_split(
3042 : struct xfs_btree_cur *cur,
3043 : int level,
3044 : union xfs_btree_ptr *ptrp,
3045 : union xfs_btree_key *key,
3046 : struct xfs_btree_cur **curp,
3047 : int *stat) /* success/failure */
3048 : {
3049 676903 : struct xfs_btree_split_args args;
3050 676903 : DECLARE_COMPLETION_ONSTACK(done);
3051 :
3052 676903 : if (cur->bc_btnum != XFS_BTNUM_BMAP ||
3053 215957 : cur->bc_tp->t_highest_agno == NULLAGNUMBER)
3054 664475 : return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
3055 :
3056 12428 : args.cur = cur;
3057 12428 : args.level = level;
3058 12428 : args.ptrp = ptrp;
3059 12428 : args.key = key;
3060 12428 : args.curp = curp;
3061 12428 : args.stat = stat;
3062 12428 : args.done = &done;
3063 12428 : args.kswapd = current_is_kswapd();
3064 12428 : INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
3065 12428 : queue_work(xfs_alloc_wq, &args.work);
3066 12428 : wait_for_completion(&done);
3067 12428 : destroy_work_on_stack(&args.work);
3068 12428 : return args.result;
3069 : }
3070 : #else
3071 : #define xfs_btree_split __xfs_btree_split
3072 : #endif /* __KERNEL__ */
3073 :
3074 :
3075 : /*
3076 : * Copy the old inode root contents into a real block and make the
3077 : * broot point to it.
3078 : */
3079 : int /* error */
3080 2454 : xfs_btree_new_iroot(
3081 : struct xfs_btree_cur *cur, /* btree cursor */
3082 : int *logflags, /* logging flags for inode */
3083 : int *stat) /* return status - 0 fail */
3084 : {
3085 2454 : struct xfs_buf *cbp; /* buffer for cblock */
3086 2454 : struct xfs_btree_block *block; /* btree block */
3087 2454 : struct xfs_btree_block *cblock; /* child btree block */
3088 2454 : union xfs_btree_key *ckp; /* child key pointer */
3089 2454 : union xfs_btree_ptr *cpp; /* child ptr pointer */
3090 2454 : union xfs_btree_key *kp; /* pointer to btree key */
3091 2454 : union xfs_btree_ptr *pp; /* pointer to block addr */
3092 2454 : union xfs_btree_ptr nptr; /* new block addr */
3093 2454 : int level; /* btree level */
3094 2454 : int error; /* error return code */
3095 2454 : int i; /* loop counter */
3096 :
3097 2454 : XFS_BTREE_STATS_INC(cur, newroot);
3098 :
3099 2454 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3100 :
3101 2454 : level = cur->bc_nlevels - 1;
3102 :
3103 2454 : block = xfs_btree_get_iroot(cur);
3104 2454 : pp = xfs_btree_ptr_addr(cur, 1, block);
3105 :
3106 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3107 2454 : error = xfs_btree_alloc_block(cur, pp, &nptr, stat);
3108 2454 : if (error)
3109 0 : goto error0;
3110 2454 : if (*stat == 0)
3111 : return 0;
3112 :
3113 2454 : XFS_BTREE_STATS_INC(cur, alloc);
3114 :
3115 : /* Copy the root into a real block. */
3116 2454 : error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
3117 2454 : if (error)
3118 0 : goto error0;
3119 :
3120 : /*
3121 : * we can't just memcpy() the root in for CRC enabled btree blocks.
3122 : * In that case have to also ensure the blkno remains correct
3123 : */
3124 4908 : memcpy(cblock, block, xfs_btree_block_len(cur));
3125 2454 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3126 2454 : __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
3127 2454 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3128 2454 : cblock->bb_u.l.bb_blkno = bno;
3129 : else
3130 0 : cblock->bb_u.s.bb_blkno = bno;
3131 : }
3132 :
3133 2454 : be16_add_cpu(&block->bb_level, 1);
3134 2454 : xfs_btree_set_numrecs(block, 1);
3135 2454 : cur->bc_nlevels++;
3136 2454 : ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3137 2454 : cur->bc_levels[level + 1].ptr = 1;
3138 :
3139 2454 : kp = xfs_btree_key_addr(cur, 1, block);
3140 2454 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3141 4908 : xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3142 :
3143 2454 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3144 32685 : for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3145 27777 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3146 27777 : if (error)
3147 0 : goto error0;
3148 : }
3149 :
3150 2454 : xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3151 :
3152 2454 : error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
3153 2454 : if (error)
3154 0 : goto error0;
3155 :
3156 2454 : xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3157 :
3158 2454 : xfs_iroot_realloc(cur->bc_ino.ip,
3159 : 1 - xfs_btree_get_numrecs(cblock),
3160 2454 : cur->bc_ino.whichfork);
3161 :
3162 2454 : xfs_btree_setbuf(cur, level, cbp);
3163 :
3164 : /*
3165 : * Do all this logging at the end so that
3166 : * the root is at the right level.
3167 : */
3168 2454 : xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3169 2454 : xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3170 2454 : xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3171 :
3172 4908 : *logflags |=
3173 2454 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
3174 2454 : *stat = 1;
3175 2454 : return 0;
3176 : error0:
3177 : return error;
3178 : }
3179 :
3180 : /*
3181 : * Allocate a new root block, fill it in.
3182 : */
3183 : STATIC int /* error */
3184 23665 : xfs_btree_new_root(
3185 : struct xfs_btree_cur *cur, /* btree cursor */
3186 : int *stat) /* success/failure */
3187 : {
3188 23665 : struct xfs_btree_block *block; /* one half of the old root block */
3189 23665 : struct xfs_buf *bp; /* buffer containing block */
3190 23665 : int error; /* error return value */
3191 23665 : struct xfs_buf *lbp; /* left buffer pointer */
3192 23665 : struct xfs_btree_block *left; /* left btree block */
3193 23665 : struct xfs_buf *nbp; /* new (root) buffer */
3194 23665 : struct xfs_btree_block *new; /* new (root) btree block */
3195 23665 : int nptr; /* new value for key index, 1 or 2 */
3196 23665 : struct xfs_buf *rbp; /* right buffer pointer */
3197 23665 : struct xfs_btree_block *right; /* right btree block */
3198 23665 : union xfs_btree_ptr rptr;
3199 23665 : union xfs_btree_ptr lptr;
3200 :
3201 23665 : XFS_BTREE_STATS_INC(cur, newroot);
3202 :
3203 : /* initialise our start point from the cursor */
3204 23665 : cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3205 :
3206 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3207 23666 : error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
3208 23666 : if (error)
3209 0 : goto error0;
3210 23666 : if (*stat == 0)
3211 0 : goto out0;
3212 23666 : XFS_BTREE_STATS_INC(cur, alloc);
3213 :
3214 : /* Set up the new block. */
3215 23666 : error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
3216 23666 : if (error)
3217 0 : goto error0;
3218 :
3219 : /* Set the root in the holding structure increasing the level by 1. */
3220 23666 : cur->bc_ops->set_root(cur, &lptr, 1);
3221 :
3222 : /*
3223 : * At the previous root level there are now two blocks: the old root,
3224 : * and the new block generated when it was split. We don't know which
3225 : * one the cursor is pointing at, so we set up variables "left" and
3226 : * "right" for each case.
3227 : */
3228 23666 : block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3229 :
3230 : #ifdef DEBUG
3231 23666 : error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3232 23666 : if (error)
3233 0 : goto error0;
3234 : #endif
3235 :
3236 23666 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3237 47332 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3238 : /* Our block is left, pick up the right block. */
3239 14847 : lbp = bp;
3240 14847 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3241 14847 : left = block;
3242 14847 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3243 14847 : if (error)
3244 0 : goto error0;
3245 14847 : bp = rbp;
3246 14847 : nptr = 1;
3247 : } else {
3248 : /* Our block is right, pick up the left block. */
3249 8819 : rbp = bp;
3250 8819 : xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3251 8819 : right = block;
3252 8819 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3253 8819 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3254 8819 : if (error)
3255 0 : goto error0;
3256 8819 : bp = lbp;
3257 8819 : nptr = 2;
3258 : }
3259 :
3260 : /* Fill in the new block's btree header and log it. */
3261 23666 : xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3262 23666 : xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3263 70998 : ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3264 : !xfs_btree_ptr_is_null(cur, &rptr));
3265 :
3266 : /* Fill in the key data in the new root. */
3267 47332 : if (xfs_btree_get_level(left) > 0) {
3268 : /*
3269 : * Get the keys for the left block's keys and put them directly
3270 : * in the parent block. Do the same for the right block.
3271 : */
3272 305 : xfs_btree_get_node_keys(cur, left,
3273 : xfs_btree_key_addr(cur, 1, new));
3274 305 : xfs_btree_get_node_keys(cur, right,
3275 : xfs_btree_key_addr(cur, 2, new));
3276 : } else {
3277 : /*
3278 : * Get the keys for the left block's records and put them
3279 : * directly in the parent block. Do the same for the right
3280 : * block.
3281 : */
3282 23361 : xfs_btree_get_leaf_keys(cur, left,
3283 : xfs_btree_key_addr(cur, 1, new));
3284 23361 : xfs_btree_get_leaf_keys(cur, right,
3285 : xfs_btree_key_addr(cur, 2, new));
3286 : }
3287 23666 : xfs_btree_log_keys(cur, nbp, 1, 2);
3288 :
3289 : /* Fill in the pointer data in the new root. */
3290 23666 : xfs_btree_copy_ptrs(cur,
3291 : xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3292 23666 : xfs_btree_copy_ptrs(cur,
3293 : xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3294 23666 : xfs_btree_log_ptrs(cur, nbp, 1, 2);
3295 :
3296 : /* Fix up the cursor. */
3297 23666 : xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3298 23666 : cur->bc_levels[cur->bc_nlevels].ptr = nptr;
3299 23666 : cur->bc_nlevels++;
3300 23666 : ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3301 23666 : *stat = 1;
3302 23666 : return 0;
3303 : error0:
3304 : return error;
3305 : out0:
3306 0 : *stat = 0;
3307 0 : return 0;
3308 : }
3309 :
3310 : STATIC int
3311 75134081 : xfs_btree_make_block_unfull(
3312 : struct xfs_btree_cur *cur, /* btree cursor */
3313 : int level, /* btree level */
3314 : int numrecs,/* # of recs in block */
3315 : int *oindex,/* old tree index */
3316 : int *index, /* new tree index */
3317 : union xfs_btree_ptr *nptr, /* new btree ptr */
3318 : struct xfs_btree_cur **ncur, /* new btree cursor */
3319 : union xfs_btree_key *key, /* key of new block */
3320 : int *stat)
3321 : {
3322 75134081 : int error = 0;
3323 :
3324 75134081 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3325 28264198 : level == cur->bc_nlevels - 1) {
3326 31667 : struct xfs_inode *ip = cur->bc_ino.ip;
3327 :
3328 31667 : if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3329 : /* A root block that can be made bigger. */
3330 29213 : xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
3331 29213 : *stat = 1;
3332 : } else {
3333 : /* A root block that needs replacing */
3334 2454 : int logflags = 0;
3335 :
3336 2454 : error = xfs_btree_new_iroot(cur, &logflags, stat);
3337 2454 : if (error || *stat == 0)
3338 0 : return error;
3339 :
3340 2454 : xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3341 : }
3342 :
3343 31667 : return 0;
3344 : }
3345 :
3346 : /* First, try shifting an entry to the right neighbor. */
3347 75102414 : error = xfs_btree_rshift(cur, level, stat);
3348 75102409 : if (error || *stat)
3349 : return error;
3350 :
3351 : /* Next, try shifting an entry to the left neighbor. */
3352 52906459 : error = xfs_btree_lshift(cur, level, stat);
3353 52906477 : if (error)
3354 : return error;
3355 :
3356 52906432 : if (*stat) {
3357 52229529 : *oindex = *index = cur->bc_levels[level].ptr;
3358 52229529 : return 0;
3359 : }
3360 :
3361 : /*
3362 : * Next, try splitting the current block in half.
3363 : *
3364 : * If this works we have to re-set our variables because we
3365 : * could be in a different block now.
3366 : */
3367 676903 : error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3368 676903 : if (error || *stat == 0)
3369 : return error;
3370 :
3371 :
3372 676662 : *index = cur->bc_levels[level].ptr;
3373 676662 : return 0;
3374 : }
3375 :
3376 : /*
3377 : * Insert one record/level. Return information to the caller
3378 : * allowing the next level up to proceed if necessary.
3379 : */
3380 : STATIC int
3381 553710573 : xfs_btree_insrec(
3382 : struct xfs_btree_cur *cur, /* btree cursor */
3383 : int level, /* level to insert record at */
3384 : union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
3385 : union xfs_btree_rec *rec, /* record to insert */
3386 : union xfs_btree_key *key, /* i/o: block key for ptrp */
3387 : struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3388 : int *stat) /* success/failure */
3389 : {
3390 553710573 : struct xfs_btree_block *block; /* btree block */
3391 553710573 : struct xfs_buf *bp; /* buffer for block */
3392 553710573 : union xfs_btree_ptr nptr; /* new block ptr */
3393 553710573 : struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
3394 553710573 : union xfs_btree_key nkey; /* new block key */
3395 553710573 : union xfs_btree_key *lkey;
3396 553710573 : int optr; /* old key/record index */
3397 553710573 : int ptr; /* key/record index */
3398 553710573 : int numrecs;/* number of records */
3399 553710573 : int error; /* error return value */
3400 553710573 : int i;
3401 553710573 : xfs_daddr_t old_bn;
3402 :
3403 553710573 : ncur = NULL;
3404 553710573 : lkey = &nkey;
3405 :
3406 : /*
3407 : * If we have an external root pointer, and we've made it to the
3408 : * root level, allocate a new root block and we're done.
3409 : */
3410 553710573 : if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3411 488170775 : (level >= cur->bc_nlevels)) {
3412 23666 : error = xfs_btree_new_root(cur, stat);
3413 23666 : xfs_btree_set_ptr_null(cur, ptrp);
3414 :
3415 23666 : return error;
3416 : }
3417 :
3418 : /* If we're off the left edge, return failure. */
3419 553686907 : ptr = cur->bc_levels[level].ptr;
3420 553686907 : if (ptr == 0) {
3421 0 : *stat = 0;
3422 0 : return 0;
3423 : }
3424 :
3425 553686907 : optr = ptr;
3426 :
3427 553686907 : XFS_BTREE_STATS_INC(cur, insrec);
3428 :
3429 : /* Get pointers to the btree buffer and block. */
3430 553686907 : block = xfs_btree_get_block(cur, level, &bp);
3431 553686056 : old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
3432 553686056 : numrecs = xfs_btree_get_numrecs(block);
3433 :
3434 : #ifdef DEBUG
3435 553686056 : error = xfs_btree_check_block(cur, block, level, bp);
3436 553688293 : if (error)
3437 0 : goto error0;
3438 :
3439 : /* Check that the new entry is being inserted in the right place. */
3440 553688293 : if (ptr <= numrecs) {
3441 308606535 : if (level == 0) {
3442 308279105 : ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3443 : xfs_btree_rec_addr(cur, ptr, block)));
3444 : } else {
3445 327430 : ASSERT(cur->bc_ops->keys_inorder(cur, key,
3446 : xfs_btree_key_addr(cur, ptr, block)));
3447 : }
3448 : }
3449 : #endif
3450 :
3451 : /*
3452 : * If the block is full, we can't insert the new entry until we
3453 : * make the block un-full.
3454 : */
3455 553692487 : xfs_btree_set_ptr_null(cur, &nptr);
3456 553692487 : if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3457 75134019 : error = xfs_btree_make_block_unfull(cur, level, numrecs,
3458 : &optr, &ptr, &nptr, &ncur, lkey, stat);
3459 75134166 : if (error || *stat == 0)
3460 402 : goto error0;
3461 : }
3462 :
3463 : /*
3464 : * The current block may have changed if the block was
3465 : * previously full and we have just made space in it.
3466 : */
3467 553685353 : block = xfs_btree_get_block(cur, level, &bp);
3468 553673490 : numrecs = xfs_btree_get_numrecs(block);
3469 :
3470 : #ifdef DEBUG
3471 553673490 : error = xfs_btree_check_block(cur, block, level, bp);
3472 553677746 : if (error)
3473 0 : goto error0;
3474 : #endif
3475 :
3476 : /*
3477 : * At this point we know there's room for our new entry in the block
3478 : * we're pointing at.
3479 : */
3480 553677746 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3481 :
3482 553677746 : if (level > 0) {
3483 : /* It's a nonleaf. make a hole in the keys and ptrs */
3484 652986 : union xfs_btree_key *kp;
3485 652986 : union xfs_btree_ptr *pp;
3486 :
3487 652986 : kp = xfs_btree_key_addr(cur, ptr, block);
3488 652986 : pp = xfs_btree_ptr_addr(cur, ptr, block);
3489 :
3490 9721306 : for (i = numrecs - ptr; i >= 0; i--) {
3491 8415334 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3492 8415334 : if (error)
3493 0 : goto error0;
3494 : }
3495 :
3496 652986 : xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3497 652985 : xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3498 :
3499 652985 : error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
3500 652985 : if (error)
3501 0 : goto error0;
3502 :
3503 : /* Now put the new data in, bump numrecs and log it. */
3504 652985 : xfs_btree_copy_keys(cur, kp, key, 1);
3505 652985 : xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3506 652986 : numrecs++;
3507 652986 : xfs_btree_set_numrecs(block, numrecs);
3508 652986 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3509 652985 : xfs_btree_log_keys(cur, bp, ptr, numrecs);
3510 : #ifdef DEBUG
3511 652985 : if (ptr < numrecs) {
3512 327399 : ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3513 : xfs_btree_key_addr(cur, ptr + 1, block)));
3514 : }
3515 : #endif
3516 : } else {
3517 : /* It's a leaf. make a hole in the records */
3518 553024760 : union xfs_btree_rec *rp;
3519 :
3520 553024760 : rp = xfs_btree_rec_addr(cur, ptr, block);
3521 :
3522 553024760 : xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3523 :
3524 : /* Now put the new data in, bump numrecs and log it. */
3525 553027826 : xfs_btree_copy_recs(cur, rp, rec, 1);
3526 553031342 : xfs_btree_set_numrecs(block, ++numrecs);
3527 553031342 : xfs_btree_log_recs(cur, bp, ptr, numrecs);
3528 : #ifdef DEBUG
3529 553030896 : if (ptr < numrecs) {
3530 308283020 : ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3531 : xfs_btree_rec_addr(cur, ptr + 1, block)));
3532 : }
3533 : #endif
3534 : }
3535 :
3536 : /* Log the new number of records in the btree header. */
3537 553687520 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3538 :
3539 : /*
3540 : * If we just inserted into a new tree block, we have to
3541 : * recalculate nkey here because nkey is out of date.
3542 : *
3543 : * Otherwise we're just updating an existing block (having shoved
3544 : * some records into the new tree block), so use the regular key
3545 : * update mechanism.
3546 : */
3547 553692342 : if (bp && xfs_buf_daddr(bp) != old_bn) {
3548 520741 : xfs_btree_get_keys(cur, block, lkey);
3549 967719058 : } else if (xfs_btree_needs_key_update(cur, optr)) {
3550 317266222 : error = xfs_btree_update_keys(cur, level);
3551 317260113 : if (error)
3552 0 : goto error0;
3553 : }
3554 :
3555 : /*
3556 : * If we are tracking the last record in the tree and
3557 : * we are at the far right edge of the tree, update it.
3558 : */
3559 553686232 : if (xfs_btree_is_lastrec(cur, block, level)) {
3560 83512362 : cur->bc_ops->update_lastrec(cur, block, rec,
3561 : ptr, LASTREC_INSREC);
3562 : }
3563 :
3564 : /*
3565 : * Return the new block number, if any.
3566 : * If there is one, give back a record value and a cursor too.
3567 : */
3568 553688813 : *ptrp = nptr;
3569 1107377626 : if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3570 676661 : xfs_btree_copy_keys(cur, key, lkey, 1);
3571 676661 : *curp = ncur;
3572 : }
3573 :
3574 553688813 : *stat = 1;
3575 553688813 : return 0;
3576 :
3577 402 : error0:
3578 402 : if (ncur)
3579 0 : xfs_btree_del_cursor(ncur, error);
3580 : return error;
3581 : }
3582 :
3583 : /*
3584 : * Insert the record at the point referenced by cur.
3585 : *
3586 : * A multi-level split of the tree on insert will invalidate the original
3587 : * cursor. All callers of this function should assume that the cursor is
3588 : * no longer valid and revalidate it.
3589 : */
3590 : int
3591 553037663 : xfs_btree_insert(
3592 : struct xfs_btree_cur *cur,
3593 : int *stat)
3594 : {
3595 553037663 : int error; /* error return value */
3596 553037663 : int i; /* result value, 0 for failure */
3597 553037663 : int level; /* current level number in btree */
3598 553037663 : union xfs_btree_ptr nptr; /* new block number (split result) */
3599 553037663 : struct xfs_btree_cur *ncur; /* new cursor (split result) */
3600 553037663 : struct xfs_btree_cur *pcur; /* previous level's cursor */
3601 553037663 : union xfs_btree_key bkey; /* key of block to insert */
3602 553037663 : union xfs_btree_key *key;
3603 553037663 : union xfs_btree_rec rec; /* record to insert */
3604 :
3605 553037663 : level = 0;
3606 553037663 : ncur = NULL;
3607 553037663 : pcur = cur;
3608 553037663 : key = &bkey;
3609 :
3610 553037663 : xfs_btree_set_ptr_null(cur, &nptr);
3611 :
3612 : /* Make a key out of the record data to be inserted, and save it. */
3613 553037663 : cur->bc_ops->init_rec_from_cur(cur, &rec);
3614 553031936 : cur->bc_ops->init_key_from_rec(key, &rec);
3615 :
3616 : /*
3617 : * Loop going up the tree, starting at the leaf level.
3618 : * Stop when we don't get a split block, that must mean that
3619 : * the insert is finished with this level.
3620 : */
3621 553717973 : do {
3622 : /*
3623 : * Insert nrec/nptr into this level of the tree.
3624 : * Note if we fail, nptr will be null.
3625 : */
3626 553717973 : error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3627 : &ncur, &i);
3628 553709372 : if (error) {
3629 402 : if (pcur != cur)
3630 10 : xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3631 402 : goto error0;
3632 : }
3633 :
3634 553708970 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3635 0 : xfs_btree_mark_sick(cur);
3636 0 : error = -EFSCORRUPTED;
3637 0 : goto error0;
3638 : }
3639 553708970 : level++;
3640 :
3641 : /*
3642 : * See if the cursor we just used is trash.
3643 : * Can't trash the caller's cursor, but otherwise we should
3644 : * if ncur is a new cursor or we're about to be done.
3645 : */
3646 553708970 : if (pcur != cur &&
3647 1304406 : (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3648 : /* Save the state from the cursor before we trash it */
3649 652986 : if (cur->bc_ops->update_cursor)
3650 215957 : cur->bc_ops->update_cursor(pcur, cur);
3651 652986 : cur->bc_nlevels = pcur->bc_nlevels;
3652 652986 : xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3653 : }
3654 : /* If we got a new cursor, switch to it. */
3655 553715920 : if (ncur) {
3656 660779 : pcur = ncur;
3657 660779 : ncur = NULL;
3658 : }
3659 1107431840 : } while (!xfs_btree_ptr_is_null(cur, &nptr));
3660 :
3661 553028152 : *stat = i;
3662 553028152 : return 0;
3663 : error0:
3664 : return error;
3665 : }
3666 :
3667 : /*
3668 : * Try to merge a non-leaf block back into the inode root.
3669 : *
3670 : * Note: the killroot names comes from the fact that we're effectively
3671 : * killing the old root block. But because we can't just delete the
3672 : * inode we have to copy the single block it was pointing to into the
3673 : * inode.
3674 : */
3675 : STATIC int
3676 4167975 : xfs_btree_kill_iroot(
3677 : struct xfs_btree_cur *cur)
3678 : {
3679 4167975 : int whichfork = cur->bc_ino.whichfork;
3680 4167975 : struct xfs_inode *ip = cur->bc_ino.ip;
3681 4167975 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
3682 4167978 : struct xfs_btree_block *block;
3683 4167978 : struct xfs_btree_block *cblock;
3684 4167978 : union xfs_btree_key *kp;
3685 4167978 : union xfs_btree_key *ckp;
3686 4167978 : union xfs_btree_ptr *pp;
3687 4167978 : union xfs_btree_ptr *cpp;
3688 4167978 : struct xfs_buf *cbp;
3689 4167978 : int level;
3690 4167978 : int index;
3691 4167978 : int numrecs;
3692 4167978 : int error;
3693 : #ifdef DEBUG
3694 4167978 : union xfs_btree_ptr ptr;
3695 : #endif
3696 4167978 : int i;
3697 :
3698 4167978 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3699 4167978 : ASSERT(cur->bc_nlevels > 1);
3700 :
3701 : /*
3702 : * Don't deal with the root block needs to be a leaf case.
3703 : * We're just going to turn the thing back into extents anyway.
3704 : */
3705 4167978 : level = cur->bc_nlevels - 1;
3706 4167978 : if (level == 1)
3707 4039730 : goto out0;
3708 :
3709 : /*
3710 : * Give up if the root has multiple children.
3711 : */
3712 128248 : block = xfs_btree_get_iroot(cur);
3713 256496 : if (xfs_btree_get_numrecs(block) != 1)
3714 5 : goto out0;
3715 :
3716 128243 : cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3717 128243 : numrecs = xfs_btree_get_numrecs(cblock);
3718 :
3719 : /*
3720 : * Only do this if the next level will fit.
3721 : * Then the data must be copied up to the inode,
3722 : * instead of freeing the root you free the next level.
3723 : */
3724 128243 : if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3725 125990 : goto out0;
3726 :
3727 2253 : XFS_BTREE_STATS_INC(cur, killroot);
3728 :
3729 : #ifdef DEBUG
3730 2253 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3731 4506 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3732 2253 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3733 4506 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3734 : #endif
3735 :
3736 2253 : index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3737 2253 : if (index) {
3738 2253 : xfs_iroot_realloc(cur->bc_ino.ip, index,
3739 2253 : cur->bc_ino.whichfork);
3740 2253 : block = ifp->if_broot;
3741 : }
3742 :
3743 2253 : be16_add_cpu(&block->bb_numrecs, index);
3744 2253 : ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3745 :
3746 2253 : kp = xfs_btree_key_addr(cur, 1, block);
3747 2253 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3748 2253 : xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3749 :
3750 2253 : pp = xfs_btree_ptr_addr(cur, 1, block);
3751 2253 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3752 :
3753 29289 : for (i = 0; i < numrecs; i++) {
3754 24783 : error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
3755 24783 : if (error)
3756 0 : return error;
3757 : }
3758 :
3759 2253 : xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3760 :
3761 2253 : error = xfs_btree_free_block(cur, cbp);
3762 2253 : if (error)
3763 : return error;
3764 :
3765 2253 : cur->bc_levels[level - 1].bp = NULL;
3766 2253 : be16_add_cpu(&block->bb_level, -1);
3767 4506 : xfs_trans_log_inode(cur->bc_tp, ip,
3768 2253 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
3769 2253 : cur->bc_nlevels--;
3770 : out0:
3771 : return 0;
3772 : }
3773 :
3774 : /*
3775 : * Kill the current root node, and replace it with it's only child node.
3776 : */
3777 : STATIC int
3778 20164 : xfs_btree_kill_root(
3779 : struct xfs_btree_cur *cur,
3780 : struct xfs_buf *bp,
3781 : int level,
3782 : union xfs_btree_ptr *newroot)
3783 : {
3784 20164 : int error;
3785 :
3786 20164 : XFS_BTREE_STATS_INC(cur, killroot);
3787 :
3788 : /*
3789 : * Update the root pointer, decreasing the level by 1 and then
3790 : * free the old root.
3791 : */
3792 20164 : cur->bc_ops->set_root(cur, newroot, -1);
3793 :
3794 20164 : error = xfs_btree_free_block(cur, bp);
3795 20164 : if (error)
3796 : return error;
3797 :
3798 20164 : cur->bc_levels[level].bp = NULL;
3799 20164 : cur->bc_levels[level].ra = 0;
3800 20164 : cur->bc_nlevels--;
3801 :
3802 20164 : return 0;
3803 : }
3804 :
3805 : STATIC int
3806 189339667 : xfs_btree_dec_cursor(
3807 : struct xfs_btree_cur *cur,
3808 : int level,
3809 : int *stat)
3810 : {
3811 189339667 : int error;
3812 189339667 : int i;
3813 :
3814 189339667 : if (level > 0) {
3815 361348 : error = xfs_btree_decrement(cur, level, &i);
3816 361348 : if (error)
3817 : return error;
3818 : }
3819 :
3820 189339667 : *stat = 1;
3821 189339667 : return 0;
3822 : }
3823 :
3824 : /*
3825 : * Single level of the btree record deletion routine.
3826 : * Delete record pointed to by cur/level.
3827 : * Remove the record from its block then rebalance the tree.
3828 : * Return 0 for error, 1 for done, 2 to go on to the next level.
3829 : */
3830 : STATIC int /* error */
3831 469919127 : xfs_btree_delrec(
3832 : struct xfs_btree_cur *cur, /* btree cursor */
3833 : int level, /* level removing record from */
3834 : int *stat) /* fail/done/go-on */
3835 : {
3836 469919127 : struct xfs_btree_block *block; /* btree block */
3837 469919127 : union xfs_btree_ptr cptr; /* current block ptr */
3838 469919127 : struct xfs_buf *bp; /* buffer for block */
3839 469919127 : int error; /* error return value */
3840 469919127 : int i; /* loop counter */
3841 469919127 : union xfs_btree_ptr lptr; /* left sibling block ptr */
3842 469919127 : struct xfs_buf *lbp; /* left buffer pointer */
3843 469919127 : struct xfs_btree_block *left; /* left btree block */
3844 469919127 : int lrecs = 0; /* left record count */
3845 469919127 : int ptr; /* key/record index */
3846 469919127 : union xfs_btree_ptr rptr; /* right sibling block ptr */
3847 469919127 : struct xfs_buf *rbp; /* right buffer pointer */
3848 469919127 : struct xfs_btree_block *right; /* right btree block */
3849 469919127 : struct xfs_btree_block *rrblock; /* right-right btree block */
3850 469919127 : struct xfs_buf *rrbp; /* right-right buffer pointer */
3851 469919127 : int rrecs = 0; /* right record count */
3852 469919127 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
3853 469919127 : int numrecs; /* temporary numrec count */
3854 :
3855 469919127 : tcur = NULL;
3856 :
3857 : /* Get the index of the entry being deleted, check for nothing there. */
3858 469919127 : ptr = cur->bc_levels[level].ptr;
3859 469919127 : if (ptr == 0) {
3860 0 : *stat = 0;
3861 0 : return 0;
3862 : }
3863 :
3864 : /* Get the buffer & block containing the record or key/ptr. */
3865 469919127 : block = xfs_btree_get_block(cur, level, &bp);
3866 469921354 : numrecs = xfs_btree_get_numrecs(block);
3867 :
3868 : #ifdef DEBUG
3869 469921354 : error = xfs_btree_check_block(cur, block, level, bp);
3870 469919206 : if (error)
3871 0 : goto error0;
3872 : #endif
3873 :
3874 : /* Fail if we're off the end of the block. */
3875 469919206 : if (ptr > numrecs) {
3876 0 : *stat = 0;
3877 0 : return 0;
3878 : }
3879 :
3880 469919206 : XFS_BTREE_STATS_INC(cur, delrec);
3881 469919206 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3882 :
3883 : /* Excise the entries being deleted. */
3884 469919206 : if (level > 0) {
3885 : /* It's a nonleaf. operate on keys and ptrs */
3886 383929 : union xfs_btree_key *lkp;
3887 383929 : union xfs_btree_ptr *lpp;
3888 :
3889 383929 : lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3890 383929 : lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3891 :
3892 6983506 : for (i = 0; i < numrecs - ptr; i++) {
3893 6599577 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
3894 6599577 : if (error)
3895 0 : goto error0;
3896 : }
3897 :
3898 383929 : if (ptr < numrecs) {
3899 184446 : xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3900 184446 : xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3901 184446 : xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3902 184446 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3903 : }
3904 : } else {
3905 : /* It's a leaf. operate on records */
3906 469535277 : if (ptr < numrecs) {
3907 248648208 : xfs_btree_shift_recs(cur,
3908 : xfs_btree_rec_addr(cur, ptr + 1, block),
3909 : -1, numrecs - ptr);
3910 248648150 : xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3911 : }
3912 : }
3913 :
3914 : /*
3915 : * Decrement and log the number of entries in the block.
3916 : */
3917 469922534 : xfs_btree_set_numrecs(block, --numrecs);
3918 469922534 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3919 :
3920 : /*
3921 : * If we are tracking the last record in the tree and
3922 : * we are at the far right edge of the tree, update it.
3923 : */
3924 469920400 : if (xfs_btree_is_lastrec(cur, block, level)) {
3925 78717298 : cur->bc_ops->update_lastrec(cur, block, NULL,
3926 : ptr, LASTREC_DELREC);
3927 : }
3928 :
3929 : /*
3930 : * We're at the root level. First, shrink the root block in-memory.
3931 : * Try to get rid of the next level down. If we can't then there's
3932 : * nothing left to do.
3933 : */
3934 469926797 : if (level == cur->bc_nlevels - 1) {
3935 257285592 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3936 23797 : xfs_iroot_realloc(cur->bc_ino.ip, -1,
3937 23797 : cur->bc_ino.whichfork);
3938 :
3939 23797 : error = xfs_btree_kill_iroot(cur);
3940 23797 : if (error)
3941 0 : goto error0;
3942 :
3943 23797 : error = xfs_btree_dec_cursor(cur, level, stat);
3944 23797 : if (error)
3945 0 : goto error0;
3946 23797 : *stat = 1;
3947 23797 : return 0;
3948 : }
3949 :
3950 : /*
3951 : * If this is the root level, and there's only one entry left,
3952 : * and it's NOT the leaf level, then we can get rid of this
3953 : * level.
3954 : */
3955 257261795 : if (numrecs == 1 && level > 0) {
3956 20164 : union xfs_btree_ptr *pp;
3957 : /*
3958 : * pp is still set to the first pointer in the block.
3959 : * Make it the new root of the btree.
3960 : */
3961 20164 : pp = xfs_btree_ptr_addr(cur, 1, block);
3962 20164 : error = xfs_btree_kill_root(cur, bp, level, pp);
3963 20164 : if (error)
3964 0 : goto error0;
3965 257241631 : } else if (level > 0) {
3966 93306 : error = xfs_btree_dec_cursor(cur, level, stat);
3967 93306 : if (error)
3968 0 : goto error0;
3969 : }
3970 257261795 : *stat = 1;
3971 257261795 : return 0;
3972 : }
3973 :
3974 : /*
3975 : * If we deleted the leftmost entry in the block, update the
3976 : * key values above us in the tree.
3977 : */
3978 336105047 : if (xfs_btree_needs_key_update(cur, ptr)) {
3979 93372364 : error = xfs_btree_update_keys(cur, level);
3980 93372572 : if (error)
3981 0 : goto error0;
3982 : }
3983 :
3984 : /*
3985 : * If the number of records remaining in the block is at least
3986 : * the minimum, we're done.
3987 : */
3988 212641413 : if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3989 175764779 : error = xfs_btree_dec_cursor(cur, level, stat);
3990 175764740 : if (error)
3991 0 : goto error0;
3992 : return 0;
3993 : }
3994 :
3995 : /*
3996 : * Otherwise, we have to move some records around to keep the
3997 : * tree balanced. Look at the left and right sibling blocks to
3998 : * see if we can re-balance by moving only one record.
3999 : */
4000 36877871 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4001 36877896 : xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
4002 :
4003 36877924 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
4004 : /*
4005 : * One child of root, need to get a chance to copy its contents
4006 : * into the root and delete it. Can't go up to next level,
4007 : * there's nothing to delete there.
4008 : */
4009 70088947 : if (xfs_btree_ptr_is_null(cur, &rptr) &&
4010 4144183 : xfs_btree_ptr_is_null(cur, &lptr) &&
4011 4144183 : level == cur->bc_nlevels - 2) {
4012 4144181 : error = xfs_btree_kill_iroot(cur);
4013 4144171 : if (!error)
4014 4144173 : error = xfs_btree_dec_cursor(cur, level, stat);
4015 4144171 : if (error)
4016 0 : goto error0;
4017 : return 0;
4018 : }
4019 : }
4020 :
4021 85116873 : ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
4022 : !xfs_btree_ptr_is_null(cur, &lptr));
4023 :
4024 : /*
4025 : * Duplicate the cursor so our btree manipulations here won't
4026 : * disrupt the next level up.
4027 : */
4028 32733743 : error = xfs_btree_dup_cursor(cur, &tcur);
4029 32733751 : if (error)
4030 0 : goto error0;
4031 :
4032 : /*
4033 : * If there's a right sibling, see if it's ok to shift an entry
4034 : * out of it.
4035 : */
4036 65467502 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
4037 : /*
4038 : * Move the temp cursor to the last entry in the next block.
4039 : * Actually any entry but the first would suffice.
4040 : */
4041 13084363 : i = xfs_btree_lastrec(tcur, level);
4042 13084361 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4043 0 : xfs_btree_mark_sick(cur);
4044 0 : error = -EFSCORRUPTED;
4045 0 : goto error0;
4046 : }
4047 :
4048 13084361 : error = xfs_btree_increment(tcur, level, &i);
4049 13084356 : if (error)
4050 2 : goto error0;
4051 13084354 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4052 0 : xfs_btree_mark_sick(cur);
4053 0 : error = -EFSCORRUPTED;
4054 0 : goto error0;
4055 : }
4056 :
4057 13084354 : i = xfs_btree_lastrec(tcur, level);
4058 13084354 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4059 0 : xfs_btree_mark_sick(cur);
4060 0 : error = -EFSCORRUPTED;
4061 0 : goto error0;
4062 : }
4063 :
4064 : /* Grab a pointer to the block. */
4065 13084354 : right = xfs_btree_get_block(tcur, level, &rbp);
4066 : #ifdef DEBUG
4067 13084364 : error = xfs_btree_check_block(tcur, right, level, rbp);
4068 13084349 : if (error)
4069 0 : goto error0;
4070 : #endif
4071 : /* Grab the current block number, for future use. */
4072 13084349 : xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
4073 :
4074 : /*
4075 : * If right block is full enough so that removing one entry
4076 : * won't make it too empty, and left-shifting an entry out
4077 : * of right to us works, we're done.
4078 : */
4079 26168724 : if (xfs_btree_get_numrecs(right) - 1 >=
4080 13084360 : cur->bc_ops->get_minrecs(tcur, level)) {
4081 9314081 : error = xfs_btree_lshift(tcur, level, &i);
4082 9314081 : if (error)
4083 0 : goto error0;
4084 9314081 : if (i) {
4085 18628166 : ASSERT(xfs_btree_get_numrecs(block) >=
4086 : cur->bc_ops->get_minrecs(tcur, level));
4087 :
4088 9314082 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4089 9314082 : tcur = NULL;
4090 :
4091 9314082 : error = xfs_btree_dec_cursor(cur, level, stat);
4092 9314081 : if (error)
4093 0 : goto error0;
4094 : return 0;
4095 : }
4096 : }
4097 :
4098 : /*
4099 : * Otherwise, grab the number of records in right for
4100 : * future reference, and fix up the temp cursor to point
4101 : * to our block again (last record).
4102 : */
4103 3770281 : rrecs = xfs_btree_get_numrecs(right);
4104 7540562 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4105 3746396 : i = xfs_btree_firstrec(tcur, level);
4106 3746395 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4107 0 : xfs_btree_mark_sick(cur);
4108 0 : error = -EFSCORRUPTED;
4109 0 : goto error0;
4110 : }
4111 :
4112 3746395 : error = xfs_btree_decrement(tcur, level, &i);
4113 3746397 : if (error)
4114 0 : goto error0;
4115 3746397 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4116 0 : xfs_btree_mark_sick(cur);
4117 0 : error = -EFSCORRUPTED;
4118 0 : goto error0;
4119 : }
4120 : }
4121 : }
4122 :
4123 : /*
4124 : * If there's a left sibling, see if it's ok to shift an entry
4125 : * out of it.
4126 : */
4127 46839340 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4128 : /*
4129 : * Move the temp cursor to the first entry in the
4130 : * previous block.
4131 : */
4132 23395730 : i = xfs_btree_firstrec(tcur, level);
4133 23395664 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4134 0 : xfs_btree_mark_sick(cur);
4135 0 : error = -EFSCORRUPTED;
4136 0 : goto error0;
4137 : }
4138 :
4139 23395664 : error = xfs_btree_decrement(tcur, level, &i);
4140 23395743 : if (error)
4141 0 : goto error0;
4142 23395743 : i = xfs_btree_firstrec(tcur, level);
4143 23395771 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4144 0 : xfs_btree_mark_sick(cur);
4145 0 : error = -EFSCORRUPTED;
4146 0 : goto error0;
4147 : }
4148 :
4149 : /* Grab a pointer to the block. */
4150 23395771 : left = xfs_btree_get_block(tcur, level, &lbp);
4151 : #ifdef DEBUG
4152 23395776 : error = xfs_btree_check_block(cur, left, level, lbp);
4153 23395776 : if (error)
4154 0 : goto error0;
4155 : #endif
4156 : /* Grab the current block number, for future use. */
4157 23395776 : xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4158 :
4159 : /*
4160 : * If left block is full enough so that removing one entry
4161 : * won't make it too empty, and right-shifting an entry out
4162 : * of left to us works, we're done.
4163 : */
4164 46791558 : if (xfs_btree_get_numrecs(left) - 1 >=
4165 23395778 : cur->bc_ops->get_minrecs(tcur, level)) {
4166 23035737 : error = xfs_btree_rshift(tcur, level, &i);
4167 23035741 : if (error)
4168 0 : goto error0;
4169 23035741 : if (i) {
4170 46071482 : ASSERT(xfs_btree_get_numrecs(block) >=
4171 : cur->bc_ops->get_minrecs(tcur, level));
4172 23035741 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4173 23035741 : tcur = NULL;
4174 23035741 : if (level == 0)
4175 23033744 : cur->bc_levels[0].ptr++;
4176 :
4177 23035741 : *stat = 1;
4178 23035741 : return 0;
4179 : }
4180 : }
4181 :
4182 : /*
4183 : * Otherwise, grab the number of records in right for
4184 : * future reference.
4185 : */
4186 360043 : lrecs = xfs_btree_get_numrecs(left);
4187 : }
4188 :
4189 : /* Delete the temp cursor, we're done with it. */
4190 383983 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4191 383929 : tcur = NULL;
4192 :
4193 : /* If here, we need to do a join to keep the tree balanced. */
4194 767858 : ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4195 :
4196 1127901 : if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4197 360043 : lrecs + xfs_btree_get_numrecs(block) <=
4198 360043 : cur->bc_ops->get_maxrecs(cur, level)) {
4199 : /*
4200 : * Set "right" to be the starting block,
4201 : * "left" to be the left neighbor.
4202 : */
4203 360043 : rptr = cptr;
4204 360043 : right = block;
4205 360043 : rbp = bp;
4206 360043 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4207 360043 : if (error)
4208 0 : goto error0;
4209 :
4210 : /*
4211 : * If that won't work, see if we can join with the right neighbor block.
4212 : */
4213 71658 : } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4214 23886 : rrecs + xfs_btree_get_numrecs(block) <=
4215 23886 : cur->bc_ops->get_maxrecs(cur, level)) {
4216 : /*
4217 : * Set "left" to be the starting block,
4218 : * "right" to be the right neighbor.
4219 : */
4220 23886 : lptr = cptr;
4221 23886 : left = block;
4222 23886 : lbp = bp;
4223 23886 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4224 23886 : if (error)
4225 0 : goto error0;
4226 :
4227 : /*
4228 : * Otherwise, we can't fix the imbalance.
4229 : * Just return. This is probably a logic error, but it's not fatal.
4230 : */
4231 : } else {
4232 0 : error = xfs_btree_dec_cursor(cur, level, stat);
4233 0 : if (error)
4234 0 : goto error0;
4235 : return 0;
4236 : }
4237 :
4238 383929 : rrecs = xfs_btree_get_numrecs(right);
4239 383929 : lrecs = xfs_btree_get_numrecs(left);
4240 :
4241 : /*
4242 : * We're now going to join "left" and "right" by moving all the stuff
4243 : * in "right" to "left" and deleting "right".
4244 : */
4245 383929 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4246 383929 : if (level > 0) {
4247 : /* It's a non-leaf. Move keys and pointers. */
4248 420 : union xfs_btree_key *lkp; /* left btree key */
4249 420 : union xfs_btree_ptr *lpp; /* left address pointer */
4250 420 : union xfs_btree_key *rkp; /* right btree key */
4251 420 : union xfs_btree_ptr *rpp; /* right address pointer */
4252 :
4253 420 : lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4254 420 : lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4255 420 : rkp = xfs_btree_key_addr(cur, 1, right);
4256 420 : rpp = xfs_btree_ptr_addr(cur, 1, right);
4257 :
4258 20888 : for (i = 1; i < rrecs; i++) {
4259 20468 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
4260 20468 : if (error)
4261 0 : goto error0;
4262 : }
4263 :
4264 420 : xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4265 420 : xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4266 :
4267 420 : xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4268 420 : xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4269 : } else {
4270 : /* It's a leaf. Move records. */
4271 383509 : union xfs_btree_rec *lrp; /* left record pointer */
4272 383509 : union xfs_btree_rec *rrp; /* right record pointer */
4273 :
4274 383509 : lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4275 383509 : rrp = xfs_btree_rec_addr(cur, 1, right);
4276 :
4277 383509 : xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4278 383509 : xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4279 : }
4280 :
4281 383929 : XFS_BTREE_STATS_INC(cur, join);
4282 :
4283 : /*
4284 : * Fix up the number of records and right block pointer in the
4285 : * surviving block, and log it.
4286 : */
4287 383929 : xfs_btree_set_numrecs(left, lrecs + rrecs);
4288 383929 : xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
4289 383929 : xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4290 383929 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4291 :
4292 : /* If there is a right sibling, point it to the remaining block. */
4293 383929 : xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4294 767858 : if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4295 185941 : error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4296 185941 : if (error)
4297 0 : goto error0;
4298 185941 : xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4299 185941 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4300 : }
4301 :
4302 : /* Free the deleted block. */
4303 383929 : error = xfs_btree_free_block(cur, rbp);
4304 383929 : if (error)
4305 0 : goto error0;
4306 :
4307 : /*
4308 : * If we joined with the left neighbor, set the buffer in the
4309 : * cursor to the left block, and fix up the index.
4310 : */
4311 383929 : if (bp != lbp) {
4312 360043 : cur->bc_levels[level].bp = lbp;
4313 360043 : cur->bc_levels[level].ptr += lrecs;
4314 360043 : cur->bc_levels[level].ra = 0;
4315 : }
4316 : /*
4317 : * If we joined with the right neighbor and there's a level above
4318 : * us, increment the cursor at that level.
4319 : */
4320 23886 : else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4321 23413 : (level + 1 < cur->bc_nlevels)) {
4322 23886 : error = xfs_btree_increment(cur, level + 1, &i);
4323 23886 : if (error)
4324 0 : goto error0;
4325 : }
4326 :
4327 : /*
4328 : * Readjust the ptr at this level if it's not a leaf, since it's
4329 : * still pointing at the deletion point, which makes the cursor
4330 : * inconsistent. If this makes the ptr 0, the caller fixes it up.
4331 : * We can't use decrement because it would change the next level up.
4332 : */
4333 383929 : if (level > 0)
4334 420 : cur->bc_levels[level].ptr--;
4335 :
4336 : /*
4337 : * We combined blocks, so we have to update the parent keys if the
4338 : * btree supports overlapped intervals. However,
4339 : * bc_levels[level + 1].ptr points to the old block so that the caller
4340 : * knows which record to delete. Therefore, the caller must be savvy
4341 : * enough to call updkeys for us if we return stat == 2. The other
4342 : * exit points from this function don't require deletions further up
4343 : * the tree, so they can call updkeys directly.
4344 : */
4345 :
4346 : /* Return value means the next level up has something to do. */
4347 383929 : *stat = 2;
4348 383929 : return 0;
4349 :
4350 2 : error0:
4351 2 : if (tcur)
4352 2 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4353 : return error;
4354 : }
4355 :
4356 : /*
4357 : * Delete the record pointed to by cur.
4358 : * The cursor refers to the place where the record was (could be inserted)
4359 : * when the operation returns.
4360 : */
4361 : int /* error */
4362 469530072 : xfs_btree_delete(
4363 : struct xfs_btree_cur *cur,
4364 : int *stat) /* success/failure */
4365 : {
4366 469530072 : int error; /* error return value */
4367 469530072 : int level;
4368 469530072 : int i;
4369 469530072 : bool joined = false;
4370 :
4371 : /*
4372 : * Go up the tree, starting at leaf level.
4373 : *
4374 : * If 2 is returned then a join was done; go to the next level.
4375 : * Otherwise we are done.
4376 : */
4377 939442844 : for (level = 0, i = 2; i == 2; level++) {
4378 469913246 : error = xfs_btree_delrec(cur, level, &i);
4379 469912774 : if (error)
4380 2 : goto error0;
4381 469912772 : if (i == 2)
4382 383929 : joined = true;
4383 : }
4384 :
4385 : /*
4386 : * If we combined blocks as part of deleting the record, delrec won't
4387 : * have updated the parent high keys so we have to do that here.
4388 : */
4389 469529598 : if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4390 135991 : error = xfs_btree_updkeys_force(cur, 0);
4391 135991 : if (error)
4392 0 : goto error0;
4393 : }
4394 :
4395 469529598 : if (i == 0) {
4396 0 : for (level = 1; level < cur->bc_nlevels; level++) {
4397 0 : if (cur->bc_levels[level].ptr == 0) {
4398 0 : error = xfs_btree_decrement(cur, level, &i);
4399 0 : if (error)
4400 0 : goto error0;
4401 : break;
4402 : }
4403 : }
4404 : }
4405 :
4406 469529598 : *stat = i;
4407 469529598 : return 0;
4408 : error0:
4409 : return error;
4410 : }
4411 :
4412 : /*
4413 : * Get the data from the pointed-to record.
4414 : */
4415 : int /* error */
4416 80858716913 : xfs_btree_get_rec(
4417 : struct xfs_btree_cur *cur, /* btree cursor */
4418 : union xfs_btree_rec **recp, /* output: btree record */
4419 : int *stat) /* output: success/failure */
4420 : {
4421 80858716913 : struct xfs_btree_block *block; /* btree block */
4422 80858716913 : struct xfs_buf *bp; /* buffer pointer */
4423 80858716913 : int ptr; /* record number */
4424 : #ifdef DEBUG
4425 80858716913 : int error; /* error return value */
4426 : #endif
4427 :
4428 80858716913 : ptr = cur->bc_levels[0].ptr;
4429 80858716913 : block = xfs_btree_get_block(cur, 0, &bp);
4430 :
4431 : #ifdef DEBUG
4432 80987661947 : error = xfs_btree_check_block(cur, block, 0, bp);
4433 80880825033 : if (error)
4434 : return error;
4435 : #endif
4436 :
4437 : /*
4438 : * Off the right end or left end, return failure.
4439 : */
4440 >16176*10^7 : if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4441 1268893 : *stat = 0;
4442 1268893 : return 0;
4443 : }
4444 :
4445 : /*
4446 : * Point to the record and extract its data.
4447 : */
4448 80913110833 : *recp = xfs_btree_rec_addr(cur, ptr, block);
4449 80913110833 : *stat = 1;
4450 80913110833 : return 0;
4451 : }
4452 :
4453 : /* Visit a block in a btree. */
4454 : STATIC int
4455 94039391 : xfs_btree_visit_block(
4456 : struct xfs_btree_cur *cur,
4457 : int level,
4458 : xfs_btree_visit_blocks_fn fn,
4459 : void *data)
4460 : {
4461 94039391 : struct xfs_btree_block *block;
4462 94039391 : struct xfs_buf *bp;
4463 94039391 : union xfs_btree_ptr rptr, bufptr;
4464 94039391 : int error;
4465 :
4466 : /* do right sibling readahead */
4467 94039391 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4468 94039400 : block = xfs_btree_get_block(cur, level, &bp);
4469 :
4470 : /* process the block */
4471 94039334 : error = fn(cur, level, data);
4472 94039364 : if (error)
4473 : return error;
4474 :
4475 : /* now read rh sibling block for next iteration */
4476 94039327 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4477 188078902 : if (xfs_btree_ptr_is_null(cur, &rptr))
4478 : return -ENOENT;
4479 :
4480 : /*
4481 : * We only visit blocks once in this walk, so we have to avoid the
4482 : * internal xfs_btree_lookup_get_block() optimisation where it will
4483 : * return the same block without checking if the right sibling points
4484 : * back to us and creates a cyclic reference in the btree.
4485 : */
4486 83244487 : xfs_btree_buf_to_ptr(cur, bp, &bufptr);
4487 83244472 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4488 5127038 : if (rptr.l == bufptr.l) {
4489 0 : xfs_btree_mark_sick(cur);
4490 0 : return -EFSCORRUPTED;
4491 : }
4492 : } else {
4493 78117434 : if (rptr.s == bufptr.s) {
4494 0 : xfs_btree_mark_sick(cur);
4495 0 : return -EFSCORRUPTED;
4496 : }
4497 : }
4498 83244472 : return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4499 : }
4500 :
4501 :
4502 : /* Visit every block in a btree. */
4503 : int
4504 7046377 : xfs_btree_visit_blocks(
4505 : struct xfs_btree_cur *cur,
4506 : xfs_btree_visit_blocks_fn fn,
4507 : unsigned int flags,
4508 : void *data)
4509 : {
4510 7046377 : union xfs_btree_ptr lptr;
4511 7046377 : int level;
4512 7046377 : struct xfs_btree_block *block = NULL;
4513 7046377 : int error = 0;
4514 :
4515 7046377 : cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4516 :
4517 : /* for each level */
4518 19118470 : for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4519 : /* grab the left hand block */
4520 12072208 : error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4521 12072241 : if (error)
4522 196 : return error;
4523 :
4524 : /* readahead the left most block for the next level down */
4525 12072045 : if (level > 0) {
4526 5025769 : union xfs_btree_ptr *ptr;
4527 :
4528 5025769 : ptr = xfs_btree_ptr_addr(cur, 1, block);
4529 5025767 : xfs_btree_readahead_ptr(cur, ptr, 1);
4530 :
4531 : /* save for the next iteration of the loop */
4532 5025777 : xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4533 :
4534 5025777 : if (!(flags & XFS_BTREE_VISIT_LEAVES))
4535 1277052 : continue;
4536 7046276 : } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
4537 0 : continue;
4538 : }
4539 :
4540 : /* for each buffer in the level */
4541 94039387 : do {
4542 94039387 : error = xfs_btree_visit_block(cur, level, fn, data);
4543 94039373 : } while (!error);
4544 :
4545 10794987 : if (error != -ENOENT)
4546 2 : return error;
4547 : }
4548 :
4549 : return 0;
4550 : }
4551 :
4552 : /*
4553 : * Change the owner of a btree.
4554 : *
4555 : * The mechanism we use here is ordered buffer logging. Because we don't know
4556 : * how many buffers were are going to need to modify, we don't really want to
4557 : * have to make transaction reservations for the worst case of every buffer in a
4558 : * full size btree as that may be more space that we can fit in the log....
4559 : *
4560 : * We do the btree walk in the most optimal manner possible - we have sibling
4561 : * pointers so we can just walk all the blocks on each level from left to right
4562 : * in a single pass, and then move to the next level and do the same. We can
4563 : * also do readahead on the sibling pointers to get IO moving more quickly,
4564 : * though for slow disks this is unlikely to make much difference to performance
4565 : * as the amount of CPU work we have to do before moving to the next block is
4566 : * relatively small.
4567 : *
4568 : * For each btree block that we load, modify the owner appropriately, set the
4569 : * buffer as an ordered buffer and log it appropriately. We need to ensure that
4570 : * we mark the region we change dirty so that if the buffer is relogged in
4571 : * a subsequent transaction the changes we make here as an ordered buffer are
4572 : * correctly relogged in that transaction. If we are in recovery context, then
4573 : * just queue the modified buffer as delayed write buffer so the transaction
4574 : * recovery completion writes the changes to disk.
4575 : */
4576 : struct xfs_btree_block_change_owner_info {
4577 : uint64_t new_owner;
4578 : struct list_head *buffer_list;
4579 : };
4580 :
4581 : static int
4582 0 : xfs_btree_block_change_owner(
4583 : struct xfs_btree_cur *cur,
4584 : int level,
4585 : void *data)
4586 : {
4587 0 : struct xfs_btree_block_change_owner_info *bbcoi = data;
4588 0 : struct xfs_btree_block *block;
4589 0 : struct xfs_buf *bp;
4590 :
4591 : /* modify the owner */
4592 0 : block = xfs_btree_get_block(cur, level, &bp);
4593 0 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4594 0 : if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4595 : return 0;
4596 0 : block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4597 : } else {
4598 0 : if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4599 : return 0;
4600 0 : block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4601 : }
4602 :
4603 : /*
4604 : * If the block is a root block hosted in an inode, we might not have a
4605 : * buffer pointer here and we shouldn't attempt to log the change as the
4606 : * information is already held in the inode and discarded when the root
4607 : * block is formatted into the on-disk inode fork. We still change it,
4608 : * though, so everything is consistent in memory.
4609 : */
4610 0 : if (!bp) {
4611 0 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4612 0 : ASSERT(level == cur->bc_nlevels - 1);
4613 0 : return 0;
4614 : }
4615 :
4616 0 : if (cur->bc_tp) {
4617 0 : if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4618 0 : xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4619 0 : return -EAGAIN;
4620 : }
4621 : } else {
4622 0 : xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4623 : }
4624 :
4625 : return 0;
4626 : }
4627 :
4628 : int
4629 0 : xfs_btree_change_owner(
4630 : struct xfs_btree_cur *cur,
4631 : uint64_t new_owner,
4632 : struct list_head *buffer_list)
4633 : {
4634 0 : struct xfs_btree_block_change_owner_info bbcoi;
4635 :
4636 0 : bbcoi.new_owner = new_owner;
4637 0 : bbcoi.buffer_list = buffer_list;
4638 :
4639 0 : return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4640 : XFS_BTREE_VISIT_ALL, &bbcoi);
4641 : }
4642 :
4643 : /* Verify the v5 fields of a long-format btree block. */
4644 : xfs_failaddr_t
4645 347895655 : xfs_btree_lblock_v5hdr_verify(
4646 : struct xfs_buf *bp,
4647 : uint64_t owner)
4648 : {
4649 347895655 : struct xfs_mount *mp = bp->b_mount;
4650 347895655 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4651 :
4652 347895655 : if (!xfs_has_crc(mp))
4653 0 : return __this_address;
4654 347895655 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4655 0 : return __this_address;
4656 347895830 : if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4657 0 : return __this_address;
4658 347895830 : if (owner != XFS_RMAP_OWN_UNKNOWN &&
4659 0 : be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4660 0 : return __this_address;
4661 : return NULL;
4662 : }
4663 :
4664 : /* Verify a long-format btree block. */
4665 : xfs_failaddr_t
4666 14823325 : xfs_btree_lblock_verify(
4667 : struct xfs_buf *bp,
4668 : unsigned int max_recs)
4669 : {
4670 14823325 : struct xfs_mount *mp = bp->b_mount;
4671 14823325 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4672 14823325 : xfs_fsblock_t fsb;
4673 14823325 : xfs_failaddr_t fa;
4674 :
4675 14823325 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4676 :
4677 : /* numrecs verification */
4678 14823325 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4679 0 : return __this_address;
4680 :
4681 : /* sibling pointer verification */
4682 14823328 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
4683 14823328 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4684 : block->bb_u.l.bb_leftsib);
4685 14823321 : if (!fa)
4686 14823316 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4687 : block->bb_u.l.bb_rightsib);
4688 : return fa;
4689 : }
4690 :
4691 : /**
4692 : * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4693 : * btree block
4694 : *
4695 : * @bp: buffer containing the btree block
4696 : */
4697 : xfs_failaddr_t
4698 56432178 : xfs_btree_sblock_v5hdr_verify(
4699 : struct xfs_buf *bp)
4700 : {
4701 56432178 : struct xfs_mount *mp = bp->b_mount;
4702 56432178 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4703 56432178 : struct xfs_perag *pag = bp->b_pag;
4704 :
4705 56432178 : if (!xfs_has_crc(mp))
4706 0 : return __this_address;
4707 56432178 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4708 0 : return __this_address;
4709 56432344 : if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4710 0 : return __this_address;
4711 95030671 : if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4712 13 : return __this_address;
4713 : return NULL;
4714 : }
4715 :
4716 : /**
4717 : * xfs_btree_sblock_verify() -- verify a short-format btree block
4718 : *
4719 : * @bp: buffer containing the btree block
4720 : * @max_recs: maximum records allowed in this btree node
4721 : */
4722 : xfs_failaddr_t
4723 38637848 : xfs_btree_sblock_verify(
4724 : struct xfs_buf *bp,
4725 : unsigned int max_recs)
4726 : {
4727 38637848 : struct xfs_mount *mp = bp->b_mount;
4728 38637848 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4729 38637848 : xfs_agblock_t agbno;
4730 38637848 : xfs_failaddr_t fa;
4731 :
4732 38637848 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4733 :
4734 : /* numrecs verification */
4735 38637848 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4736 16 : return __this_address;
4737 :
4738 : /* sibling pointer verification */
4739 38637854 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
4740 38637854 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4741 : block->bb_u.s.bb_leftsib);
4742 38637853 : if (!fa)
4743 38637868 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4744 : block->bb_u.s.bb_rightsib);
4745 : return fa;
4746 : }
4747 :
4748 : /*
4749 : * For the given limits on leaf and keyptr records per block, calculate the
4750 : * height of the tree needed to index the number of leaf records.
4751 : */
4752 : unsigned int
4753 216608 : xfs_btree_compute_maxlevels(
4754 : const unsigned int *limits,
4755 : unsigned long long records)
4756 : {
4757 216608 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4758 216608 : unsigned int height = 1;
4759 :
4760 1417929 : while (level_blocks > 1) {
4761 1201321 : level_blocks = howmany_64(level_blocks, limits[1]);
4762 1201321 : height++;
4763 : }
4764 :
4765 216608 : return height;
4766 : }
4767 :
4768 : /*
4769 : * For the given limits on leaf and keyptr records per block, calculate the
4770 : * number of blocks needed to index the given number of leaf records.
4771 : */
4772 : unsigned long long
4773 5106531 : xfs_btree_calc_size(
4774 : const unsigned int *limits,
4775 : unsigned long long records)
4776 : {
4777 5106531 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4778 5106531 : unsigned long long blocks = level_blocks;
4779 :
4780 12883668 : while (level_blocks > 1) {
4781 7777137 : level_blocks = howmany_64(level_blocks, limits[1]);
4782 7777137 : blocks += level_blocks;
4783 : }
4784 :
4785 5106531 : return blocks;
4786 : }
4787 :
4788 : /*
4789 : * Given a number of available blocks for the btree to consume with records and
4790 : * pointers, calculate the height of the tree needed to index all the records
4791 : * that space can hold based on the number of pointers each interior node
4792 : * holds.
4793 : *
4794 : * We start by assuming a single level tree consumes a single block, then track
4795 : * the number of blocks each node level consumes until we no longer have space
4796 : * to store the next node level. At this point, we are indexing all the leaf
4797 : * blocks in the space, and there's no more free space to split the tree any
4798 : * further. That's our maximum btree height.
4799 : */
4800 : unsigned int
4801 831111222 : xfs_btree_space_to_height(
4802 : const unsigned int *limits,
4803 : unsigned long long leaf_blocks)
4804 : {
4805 : /*
4806 : * The root btree block can have fewer than minrecs pointers in it
4807 : * because the tree might not be big enough to require that amount of
4808 : * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
4809 : */
4810 831111222 : unsigned long long node_blocks = 2;
4811 831111222 : unsigned long long blocks_left = leaf_blocks - 1;
4812 831111222 : unsigned int height = 1;
4813 :
4814 831111222 : if (leaf_blocks < 1)
4815 : return 0;
4816 :
4817 6648860105 : while (node_blocks < blocks_left) {
4818 5817748883 : blocks_left -= node_blocks;
4819 5817748883 : node_blocks *= limits[1];
4820 5817748883 : height++;
4821 : }
4822 :
4823 : return height;
4824 : }
4825 :
4826 : /*
4827 : * Query a regular btree for all records overlapping a given interval.
4828 : * Start with a LE lookup of the key of low_rec and return all records
4829 : * until we find a record with a key greater than the key of high_rec.
4830 : */
4831 : STATIC int
4832 2800753611 : xfs_btree_simple_query_range(
4833 : struct xfs_btree_cur *cur,
4834 : const union xfs_btree_key *low_key,
4835 : const union xfs_btree_key *high_key,
4836 : xfs_btree_query_range_fn fn,
4837 : void *priv)
4838 : {
4839 2800753611 : union xfs_btree_rec *recp;
4840 2800753611 : union xfs_btree_key rec_key;
4841 2800753611 : int stat;
4842 2800753611 : bool firstrec = true;
4843 2800753611 : int error;
4844 :
4845 2800753611 : ASSERT(cur->bc_ops->init_high_key_from_rec);
4846 2800753611 : ASSERT(cur->bc_ops->diff_two_keys);
4847 :
4848 : /*
4849 : * Find the leftmost record. The btree cursor must be set
4850 : * to the low record used to generate low_key.
4851 : */
4852 2800753611 : stat = 0;
4853 2800753611 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4854 2800750224 : if (error)
4855 4 : goto out;
4856 :
4857 : /* Nothing? See if there's anything to the right. */
4858 2800750220 : if (!stat) {
4859 584708918 : error = xfs_btree_increment(cur, 0, &stat);
4860 584712034 : if (error)
4861 0 : goto out;
4862 : }
4863 :
4864 52786885974 : while (stat) {
4865 : /* Find the record. */
4866 52307926700 : error = xfs_btree_get_rec(cur, &recp, &stat);
4867 52408762033 : if (error || !stat)
4868 : break;
4869 :
4870 : /* Skip if low_key > high_key(rec). */
4871 52408762033 : if (firstrec) {
4872 2483801939 : cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4873 2483806762 : firstrec = false;
4874 2483806762 : if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
4875 2216138152 : goto advloop;
4876 : }
4877 :
4878 : /* Stop if low_key(rec) > high_key. */
4879 50192634164 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
4880 49871424078 : if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
4881 : break;
4882 :
4883 : /* Callback */
4884 47735842311 : error = fn(cur, recp, priv);
4885 47892593975 : if (error)
4886 : break;
4887 :
4888 47892593890 : advloop:
4889 : /* Move on to the next record. */
4890 50108732042 : error = xfs_btree_increment(cur, 0, &stat);
4891 49986132638 : if (error)
4892 : break;
4893 : }
4894 :
4895 2800799742 : out:
4896 2800799746 : return error;
4897 : }
4898 :
4899 : /*
4900 : * Query an overlapped interval btree for all records overlapping a given
4901 : * interval. This function roughly follows the algorithm given in
4902 : * "Interval Trees" of _Introduction to Algorithms_, which is section
4903 : * 14.3 in the 2nd and 3rd editions.
4904 : *
4905 : * First, generate keys for the low and high records passed in.
4906 : *
4907 : * For any leaf node, generate the high and low keys for the record.
4908 : * If the record keys overlap with the query low/high keys, pass the
4909 : * record to the function iterator.
4910 : *
4911 : * For any internal node, compare the low and high keys of each
4912 : * pointer against the query low/high keys. If there's an overlap,
4913 : * follow the pointer.
4914 : *
4915 : * As an optimization, we stop scanning a block when we find a low key
4916 : * that is greater than the query's high key.
4917 : */
4918 : STATIC int
4919 888384041 : xfs_btree_overlapped_query_range(
4920 : struct xfs_btree_cur *cur,
4921 : const union xfs_btree_key *low_key,
4922 : const union xfs_btree_key *high_key,
4923 : xfs_btree_query_range_fn fn,
4924 : void *priv)
4925 : {
4926 888384041 : union xfs_btree_ptr ptr;
4927 888384041 : union xfs_btree_ptr *pp;
4928 888384041 : union xfs_btree_key rec_key;
4929 888384041 : union xfs_btree_key rec_hkey;
4930 888384041 : union xfs_btree_key *lkp;
4931 888384041 : union xfs_btree_key *hkp;
4932 888384041 : union xfs_btree_rec *recp;
4933 888384041 : struct xfs_btree_block *block;
4934 888384041 : int level;
4935 888384041 : struct xfs_buf *bp;
4936 888384041 : int i;
4937 888384041 : int error;
4938 :
4939 : /* Load the root of the btree. */
4940 888384041 : level = cur->bc_nlevels - 1;
4941 888384041 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4942 888388347 : error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4943 888386969 : if (error)
4944 : return error;
4945 888387781 : xfs_btree_get_block(cur, level, &bp);
4946 888383735 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
4947 : #ifdef DEBUG
4948 888381506 : error = xfs_btree_check_block(cur, block, level, bp);
4949 888386490 : if (error)
4950 0 : goto out;
4951 : #endif
4952 888386490 : cur->bc_levels[level].ptr = 1;
4953 :
4954 >10974*10^7 : while (level < cur->bc_nlevels) {
4955 >10885*10^7 : block = xfs_btree_get_block(cur, level, &bp);
4956 :
4957 : /* End of node, pop back towards the root. */
4958 >10884*10^7 : if (cur->bc_levels[level].ptr >
4959 >10884*10^7 : be16_to_cpu(block->bb_numrecs)) {
4960 230659607 : pop_up:
4961 2390963658 : if (level < cur->bc_nlevels - 1)
4962 1503061875 : cur->bc_levels[level + 1].ptr++;
4963 2390963658 : level++;
4964 2390963658 : continue;
4965 : }
4966 :
4967 >10861*10^7 : if (level == 0) {
4968 : /* Handle a leaf node. */
4969 75518093108 : recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
4970 : block);
4971 :
4972 75518093108 : cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4973 75514085416 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
4974 :
4975 : /*
4976 : * If (query's high key < record's low key), then there
4977 : * are no more interesting records in this block. Pop
4978 : * up to the leaf level to find more record blocks.
4979 : *
4980 : * If (record's high key >= query's low key) and
4981 : * (query's high key >= record's low key), then
4982 : * this record overlaps the query range; callback.
4983 : */
4984 75509163893 : if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
4985 876728731 : goto pop_up;
4986 74638409940 : if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
4987 2662180108 : error = fn(cur, recp, priv);
4988 2662193711 : if (error)
4989 : break;
4990 : }
4991 74636740094 : cur->bc_levels[level].ptr++;
4992 74636740094 : continue;
4993 : }
4994 :
4995 : /* Handle an internal node. */
4996 33099916445 : lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
4997 33099916445 : hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
4998 : block);
4999 33099916445 : pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
5000 :
5001 : /*
5002 : * If (query's high key < pointer's low key), then there are no
5003 : * more interesting keys in this block. Pop up one leaf level
5004 : * to continue looking for records.
5005 : *
5006 : * If (pointer's high key >= query's low key) and
5007 : * (query's high key >= pointer's low key), then
5008 : * this record overlaps the query range; follow pointer.
5009 : */
5010 33098717113 : if (xfs_btree_keycmp_lt(cur, high_key, lkp))
5011 1283575320 : goto pop_up;
5012 31821276427 : if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
5013 1503500189 : level--;
5014 1503500189 : error = xfs_btree_lookup_get_block(cur, level, pp,
5015 : &block);
5016 1503506720 : if (error)
5017 0 : goto out;
5018 1503506720 : xfs_btree_get_block(cur, level, &bp);
5019 1503493848 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
5020 : #ifdef DEBUG
5021 1503486912 : error = xfs_btree_check_block(cur, block, level, bp);
5022 1503499834 : if (error)
5023 0 : goto out;
5024 : #endif
5025 1503499834 : cur->bc_levels[level].ptr = 1;
5026 1503499834 : continue;
5027 : }
5028 30323173361 : cur->bc_levels[level].ptr++;
5029 : }
5030 :
5031 888376891 : out:
5032 : /*
5033 : * If we don't end this function with the cursor pointing at a record
5034 : * block, a subsequent non-error cursor deletion will not release
5035 : * node-level buffers, causing a buffer leak. This is quite possible
5036 : * with a zero-results range query, so release the buffers if we
5037 : * failed to return any results.
5038 : */
5039 888376891 : if (cur->bc_levels[0].bp == NULL) {
5040 2813 : for (i = 0; i < cur->bc_nlevels; i++) {
5041 2035 : if (cur->bc_levels[i].bp) {
5042 1240 : xfs_trans_brelse(cur->bc_tp,
5043 : cur->bc_levels[i].bp);
5044 1240 : cur->bc_levels[i].bp = NULL;
5045 1240 : cur->bc_levels[i].ptr = 0;
5046 1240 : cur->bc_levels[i].ra = 0;
5047 : }
5048 : }
5049 : }
5050 :
5051 : return error;
5052 : }
5053 :
5054 : static inline void
5055 13732305404 : xfs_btree_key_from_irec(
5056 : struct xfs_btree_cur *cur,
5057 : union xfs_btree_key *key,
5058 : const union xfs_btree_irec *irec)
5059 : {
5060 13732305404 : union xfs_btree_rec rec;
5061 :
5062 13732305404 : cur->bc_rec = *irec;
5063 13732305404 : cur->bc_ops->init_rec_from_cur(cur, &rec);
5064 13732235621 : cur->bc_ops->init_key_from_rec(key, &rec);
5065 13732392745 : }
5066 :
5067 : /*
5068 : * Query a btree for all records overlapping a given interval of keys. The
5069 : * supplied function will be called with each record found; return one of the
5070 : * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
5071 : * code. This function returns -ECANCELED, zero, or a negative error code.
5072 : */
5073 : int
5074 3686355659 : xfs_btree_query_range(
5075 : struct xfs_btree_cur *cur,
5076 : const union xfs_btree_irec *low_rec,
5077 : const union xfs_btree_irec *high_rec,
5078 : xfs_btree_query_range_fn fn,
5079 : void *priv)
5080 : {
5081 3686355659 : union xfs_btree_key low_key;
5082 3686355659 : union xfs_btree_key high_key;
5083 :
5084 : /* Find the keys of both ends of the interval. */
5085 3686355659 : xfs_btree_key_from_irec(cur, &high_key, high_rec);
5086 3686418146 : xfs_btree_key_from_irec(cur, &low_key, low_rec);
5087 :
5088 : /* Enforce low key <= high key. */
5089 3686355503 : if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
5090 : return -EINVAL;
5091 :
5092 3686543803 : if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5093 2798158605 : return xfs_btree_simple_query_range(cur, &low_key,
5094 : &high_key, fn, priv);
5095 888385198 : return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
5096 : fn, priv);
5097 : }
5098 :
5099 : /* Query a btree for all records. */
5100 : int
5101 2602678 : xfs_btree_query_all(
5102 : struct xfs_btree_cur *cur,
5103 : xfs_btree_query_range_fn fn,
5104 : void *priv)
5105 : {
5106 2602678 : union xfs_btree_key low_key;
5107 2602678 : union xfs_btree_key high_key;
5108 :
5109 2602678 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5110 2602678 : memset(&low_key, 0, sizeof(low_key));
5111 2602678 : memset(&high_key, 0xFF, sizeof(high_key));
5112 :
5113 2602678 : return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
5114 : }
5115 :
5116 : static int
5117 79894536 : xfs_btree_count_blocks_helper(
5118 : struct xfs_btree_cur *cur,
5119 : int level,
5120 : void *data)
5121 : {
5122 79894536 : xfs_extlen_t *blocks = data;
5123 79894536 : (*blocks)++;
5124 :
5125 79894536 : return 0;
5126 : }
5127 :
5128 : /* Count the blocks in a btree and return the result in *blocks. */
5129 : int
5130 4476285 : xfs_btree_count_blocks(
5131 : struct xfs_btree_cur *cur,
5132 : xfs_extlen_t *blocks)
5133 : {
5134 4476285 : *blocks = 0;
5135 4476285 : return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
5136 : XFS_BTREE_VISIT_ALL, blocks);
5137 : }
5138 :
5139 : /* Compare two btree pointers. */
5140 : int64_t
5141 13167791 : xfs_btree_diff_two_ptrs(
5142 : struct xfs_btree_cur *cur,
5143 : const union xfs_btree_ptr *a,
5144 : const union xfs_btree_ptr *b)
5145 : {
5146 13167791 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5147 1164344 : return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
5148 12003447 : return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
5149 : }
5150 :
5151 : struct xfs_btree_has_records {
5152 : /* Keys for the start and end of the range we want to know about. */
5153 : union xfs_btree_key start_key;
5154 : union xfs_btree_key end_key;
5155 :
5156 : /* Mask for key comparisons, if desired. */
5157 : const union xfs_btree_key *key_mask;
5158 :
5159 : /* Highest record key we've seen so far. */
5160 : union xfs_btree_key high_key;
5161 :
5162 : enum xbtree_recpacking outcome;
5163 : };
5164 :
5165 : STATIC int
5166 0 : xfs_btree_has_records_helper(
5167 : struct xfs_btree_cur *cur,
5168 : const union xfs_btree_rec *rec,
5169 : void *priv)
5170 : {
5171 0 : union xfs_btree_key rec_key;
5172 0 : union xfs_btree_key rec_high_key;
5173 0 : struct xfs_btree_has_records *info = priv;
5174 0 : enum xbtree_key_contig key_contig;
5175 :
5176 0 : cur->bc_ops->init_key_from_rec(&rec_key, rec);
5177 :
5178 0 : if (info->outcome == XBTREE_RECPACKING_EMPTY) {
5179 0 : info->outcome = XBTREE_RECPACKING_SPARSE;
5180 :
5181 : /*
5182 : * If the first record we find does not overlap the start key,
5183 : * then there is a hole at the start of the search range.
5184 : * Classify this as sparse and stop immediately.
5185 : */
5186 0 : if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
5187 : info->key_mask))
5188 : return -ECANCELED;
5189 : } else {
5190 : /*
5191 : * If a subsequent record does not overlap with the any record
5192 : * we've seen so far, there is a hole in the middle of the
5193 : * search range. Classify this as sparse and stop.
5194 : * If the keys overlap and this btree does not allow overlap,
5195 : * signal corruption.
5196 : */
5197 0 : key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
5198 : &rec_key, info->key_mask);
5199 0 : if (key_contig == XBTREE_KEY_OVERLAP &&
5200 0 : !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5201 : return -EFSCORRUPTED;
5202 0 : if (key_contig == XBTREE_KEY_GAP)
5203 : return -ECANCELED;
5204 : }
5205 :
5206 : /*
5207 : * If high_key(rec) is larger than any other high key we've seen,
5208 : * remember it for later.
5209 : */
5210 0 : cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
5211 0 : if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
5212 : info->key_mask))
5213 0 : info->high_key = rec_high_key; /* struct copy */
5214 :
5215 : return 0;
5216 : }
5217 :
5218 : /*
5219 : * Scan part of the keyspace of a btree and tell us if that keyspace does not
5220 : * map to any records; is fully mapped to records; or is partially mapped to
5221 : * records. This is the btree record equivalent to determining if a file is
5222 : * sparse.
5223 : *
5224 : * For most btree types, the record scan should use all available btree key
5225 : * fields to compare the keys encountered. These callers should pass NULL for
5226 : * @mask. However, some callers (e.g. scanning physical space in the rmapbt)
5227 : * want to ignore some part of the btree record keyspace when performing the
5228 : * comparison. These callers should pass in a union xfs_btree_key object with
5229 : * the fields that *should* be a part of the comparison set to any nonzero
5230 : * value, and the rest zeroed.
5231 : */
5232 : int
5233 3180349671 : xfs_btree_has_records(
5234 : struct xfs_btree_cur *cur,
5235 : const union xfs_btree_irec *low,
5236 : const union xfs_btree_irec *high,
5237 : const union xfs_btree_key *mask,
5238 : enum xbtree_recpacking *outcome)
5239 : {
5240 3180349671 : struct xfs_btree_has_records info = {
5241 : .outcome = XBTREE_RECPACKING_EMPTY,
5242 : .key_mask = mask,
5243 : };
5244 3180349671 : int error;
5245 :
5246 : /* Not all btrees support this operation. */
5247 3180349671 : if (!cur->bc_ops->keys_contiguous) {
5248 0 : ASSERT(0);
5249 0 : return -EOPNOTSUPP;
5250 : }
5251 :
5252 3180349671 : xfs_btree_key_from_irec(cur, &info.start_key, low);
5253 3180356639 : xfs_btree_key_from_irec(cur, &info.end_key, high);
5254 :
5255 3180273544 : error = xfs_btree_query_range(cur, low, high,
5256 : xfs_btree_has_records_helper, &info);
5257 3180414167 : if (error == -ECANCELED)
5258 0 : goto out;
5259 3180414167 : if (error)
5260 : return error;
5261 :
5262 3180414167 : if (info.outcome == XBTREE_RECPACKING_EMPTY)
5263 3180414167 : goto out;
5264 :
5265 : /*
5266 : * If the largest high_key(rec) we saw during the walk is greater than
5267 : * the end of the search range, classify this as full. Otherwise,
5268 : * there is a hole at the end of the search range.
5269 : */
5270 0 : if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
5271 : mask))
5272 0 : info.outcome = XBTREE_RECPACKING_FULL;
5273 :
5274 0 : out:
5275 3180414167 : *outcome = info.outcome;
5276 3180414167 : return 0;
5277 : }
5278 :
5279 : /* Are there more records in this btree? */
5280 : bool
5281 72739782 : xfs_btree_has_more_records(
5282 : struct xfs_btree_cur *cur)
5283 : {
5284 72739782 : struct xfs_btree_block *block;
5285 72739782 : struct xfs_buf *bp;
5286 :
5287 72739782 : block = xfs_btree_get_block(cur, 0, &bp);
5288 :
5289 : /* There are still records in this block. */
5290 145479562 : if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
5291 : return true;
5292 :
5293 : /* There are more record blocks. */
5294 551068 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5295 0 : return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
5296 : else
5297 551068 : return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
5298 : }
5299 :
5300 : /* Set up all the btree cursor caches. */
5301 : int __init
5302 12 : xfs_btree_init_cur_caches(void)
5303 : {
5304 12 : int error;
5305 :
5306 12 : error = xfs_allocbt_init_cur_cache();
5307 12 : if (error)
5308 : return error;
5309 12 : error = xfs_inobt_init_cur_cache();
5310 12 : if (error)
5311 0 : goto err;
5312 12 : error = xfs_bmbt_init_cur_cache();
5313 12 : if (error)
5314 0 : goto err;
5315 12 : error = xfs_rmapbt_init_cur_cache();
5316 12 : if (error)
5317 0 : goto err;
5318 12 : error = xfs_refcountbt_init_cur_cache();
5319 12 : if (error)
5320 0 : goto err;
5321 :
5322 : return 0;
5323 0 : err:
5324 0 : xfs_btree_destroy_cur_caches();
5325 0 : return error;
5326 : }
5327 :
5328 : /* Destroy all the btree cursor caches, if they've been allocated. */
5329 : void
5330 12 : xfs_btree_destroy_cur_caches(void)
5331 : {
5332 12 : xfs_allocbt_destroy_cur_cache();
5333 12 : xfs_inobt_destroy_cur_cache();
5334 12 : xfs_bmbt_destroy_cur_cache();
5335 12 : xfs_rmapbt_destroy_cur_cache();
5336 12 : xfs_refcountbt_destroy_cur_cache();
5337 12 : }
5338 :
5339 : /* Move the btree cursor before the first record. */
5340 : int
5341 164415285 : xfs_btree_goto_left_edge(
5342 : struct xfs_btree_cur *cur)
5343 : {
5344 164415285 : int stat = 0;
5345 164415285 : int error;
5346 :
5347 164415285 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5348 164415285 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
5349 164415286 : if (error)
5350 : return error;
5351 164415286 : if (!stat)
5352 : return 0;
5353 :
5354 0 : error = xfs_btree_decrement(cur, 0, &stat);
5355 0 : if (error)
5356 : return error;
5357 0 : if (stat != 0) {
5358 0 : ASSERT(0);
5359 0 : xfs_btree_mark_sick(cur);
5360 0 : return -EFSCORRUPTED;
5361 : }
5362 :
5363 : return 0;
5364 : }
|