Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_buf_item.h"
17 : #include "xfs_btree.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_alloc.h"
22 : #include "xfs_log.h"
23 : #include "xfs_btree_staging.h"
24 : #include "xfs_ag.h"
25 : #include "xfs_alloc_btree.h"
26 : #include "xfs_ialloc_btree.h"
27 : #include "xfs_bmap_btree.h"
28 : #include "xfs_rmap_btree.h"
29 : #include "xfs_refcount_btree.h"
30 : #include "xfs_health.h"
31 : #include "scrub/xfile.h"
32 : #include "scrub/xfbtree.h"
33 : #include "xfs_btree_mem.h"
34 :
35 : /*
36 : * Btree magic numbers.
37 : */
38 : uint32_t
39 >26235*10^7 : xfs_btree_magic(
40 : struct xfs_mount *mp,
41 : const struct xfs_btree_ops *ops)
42 : {
43 >26235*10^7 : int idx = xfs_has_crc(mp) ? 1 : 0;
44 >26235*10^7 : __be32 magic = ops->buf_ops->magic[idx];
45 :
46 : /* Ensure we asked for crc for crc-only magics. */
47 >26232*10^7 : ASSERT(magic != 0);
48 >26232*10^7 : return be32_to_cpu(magic);
49 : }
50 :
51 : /*
52 : * These sibling pointer checks are optimised for null sibling pointers. This
53 : * happens a lot, and we don't need to byte swap at runtime if the sibling
54 : * pointer is NULL.
55 : *
56 : * These are explicitly marked at inline because the cost of calling them as
57 : * functions instead of inlining them is about 36 bytes extra code per call site
58 : * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
59 : * two sibling check functions reduces the compiled code size by over 300
60 : * bytes.
61 : */
62 : static inline xfs_failaddr_t
63 4341369986 : xfs_btree_check_lblock_siblings(
64 : struct xfs_mount *mp,
65 : struct xfs_btree_cur *cur,
66 : int level,
67 : xfs_fsblock_t fsb,
68 : __be64 dsibling)
69 : {
70 4341369986 : xfs_fsblock_t sibling;
71 :
72 4341369986 : if (dsibling == cpu_to_be64(NULLFSBLOCK))
73 : return NULL;
74 :
75 1054587194 : sibling = be64_to_cpu(dsibling);
76 1054587194 : if (sibling == fsb)
77 0 : return __this_address;
78 1054587194 : if (level >= 0) {
79 1037679073 : if (!xfs_btree_check_lptr(cur, sibling, level + 1))
80 0 : return __this_address;
81 16908121 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
82 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
83 0 : return __this_address;
84 : } else {
85 16908121 : if (!xfs_verify_fsbno(mp, sibling))
86 0 : return __this_address;
87 : }
88 :
89 : return NULL;
90 : }
91 :
92 : static inline xfs_failaddr_t
93 >51877*10^7 : xfs_btree_check_sblock_siblings(
94 : struct xfs_perag *pag,
95 : struct xfs_btree_cur *cur,
96 : int level,
97 : xfs_agblock_t agbno,
98 : __be32 dsibling)
99 : {
100 >51877*10^7 : xfs_agblock_t sibling;
101 :
102 >51877*10^7 : if (dsibling == cpu_to_be32(NULLAGBLOCK))
103 : return NULL;
104 :
105 >36723*10^7 : sibling = be32_to_cpu(dsibling);
106 >36723*10^7 : if (sibling == agbno)
107 0 : return __this_address;
108 >36723*10^7 : if (level >= 0) {
109 >36717*10^7 : if (!xfs_btree_check_sptr(cur, sibling, level + 1))
110 0 : return __this_address;
111 66214159 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
112 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
113 0 : return __this_address;
114 : } else {
115 66214159 : if (!xfs_verify_agbno(pag, sibling))
116 0 : return __this_address;
117 : }
118 : return NULL;
119 : }
120 :
121 : /*
122 : * Check a long btree block header. Return the address of the failing check,
123 : * or NULL if everything is ok.
124 : */
125 : xfs_failaddr_t
126 2151191978 : __xfs_btree_check_lblock(
127 : struct xfs_btree_cur *cur,
128 : struct xfs_btree_block *block,
129 : int level,
130 : struct xfs_buf *bp)
131 : {
132 2151191978 : struct xfs_mount *mp = cur->bc_mp;
133 2151191978 : int crc = xfs_has_crc(mp);
134 2151191978 : xfs_failaddr_t fa;
135 2151191978 : xfs_fsblock_t fsb = NULLFSBLOCK;
136 :
137 2151191978 : if (crc) {
138 2151193882 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
139 0 : return __this_address;
140 4302409728 : if (block->bb_u.l.bb_blkno !=
141 2151204864 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
142 0 : return __this_address;
143 2151204864 : if (block->bb_u.l.bb_pad != cpu_to_be32(0))
144 0 : return __this_address;
145 : }
146 :
147 2151202960 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
148 0 : return __this_address;
149 2151208878 : if (be16_to_cpu(block->bb_level) != level)
150 0 : return __this_address;
151 2151209147 : if (be16_to_cpu(block->bb_numrecs) >
152 2151208878 : cur->bc_ops->get_maxrecs(cur, level))
153 0 : return __this_address;
154 :
155 2151209147 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
156 1158113973 : fsb = xfbtree_buf_to_xfoff(cur, bp);
157 993095174 : else if (bp)
158 972316043 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
159 :
160 2151207454 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
161 : block->bb_u.l.bb_leftsib);
162 2151204547 : if (!fa)
163 2151203144 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
164 : block->bb_u.l.bb_rightsib);
165 : return fa;
166 : }
167 :
168 : /* Check a long btree block header. */
169 : static int
170 2145636038 : xfs_btree_check_lblock(
171 : struct xfs_btree_cur *cur,
172 : struct xfs_btree_block *block,
173 : int level,
174 : struct xfs_buf *bp)
175 : {
176 2145636038 : struct xfs_mount *mp = cur->bc_mp;
177 2145636038 : xfs_failaddr_t fa;
178 :
179 2145636038 : fa = __xfs_btree_check_lblock(cur, block, level, bp);
180 4291290549 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
181 2145647438 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
182 0 : if (bp)
183 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
184 0 : xfs_btree_mark_sick(cur);
185 0 : return -EFSCORRUPTED;
186 : }
187 : return 0;
188 : }
189 :
190 : /*
191 : * Check a short btree block header. Return the address of the failing check,
192 : * or NULL if everything is ok.
193 : */
194 : xfs_failaddr_t
195 >26020*10^7 : __xfs_btree_check_sblock(
196 : struct xfs_btree_cur *cur,
197 : struct xfs_btree_block *block,
198 : int level,
199 : struct xfs_buf *bp)
200 : {
201 >26020*10^7 : struct xfs_mount *mp = cur->bc_mp;
202 >26020*10^7 : struct xfs_perag *pag = cur->bc_ag.pag;
203 >26020*10^7 : int crc = xfs_has_crc(mp);
204 >26020*10^7 : xfs_failaddr_t fa;
205 >26020*10^7 : xfs_agblock_t agbno = NULLAGBLOCK;
206 :
207 >26020*10^7 : if (crc) {
208 >26014*10^7 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
209 0 : return __this_address;
210 >51872*10^7 : if (block->bb_u.s.bb_blkno !=
211 >25936*10^7 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
212 0 : return __this_address;
213 : }
214 :
215 >25942*10^7 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
216 0 : return __this_address;
217 >26025*10^7 : if (be16_to_cpu(block->bb_level) != level)
218 0 : return __this_address;
219 >25994*10^7 : if (be16_to_cpu(block->bb_numrecs) >
220 >26025*10^7 : cur->bc_ops->get_maxrecs(cur, level))
221 0 : return __this_address;
222 :
223 >25994*10^7 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
224 865349917 : pag = NULL;
225 865349917 : agbno = xfbtree_buf_to_xfoff(cur, bp);
226 >25907*10^7 : } else if (bp) {
227 >25907*10^7 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
228 : }
229 :
230 >25988*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
231 : block->bb_u.s.bb_leftsib);
232 >25944*10^7 : if (!fa)
233 >25952*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
234 : block->bb_u.s.bb_rightsib);
235 : return fa;
236 : }
237 :
238 : /* Check a short btree block header. */
239 : STATIC int
240 >25919*10^7 : xfs_btree_check_sblock(
241 : struct xfs_btree_cur *cur,
242 : struct xfs_btree_block *block,
243 : int level,
244 : struct xfs_buf *bp)
245 : {
246 >25919*10^7 : struct xfs_mount *mp = cur->bc_mp;
247 >25919*10^7 : xfs_failaddr_t fa;
248 :
249 >25919*10^7 : fa = __xfs_btree_check_sblock(cur, block, level, bp);
250 >51916*10^7 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
251 >25998*10^7 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
252 0 : if (bp)
253 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
254 0 : xfs_btree_mark_sick(cur);
255 0 : return -EFSCORRUPTED;
256 : }
257 : return 0;
258 : }
259 :
260 : /*
261 : * Debug routine: check that block header is ok.
262 : */
263 : int
264 >26140*10^7 : xfs_btree_check_block(
265 : struct xfs_btree_cur *cur, /* btree cursor */
266 : struct xfs_btree_block *block, /* generic btree block pointer */
267 : int level, /* level of the btree block */
268 : struct xfs_buf *bp) /* buffer containing block, if any */
269 : {
270 >26140*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
271 2145644804 : return xfs_btree_check_lblock(cur, block, level, bp);
272 : else
273 >25926*10^7 : return xfs_btree_check_sblock(cur, block, level, bp);
274 : }
275 :
276 : /* Check that this long pointer is valid and points within the fs. */
277 : bool
278 3105173622 : xfs_btree_check_lptr(
279 : struct xfs_btree_cur *cur,
280 : xfs_fsblock_t fsbno,
281 : int level)
282 : {
283 3105173622 : if (level <= 0)
284 : return false;
285 3105173622 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
286 2396706 : return xfbtree_verify_xfileoff(cur, fsbno);
287 3102776916 : return xfs_verify_fsbno(cur->bc_mp, fsbno);
288 : }
289 :
290 : /* Check that this short pointer is valid and points within the AG. */
291 : bool
292 >41501*10^7 : xfs_btree_check_sptr(
293 : struct xfs_btree_cur *cur,
294 : xfs_agblock_t agbno,
295 : int level)
296 : {
297 >41501*10^7 : if (level <= 0)
298 : return false;
299 >41501*10^7 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
300 1433185681 : return xfbtree_verify_xfileoff(cur, agbno);
301 >41358*10^7 : return xfs_verify_agbno(cur->bc_ag.pag, agbno);
302 : }
303 :
304 : /*
305 : * Check that a given (indexed) btree pointer at a certain level of a
306 : * btree is valid and doesn't point past where it should.
307 : */
308 : static int
309 51891001224 : xfs_btree_check_ptr(
310 : struct xfs_btree_cur *cur,
311 : const union xfs_btree_ptr *ptr,
312 : int index,
313 : int level)
314 : {
315 51891001224 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
316 1731222433 : return xfbtree_check_ptr(cur, ptr, index, level);
317 :
318 50159778791 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
319 2060754645 : if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
320 : level))
321 : return 0;
322 0 : xfs_err(cur->bc_mp,
323 : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
324 : cur->bc_ino.ip->i_ino,
325 : cur->bc_ino.whichfork, cur->bc_btnum,
326 : level, index);
327 : } else {
328 48099024146 : if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
329 : level))
330 : return 0;
331 0 : xfs_err(cur->bc_mp,
332 : "AG %u: Corrupt btree %d pointer at level %d index %d.",
333 : cur->bc_ag.pag->pag_agno, cur->bc_btnum,
334 : level, index);
335 : }
336 :
337 0 : xfs_btree_mark_sick(cur);
338 0 : return -EFSCORRUPTED;
339 : }
340 :
341 : #ifdef DEBUG
342 : # define xfs_btree_debug_check_ptr xfs_btree_check_ptr
343 : #else
344 : # define xfs_btree_debug_check_ptr(...) (0)
345 : #endif
346 :
347 : /*
348 : * Calculate CRC on the whole btree block and stuff it into the
349 : * long-form btree header.
350 : *
351 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
352 : * it into the buffer so recovery knows what the last modification was that made
353 : * it to disk.
354 : */
355 : void
356 9286042 : xfs_btree_lblock_calc_crc(
357 : struct xfs_buf *bp)
358 : {
359 9286042 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
360 9286042 : struct xfs_buf_log_item *bip = bp->b_log_item;
361 :
362 9286042 : if (!xfs_has_crc(bp->b_mount))
363 : return;
364 9286042 : if (bip)
365 9218948 : block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
366 9286042 : xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
367 : }
368 :
369 : bool
370 5467933 : xfs_btree_lblock_verify_crc(
371 : struct xfs_buf *bp)
372 : {
373 5467933 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
374 5467933 : struct xfs_mount *mp = bp->b_mount;
375 :
376 5467933 : if (xfs_has_crc(mp)) {
377 5467933 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
378 : return false;
379 5467933 : return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
380 : }
381 :
382 : return true;
383 : }
384 :
385 : /*
386 : * Calculate CRC on the whole btree block and stuff it into the
387 : * short-form btree header.
388 : *
389 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
390 : * it into the buffer so recovery knows what the last modification was that made
391 : * it to disk.
392 : */
393 : void
394 24116236 : xfs_btree_sblock_calc_crc(
395 : struct xfs_buf *bp)
396 : {
397 24116236 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
398 24116236 : struct xfs_buf_log_item *bip = bp->b_log_item;
399 :
400 24116236 : if (!xfs_has_crc(bp->b_mount))
401 : return;
402 24075302 : if (bip)
403 23029684 : block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
404 24075302 : xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
405 : }
406 :
407 : bool
408 2764934 : xfs_btree_sblock_verify_crc(
409 : struct xfs_buf *bp)
410 : {
411 2764934 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
412 2764934 : struct xfs_mount *mp = bp->b_mount;
413 :
414 2764934 : if (xfs_has_crc(mp)) {
415 2764321 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
416 : return false;
417 2764321 : return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
418 : }
419 :
420 : return true;
421 : }
422 :
423 : static int
424 539377 : xfs_btree_free_block(
425 : struct xfs_btree_cur *cur,
426 : struct xfs_buf *bp)
427 : {
428 539377 : int error;
429 :
430 539377 : trace_xfs_btree_free_block(cur, bp);
431 :
432 539376 : error = cur->bc_ops->free_block(cur, bp);
433 539376 : if (!error) {
434 539376 : xfs_trans_binval(cur->bc_tp, bp);
435 539377 : XFS_BTREE_STATS_INC(cur, free);
436 : }
437 539377 : return error;
438 : }
439 :
440 : /*
441 : * Delete the btree cursor.
442 : */
443 : void
444 7295323073 : xfs_btree_del_cursor(
445 : struct xfs_btree_cur *cur, /* btree cursor */
446 : int error) /* del because of error */
447 : {
448 7295323073 : int i; /* btree level */
449 :
450 : /*
451 : * Clear the buffer pointers and release the buffers. If we're doing
452 : * this because of an error, inspect all of the entries in the bc_bufs
453 : * array for buffers to be unlocked. This is because some of the btree
454 : * code works from level n down to 0, and if we get an error along the
455 : * way we won't have initialized all the entries down to 0.
456 : */
457 19073681543 : for (i = 0; i < cur->bc_nlevels; i++) {
458 12199803747 : if (cur->bc_levels[i].bp)
459 10745834552 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
460 1453969195 : else if (!error)
461 : break;
462 : }
463 :
464 : /*
465 : * If we are doing a BMBT update, the number of unaccounted blocks
466 : * allocated during this cursor life time should be zero. If it's not
467 : * zero, then we should be shut down or on our way to shutdown due to
468 : * cancelling a dirty transaction on error.
469 : */
470 7297853731 : ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
471 : xfs_is_shutdown(cur->bc_mp) || error != 0);
472 7297853731 : if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
473 156 : kmem_free(cur->bc_ops);
474 7297853731 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
475 6396235432 : !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
476 6396235432 : xfs_perag_put(cur->bc_ag.pag);
477 7301676974 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
478 500416678 : if (cur->bc_mem.pag)
479 77274079 : xfs_perag_put(cur->bc_mem.pag);
480 : }
481 7301678919 : kmem_cache_free(cur->bc_cache, cur);
482 7299774605 : }
483 :
484 : /* Return the buffer target for this btree's buffer. */
485 : static inline struct xfs_buftarg *
486 12926831726 : xfs_btree_buftarg(
487 : struct xfs_btree_cur *cur)
488 : {
489 12926831726 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
490 693455728 : return xfbtree_target(cur->bc_mem.xfbtree);
491 12233375998 : return cur->bc_mp->m_ddev_targp;
492 : }
493 :
494 : /* Return the block size (in units of 512b sectors) for this btree. */
495 : static inline unsigned int
496 12929090270 : xfs_btree_bbsize(
497 : struct xfs_btree_cur *cur)
498 : {
499 12929090270 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
500 693477066 : return xfbtree_bbsize();
501 12235613204 : return cur->bc_mp->m_bsize;
502 : }
503 :
504 : /*
505 : * Duplicate the btree cursor.
506 : * Allocate a new one, copy the record, re-get the buffers.
507 : */
508 : int /* error */
509 292887405 : xfs_btree_dup_cursor(
510 : struct xfs_btree_cur *cur, /* input cursor */
511 : struct xfs_btree_cur **ncur) /* output cursor */
512 : {
513 292887405 : struct xfs_buf *bp; /* btree block's buffer pointer */
514 292887405 : int error; /* error return value */
515 292887405 : int i; /* level number of btree block */
516 292887405 : xfs_mount_t *mp; /* mount structure for filesystem */
517 292887405 : struct xfs_btree_cur *new; /* new cursor value */
518 292887405 : xfs_trans_t *tp; /* transaction pointer, can be NULL */
519 :
520 292887405 : tp = cur->bc_tp;
521 292887405 : mp = cur->bc_mp;
522 :
523 : /*
524 : * Allocate a new cursor like the old one.
525 : */
526 292887405 : new = cur->bc_ops->dup_cursor(cur);
527 :
528 : /*
529 : * Copy the record currently in the cursor.
530 : */
531 293015044 : new->bc_rec = cur->bc_rec;
532 :
533 : /*
534 : * For each level current, re-get the buffer and copy the ptr value.
535 : */
536 896645051 : for (i = 0; i < new->bc_nlevels; i++) {
537 604293338 : new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
538 604293338 : new->bc_levels[i].ra = cur->bc_levels[i].ra;
539 604293338 : bp = cur->bc_levels[i].bp;
540 604293338 : if (bp) {
541 548832963 : error = xfs_trans_read_buf(mp, tp,
542 : xfs_btree_buftarg(cur),
543 : xfs_buf_daddr(bp),
544 549017838 : xfs_btree_bbsize(cur), 0, &bp,
545 549017838 : cur->bc_ops->buf_ops);
546 548354513 : if (xfs_metadata_is_sick(error))
547 0 : xfs_btree_mark_sick(new);
548 548354513 : if (error) {
549 6 : xfs_btree_del_cursor(new, error);
550 6 : *ncur = NULL;
551 6 : return error;
552 : }
553 : }
554 603630007 : new->bc_levels[i].bp = bp;
555 : }
556 292351713 : *ncur = new;
557 292351713 : return 0;
558 : }
559 :
560 : /*
561 : * XFS btree block layout and addressing:
562 : *
563 : * There are two types of blocks in the btree: leaf and non-leaf blocks.
564 : *
565 : * The leaf record start with a header then followed by records containing
566 : * the values. A non-leaf block also starts with the same header, and
567 : * then first contains lookup keys followed by an equal number of pointers
568 : * to the btree blocks at the previous level.
569 : *
570 : * +--------+-------+-------+-------+-------+-------+-------+
571 : * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
572 : * +--------+-------+-------+-------+-------+-------+-------+
573 : *
574 : * +--------+-------+-------+-------+-------+-------+-------+
575 : * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
576 : * +--------+-------+-------+-------+-------+-------+-------+
577 : *
578 : * The header is called struct xfs_btree_block for reasons better left unknown
579 : * and comes in different versions for short (32bit) and long (64bit) block
580 : * pointers. The record and key structures are defined by the btree instances
581 : * and opaque to the btree core. The block pointers are simple disk endian
582 : * integers, available in a short (32bit) and long (64bit) variant.
583 : *
584 : * The helpers below calculate the offset of a given record, key or pointer
585 : * into a btree block (xfs_btree_*_offset) or return a pointer to the given
586 : * record, key or pointer (xfs_btree_*_addr). Note that all addressing
587 : * inside the btree block is done using indices starting at one, not zero!
588 : *
589 : * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
590 : * overlapping intervals. In such a tree, records are still sorted lowest to
591 : * highest and indexed by the smallest key value that refers to the record.
592 : * However, nodes are different: each pointer has two associated keys -- one
593 : * indexing the lowest key available in the block(s) below (the same behavior
594 : * as the key in a regular btree) and another indexing the highest key
595 : * available in the block(s) below. Because records are /not/ sorted by the
596 : * highest key, all leaf block updates require us to compute the highest key
597 : * that matches any record in the leaf and to recursively update the high keys
598 : * in the nodes going further up in the tree, if necessary. Nodes look like
599 : * this:
600 : *
601 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
602 : * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
603 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
604 : *
605 : * To perform an interval query on an overlapped tree, perform the usual
606 : * depth-first search and use the low and high keys to decide if we can skip
607 : * that particular node. If a leaf node is reached, return the records that
608 : * intersect the interval. Note that an interval query may return numerous
609 : * entries. For a non-overlapped tree, simply search for the record associated
610 : * with the lowest key and iterate forward until a non-matching record is
611 : * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
612 : * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
613 : * more detail.
614 : *
615 : * Why do we care about overlapping intervals? Let's say you have a bunch of
616 : * reverse mapping records on a reflink filesystem:
617 : *
618 : * 1: +- file A startblock B offset C length D -----------+
619 : * 2: +- file E startblock F offset G length H --------------+
620 : * 3: +- file I startblock F offset J length K --+
621 : * 4: +- file L... --+
622 : *
623 : * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
624 : * we'd simply increment the length of record 1. But how do we find the record
625 : * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
626 : * record 3 because the keys are ordered first by startblock. An interval
627 : * query would return records 1 and 2 because they both overlap (B+D-1), and
628 : * from that we can pick out record 1 as the appropriate left neighbor.
629 : *
630 : * In the non-overlapped case you can do a LE lookup and decrement the cursor
631 : * because a record's interval must end before the next record.
632 : */
633 :
634 : /*
635 : * Return size of the btree block header for this btree instance.
636 : */
637 >58776*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
638 : {
639 >58776*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
640 9347329906 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
641 : return XFS_BTREE_LBLOCK_CRC_LEN;
642 0 : return XFS_BTREE_LBLOCK_LEN;
643 : }
644 >57841*10^7 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
645 >57845*10^7 : return XFS_BTREE_SBLOCK_CRC_LEN;
646 : return XFS_BTREE_SBLOCK_LEN;
647 : }
648 :
649 : /*
650 : * Return size of btree block pointers for this btree instance.
651 : */
652 : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
653 : {
654 51350042426 : return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
655 51350042426 : sizeof(__be64) : sizeof(__be32);
656 : }
657 :
658 : /*
659 : * Calculate offset of the n-th record in a btree block.
660 : */
661 : STATIC size_t
662 >40452*10^7 : xfs_btree_rec_offset(
663 : struct xfs_btree_cur *cur,
664 : int n)
665 : {
666 >40452*10^7 : return xfs_btree_block_len(cur) +
667 >40452*10^7 : (n - 1) * cur->bc_ops->rec_len;
668 : }
669 :
670 : /*
671 : * Calculate offset of the n-th key in a btree block.
672 : */
673 : STATIC size_t
674 79521521610 : xfs_btree_key_offset(
675 : struct xfs_btree_cur *cur,
676 : int n)
677 : {
678 79521521610 : return xfs_btree_block_len(cur) +
679 79521521610 : (n - 1) * cur->bc_ops->key_len;
680 : }
681 :
682 : /*
683 : * Calculate offset of the n-th high key in a btree block.
684 : */
685 : STATIC size_t
686 54969471006 : xfs_btree_high_key_offset(
687 : struct xfs_btree_cur *cur,
688 : int n)
689 : {
690 54969471006 : return xfs_btree_block_len(cur) +
691 54969471006 : (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
692 : }
693 :
694 : /*
695 : * Calculate offset of the n-th block pointer in a btree block.
696 : */
697 : STATIC size_t
698 51334703141 : xfs_btree_ptr_offset(
699 : struct xfs_btree_cur *cur,
700 : int n,
701 : int level)
702 : {
703 51334703141 : return xfs_btree_block_len(cur) +
704 >10267*10^7 : cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
705 >10197*10^7 : (n - 1) * xfs_btree_ptr_len(cur);
706 : }
707 :
708 : /*
709 : * Return a pointer to the n-th record in the btree block.
710 : */
711 : union xfs_btree_rec *
712 3061430958 : xfs_btree_rec_addr(
713 : struct xfs_btree_cur *cur,
714 : int n,
715 : struct xfs_btree_block *block)
716 : {
717 >40054*10^7 : return (union xfs_btree_rec *)
718 >40054*10^7 : ((char *)block + xfs_btree_rec_offset(cur, n));
719 : }
720 :
721 : /*
722 : * Return a pointer to the n-th key in the btree block.
723 : */
724 : union xfs_btree_key *
725 2628470463 : xfs_btree_key_addr(
726 : struct xfs_btree_cur *cur,
727 : int n,
728 : struct xfs_btree_block *block)
729 : {
730 78894257405 : return (union xfs_btree_key *)
731 78894257405 : ((char *)block + xfs_btree_key_offset(cur, n));
732 : }
733 :
734 : /*
735 : * Return a pointer to the n-th high key in the btree block.
736 : */
737 : union xfs_btree_key *
738 1831116549 : xfs_btree_high_key_addr(
739 : struct xfs_btree_cur *cur,
740 : int n,
741 : struct xfs_btree_block *block)
742 : {
743 54998042695 : return (union xfs_btree_key *)
744 54998042695 : ((char *)block + xfs_btree_high_key_offset(cur, n));
745 : }
746 :
747 : /*
748 : * Return a pointer to the n-th block pointer in the btree block.
749 : */
750 : union xfs_btree_ptr *
751 51348998043 : xfs_btree_ptr_addr(
752 : struct xfs_btree_cur *cur,
753 : int n,
754 : struct xfs_btree_block *block)
755 : {
756 51348998043 : int level = xfs_btree_get_level(block);
757 :
758 51348998043 : ASSERT(block->bb_level != 0);
759 :
760 >10268*10^7 : return (union xfs_btree_ptr *)
761 51348998043 : ((char *)block + xfs_btree_ptr_offset(cur, n, level));
762 : }
763 :
764 : struct xfs_ifork *
765 916991643 : xfs_btree_ifork_ptr(
766 : struct xfs_btree_cur *cur)
767 : {
768 916991643 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
769 :
770 916991643 : if (cur->bc_flags & XFS_BTREE_STAGING)
771 52223 : return cur->bc_ino.ifake->if_fork;
772 916939420 : return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
773 : }
774 :
775 : /*
776 : * Get the root block which is stored in the inode.
777 : *
778 : * For now this btree implementation assumes the btree root is always
779 : * stored in the if_broot field of an inode fork.
780 : */
781 : STATIC struct xfs_btree_block *
782 512220285 : xfs_btree_get_iroot(
783 : struct xfs_btree_cur *cur)
784 : {
785 512220285 : struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
786 :
787 512219960 : return (struct xfs_btree_block *)ifp->if_broot;
788 : }
789 :
790 : /*
791 : * Retrieve the block pointer from the cursor at the given level.
792 : * This may be an inode btree root or from a buffer.
793 : */
794 : struct xfs_btree_block * /* generic btree block pointer */
795 >38974*10^7 : xfs_btree_get_block(
796 : struct xfs_btree_cur *cur, /* btree cursor */
797 : int level, /* level in btree */
798 : struct xfs_buf **bpp) /* buffer containing the block */
799 : {
800 >38974*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
801 1940161147 : (level == cur->bc_nlevels - 1)) {
802 140530203 : *bpp = NULL;
803 140530203 : return xfs_btree_get_iroot(cur);
804 : }
805 :
806 >38960*10^7 : *bpp = cur->bc_levels[level].bp;
807 >38960*10^7 : return XFS_BUF_TO_BLOCK(*bpp);
808 : }
809 :
810 : /*
811 : * Change the cursor to point to the first record at the given level.
812 : * Other levels are unaffected.
813 : */
814 : STATIC int /* success=1, failure=0 */
815 105957616 : xfs_btree_firstrec(
816 : struct xfs_btree_cur *cur, /* btree cursor */
817 : int level) /* level to change */
818 : {
819 105957616 : struct xfs_btree_block *block; /* generic btree block pointer */
820 105957616 : struct xfs_buf *bp; /* buffer containing block */
821 :
822 : /*
823 : * Get the block pointer for this level.
824 : */
825 105957616 : block = xfs_btree_get_block(cur, level, &bp);
826 105957564 : if (xfs_btree_check_block(cur, block, level, bp))
827 : return 0;
828 : /*
829 : * It's empty, there is no such record.
830 : */
831 105957536 : if (!block->bb_numrecs)
832 : return 0;
833 : /*
834 : * Set the ptr value to 1, that's the first record/key.
835 : */
836 105957536 : cur->bc_levels[level].ptr = 1;
837 105957536 : return 1;
838 : }
839 :
840 : /*
841 : * Change the cursor to point to the last record in the current block
842 : * at the given level. Other levels are unaffected.
843 : */
844 : STATIC int /* success=1, failure=0 */
845 116023022 : xfs_btree_lastrec(
846 : struct xfs_btree_cur *cur, /* btree cursor */
847 : int level) /* level to change */
848 : {
849 116023022 : struct xfs_btree_block *block; /* generic btree block pointer */
850 116023022 : struct xfs_buf *bp; /* buffer containing block */
851 :
852 : /*
853 : * Get the block pointer for this level.
854 : */
855 116023022 : block = xfs_btree_get_block(cur, level, &bp);
856 116022362 : if (xfs_btree_check_block(cur, block, level, bp))
857 : return 0;
858 : /*
859 : * It's empty, there is no such record.
860 : */
861 116025766 : if (!block->bb_numrecs)
862 : return 0;
863 : /*
864 : * Set the ptr value to numrecs, that's the last record/key.
865 : */
866 116025766 : cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
867 116025766 : return 1;
868 : }
869 :
870 : /*
871 : * Compute first and last byte offsets for the fields given.
872 : * Interprets the offsets table, which contains struct field offsets.
873 : */
874 : void
875 2475491417 : xfs_btree_offsets(
876 : uint32_t fields, /* bitmask of fields */
877 : const short *offsets, /* table of field offsets */
878 : int nbits, /* number of bits to inspect */
879 : int *first, /* output: first byte offset */
880 : int *last) /* output: last byte offset */
881 : {
882 2475491417 : int i; /* current bit number */
883 2475491417 : uint32_t imask; /* mask for current bit number */
884 :
885 2475491417 : ASSERT(fields != 0);
886 : /*
887 : * Find the lowest bit, so the first byte offset.
888 : */
889 10201584462 : for (i = 0, imask = 1u; ; i++, imask <<= 1) {
890 10201584462 : if (imask & fields) {
891 2475491417 : *first = offsets[i];
892 2475491417 : break;
893 : }
894 : }
895 : /*
896 : * Find the highest bit, so the last byte offset.
897 : */
898 2475491417 : for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
899 17083555863 : if (imask & fields) {
900 2475491417 : *last = offsets[i + 1] - 1;
901 2475491417 : break;
902 : }
903 : }
904 2475491417 : }
905 :
906 : /*
907 : * Get a buffer for the block, return it read in.
908 : * Long-form addressing.
909 : */
910 : int
911 444718120 : xfs_btree_read_bufl(
912 : struct xfs_mount *mp, /* file system mount point */
913 : struct xfs_trans *tp, /* transaction pointer */
914 : xfs_fsblock_t fsbno, /* file system block number */
915 : struct xfs_buf **bpp, /* buffer for fsbno */
916 : int refval, /* ref count value for buffer */
917 : const struct xfs_buf_ops *ops)
918 : {
919 444718120 : struct xfs_buf *bp; /* return value */
920 444718120 : xfs_daddr_t d; /* real disk block address */
921 444718120 : int error;
922 :
923 444718120 : if (!xfs_verify_fsbno(mp, fsbno))
924 : return -EFSCORRUPTED;
925 444716260 : d = XFS_FSB_TO_DADDR(mp, fsbno);
926 444716065 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
927 : mp->m_bsize, 0, &bp, ops);
928 444716966 : if (error)
929 : return error;
930 444716900 : if (bp)
931 444716900 : xfs_buf_set_ref(bp, refval);
932 444709606 : *bpp = bp;
933 444709606 : return 0;
934 : }
935 :
936 : /*
937 : * Read-ahead the block, don't wait for it, don't return a buffer.
938 : * Long-form addressing.
939 : */
940 : /* ARGSUSED */
941 : void
942 58203206 : xfs_btree_reada_bufl(
943 : struct xfs_mount *mp, /* file system mount point */
944 : xfs_fsblock_t fsbno, /* file system block number */
945 : xfs_extlen_t count, /* count of filesystem blocks */
946 : const struct xfs_buf_ops *ops)
947 : {
948 58203206 : xfs_daddr_t d;
949 :
950 58203206 : ASSERT(fsbno != NULLFSBLOCK);
951 58203206 : d = XFS_FSB_TO_DADDR(mp, fsbno);
952 58203157 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
953 58203505 : }
954 :
955 : /*
956 : * Read-ahead the block, don't wait for it, don't return a buffer.
957 : * Short-form addressing.
958 : */
959 : /* ARGSUSED */
960 : void
961 6025765672 : xfs_btree_reada_bufs(
962 : struct xfs_mount *mp, /* file system mount point */
963 : xfs_agnumber_t agno, /* allocation group number */
964 : xfs_agblock_t agbno, /* allocation group block number */
965 : xfs_extlen_t count, /* count of filesystem blocks */
966 : const struct xfs_buf_ops *ops)
967 : {
968 6025765672 : xfs_daddr_t d;
969 :
970 6025765672 : ASSERT(agno != NULLAGNUMBER);
971 6025765672 : ASSERT(agbno != NULLAGBLOCK);
972 6025765672 : d = XFS_AGB_TO_DADDR(mp, agno, agbno);
973 6025765672 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
974 6026778002 : }
975 :
976 : STATIC int
977 58209229 : xfs_btree_readahead_lblock(
978 : struct xfs_btree_cur *cur,
979 : int lr,
980 : struct xfs_btree_block *block)
981 : {
982 58209229 : int rval = 0;
983 58209229 : xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
984 58209229 : xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
985 :
986 58209229 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
987 : return 0;
988 :
989 58203638 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
990 23968055 : xfs_btree_reada_bufl(cur->bc_mp, left, 1,
991 23968055 : cur->bc_ops->buf_ops);
992 23968055 : rval++;
993 : }
994 :
995 58203644 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
996 34235167 : xfs_btree_reada_bufl(cur->bc_mp, right, 1,
997 34235167 : cur->bc_ops->buf_ops);
998 34235442 : rval++;
999 : }
1000 :
1001 : return rval;
1002 : }
1003 :
1004 : STATIC int
1005 2396241584 : xfs_btree_readahead_sblock(
1006 : struct xfs_btree_cur *cur,
1007 : int lr,
1008 : struct xfs_btree_block *block)
1009 : {
1010 2396241584 : int rval = 0;
1011 2396241584 : xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
1012 2396241584 : xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
1013 :
1014 2396241584 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1015 : return 0;
1016 :
1017 2362829001 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
1018 126091101 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1019 126091101 : left, 1, cur->bc_ops->buf_ops);
1020 126091101 : rval++;
1021 : }
1022 :
1023 2362830711 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
1024 2236655635 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1025 2236655635 : right, 1, cur->bc_ops->buf_ops);
1026 2237338106 : rval++;
1027 : }
1028 :
1029 : return rval;
1030 : }
1031 :
1032 : /*
1033 : * Read-ahead btree blocks, at the given level.
1034 : * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
1035 : */
1036 : STATIC int
1037 >12404*10^7 : xfs_btree_readahead(
1038 : struct xfs_btree_cur *cur, /* btree cursor */
1039 : int lev, /* level in btree */
1040 : int lr) /* left/right bits */
1041 : {
1042 >12404*10^7 : struct xfs_btree_block *block;
1043 :
1044 : /*
1045 : * No readahead needed if we are at the root level and the
1046 : * btree root is stored in the inode.
1047 : */
1048 >12404*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1049 114227898 : (lev == cur->bc_nlevels - 1))
1050 : return 0;
1051 :
1052 >12403*10^7 : if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
1053 : return 0;
1054 :
1055 2454656958 : cur->bc_levels[lev].ra |= lr;
1056 2454656958 : block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
1057 :
1058 2454656958 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1059 58209322 : return xfs_btree_readahead_lblock(cur, lr, block);
1060 2396447636 : return xfs_btree_readahead_sblock(cur, lr, block);
1061 : }
1062 :
1063 : STATIC int
1064 42265350697 : xfs_btree_ptr_to_daddr(
1065 : struct xfs_btree_cur *cur,
1066 : const union xfs_btree_ptr *ptr,
1067 : xfs_daddr_t *daddr)
1068 : {
1069 42265350697 : xfs_fsblock_t fsbno;
1070 42265350697 : xfs_agblock_t agbno;
1071 42265350697 : int error;
1072 :
1073 42265350697 : error = xfs_btree_check_ptr(cur, ptr, 0, 1);
1074 42259767470 : if (error)
1075 : return error;
1076 :
1077 42259767470 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1078 1491588093 : *daddr = xfbtree_ptr_to_daddr(cur, ptr);
1079 1491571569 : return 0;
1080 : }
1081 :
1082 40768179377 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1083 1390904653 : fsbno = be64_to_cpu(ptr->l);
1084 1390904653 : *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
1085 : } else {
1086 39377274724 : agbno = be32_to_cpu(ptr->s);
1087 39377274724 : *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1088 : agbno);
1089 : }
1090 :
1091 : return 0;
1092 : }
1093 :
1094 : /*
1095 : * Readahead @count btree blocks at the given @ptr location.
1096 : *
1097 : * We don't need to care about long or short form btrees here as we have a
1098 : * method of converting the ptr directly to a daddr available to us.
1099 : */
1100 : STATIC void
1101 8455362 : xfs_btree_readahead_ptr(
1102 : struct xfs_btree_cur *cur,
1103 : union xfs_btree_ptr *ptr,
1104 : xfs_extlen_t count)
1105 : {
1106 8455362 : xfs_daddr_t daddr;
1107 :
1108 8455362 : if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
1109 0 : return;
1110 16911438 : xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
1111 8455753 : xfs_btree_bbsize(cur) * count,
1112 8455685 : cur->bc_ops->buf_ops);
1113 : }
1114 :
1115 : /*
1116 : * Set the buffer for level "lev" in the cursor to bp, releasing
1117 : * any previous buffer.
1118 : */
1119 : STATIC void
1120 12140519686 : xfs_btree_setbuf(
1121 : struct xfs_btree_cur *cur, /* btree cursor */
1122 : int lev, /* level in btree */
1123 : struct xfs_buf *bp) /* new buffer to set */
1124 : {
1125 12140519686 : struct xfs_btree_block *b; /* btree block */
1126 :
1127 12140519686 : if (cur->bc_levels[lev].bp)
1128 1941221336 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
1129 12140585788 : cur->bc_levels[lev].bp = bp;
1130 12140585788 : cur->bc_levels[lev].ra = 0;
1131 :
1132 12140585788 : b = XFS_BUF_TO_BLOCK(bp);
1133 12140585788 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1134 1072576279 : if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1135 700008112 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1136 1072576279 : if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1137 915155389 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1138 : } else {
1139 11068009509 : if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1140 6659876794 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1141 11068009509 : if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1142 7051175039 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1143 : }
1144 12140585788 : }
1145 :
1146 : bool
1147 29116452 : xfs_btree_ptr_is_null(
1148 : struct xfs_btree_cur *cur,
1149 : const union xfs_btree_ptr *ptr)
1150 : {
1151 305456144 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1152 1359847894 : return ptr->l == cpu_to_be64(NULLFSBLOCK);
1153 : else
1154 5982091589 : return ptr->s == cpu_to_be32(NULLAGBLOCK);
1155 : }
1156 :
1157 : void
1158 1688407 : xfs_btree_set_ptr_null(
1159 : struct xfs_btree_cur *cur,
1160 : union xfs_btree_ptr *ptr)
1161 : {
1162 1688407 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1163 563601102 : ptr->l = cpu_to_be64(NULLFSBLOCK);
1164 : else
1165 1286175834 : ptr->s = cpu_to_be32(NULLAGBLOCK);
1166 1688407 : }
1167 :
1168 : /*
1169 : * Get/set/init sibling pointers
1170 : */
1171 : void
1172 6025948254 : xfs_btree_get_sibling(
1173 : struct xfs_btree_cur *cur,
1174 : struct xfs_btree_block *block,
1175 : union xfs_btree_ptr *ptr,
1176 : int lr)
1177 : {
1178 6025948254 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1179 :
1180 6025948254 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1181 825597102 : if (lr == XFS_BB_RIGHTSIB)
1182 564346206 : ptr->l = block->bb_u.l.bb_rightsib;
1183 : else
1184 261250896 : ptr->l = block->bb_u.l.bb_leftsib;
1185 : } else {
1186 5200351152 : if (lr == XFS_BB_RIGHTSIB)
1187 4998087703 : ptr->s = block->bb_u.s.bb_rightsib;
1188 : else
1189 202263449 : ptr->s = block->bb_u.s.bb_leftsib;
1190 : }
1191 6025948254 : }
1192 :
1193 : void
1194 8938561 : xfs_btree_set_sibling(
1195 : struct xfs_btree_cur *cur,
1196 : struct xfs_btree_block *block,
1197 : const union xfs_btree_ptr *ptr,
1198 : int lr)
1199 : {
1200 8938561 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1201 :
1202 8938561 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1203 1797493 : if (lr == XFS_BB_RIGHTSIB)
1204 1191836 : block->bb_u.l.bb_rightsib = ptr->l;
1205 : else
1206 605657 : block->bb_u.l.bb_leftsib = ptr->l;
1207 : } else {
1208 7141068 : if (lr == XFS_BB_RIGHTSIB)
1209 3599484 : block->bb_u.s.bb_rightsib = ptr->s;
1210 : else
1211 3541584 : block->bb_u.s.bb_leftsib = ptr->s;
1212 : }
1213 8938561 : }
1214 :
1215 : static void
1216 22277597 : __xfs_btree_init_block(
1217 : struct xfs_mount *mp,
1218 : struct xfs_btree_block *buf,
1219 : const struct xfs_btree_ops *ops,
1220 : xfs_daddr_t blkno,
1221 : __u16 level,
1222 : __u16 numrecs,
1223 : __u64 owner)
1224 : {
1225 22277597 : int crc = xfs_has_crc(mp);
1226 22277597 : __u32 magic = xfs_btree_magic(mp, ops);
1227 :
1228 22277745 : buf->bb_magic = cpu_to_be32(magic);
1229 22277745 : buf->bb_level = cpu_to_be16(level);
1230 22277745 : buf->bb_numrecs = cpu_to_be16(numrecs);
1231 :
1232 22277745 : if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
1233 19809989 : buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1234 19809989 : buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1235 19809989 : if (crc) {
1236 19809997 : buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1237 19809997 : buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1238 19809997 : uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1239 19809976 : buf->bb_u.l.bb_pad = 0;
1240 19809976 : buf->bb_u.l.bb_lsn = 0;
1241 : }
1242 : } else {
1243 : /* owner is a 32 bit value on short blocks */
1244 2467756 : __u32 __owner = (__u32)owner;
1245 :
1246 2467756 : buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1247 2467756 : buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1248 2467756 : if (crc) {
1249 2427015 : buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1250 2427015 : buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1251 2427015 : uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1252 2426750 : buf->bb_u.s.bb_lsn = 0;
1253 : }
1254 : }
1255 22277459 : }
1256 :
1257 : void
1258 16677926 : xfs_btree_init_block(
1259 : struct xfs_mount *mp,
1260 : struct xfs_btree_block *block,
1261 : const struct xfs_btree_ops *ops,
1262 : __u16 level,
1263 : __u16 numrecs,
1264 : __u64 owner)
1265 : {
1266 16677926 : __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
1267 : numrecs, owner);
1268 16677919 : }
1269 :
1270 : void
1271 5599587 : xfs_btree_init_buf(
1272 : struct xfs_mount *mp,
1273 : struct xfs_buf *bp,
1274 : const struct xfs_btree_ops *ops,
1275 : __u16 level,
1276 : __u16 numrecs,
1277 : __u64 owner)
1278 : {
1279 5599587 : __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
1280 : xfs_buf_daddr(bp), level, numrecs, owner);
1281 5599774 : bp->b_ops = ops->buf_ops;
1282 5599774 : }
1283 :
1284 : void
1285 2851893 : xfs_btree_init_block_cur(
1286 : struct xfs_btree_cur *cur,
1287 : struct xfs_buf *bp,
1288 : int level,
1289 : int numrecs)
1290 : {
1291 2851893 : __u64 owner;
1292 :
1293 : /*
1294 : * we can pull the owner from the cursor right now as the different
1295 : * owners align directly with the pointer size of the btree. This may
1296 : * change in future, but is safe for current users of the generic btree
1297 : * code.
1298 : */
1299 2851893 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1300 378885 : owner = xfbtree_owner(cur);
1301 2473008 : else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1302 548187 : owner = cur->bc_ino.ip->i_ino;
1303 : else
1304 1924821 : owner = cur->bc_ag.pag->pag_agno;
1305 :
1306 2851893 : xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
1307 2851918 : }
1308 :
1309 : /*
1310 : * Return true if ptr is the last record in the btree and
1311 : * we need to track updates to this record. The decision
1312 : * will be further refined in the update_lastrec method.
1313 : */
1314 : STATIC int
1315 2513866827 : xfs_btree_is_lastrec(
1316 : struct xfs_btree_cur *cur,
1317 : struct xfs_btree_block *block,
1318 : int level)
1319 : {
1320 2513866827 : union xfs_btree_ptr ptr;
1321 :
1322 2513866827 : if (level > 0)
1323 : return 0;
1324 2511589762 : if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1325 : return 0;
1326 :
1327 309523758 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1328 619001738 : if (!xfs_btree_ptr_is_null(cur, &ptr))
1329 52402319 : return 0;
1330 : return 1;
1331 : }
1332 :
1333 : STATIC void
1334 132339901 : xfs_btree_buf_to_ptr(
1335 : struct xfs_btree_cur *cur,
1336 : struct xfs_buf *bp,
1337 : union xfs_btree_ptr *ptr)
1338 : {
1339 132339901 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1340 378888 : xfbtree_buf_to_ptr(cur, bp, ptr);
1341 378888 : return;
1342 : }
1343 :
1344 131961013 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1345 6878617 : ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1346 : xfs_buf_daddr(bp)));
1347 : else {
1348 125082396 : ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1349 : xfs_buf_daddr(bp)));
1350 : }
1351 : }
1352 :
1353 : static inline void
1354 : xfs_btree_set_refs(
1355 : struct xfs_btree_cur *cur,
1356 : struct xfs_buf *bp)
1357 : {
1358 12372442166 : xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
1359 : }
1360 :
1361 : int
1362 2855428 : xfs_btree_get_buf_block(
1363 : struct xfs_btree_cur *cur,
1364 : const union xfs_btree_ptr *ptr,
1365 : struct xfs_btree_block **block,
1366 : struct xfs_buf **bpp)
1367 : {
1368 2855428 : xfs_daddr_t d;
1369 2855428 : int error;
1370 :
1371 2855428 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1372 2855451 : if (error)
1373 : return error;
1374 2855294 : error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
1375 2855466 : xfs_btree_bbsize(cur), 0, bpp);
1376 2855445 : if (error)
1377 : return error;
1378 :
1379 2855445 : (*bpp)->b_ops = cur->bc_ops->buf_ops;
1380 2855445 : *block = XFS_BUF_TO_BLOCK(*bpp);
1381 2855445 : return 0;
1382 : }
1383 :
1384 : /*
1385 : * Read in the buffer at the given ptr and return the buffer and
1386 : * the block pointer within the buffer.
1387 : */
1388 : int
1389 12370441143 : xfs_btree_read_buf_block(
1390 : struct xfs_btree_cur *cur,
1391 : const union xfs_btree_ptr *ptr,
1392 : int flags,
1393 : struct xfs_btree_block **block,
1394 : struct xfs_buf **bpp)
1395 : {
1396 12370441143 : struct xfs_mount *mp = cur->bc_mp;
1397 12370441143 : xfs_daddr_t d;
1398 12370441143 : int error;
1399 :
1400 : /* need to sort out how callers deal with failures first */
1401 12370441143 : ASSERT(!(flags & XBF_TRYLOCK));
1402 :
1403 12370441143 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1404 12369968119 : if (error)
1405 : return error;
1406 12365396052 : error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
1407 12369409740 : xfs_btree_bbsize(cur), flags, bpp,
1408 12369409740 : cur->bc_ops->buf_ops);
1409 12372141396 : if (xfs_metadata_is_sick(error))
1410 1349 : xfs_btree_mark_sick(cur);
1411 12372141396 : if (error)
1412 : return error;
1413 :
1414 12372442166 : xfs_btree_set_refs(cur, *bpp);
1415 12370437903 : *block = XFS_BUF_TO_BLOCK(*bpp);
1416 12370437903 : return 0;
1417 : }
1418 :
1419 : /*
1420 : * Copy keys from one btree block to another.
1421 : */
1422 : void
1423 365527715 : xfs_btree_copy_keys(
1424 : struct xfs_btree_cur *cur,
1425 : union xfs_btree_key *dst_key,
1426 : const union xfs_btree_key *src_key,
1427 : int numkeys)
1428 : {
1429 365527715 : ASSERT(numkeys >= 0);
1430 731055430 : memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1431 365527715 : }
1432 :
1433 : /*
1434 : * Copy records from one btree block to another.
1435 : */
1436 : STATIC void
1437 2064936288 : xfs_btree_copy_recs(
1438 : struct xfs_btree_cur *cur,
1439 : union xfs_btree_rec *dst_rec,
1440 : union xfs_btree_rec *src_rec,
1441 : int numrecs)
1442 : {
1443 2064936288 : ASSERT(numrecs >= 0);
1444 4129872576 : memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1445 2064936288 : }
1446 :
1447 : /*
1448 : * Copy block pointers from one btree block to another.
1449 : */
1450 : void
1451 12898049 : xfs_btree_copy_ptrs(
1452 : struct xfs_btree_cur *cur,
1453 : union xfs_btree_ptr *dst_ptr,
1454 : const union xfs_btree_ptr *src_ptr,
1455 : int numptrs)
1456 : {
1457 12898049 : ASSERT(numptrs >= 0);
1458 31441373 : memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1459 12898049 : }
1460 :
1461 : /*
1462 : * Shift keys one index left/right inside a single btree block.
1463 : */
1464 : STATIC void
1465 2441235 : xfs_btree_shift_keys(
1466 : struct xfs_btree_cur *cur,
1467 : union xfs_btree_key *key,
1468 : int dir,
1469 : int numkeys)
1470 : {
1471 2441235 : char *dst_key;
1472 :
1473 2441235 : ASSERT(numkeys >= 0);
1474 2441235 : ASSERT(dir == 1 || dir == -1);
1475 :
1476 2441235 : dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1477 4882470 : memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1478 2441235 : }
1479 :
1480 : /*
1481 : * Shift records one index left/right inside a single btree block.
1482 : */
1483 : STATIC void
1484 1576969880 : xfs_btree_shift_recs(
1485 : struct xfs_btree_cur *cur,
1486 : union xfs_btree_rec *rec,
1487 : int dir,
1488 : int numrecs)
1489 : {
1490 1576969880 : char *dst_rec;
1491 :
1492 1576969880 : ASSERT(numrecs >= 0);
1493 1576969880 : ASSERT(dir == 1 || dir == -1);
1494 :
1495 1576969880 : dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1496 3153939760 : memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1497 1576969880 : }
1498 :
1499 : /*
1500 : * Shift block pointers one index left/right inside a single btree block.
1501 : */
1502 : STATIC void
1503 2441236 : xfs_btree_shift_ptrs(
1504 : struct xfs_btree_cur *cur,
1505 : union xfs_btree_ptr *ptr,
1506 : int dir,
1507 : int numptrs)
1508 : {
1509 2441236 : char *dst_ptr;
1510 :
1511 2441236 : ASSERT(numptrs >= 0);
1512 2441236 : ASSERT(dir == 1 || dir == -1);
1513 :
1514 2441236 : dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1515 4882472 : memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1516 2441236 : }
1517 :
1518 : /*
1519 : * Log key values from the btree block.
1520 : */
1521 : STATIC void
1522 363616064 : xfs_btree_log_keys(
1523 : struct xfs_btree_cur *cur,
1524 : struct xfs_buf *bp,
1525 : int first,
1526 : int last)
1527 : {
1528 :
1529 363616064 : if (bp) {
1530 353794442 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1531 353795629 : xfs_trans_log_buf(cur->bc_tp, bp,
1532 353795629 : xfs_btree_key_offset(cur, first),
1533 353795629 : xfs_btree_key_offset(cur, last + 1) - 1);
1534 : } else {
1535 9821622 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1536 9821622 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1537 : }
1538 363621623 : }
1539 :
1540 : /*
1541 : * Log record values from the btree block.
1542 : */
1543 : void
1544 2647586713 : xfs_btree_log_recs(
1545 : struct xfs_btree_cur *cur,
1546 : struct xfs_buf *bp,
1547 : int first,
1548 : int last)
1549 : {
1550 :
1551 2647586713 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1552 2647501646 : xfs_trans_log_buf(cur->bc_tp, bp,
1553 2647501646 : xfs_btree_rec_offset(cur, first),
1554 2647501646 : xfs_btree_rec_offset(cur, last + 1) - 1);
1555 :
1556 2647973040 : }
1557 :
1558 : /*
1559 : * Log block pointer fields from a btree block (nonleaf).
1560 : */
1561 : STATIC void
1562 2822489 : xfs_btree_log_ptrs(
1563 : struct xfs_btree_cur *cur, /* btree cursor */
1564 : struct xfs_buf *bp, /* buffer containing btree block */
1565 : int first, /* index of first pointer to log */
1566 : int last) /* index of last pointer to log */
1567 : {
1568 :
1569 2822489 : if (bp) {
1570 2769848 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1571 2769848 : int level = xfs_btree_get_level(block);
1572 :
1573 2769848 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1574 2769849 : xfs_trans_log_buf(cur->bc_tp, bp,
1575 2769849 : xfs_btree_ptr_offset(cur, first, level),
1576 2769848 : xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1577 : } else {
1578 52641 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1579 52641 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1580 : }
1581 :
1582 2822492 : }
1583 :
1584 : /*
1585 : * Log fields from a btree block header.
1586 : */
1587 : void
1588 2020950676 : xfs_btree_log_block(
1589 : struct xfs_btree_cur *cur, /* btree cursor */
1590 : struct xfs_buf *bp, /* buffer containing btree block */
1591 : uint32_t fields) /* mask of fields: XFS_BB_... */
1592 : {
1593 2020950676 : int first; /* first byte offset logged */
1594 2020950676 : int last; /* last byte offset logged */
1595 2020950676 : static const short soffsets[] = { /* table of offsets (short) */
1596 : offsetof(struct xfs_btree_block, bb_magic),
1597 : offsetof(struct xfs_btree_block, bb_level),
1598 : offsetof(struct xfs_btree_block, bb_numrecs),
1599 : offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1600 : offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1601 : offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1602 : offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1603 : offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1604 : offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1605 : offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1606 : XFS_BTREE_SBLOCK_CRC_LEN
1607 : };
1608 2020950676 : static const short loffsets[] = { /* table of offsets (long) */
1609 : offsetof(struct xfs_btree_block, bb_magic),
1610 : offsetof(struct xfs_btree_block, bb_level),
1611 : offsetof(struct xfs_btree_block, bb_numrecs),
1612 : offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1613 : offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1614 : offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1615 : offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1616 : offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1617 : offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1618 : offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1619 : offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1620 : XFS_BTREE_LBLOCK_CRC_LEN
1621 : };
1622 :
1623 2020950676 : if (bp) {
1624 2020868489 : int nbits;
1625 :
1626 2020868489 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1627 : /*
1628 : * We don't log the CRC when updating a btree
1629 : * block but instead recreate it during log
1630 : * recovery. As the log buffers have checksums
1631 : * of their own this is safe and avoids logging a crc
1632 : * update in a lot of places.
1633 : */
1634 2020863167 : if (fields == XFS_BB_ALL_BITS)
1635 4403321 : fields = XFS_BB_ALL_BITS_CRC;
1636 : nbits = XFS_BB_NUM_BITS_CRC;
1637 : } else {
1638 : nbits = XFS_BB_NUM_BITS;
1639 : }
1640 2020868489 : xfs_btree_offsets(fields,
1641 2020868489 : (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1642 : loffsets : soffsets,
1643 : nbits, &first, &last);
1644 2020786531 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1645 2020792248 : xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1646 : } else {
1647 82187 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1648 82187 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1649 : }
1650 2021042586 : }
1651 :
1652 : /*
1653 : * Increment cursor by one record at the level.
1654 : * For nonzero levels the leaf-ward information is untouched.
1655 : */
1656 : int /* error */
1657 >12159*10^7 : xfs_btree_increment(
1658 : struct xfs_btree_cur *cur,
1659 : int level,
1660 : int *stat) /* success/failure */
1661 : {
1662 >12159*10^7 : struct xfs_btree_block *block;
1663 >12159*10^7 : union xfs_btree_ptr ptr;
1664 >12159*10^7 : struct xfs_buf *bp;
1665 >12159*10^7 : int error; /* error return value */
1666 >12159*10^7 : int lev;
1667 :
1668 >12159*10^7 : ASSERT(level < cur->bc_nlevels);
1669 :
1670 : /* Read-ahead to the right at this level. */
1671 >12159*10^7 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1672 :
1673 : /* Get a pointer to the btree block. */
1674 >12124*10^7 : block = xfs_btree_get_block(cur, level, &bp);
1675 :
1676 : #ifdef DEBUG
1677 >12129*10^7 : error = xfs_btree_check_block(cur, block, level, bp);
1678 >12160*10^7 : if (error)
1679 0 : goto error0;
1680 : #endif
1681 :
1682 : /* We're done if we remain in the block after the increment. */
1683 >12160*10^7 : if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
1684 >11760*10^7 : goto out1;
1685 :
1686 : /* Fail if we just went off the right edge of the tree. */
1687 3995491344 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1688 7988956976 : if (xfs_btree_ptr_is_null(cur, &ptr))
1689 3269275850 : goto out0;
1690 :
1691 725202638 : XFS_BTREE_STATS_INC(cur, increment);
1692 :
1693 : /*
1694 : * March up the tree incrementing pointers.
1695 : * Stop when we don't go off the right edge of a block.
1696 : */
1697 730673857 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1698 730673731 : block = xfs_btree_get_block(cur, lev, &bp);
1699 :
1700 : #ifdef DEBUG
1701 730669163 : error = xfs_btree_check_block(cur, block, lev, bp);
1702 730680064 : if (error)
1703 0 : goto error0;
1704 : #endif
1705 :
1706 730680064 : if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
1707 : break;
1708 :
1709 : /* Read-ahead the right block for the next loop. */
1710 5745705 : xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1711 : }
1712 :
1713 : /*
1714 : * If we went off the root then we are either seriously
1715 : * confused or have the tree root in an inode.
1716 : */
1717 724934485 : if (lev == cur->bc_nlevels) {
1718 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1719 0 : goto out0;
1720 0 : ASSERT(0);
1721 0 : xfs_btree_mark_sick(cur);
1722 0 : error = -EFSCORRUPTED;
1723 0 : goto error0;
1724 : }
1725 724934485 : ASSERT(lev < cur->bc_nlevels);
1726 :
1727 : /*
1728 : * Now walk back down the tree, fixing up the cursor's buffer
1729 : * pointers and key numbers.
1730 : */
1731 1455571810 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1732 730674183 : union xfs_btree_ptr *ptrp;
1733 :
1734 730674183 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1735 730663401 : --lev;
1736 730663401 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1737 730661213 : if (error)
1738 38 : goto error0;
1739 :
1740 730661175 : xfs_btree_setbuf(cur, lev, bp);
1741 730637325 : cur->bc_levels[lev].ptr = 1;
1742 : }
1743 724895249 : out1:
1744 >11833*10^7 : *stat = 1;
1745 >11833*10^7 : return 0;
1746 :
1747 3269275850 : out0:
1748 3269275850 : *stat = 0;
1749 3269275850 : return 0;
1750 :
1751 : error0:
1752 : return error;
1753 : }
1754 :
1755 : /*
1756 : * Decrement cursor by one record at the level.
1757 : * For nonzero levels the leaf-ward information is untouched.
1758 : */
1759 : int /* error */
1760 2365990084 : xfs_btree_decrement(
1761 : struct xfs_btree_cur *cur,
1762 : int level,
1763 : int *stat) /* success/failure */
1764 : {
1765 2365990084 : struct xfs_btree_block *block;
1766 2365990084 : struct xfs_buf *bp;
1767 2365990084 : int error; /* error return value */
1768 2365990084 : int lev;
1769 2365990084 : union xfs_btree_ptr ptr;
1770 :
1771 2365990084 : ASSERT(level < cur->bc_nlevels);
1772 :
1773 : /* Read-ahead to the left at this level. */
1774 2365990084 : xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1775 :
1776 : /* We're done if we remain in the block after the decrement. */
1777 2365908850 : if (--cur->bc_levels[level].ptr > 0)
1778 2142647100 : goto out1;
1779 :
1780 : /* Get a pointer to the btree block. */
1781 223261750 : block = xfs_btree_get_block(cur, level, &bp);
1782 :
1783 : #ifdef DEBUG
1784 223261234 : error = xfs_btree_check_block(cur, block, level, bp);
1785 223262368 : if (error)
1786 0 : goto error0;
1787 : #endif
1788 :
1789 : /* Fail if we just went off the left edge of the tree. */
1790 223262368 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1791 446523974 : if (xfs_btree_ptr_is_null(cur, &ptr))
1792 140624173 : goto out0;
1793 :
1794 82637814 : XFS_BTREE_STATS_INC(cur, decrement);
1795 :
1796 : /*
1797 : * March up the tree decrementing pointers.
1798 : * Stop when we don't go off the left edge of a block.
1799 : */
1800 82860448 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1801 82860452 : if (--cur->bc_levels[lev].ptr > 0)
1802 : break;
1803 : /* Read-ahead the left block for the next loop. */
1804 222704 : xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1805 : }
1806 :
1807 : /*
1808 : * If we went off the root then we are seriously confused.
1809 : * or the root of the tree is in an inode.
1810 : */
1811 82637744 : if (lev == cur->bc_nlevels) {
1812 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1813 0 : goto out0;
1814 0 : ASSERT(0);
1815 0 : xfs_btree_mark_sick(cur);
1816 0 : error = -EFSCORRUPTED;
1817 0 : goto error0;
1818 : }
1819 82637744 : ASSERT(lev < cur->bc_nlevels);
1820 :
1821 : /*
1822 : * Now walk back down the tree, fixing up the cursor's buffer
1823 : * pointers and key numbers.
1824 : */
1825 165498033 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1826 82860323 : union xfs_btree_ptr *ptrp;
1827 :
1828 82860323 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1829 82860319 : --lev;
1830 82860319 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1831 82860505 : if (error)
1832 12 : goto error0;
1833 82860493 : xfs_btree_setbuf(cur, lev, bp);
1834 82860289 : cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
1835 : }
1836 82637603 : out1:
1837 2225284703 : *stat = 1;
1838 2225284703 : return 0;
1839 :
1840 140624173 : out0:
1841 140624173 : *stat = 0;
1842 140624173 : return 0;
1843 :
1844 : error0:
1845 : return error;
1846 : }
1847 :
1848 : /*
1849 : * Check the btree block owner now that we have the context to know who the
1850 : * real owner is.
1851 : */
1852 : static inline xfs_failaddr_t
1853 11325985691 : xfs_btree_check_block_owner(
1854 : struct xfs_btree_cur *cur,
1855 : struct xfs_btree_block *block)
1856 : {
1857 11325985691 : if (!xfs_has_crc(cur->bc_mp))
1858 : return NULL;
1859 :
1860 11325982462 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1861 570693213 : return xfbtree_check_block_owner(cur, block);
1862 :
1863 10755289249 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
1864 10155492488 : if (be32_to_cpu(block->bb_u.s.bb_owner) !=
1865 10155492488 : cur->bc_ag.pag->pag_agno)
1866 0 : return __this_address;
1867 : return NULL;
1868 : }
1869 :
1870 599796761 : if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
1871 : return NULL;
1872 :
1873 599621034 : if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
1874 0 : return __this_address;
1875 :
1876 : return NULL;
1877 : }
1878 :
1879 : int
1880 30268102298 : xfs_btree_lookup_get_block(
1881 : struct xfs_btree_cur *cur, /* btree cursor */
1882 : int level, /* level in the btree */
1883 : const union xfs_btree_ptr *pp, /* ptr to btree block */
1884 : struct xfs_btree_block **blkp) /* return btree block */
1885 : {
1886 30268102298 : struct xfs_buf *bp; /* buffer pointer for btree block */
1887 30268102298 : xfs_daddr_t daddr;
1888 30268102298 : int error = 0;
1889 :
1890 : /* special case the root block if in an inode */
1891 30268102298 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1892 1025009295 : (level == cur->bc_nlevels - 1)) {
1893 371548273 : *blkp = xfs_btree_get_iroot(cur);
1894 371543712 : return 0;
1895 : }
1896 :
1897 : /*
1898 : * If the old buffer at this level for the disk address we are
1899 : * looking for re-use it.
1900 : *
1901 : * Otherwise throw it away and get a new one.
1902 : */
1903 29896554025 : bp = cur->bc_levels[level].bp;
1904 29896554025 : error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
1905 29894194795 : if (error)
1906 : return error;
1907 29894194795 : if (bp && xfs_buf_daddr(bp) == daddr) {
1908 18572437711 : *blkp = XFS_BUF_TO_BLOCK(bp);
1909 18572437711 : return 0;
1910 : }
1911 :
1912 11321757084 : error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1913 11325333983 : if (error)
1914 : return error;
1915 :
1916 : /* Check the inode owner since the verifiers don't. */
1917 11325462333 : if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
1918 0 : goto out_bad;
1919 :
1920 : /* Did we get the level we were looking for? */
1921 11326781297 : if (be16_to_cpu((*blkp)->bb_level) != level)
1922 0 : goto out_bad;
1923 :
1924 : /* Check that internal nodes have at least one record. */
1925 11326781297 : if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1926 0 : goto out_bad;
1927 :
1928 11326781297 : xfs_btree_setbuf(cur, level, bp);
1929 11326781297 : return 0;
1930 :
1931 0 : out_bad:
1932 0 : *blkp = NULL;
1933 0 : xfs_buf_mark_corrupt(bp);
1934 0 : xfs_trans_brelse(cur->bc_tp, bp);
1935 0 : xfs_btree_mark_sick(cur);
1936 0 : return -EFSCORRUPTED;
1937 : }
1938 :
1939 : /*
1940 : * Get current search key. For level 0 we don't actually have a key
1941 : * structure so we make one up from the record. For all other levels
1942 : * we just return the right key.
1943 : */
1944 : STATIC union xfs_btree_key *
1945 >12080*10^7 : xfs_lookup_get_search_key(
1946 : struct xfs_btree_cur *cur,
1947 : int level,
1948 : int keyno,
1949 : struct xfs_btree_block *block,
1950 : union xfs_btree_key *kp)
1951 : {
1952 >12080*10^7 : if (level == 0) {
1953 86698055738 : cur->bc_ops->init_key_from_rec(kp,
1954 : xfs_btree_rec_addr(cur, keyno, block));
1955 86698055738 : return kp;
1956 : }
1957 :
1958 34111457320 : return xfs_btree_key_addr(cur, keyno, block);
1959 : }
1960 :
1961 : /*
1962 : * Lookup the record. The cursor is made to point to it, based on dir.
1963 : * stat is set to 0 if can't find any such record, 1 for success.
1964 : */
1965 : int /* error */
1966 17599581786 : xfs_btree_lookup(
1967 : struct xfs_btree_cur *cur, /* btree cursor */
1968 : xfs_lookup_t dir, /* <=, ==, or >= */
1969 : int *stat) /* success/failure */
1970 : {
1971 17599581786 : struct xfs_btree_block *block; /* current btree block */
1972 17599581786 : int64_t diff; /* difference for the current key */
1973 17599581786 : int error; /* error return value */
1974 17599581786 : int keyno; /* current key number */
1975 17599581786 : int level; /* level in the btree */
1976 17599581786 : union xfs_btree_ptr *pp; /* ptr to btree block */
1977 17599581786 : union xfs_btree_ptr ptr; /* ptr to btree block */
1978 :
1979 17599581786 : XFS_BTREE_STATS_INC(cur, lookup);
1980 :
1981 : /* No such thing as a zero-level tree. */
1982 17593933328 : if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
1983 0 : xfs_btree_mark_sick(cur);
1984 0 : return -EFSCORRUPTED;
1985 : }
1986 :
1987 17593933328 : block = NULL;
1988 17593933328 : keyno = 0;
1989 :
1990 : /* initialise start pointer from cursor */
1991 17593933328 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1992 17593427023 : pp = &ptr;
1993 :
1994 : /*
1995 : * Iterate over each level in the btree, starting at the root.
1996 : * For each level above the leaves, find the key we need, based
1997 : * on the lookup record, then follow the corresponding block
1998 : * pointer down to the next level.
1999 : */
2000 43891309275 : for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
2001 : /* Get the block we need to do the lookup on. */
2002 27129572037 : error = xfs_btree_lookup_get_block(cur, level, pp, &block);
2003 27140914068 : if (error)
2004 6895 : goto error0;
2005 :
2006 27140907173 : if (diff == 0) {
2007 : /*
2008 : * If we already had a key match at a higher level, we
2009 : * know we need to use the first entry in this block.
2010 : */
2011 : keyno = 1;
2012 : } else {
2013 : /* Otherwise search this block. Do a binary search. */
2014 :
2015 27105108935 : int high; /* high entry number */
2016 27105108935 : int low; /* low entry number */
2017 :
2018 : /* Set low and high entry numbers, 1-based. */
2019 27105108935 : low = 1;
2020 27105108935 : high = xfs_btree_get_numrecs(block);
2021 27105108935 : if (!high) {
2022 : /* Block is empty, must be an empty leaf. */
2023 836109461 : if (level != 0 || cur->bc_nlevels != 1) {
2024 0 : XFS_CORRUPTION_ERROR(__func__,
2025 : XFS_ERRLEVEL_LOW,
2026 : cur->bc_mp, block,
2027 : sizeof(*block));
2028 0 : xfs_btree_mark_sick(cur);
2029 0 : return -EFSCORRUPTED;
2030 : }
2031 :
2032 836109461 : cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
2033 836109461 : *stat = 0;
2034 836109461 : return 0;
2035 : }
2036 :
2037 : /* Binary search the block. */
2038 >14482*10^7 : while (low <= high) {
2039 >12087*10^7 : union xfs_btree_key key;
2040 >12087*10^7 : union xfs_btree_key *kp;
2041 :
2042 >12087*10^7 : XFS_BTREE_STATS_INC(cur, compare);
2043 :
2044 : /* keyno is average of low and high. */
2045 >12065*10^7 : keyno = (low + high) >> 1;
2046 :
2047 : /* Get current search key */
2048 >12065*10^7 : kp = xfs_lookup_get_search_key(cur, level,
2049 : keyno, block, &key);
2050 :
2051 : /*
2052 : * Compute difference to get next direction:
2053 : * - less than, move right
2054 : * - greater than, move left
2055 : * - equal, we're done
2056 : */
2057 >12059*10^7 : diff = cur->bc_ops->key_diff(cur, kp);
2058 >12087*10^7 : if (diff < 0)
2059 68759196405 : low = keyno + 1;
2060 52116568769 : else if (diff > 0)
2061 49792233852 : high = keyno - 1;
2062 : else
2063 : break;
2064 : }
2065 : }
2066 :
2067 : /*
2068 : * If there are more levels, set up for the next level
2069 : * by getting the block number and filling in the cursor.
2070 : */
2071 26302186176 : if (level > 0) {
2072 : /*
2073 : * If we moved left, need the previous key number,
2074 : * unless there isn't one.
2075 : */
2076 9540226219 : if (diff > 0 && --keyno < 1)
2077 : keyno = 1;
2078 9540226219 : pp = xfs_btree_ptr_addr(cur, keyno, block);
2079 :
2080 9535875580 : error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
2081 9535922295 : if (error)
2082 0 : goto error0;
2083 :
2084 9535922295 : cur->bc_levels[level].ptr = keyno;
2085 : }
2086 : }
2087 :
2088 : /* Done with the search. See if we need to adjust the results. */
2089 16761737238 : if (dir != XFS_LOOKUP_LE && diff < 0) {
2090 797969993 : keyno++;
2091 : /*
2092 : * If ge search and we went off the end of the block, but it's
2093 : * not the last block, we're in the wrong block.
2094 : */
2095 797969993 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2096 797964192 : if (dir == XFS_LOOKUP_GE &&
2097 209812317 : keyno > xfs_btree_get_numrecs(block) &&
2098 : !xfs_btree_ptr_is_null(cur, &ptr)) {
2099 2363196 : int i;
2100 :
2101 2363196 : cur->bc_levels[0].ptr = keyno;
2102 2363196 : error = xfs_btree_increment(cur, 0, &i);
2103 2363196 : if (error)
2104 3 : goto error0;
2105 2363193 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
2106 0 : xfs_btree_mark_sick(cur);
2107 2363193 : return -EFSCORRUPTED;
2108 : }
2109 2363193 : *stat = 1;
2110 2363193 : return 0;
2111 : }
2112 15963767245 : } else if (dir == XFS_LOOKUP_LE && diff > 0)
2113 7223409681 : keyno--;
2114 16759368241 : cur->bc_levels[0].ptr = keyno;
2115 :
2116 : /* Return if we succeeded or not. */
2117 16759368241 : if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
2118 3430096083 : *stat = 0;
2119 13329272158 : else if (dir != XFS_LOOKUP_EQ || diff == 0)
2120 12808240115 : *stat = 1;
2121 : else
2122 521032043 : *stat = 0;
2123 : return 0;
2124 :
2125 : error0:
2126 : return error;
2127 : }
2128 :
2129 : /* Find the high key storage area from a regular key. */
2130 : union xfs_btree_key *
2131 1563454027 : xfs_btree_high_key_from_key(
2132 : struct xfs_btree_cur *cur,
2133 : union xfs_btree_key *key)
2134 : {
2135 1563454027 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2136 1563454027 : return (union xfs_btree_key *)((char *)key +
2137 1563454027 : (cur->bc_ops->key_len / 2));
2138 : }
2139 :
2140 : /* Determine the low (and high if overlapped) keys of a leaf block */
2141 : STATIC void
2142 1197238139 : xfs_btree_get_leaf_keys(
2143 : struct xfs_btree_cur *cur,
2144 : struct xfs_btree_block *block,
2145 : union xfs_btree_key *key)
2146 : {
2147 1197238139 : union xfs_btree_key max_hkey;
2148 1197238139 : union xfs_btree_key hkey;
2149 1197238139 : union xfs_btree_rec *rec;
2150 1197238139 : union xfs_btree_key *high;
2151 1197238139 : int n;
2152 :
2153 1197238139 : rec = xfs_btree_rec_addr(cur, 1, block);
2154 1197238139 : cur->bc_ops->init_key_from_rec(key, rec);
2155 :
2156 1197323138 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2157 :
2158 706864492 : cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2159 93761960542 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2160 92348173972 : rec = xfs_btree_rec_addr(cur, n, block);
2161 92348173972 : cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2162 92315459172 : if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
2163 90757382108 : max_hkey = hkey;
2164 : }
2165 :
2166 706922078 : high = xfs_btree_high_key_from_key(cur, key);
2167 1413831726 : memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2168 : }
2169 1197374509 : }
2170 :
2171 : /* Determine the low (and high if overlapped) keys of a node block */
2172 : STATIC void
2173 150525074 : xfs_btree_get_node_keys(
2174 : struct xfs_btree_cur *cur,
2175 : struct xfs_btree_block *block,
2176 : union xfs_btree_key *key)
2177 : {
2178 150525074 : union xfs_btree_key *hkey;
2179 150525074 : union xfs_btree_key *max_hkey;
2180 150525074 : union xfs_btree_key *high;
2181 150525074 : int n;
2182 :
2183 150525074 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2184 300700840 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2185 : cur->bc_ops->key_len / 2);
2186 :
2187 150350420 : max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2188 11326078947 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2189 11175728723 : hkey = xfs_btree_high_key_addr(cur, n, block);
2190 11175728723 : if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
2191 11144203519 : max_hkey = hkey;
2192 : }
2193 :
2194 150350224 : high = xfs_btree_high_key_from_key(cur, key);
2195 300700570 : memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2196 : } else {
2197 349308 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2198 : cur->bc_ops->key_len);
2199 : }
2200 150524939 : }
2201 :
2202 : /* Derive the keys for any btree block. */
2203 : void
2204 1197442913 : xfs_btree_get_keys(
2205 : struct xfs_btree_cur *cur,
2206 : struct xfs_btree_block *block,
2207 : union xfs_btree_key *key)
2208 : {
2209 1197442913 : if (be16_to_cpu(block->bb_level) == 0)
2210 1195684608 : xfs_btree_get_leaf_keys(cur, block, key);
2211 : else
2212 1758305 : xfs_btree_get_node_keys(cur, block, key);
2213 1197270194 : }
2214 :
2215 : /*
2216 : * Decide if we need to update the parent keys of a btree block. For
2217 : * a standard btree this is only necessary if we're updating the first
2218 : * record/key. For an overlapping btree, we must always update the
2219 : * keys because the highest key can be in any of the records or keys
2220 : * in the block.
2221 : */
2222 : static inline bool
2223 : xfs_btree_needs_key_update(
2224 : struct xfs_btree_cur *cur,
2225 : int ptr)
2226 : {
2227 1650351955 : return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2228 : }
2229 :
2230 : /*
2231 : * Update the low and high parent keys of the given level, progressing
2232 : * towards the root. If force_all is false, stop if the keys for a given
2233 : * level do not need updating.
2234 : */
2235 : STATIC int
2236 729036108 : __xfs_btree_updkeys(
2237 : struct xfs_btree_cur *cur,
2238 : int level,
2239 : struct xfs_btree_block *block,
2240 : struct xfs_buf *bp0,
2241 : bool force_all)
2242 : {
2243 729036108 : union xfs_btree_key key; /* keys from current level */
2244 729036108 : union xfs_btree_key *lkey; /* keys from the next level up */
2245 729036108 : union xfs_btree_key *hkey;
2246 729036108 : union xfs_btree_key *nlkey; /* keys from the next level up */
2247 729036108 : union xfs_btree_key *nhkey;
2248 729036108 : struct xfs_buf *bp;
2249 729036108 : int ptr;
2250 :
2251 729036108 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2252 :
2253 : /* Exit if there aren't any parent levels to update. */
2254 729036108 : if (level + 1 >= cur->bc_nlevels)
2255 : return 0;
2256 :
2257 696693256 : trace_xfs_btree_updkeys(cur, level, bp0);
2258 :
2259 696680000 : lkey = &key;
2260 696680000 : hkey = xfs_btree_high_key_from_key(cur, lkey);
2261 696671438 : xfs_btree_get_keys(cur, block, lkey);
2262 1542144535 : for (level++; level < cur->bc_nlevels; level++) {
2263 : #ifdef DEBUG
2264 845473097 : int error;
2265 : #endif
2266 845473097 : block = xfs_btree_get_block(cur, level, &bp);
2267 845471508 : trace_xfs_btree_updkeys(cur, level, bp);
2268 : #ifdef DEBUG
2269 845472510 : error = xfs_btree_check_block(cur, block, level, bp);
2270 845465009 : if (error)
2271 0 : return error;
2272 : #endif
2273 845465009 : ptr = cur->bc_levels[level].ptr;
2274 845465009 : nlkey = xfs_btree_key_addr(cur, ptr, block);
2275 845465009 : nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2276 1690610746 : if (!force_all &&
2277 764016540 : xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
2278 : xfs_btree_keycmp_eq(cur, nhkey, hkey))
2279 : break;
2280 203464655 : xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2281 203475425 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2282 203476586 : if (level + 1 >= cur->bc_nlevels)
2283 : break;
2284 148753786 : xfs_btree_get_node_keys(cur, block, lkey);
2285 : }
2286 :
2287 : return 0;
2288 : }
2289 :
2290 : /* Update all the keys from some level in cursor back to the root. */
2291 : STATIC int
2292 183533 : xfs_btree_updkeys_force(
2293 : struct xfs_btree_cur *cur,
2294 : int level)
2295 : {
2296 183533 : struct xfs_buf *bp;
2297 183533 : struct xfs_btree_block *block;
2298 :
2299 183533 : block = xfs_btree_get_block(cur, level, &bp);
2300 183533 : return __xfs_btree_updkeys(cur, level, block, bp, true);
2301 : }
2302 :
2303 : /*
2304 : * Update the parent keys of the given level, progressing towards the root.
2305 : */
2306 : STATIC int
2307 1208653235 : xfs_btree_update_keys(
2308 : struct xfs_btree_cur *cur,
2309 : int level)
2310 : {
2311 1208653235 : struct xfs_btree_block *block;
2312 1208653235 : struct xfs_buf *bp;
2313 1208653235 : union xfs_btree_key *kp;
2314 1208653235 : union xfs_btree_key key;
2315 1208653235 : int ptr;
2316 :
2317 1208653235 : ASSERT(level >= 0);
2318 :
2319 1208653235 : block = xfs_btree_get_block(cur, level, &bp);
2320 1208213983 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2321 728843023 : return __xfs_btree_updkeys(cur, level, block, bp, false);
2322 :
2323 : /*
2324 : * Go up the tree from this level toward the root.
2325 : * At each level, update the key value to the value input.
2326 : * Stop when we reach a level where the cursor isn't pointing
2327 : * at the first entry in the block.
2328 : */
2329 479370960 : xfs_btree_get_keys(cur, block, &key);
2330 636754755 : for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2331 : #ifdef DEBUG
2332 157093454 : int error;
2333 : #endif
2334 157093454 : block = xfs_btree_get_block(cur, level, &bp);
2335 : #ifdef DEBUG
2336 157319922 : error = xfs_btree_check_block(cur, block, level, bp);
2337 157321322 : if (error)
2338 0 : return error;
2339 : #endif
2340 157321322 : ptr = cur->bc_levels[level].ptr;
2341 157321322 : kp = xfs_btree_key_addr(cur, ptr, block);
2342 157321322 : xfs_btree_copy_keys(cur, kp, &key, 1);
2343 157318587 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2344 : }
2345 :
2346 : return 0;
2347 : }
2348 :
2349 : /*
2350 : * Update the record referred to by cur to the value in the
2351 : * given record. This either works (return 0) or gets an
2352 : * EFSCORRUPTED error.
2353 : */
2354 : int
2355 927351671 : xfs_btree_update(
2356 : struct xfs_btree_cur *cur,
2357 : union xfs_btree_rec *rec)
2358 : {
2359 927351671 : struct xfs_btree_block *block;
2360 927351671 : struct xfs_buf *bp;
2361 927351671 : int error;
2362 927351671 : int ptr;
2363 927351671 : union xfs_btree_rec *rp;
2364 :
2365 : /* Pick up the current block. */
2366 927351671 : block = xfs_btree_get_block(cur, 0, &bp);
2367 :
2368 : #ifdef DEBUG
2369 926292121 : error = xfs_btree_check_block(cur, block, 0, bp);
2370 927512706 : if (error)
2371 0 : goto error0;
2372 : #endif
2373 : /* Get the address of the rec to be updated. */
2374 927512706 : ptr = cur->bc_levels[0].ptr;
2375 927512706 : rp = xfs_btree_rec_addr(cur, ptr, block);
2376 :
2377 : /* Fill in the new contents and log them. */
2378 927512706 : xfs_btree_copy_recs(cur, rp, rec, 1);
2379 927034544 : xfs_btree_log_recs(cur, bp, ptr, ptr);
2380 :
2381 : /*
2382 : * If we are tracking the last record in the tree and
2383 : * we are at the far right edge of the tree, update it.
2384 : */
2385 926999552 : if (xfs_btree_is_lastrec(cur, block, 0)) {
2386 0 : cur->bc_ops->update_lastrec(cur, block, rec,
2387 : ptr, LASTREC_UPDATE);
2388 : }
2389 :
2390 : /* Pass new key value up to our parent. */
2391 1767795506 : if (xfs_btree_needs_key_update(cur, ptr)) {
2392 259854576 : error = xfs_btree_update_keys(cur, 0);
2393 259472876 : if (error)
2394 0 : goto error0;
2395 : }
2396 :
2397 : return 0;
2398 :
2399 : error0:
2400 : return error;
2401 : }
2402 :
2403 : /*
2404 : * Move 1 record left from cur/level if possible.
2405 : * Update cur to reflect the new path.
2406 : */
2407 : STATIC int /* error */
2408 141096585 : xfs_btree_lshift(
2409 : struct xfs_btree_cur *cur,
2410 : int level,
2411 : int *stat) /* success/failure */
2412 : {
2413 141096585 : struct xfs_buf *lbp; /* left buffer pointer */
2414 141096585 : struct xfs_btree_block *left; /* left btree block */
2415 141096585 : int lrecs; /* left record count */
2416 141096585 : struct xfs_buf *rbp; /* right buffer pointer */
2417 141096585 : struct xfs_btree_block *right; /* right btree block */
2418 141096585 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2419 141096585 : int rrecs; /* right record count */
2420 141096585 : union xfs_btree_ptr lptr; /* left btree pointer */
2421 141096585 : union xfs_btree_key *rkp = NULL; /* right btree key */
2422 141096585 : union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2423 141096585 : union xfs_btree_rec *rrp = NULL; /* right record pointer */
2424 141096585 : int error; /* error return value */
2425 141096585 : int i;
2426 :
2427 141096585 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2428 54540757 : level == cur->bc_nlevels - 1)
2429 0 : goto out0;
2430 :
2431 : /* Set up variables for this block as "right". */
2432 141096585 : right = xfs_btree_get_block(cur, level, &rbp);
2433 :
2434 : #ifdef DEBUG
2435 141096066 : error = xfs_btree_check_block(cur, right, level, rbp);
2436 141097386 : if (error)
2437 0 : goto error0;
2438 : #endif
2439 :
2440 : /* If we've got no left sibling then we can't shift an entry left. */
2441 141097386 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2442 282194680 : if (xfs_btree_ptr_is_null(cur, &lptr))
2443 83182 : goto out0;
2444 :
2445 : /*
2446 : * If the cursor entry is the one that would be moved, don't
2447 : * do it... it's too complicated.
2448 : */
2449 141014158 : if (cur->bc_levels[level].ptr <= 1)
2450 27746 : goto out0;
2451 :
2452 : /* Set up the left neighbor as "left". */
2453 140986412 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2454 140986636 : if (error)
2455 58 : goto error0;
2456 :
2457 : /* If it's full, it can't take another entry. */
2458 140986578 : lrecs = xfs_btree_get_numrecs(left);
2459 140986578 : if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2460 1695757 : goto out0;
2461 :
2462 139290803 : rrecs = xfs_btree_get_numrecs(right);
2463 :
2464 : /*
2465 : * We add one entry to the left side and remove one for the right side.
2466 : * Account for it here, the changes will be updated on disk and logged
2467 : * later.
2468 : */
2469 139290803 : lrecs++;
2470 139290803 : rrecs--;
2471 :
2472 139290803 : XFS_BTREE_STATS_INC(cur, lshift);
2473 139290812 : XFS_BTREE_STATS_ADD(cur, moves, 1);
2474 :
2475 : /*
2476 : * If non-leaf, copy a key and a ptr to the left block.
2477 : * Log the changes to the left block.
2478 : */
2479 139291048 : if (level > 0) {
2480 : /* It's a non-leaf. Move keys and pointers. */
2481 326705 : union xfs_btree_key *lkp; /* left btree key */
2482 326705 : union xfs_btree_ptr *lpp; /* left address pointer */
2483 :
2484 326705 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2485 326705 : rkp = xfs_btree_key_addr(cur, 1, right);
2486 :
2487 326705 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2488 326705 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2489 :
2490 326705 : error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
2491 326705 : if (error)
2492 0 : goto error0;
2493 :
2494 326705 : xfs_btree_copy_keys(cur, lkp, rkp, 1);
2495 326705 : xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2496 :
2497 326705 : xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2498 326705 : xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2499 :
2500 326705 : ASSERT(cur->bc_ops->keys_inorder(cur,
2501 : xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2502 : } else {
2503 : /* It's a leaf. Move records. */
2504 138964343 : union xfs_btree_rec *lrp; /* left record pointer */
2505 :
2506 138964343 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2507 138964343 : rrp = xfs_btree_rec_addr(cur, 1, right);
2508 :
2509 138964343 : xfs_btree_copy_recs(cur, lrp, rrp, 1);
2510 138963046 : xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2511 :
2512 138963999 : ASSERT(cur->bc_ops->recs_inorder(cur,
2513 : xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2514 : }
2515 :
2516 139289811 : xfs_btree_set_numrecs(left, lrecs);
2517 139289811 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2518 :
2519 139291148 : xfs_btree_set_numrecs(right, rrecs);
2520 139291148 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2521 :
2522 : /*
2523 : * Slide the contents of right down one entry.
2524 : */
2525 139291338 : XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2526 139291261 : if (level > 0) {
2527 : /* It's a nonleaf. operate on keys and ptrs */
2528 57806755 : for (i = 0; i < rrecs; i++) {
2529 57480050 : error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
2530 57480050 : if (error)
2531 0 : goto error0;
2532 : }
2533 :
2534 326705 : xfs_btree_shift_keys(cur,
2535 : xfs_btree_key_addr(cur, 2, right),
2536 : -1, rrecs);
2537 326705 : xfs_btree_shift_ptrs(cur,
2538 : xfs_btree_ptr_addr(cur, 2, right),
2539 : -1, rrecs);
2540 :
2541 326705 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2542 326705 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2543 : } else {
2544 : /* It's a leaf. operate on records */
2545 138964556 : xfs_btree_shift_recs(cur,
2546 : xfs_btree_rec_addr(cur, 2, right),
2547 : -1, rrecs);
2548 138964199 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2549 : }
2550 :
2551 : /*
2552 : * Using a temporary cursor, update the parent key values of the
2553 : * block on the left.
2554 : */
2555 139291141 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2556 44408095 : error = xfs_btree_dup_cursor(cur, &tcur);
2557 44407902 : if (error)
2558 0 : goto error0;
2559 44407902 : i = xfs_btree_firstrec(tcur, level);
2560 44407734 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2561 0 : xfs_btree_mark_sick(cur);
2562 0 : error = -EFSCORRUPTED;
2563 0 : goto error0;
2564 : }
2565 :
2566 44407734 : error = xfs_btree_decrement(tcur, level, &i);
2567 44407563 : if (error)
2568 0 : goto error1;
2569 :
2570 : /* Update the parent high keys of the left block, if needed. */
2571 44407563 : error = xfs_btree_update_keys(tcur, level);
2572 44408106 : if (error)
2573 0 : goto error1;
2574 :
2575 44408106 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2576 : }
2577 :
2578 : /* Update the parent keys of the right block. */
2579 139291221 : error = xfs_btree_update_keys(cur, level);
2580 139291186 : if (error)
2581 0 : goto error0;
2582 :
2583 : /* Slide the cursor value left one. */
2584 139291186 : cur->bc_levels[level].ptr--;
2585 :
2586 139291186 : *stat = 1;
2587 139291186 : return 0;
2588 :
2589 1806685 : out0:
2590 1806685 : *stat = 0;
2591 1806685 : return 0;
2592 :
2593 : error0:
2594 : return error;
2595 :
2596 0 : error1:
2597 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2598 0 : return error;
2599 : }
2600 :
2601 : /*
2602 : * Move 1 record right from cur/level if possible.
2603 : * Update cur to reflect the new path.
2604 : */
2605 : STATIC int /* error */
2606 198782309 : xfs_btree_rshift(
2607 : struct xfs_btree_cur *cur,
2608 : int level,
2609 : int *stat) /* success/failure */
2610 : {
2611 198782309 : struct xfs_buf *lbp; /* left buffer pointer */
2612 198782309 : struct xfs_btree_block *left; /* left btree block */
2613 198782309 : struct xfs_buf *rbp; /* right buffer pointer */
2614 198782309 : struct xfs_btree_block *right; /* right btree block */
2615 198782309 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2616 198782309 : union xfs_btree_ptr rptr; /* right block pointer */
2617 198782309 : union xfs_btree_key *rkp; /* right btree key */
2618 198782309 : int rrecs; /* right record count */
2619 198782309 : int lrecs; /* left record count */
2620 198782309 : int error; /* error return value */
2621 198782309 : int i; /* loop counter */
2622 :
2623 198782309 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2624 79261465 : (level == cur->bc_nlevels - 1))
2625 0 : goto out0;
2626 :
2627 : /* Set up variables for this block as "left". */
2628 198782309 : left = xfs_btree_get_block(cur, level, &lbp);
2629 :
2630 : #ifdef DEBUG
2631 198780053 : error = xfs_btree_check_block(cur, left, level, lbp);
2632 198782654 : if (error)
2633 0 : goto error0;
2634 : #endif
2635 :
2636 : /* If we've got no right sibling then we can't shift an entry right. */
2637 198782654 : xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2638 397564914 : if (xfs_btree_ptr_is_null(cur, &rptr))
2639 83636748 : goto out0;
2640 :
2641 : /*
2642 : * If the cursor entry is the one that would be moved, don't
2643 : * do it... it's too complicated.
2644 : */
2645 115145709 : lrecs = xfs_btree_get_numrecs(left);
2646 115145709 : if (cur->bc_levels[level].ptr >= lrecs)
2647 25015599 : goto out0;
2648 :
2649 : /* Set up the right neighbor as "right". */
2650 90130110 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2651 90130222 : if (error)
2652 198 : goto error0;
2653 :
2654 : /* If it's full, it can't take another entry. */
2655 90130024 : rrecs = xfs_btree_get_numrecs(right);
2656 90130024 : if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2657 16544037 : goto out0;
2658 :
2659 73586396 : XFS_BTREE_STATS_INC(cur, rshift);
2660 73586389 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2661 :
2662 : /*
2663 : * Make a hole at the start of the right neighbor block, then
2664 : * copy the last left block entry to the hole.
2665 : */
2666 73586528 : if (level > 0) {
2667 : /* It's a nonleaf. make a hole in the keys and ptrs */
2668 82920 : union xfs_btree_key *lkp;
2669 82920 : union xfs_btree_ptr *lpp;
2670 82920 : union xfs_btree_ptr *rpp;
2671 :
2672 82920 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2673 82920 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2674 82920 : rkp = xfs_btree_key_addr(cur, 1, right);
2675 82920 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2676 :
2677 6178987 : for (i = rrecs - 1; i >= 0; i--) {
2678 6096067 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
2679 6096067 : if (error)
2680 0 : goto error0;
2681 : }
2682 :
2683 82920 : xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2684 82920 : xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2685 :
2686 82920 : error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
2687 82920 : if (error)
2688 0 : goto error0;
2689 :
2690 : /* Now put the new data in, and log it. */
2691 82920 : xfs_btree_copy_keys(cur, rkp, lkp, 1);
2692 82920 : xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2693 :
2694 82920 : xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2695 82920 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2696 :
2697 82920 : ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2698 : xfs_btree_key_addr(cur, 2, right)));
2699 : } else {
2700 : /* It's a leaf. make a hole in the records */
2701 73503608 : union xfs_btree_rec *lrp;
2702 73503608 : union xfs_btree_rec *rrp;
2703 :
2704 73503608 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2705 73503608 : rrp = xfs_btree_rec_addr(cur, 1, right);
2706 :
2707 73503608 : xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2708 :
2709 : /* Now put the new data in, and log it. */
2710 73502650 : xfs_btree_copy_recs(cur, rrp, lrp, 1);
2711 73502562 : xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2712 : }
2713 :
2714 : /*
2715 : * Decrement and log left's numrecs, bump and log right's numrecs.
2716 : */
2717 73586242 : xfs_btree_set_numrecs(left, --lrecs);
2718 73586242 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2719 :
2720 73586424 : xfs_btree_set_numrecs(right, ++rrecs);
2721 73586424 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2722 :
2723 : /*
2724 : * Using a temporary cursor, update the parent key values of the
2725 : * block on the right.
2726 : */
2727 73586474 : error = xfs_btree_dup_cursor(cur, &tcur);
2728 73586032 : if (error)
2729 1 : goto error0;
2730 73586031 : i = xfs_btree_lastrec(tcur, level);
2731 73586055 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2732 0 : xfs_btree_mark_sick(cur);
2733 0 : error = -EFSCORRUPTED;
2734 0 : goto error0;
2735 : }
2736 :
2737 73586055 : error = xfs_btree_increment(tcur, level, &i);
2738 73585220 : if (error)
2739 0 : goto error1;
2740 :
2741 : /* Update the parent high keys of the left block, if needed. */
2742 73585220 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2743 34435165 : error = xfs_btree_update_keys(cur, level);
2744 34435576 : if (error)
2745 0 : goto error1;
2746 : }
2747 :
2748 : /* Update the parent keys of the right block. */
2749 73585631 : error = xfs_btree_update_keys(tcur, level);
2750 73585666 : if (error)
2751 0 : goto error1;
2752 :
2753 73585666 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2754 :
2755 73585150 : *stat = 1;
2756 73585150 : return 0;
2757 :
2758 125196384 : out0:
2759 125196384 : *stat = 0;
2760 125196384 : return 0;
2761 :
2762 : error0:
2763 : return error;
2764 :
2765 0 : error1:
2766 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2767 0 : return error;
2768 : }
2769 :
2770 : static inline int
2771 1849737 : xfs_btree_alloc_block(
2772 : struct xfs_btree_cur *cur,
2773 : const union xfs_btree_ptr *hint_block,
2774 : union xfs_btree_ptr *new_block,
2775 : int *stat)
2776 : {
2777 1849737 : int error;
2778 :
2779 1849737 : error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
2780 1849737 : trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
2781 1849735 : return error;
2782 : }
2783 :
2784 : /*
2785 : * Split cur/level block in half.
2786 : * Return new block number and the key to its first
2787 : * record (to be inserted into parent).
2788 : */
2789 : STATIC int /* error */
2790 1806682 : __xfs_btree_split(
2791 : struct xfs_btree_cur *cur,
2792 : int level,
2793 : union xfs_btree_ptr *ptrp,
2794 : union xfs_btree_key *key,
2795 : struct xfs_btree_cur **curp,
2796 : int *stat) /* success/failure */
2797 : {
2798 1806682 : union xfs_btree_ptr lptr; /* left sibling block ptr */
2799 1806682 : struct xfs_buf *lbp; /* left buffer pointer */
2800 1806682 : struct xfs_btree_block *left; /* left btree block */
2801 1806682 : union xfs_btree_ptr rptr; /* right sibling block ptr */
2802 1806682 : struct xfs_buf *rbp; /* right buffer pointer */
2803 1806682 : struct xfs_btree_block *right; /* right btree block */
2804 1806682 : union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2805 1806682 : struct xfs_buf *rrbp; /* right-right buffer pointer */
2806 1806682 : struct xfs_btree_block *rrblock; /* right-right btree block */
2807 1806682 : int lrecs;
2808 1806682 : int rrecs;
2809 1806682 : int src_index;
2810 1806682 : int error; /* error return value */
2811 1806682 : int i;
2812 :
2813 1806682 : XFS_BTREE_STATS_INC(cur, split);
2814 :
2815 : /* Set up left block (current one). */
2816 1806685 : left = xfs_btree_get_block(cur, level, &lbp);
2817 :
2818 : #ifdef DEBUG
2819 1806685 : error = xfs_btree_check_block(cur, left, level, lbp);
2820 1806685 : if (error)
2821 0 : goto error0;
2822 : #endif
2823 :
2824 1806685 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2825 :
2826 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
2827 1806686 : error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
2828 1806683 : if (error)
2829 4 : goto error0;
2830 1806679 : if (*stat == 0)
2831 0 : goto out0;
2832 1806679 : XFS_BTREE_STATS_INC(cur, alloc);
2833 :
2834 : /* Set up the new block as "right". */
2835 1806680 : error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
2836 1806679 : if (error)
2837 0 : goto error0;
2838 :
2839 : /* Fill in the btree header for the new right block. */
2840 1806679 : xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2841 :
2842 : /*
2843 : * Split the entries between the old and the new block evenly.
2844 : * Make sure that if there's an odd number of entries now, that
2845 : * each new block will have the same number of entries.
2846 : */
2847 1806676 : lrecs = xfs_btree_get_numrecs(left);
2848 1806676 : rrecs = lrecs / 2;
2849 1806676 : if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
2850 53428 : rrecs++;
2851 1806676 : src_index = (lrecs - rrecs + 1);
2852 :
2853 1806676 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2854 :
2855 : /* Adjust numrecs for the later get_*_keys() calls. */
2856 1806680 : lrecs -= rrecs;
2857 1806680 : xfs_btree_set_numrecs(left, lrecs);
2858 1806680 : xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2859 :
2860 : /*
2861 : * Copy btree block entries from the left block over to the
2862 : * new block, the right. Update the right block and log the
2863 : * changes.
2864 : */
2865 1806680 : if (level > 0) {
2866 : /* It's a non-leaf. Move keys and pointers. */
2867 10666 : union xfs_btree_key *lkp; /* left btree key */
2868 10666 : union xfs_btree_ptr *lpp; /* left address pointer */
2869 10666 : union xfs_btree_key *rkp; /* right btree key */
2870 10666 : union xfs_btree_ptr *rpp; /* right address pointer */
2871 :
2872 10666 : lkp = xfs_btree_key_addr(cur, src_index, left);
2873 10666 : lpp = xfs_btree_ptr_addr(cur, src_index, left);
2874 10666 : rkp = xfs_btree_key_addr(cur, 1, right);
2875 10666 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2876 :
2877 21332 : for (i = src_index; i < rrecs; i++) {
2878 0 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
2879 0 : if (error)
2880 0 : goto error0;
2881 : }
2882 :
2883 : /* Copy the keys & pointers to the new block. */
2884 10666 : xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2885 10666 : xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2886 :
2887 10666 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2888 10666 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2889 :
2890 : /* Stash the keys of the new block for later insertion. */
2891 10666 : xfs_btree_get_node_keys(cur, right, key);
2892 : } else {
2893 : /* It's a leaf. Move records. */
2894 1796014 : union xfs_btree_rec *lrp; /* left record pointer */
2895 1796014 : union xfs_btree_rec *rrp; /* right record pointer */
2896 :
2897 1796014 : lrp = xfs_btree_rec_addr(cur, src_index, left);
2898 1796014 : rrp = xfs_btree_rec_addr(cur, 1, right);
2899 :
2900 : /* Copy records to the new block. */
2901 1796014 : xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2902 1796016 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2903 :
2904 : /* Stash the keys of the new block for later insertion. */
2905 1796014 : xfs_btree_get_leaf_keys(cur, right, key);
2906 : }
2907 :
2908 : /*
2909 : * Find the left block number by looking in the buffer.
2910 : * Adjust sibling pointers.
2911 : */
2912 1806683 : xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2913 1806682 : xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2914 1806682 : xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2915 1806681 : xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2916 :
2917 1806681 : xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2918 1806683 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2919 :
2920 : /*
2921 : * If there's a block to the new block's right, make that block
2922 : * point back to right instead of to left.
2923 : */
2924 3613366 : if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2925 1066705 : error = xfs_btree_read_buf_block(cur, &rrptr,
2926 : 0, &rrblock, &rrbp);
2927 1066705 : if (error)
2928 231 : goto error0;
2929 1066474 : xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2930 1066474 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2931 : }
2932 :
2933 : /* Update the parent high keys of the left block, if needed. */
2934 1806452 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2935 1074902 : error = xfs_btree_update_keys(cur, level);
2936 1074900 : if (error)
2937 0 : goto error0;
2938 : }
2939 :
2940 : /*
2941 : * If the cursor is really in the right block, move it there.
2942 : * If it's just pointing past the last entry in left, then we'll
2943 : * insert there, so don't change anything in that case.
2944 : */
2945 1806450 : if (cur->bc_levels[level].ptr > lrecs + 1) {
2946 1376942 : xfs_btree_setbuf(cur, level, rbp);
2947 1376941 : cur->bc_levels[level].ptr -= lrecs;
2948 : }
2949 : /*
2950 : * If there are more levels, we'll need another cursor which refers
2951 : * the right block, no matter where this cursor was.
2952 : */
2953 1806449 : if (level + 1 < cur->bc_nlevels) {
2954 1766277 : error = xfs_btree_dup_cursor(cur, curp);
2955 1766277 : if (error)
2956 4 : goto error0;
2957 1766273 : (*curp)->bc_levels[level + 1].ptr++;
2958 : }
2959 1806445 : *ptrp = rptr;
2960 1806445 : *stat = 1;
2961 1806445 : return 0;
2962 : out0:
2963 0 : *stat = 0;
2964 0 : return 0;
2965 :
2966 : error0:
2967 : return error;
2968 : }
2969 :
2970 : #ifdef __KERNEL__
2971 : struct xfs_btree_split_args {
2972 : struct xfs_btree_cur *cur;
2973 : int level;
2974 : union xfs_btree_ptr *ptrp;
2975 : union xfs_btree_key *key;
2976 : struct xfs_btree_cur **curp;
2977 : int *stat; /* success/failure */
2978 : int result;
2979 : bool kswapd; /* allocation in kswapd context */
2980 : struct completion *done;
2981 : struct work_struct work;
2982 : };
2983 :
2984 : /*
2985 : * Stack switching interfaces for allocation
2986 : */
2987 : static void
2988 35688 : xfs_btree_split_worker(
2989 : struct work_struct *work)
2990 : {
2991 35688 : struct xfs_btree_split_args *args = container_of(work,
2992 : struct xfs_btree_split_args, work);
2993 35688 : unsigned long pflags;
2994 35688 : unsigned long new_pflags = 0;
2995 :
2996 : /*
2997 : * we are in a transaction context here, but may also be doing work
2998 : * in kswapd context, and hence we may need to inherit that state
2999 : * temporarily to ensure that we don't block waiting for memory reclaim
3000 : * in any way.
3001 : */
3002 35688 : if (args->kswapd)
3003 0 : new_pflags |= PF_MEMALLOC | PF_KSWAPD;
3004 :
3005 35688 : current_set_flags_nested(&pflags, new_pflags);
3006 35688 : xfs_trans_set_context(args->cur->bc_tp);
3007 :
3008 35687 : args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
3009 : args->key, args->curp, args->stat);
3010 :
3011 35688 : xfs_trans_clear_context(args->cur->bc_tp);
3012 35688 : current_restore_flags_nested(&pflags, new_pflags);
3013 :
3014 : /*
3015 : * Do not access args after complete() has run here. We don't own args
3016 : * and the owner may run and free args before we return here.
3017 : */
3018 35688 : complete(args->done);
3019 :
3020 35688 : }
3021 :
3022 : /*
3023 : * BMBT split requests often come in with little stack to work on so we push
3024 : * them off to a worker thread so there is lots of stack to use. For the other
3025 : * btree types, just call directly to avoid the context switch overhead here.
3026 : *
3027 : * Care must be taken here - the work queue rescuer thread introduces potential
3028 : * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
3029 : * AGFs to allocate blocks. A task being run by the rescuer could attempt to
3030 : * lock an AGF that is already locked by a task queued to run by the rescuer,
3031 : * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
3032 : * release it until the current thread it is running gains the lock.
3033 : *
3034 : * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
3035 : * already locked to allocate from. The only place that doesn't hold an AGF
3036 : * locked is unwritten extent conversion at IO completion, but that has already
3037 : * been offloaded to a worker thread and hence has no stack consumption issues
3038 : * we have to worry about.
3039 : */
3040 : STATIC int /* error */
3041 1806684 : xfs_btree_split(
3042 : struct xfs_btree_cur *cur,
3043 : int level,
3044 : union xfs_btree_ptr *ptrp,
3045 : union xfs_btree_key *key,
3046 : struct xfs_btree_cur **curp,
3047 : int *stat) /* success/failure */
3048 : {
3049 1806684 : struct xfs_btree_split_args args;
3050 1806684 : DECLARE_COMPLETION_ONSTACK(done);
3051 :
3052 1806684 : if (cur->bc_btnum != XFS_BTNUM_BMAP ||
3053 481096 : cur->bc_tp->t_highest_agno == NULLAGNUMBER)
3054 1770997 : return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
3055 :
3056 35687 : args.cur = cur;
3057 35687 : args.level = level;
3058 35687 : args.ptrp = ptrp;
3059 35687 : args.key = key;
3060 35687 : args.curp = curp;
3061 35687 : args.stat = stat;
3062 35687 : args.done = &done;
3063 35687 : args.kswapd = current_is_kswapd();
3064 35687 : INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
3065 35687 : queue_work(xfs_alloc_wq, &args.work);
3066 35687 : wait_for_completion(&done);
3067 35687 : destroy_work_on_stack(&args.work);
3068 35687 : return args.result;
3069 : }
3070 : #else
3071 : #define xfs_btree_split __xfs_btree_split
3072 : #endif /* __KERNEL__ */
3073 :
3074 :
3075 : /*
3076 : * Copy the old inode root contents into a real block and make the
3077 : * broot point to it.
3078 : */
3079 : int /* error */
3080 2878 : xfs_btree_new_iroot(
3081 : struct xfs_btree_cur *cur, /* btree cursor */
3082 : int *logflags, /* logging flags for inode */
3083 : int *stat) /* return status - 0 fail */
3084 : {
3085 2878 : struct xfs_buf *cbp; /* buffer for cblock */
3086 2878 : struct xfs_btree_block *block; /* btree block */
3087 2878 : struct xfs_btree_block *cblock; /* child btree block */
3088 2878 : union xfs_btree_key *ckp; /* child key pointer */
3089 2878 : union xfs_btree_ptr *cpp; /* child ptr pointer */
3090 2878 : union xfs_btree_key *kp; /* pointer to btree key */
3091 2878 : union xfs_btree_ptr *pp; /* pointer to block addr */
3092 2878 : union xfs_btree_ptr nptr; /* new block addr */
3093 2878 : int level; /* btree level */
3094 2878 : int error; /* error return code */
3095 2878 : int i; /* loop counter */
3096 :
3097 2878 : XFS_BTREE_STATS_INC(cur, newroot);
3098 :
3099 2878 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3100 :
3101 2878 : level = cur->bc_nlevels - 1;
3102 :
3103 2878 : block = xfs_btree_get_iroot(cur);
3104 2878 : pp = xfs_btree_ptr_addr(cur, 1, block);
3105 :
3106 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3107 2878 : error = xfs_btree_alloc_block(cur, pp, &nptr, stat);
3108 2878 : if (error)
3109 0 : goto error0;
3110 2878 : if (*stat == 0)
3111 : return 0;
3112 :
3113 2878 : XFS_BTREE_STATS_INC(cur, alloc);
3114 :
3115 : /* Copy the root into a real block. */
3116 2878 : error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
3117 2878 : if (error)
3118 0 : goto error0;
3119 :
3120 : /*
3121 : * we can't just memcpy() the root in for CRC enabled btree blocks.
3122 : * In that case have to also ensure the blkno remains correct
3123 : */
3124 5756 : memcpy(cblock, block, xfs_btree_block_len(cur));
3125 2878 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3126 2878 : __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
3127 2878 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3128 2878 : cblock->bb_u.l.bb_blkno = bno;
3129 : else
3130 0 : cblock->bb_u.s.bb_blkno = bno;
3131 : }
3132 :
3133 2878 : be16_add_cpu(&block->bb_level, 1);
3134 2878 : xfs_btree_set_numrecs(block, 1);
3135 2878 : cur->bc_nlevels++;
3136 2878 : ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3137 2878 : cur->bc_levels[level + 1].ptr = 1;
3138 :
3139 2878 : kp = xfs_btree_key_addr(cur, 1, block);
3140 2878 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3141 2878 : xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3142 :
3143 2878 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3144 38782 : for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3145 33026 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3146 33026 : if (error)
3147 0 : goto error0;
3148 : }
3149 :
3150 2878 : xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3151 :
3152 2878 : error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
3153 2878 : if (error)
3154 0 : goto error0;
3155 :
3156 2878 : xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3157 :
3158 2878 : xfs_iroot_realloc(cur->bc_ino.ip,
3159 : 1 - xfs_btree_get_numrecs(cblock),
3160 2878 : cur->bc_ino.whichfork);
3161 :
3162 2878 : xfs_btree_setbuf(cur, level, cbp);
3163 :
3164 : /*
3165 : * Do all this logging at the end so that
3166 : * the root is at the right level.
3167 : */
3168 2878 : xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3169 2878 : xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3170 2878 : xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3171 :
3172 5756 : *logflags |=
3173 2878 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
3174 2878 : *stat = 1;
3175 2878 : return 0;
3176 : error0:
3177 : return error;
3178 : }
3179 :
3180 : /*
3181 : * Allocate a new root block, fill it in.
3182 : */
3183 : STATIC int /* error */
3184 40174 : xfs_btree_new_root(
3185 : struct xfs_btree_cur *cur, /* btree cursor */
3186 : int *stat) /* success/failure */
3187 : {
3188 40174 : struct xfs_btree_block *block; /* one half of the old root block */
3189 40174 : struct xfs_buf *bp; /* buffer containing block */
3190 40174 : int error; /* error return value */
3191 40174 : struct xfs_buf *lbp; /* left buffer pointer */
3192 40174 : struct xfs_btree_block *left; /* left btree block */
3193 40174 : struct xfs_buf *nbp; /* new (root) buffer */
3194 40174 : struct xfs_btree_block *new; /* new (root) btree block */
3195 40174 : int nptr; /* new value for key index, 1 or 2 */
3196 40174 : struct xfs_buf *rbp; /* right buffer pointer */
3197 40174 : struct xfs_btree_block *right; /* right btree block */
3198 40174 : union xfs_btree_ptr rptr;
3199 40174 : union xfs_btree_ptr lptr;
3200 :
3201 40174 : XFS_BTREE_STATS_INC(cur, newroot);
3202 :
3203 : /* initialise our start point from the cursor */
3204 40174 : cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3205 :
3206 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3207 40174 : error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
3208 40174 : if (error)
3209 0 : goto error0;
3210 40174 : if (*stat == 0)
3211 0 : goto out0;
3212 40174 : XFS_BTREE_STATS_INC(cur, alloc);
3213 :
3214 : /* Set up the new block. */
3215 40174 : error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
3216 40174 : if (error)
3217 0 : goto error0;
3218 :
3219 : /* Set the root in the holding structure increasing the level by 1. */
3220 40174 : cur->bc_ops->set_root(cur, &lptr, 1);
3221 :
3222 : /*
3223 : * At the previous root level there are now two blocks: the old root,
3224 : * and the new block generated when it was split. We don't know which
3225 : * one the cursor is pointing at, so we set up variables "left" and
3226 : * "right" for each case.
3227 : */
3228 40174 : block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3229 :
3230 : #ifdef DEBUG
3231 40174 : error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3232 40174 : if (error)
3233 0 : goto error0;
3234 : #endif
3235 :
3236 40174 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3237 80348 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3238 : /* Our block is left, pick up the right block. */
3239 17317 : lbp = bp;
3240 17317 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3241 17317 : left = block;
3242 17317 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3243 17317 : if (error)
3244 1 : goto error0;
3245 17316 : bp = rbp;
3246 17316 : nptr = 1;
3247 : } else {
3248 : /* Our block is right, pick up the left block. */
3249 22857 : rbp = bp;
3250 22857 : xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3251 22857 : right = block;
3252 22857 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3253 22857 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3254 22857 : if (error)
3255 0 : goto error0;
3256 22857 : bp = lbp;
3257 22857 : nptr = 2;
3258 : }
3259 :
3260 : /* Fill in the new block's btree header and log it. */
3261 40173 : xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3262 40173 : xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3263 120519 : ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3264 : !xfs_btree_ptr_is_null(cur, &rptr));
3265 :
3266 : /* Fill in the key data in the new root. */
3267 40173 : if (xfs_btree_get_level(left) > 0) {
3268 : /*
3269 : * Get the keys for the left block's keys and put them directly
3270 : * in the parent block. Do the same for the right block.
3271 : */
3272 1316 : xfs_btree_get_node_keys(cur, left,
3273 : xfs_btree_key_addr(cur, 1, new));
3274 1316 : xfs_btree_get_node_keys(cur, right,
3275 : xfs_btree_key_addr(cur, 2, new));
3276 : } else {
3277 : /*
3278 : * Get the keys for the left block's records and put them
3279 : * directly in the parent block. Do the same for the right
3280 : * block.
3281 : */
3282 38857 : xfs_btree_get_leaf_keys(cur, left,
3283 : xfs_btree_key_addr(cur, 1, new));
3284 38857 : xfs_btree_get_leaf_keys(cur, right,
3285 : xfs_btree_key_addr(cur, 2, new));
3286 : }
3287 40173 : xfs_btree_log_keys(cur, nbp, 1, 2);
3288 :
3289 : /* Fill in the pointer data in the new root. */
3290 40173 : xfs_btree_copy_ptrs(cur,
3291 : xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3292 40173 : xfs_btree_copy_ptrs(cur,
3293 : xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3294 40173 : xfs_btree_log_ptrs(cur, nbp, 1, 2);
3295 :
3296 : /* Fix up the cursor. */
3297 40173 : xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3298 40173 : cur->bc_levels[cur->bc_nlevels].ptr = nptr;
3299 40173 : cur->bc_nlevels++;
3300 40173 : ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3301 40173 : *stat = 1;
3302 40173 : return 0;
3303 : error0:
3304 : return error;
3305 : out0:
3306 0 : *stat = 0;
3307 0 : return 0;
3308 : }
3309 :
3310 : STATIC int
3311 171146056 : xfs_btree_make_block_unfull(
3312 : struct xfs_btree_cur *cur, /* btree cursor */
3313 : int level, /* btree level */
3314 : int numrecs,/* # of recs in block */
3315 : int *oindex,/* old tree index */
3316 : int *index, /* new tree index */
3317 : union xfs_btree_ptr *nptr, /* new btree ptr */
3318 : struct xfs_btree_cur **ncur, /* new btree cursor */
3319 : union xfs_btree_key *key, /* key of new block */
3320 : int *stat)
3321 : {
3322 171146056 : int error = 0;
3323 :
3324 171146056 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3325 57403965 : level == cur->bc_nlevels - 1) {
3326 54584 : struct xfs_inode *ip = cur->bc_ino.ip;
3327 :
3328 54584 : if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3329 : /* A root block that can be made bigger. */
3330 51707 : xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
3331 51706 : *stat = 1;
3332 : } else {
3333 : /* A root block that needs replacing */
3334 2878 : int logflags = 0;
3335 :
3336 2878 : error = xfs_btree_new_iroot(cur, &logflags, stat);
3337 2878 : if (error || *stat == 0)
3338 0 : return error;
3339 :
3340 2878 : xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3341 : }
3342 :
3343 54584 : return 0;
3344 : }
3345 :
3346 : /* First, try shifting an entry to the right neighbor. */
3347 171091472 : error = xfs_btree_rshift(cur, level, stat);
3348 171091975 : if (error || *stat)
3349 : return error;
3350 :
3351 : /* Next, try shifting an entry to the left neighbor. */
3352 125196229 : error = xfs_btree_lshift(cur, level, stat);
3353 125196536 : if (error)
3354 : return error;
3355 :
3356 125196478 : if (*stat) {
3357 123389793 : *oindex = *index = cur->bc_levels[level].ptr;
3358 123389793 : return 0;
3359 : }
3360 :
3361 : /*
3362 : * Next, try splitting the current block in half.
3363 : *
3364 : * If this works we have to re-set our variables because we
3365 : * could be in a different block now.
3366 : */
3367 1806685 : error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3368 1806685 : if (error || *stat == 0)
3369 : return error;
3370 :
3371 :
3372 1806446 : *index = cur->bc_levels[level].ptr;
3373 1806446 : return 0;
3374 : }
3375 :
3376 : /*
3377 : * Insert one record/level. Return information to the caller
3378 : * allowing the next level up to proceed if necessary.
3379 : */
3380 : STATIC int
3381 924942907 : xfs_btree_insrec(
3382 : struct xfs_btree_cur *cur, /* btree cursor */
3383 : int level, /* level to insert record at */
3384 : union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
3385 : union xfs_btree_rec *rec, /* record to insert */
3386 : union xfs_btree_key *key, /* i/o: block key for ptrp */
3387 : struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3388 : int *stat) /* success/failure */
3389 : {
3390 924942907 : struct xfs_btree_block *block; /* btree block */
3391 924942907 : struct xfs_buf *bp; /* buffer for block */
3392 924942907 : union xfs_btree_ptr nptr; /* new block ptr */
3393 924942907 : struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
3394 924942907 : union xfs_btree_key nkey; /* new block key */
3395 924942907 : union xfs_btree_key *lkey;
3396 924942907 : int optr; /* old key/record index */
3397 924942907 : int ptr; /* key/record index */
3398 924942907 : int numrecs;/* number of records */
3399 924942907 : int error; /* error return value */
3400 924942907 : int i;
3401 924942907 : xfs_daddr_t old_bn;
3402 :
3403 924942907 : ncur = NULL;
3404 924942907 : lkey = &nkey;
3405 :
3406 : /*
3407 : * If we have an external root pointer, and we've made it to the
3408 : * root level, allocate a new root block and we're done.
3409 : */
3410 924942907 : if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3411 775693894 : (level >= cur->bc_nlevels)) {
3412 40174 : error = xfs_btree_new_root(cur, stat);
3413 40174 : xfs_btree_set_ptr_null(cur, ptrp);
3414 :
3415 40174 : return error;
3416 : }
3417 :
3418 : /* If we're off the left edge, return failure. */
3419 924902733 : ptr = cur->bc_levels[level].ptr;
3420 924902733 : if (ptr == 0) {
3421 0 : *stat = 0;
3422 0 : return 0;
3423 : }
3424 :
3425 924902733 : optr = ptr;
3426 :
3427 924902733 : XFS_BTREE_STATS_INC(cur, insrec);
3428 :
3429 : /* Get pointers to the btree buffer and block. */
3430 924905860 : block = xfs_btree_get_block(cur, level, &bp);
3431 924906187 : old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
3432 924906187 : numrecs = xfs_btree_get_numrecs(block);
3433 :
3434 : #ifdef DEBUG
3435 924906187 : error = xfs_btree_check_block(cur, block, level, bp);
3436 924933792 : if (error)
3437 0 : goto error0;
3438 :
3439 : /* Check that the new entry is being inserted in the right place. */
3440 924933792 : if (ptr <= numrecs) {
3441 633832792 : if (level == 0) {
3442 632821853 : ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3443 : xfs_btree_rec_addr(cur, ptr, block)));
3444 : } else {
3445 1010939 : ASSERT(cur->bc_ops->keys_inorder(cur, key,
3446 : xfs_btree_key_addr(cur, ptr, block)));
3447 : }
3448 : }
3449 : #endif
3450 :
3451 : /*
3452 : * If the block is full, we can't insert the new entry until we
3453 : * make the block un-full.
3454 : */
3455 924871309 : xfs_btree_set_ptr_null(cur, &nptr);
3456 924871309 : if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3457 171145726 : error = xfs_btree_make_block_unfull(cur, level, numrecs,
3458 : &optr, &ptr, &nptr, &ncur, lkey, stat);
3459 171146750 : if (error || *stat == 0)
3460 496 : goto error0;
3461 : }
3462 :
3463 : /*
3464 : * The current block may have changed if the block was
3465 : * previously full and we have just made space in it.
3466 : */
3467 924860583 : block = xfs_btree_get_block(cur, level, &bp);
3468 924874439 : numrecs = xfs_btree_get_numrecs(block);
3469 :
3470 : #ifdef DEBUG
3471 924874439 : error = xfs_btree_check_block(cur, block, level, bp);
3472 924925174 : if (error)
3473 0 : goto error0;
3474 : #endif
3475 :
3476 : /*
3477 : * At this point we know there's room for our new entry in the block
3478 : * we're pointing at.
3479 : */
3480 924925174 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3481 :
3482 924927833 : if (level > 0) {
3483 : /* It's a nonleaf. make a hole in the keys and ptrs */
3484 1766267 : union xfs_btree_key *kp;
3485 1766267 : union xfs_btree_ptr *pp;
3486 :
3487 1766267 : kp = xfs_btree_key_addr(cur, ptr, block);
3488 1766267 : pp = xfs_btree_ptr_addr(cur, ptr, block);
3489 :
3490 32489355 : for (i = numrecs - ptr; i >= 0; i--) {
3491 28956820 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3492 28956821 : if (error)
3493 0 : goto error0;
3494 : }
3495 :
3496 1766268 : xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3497 1766266 : xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3498 :
3499 1766266 : error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
3500 1766265 : if (error)
3501 0 : goto error0;
3502 :
3503 : /* Now put the new data in, bump numrecs and log it. */
3504 1766265 : xfs_btree_copy_keys(cur, kp, key, 1);
3505 1766264 : xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3506 1766263 : numrecs++;
3507 1766263 : xfs_btree_set_numrecs(block, numrecs);
3508 1766263 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3509 1766266 : xfs_btree_log_keys(cur, bp, ptr, numrecs);
3510 : #ifdef DEBUG
3511 1766268 : if (ptr < numrecs) {
3512 1010810 : ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3513 : xfs_btree_key_addr(cur, ptr + 1, block)));
3514 : }
3515 : #endif
3516 : } else {
3517 : /* It's a leaf. make a hole in the records */
3518 923161566 : union xfs_btree_rec *rp;
3519 :
3520 923161566 : rp = xfs_btree_rec_addr(cur, ptr, block);
3521 :
3522 923161566 : xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3523 :
3524 : /* Now put the new data in, bump numrecs and log it. */
3525 923139304 : xfs_btree_copy_recs(cur, rp, rec, 1);
3526 923104273 : xfs_btree_set_numrecs(block, ++numrecs);
3527 923104273 : xfs_btree_log_recs(cur, bp, ptr, numrecs);
3528 : #ifdef DEBUG
3529 923178400 : if (ptr < numrecs) {
3530 632826493 : ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3531 : xfs_btree_rec_addr(cur, ptr + 1, block)));
3532 : }
3533 : #endif
3534 : }
3535 :
3536 : /* Log the new number of records in the btree header. */
3537 924936863 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3538 :
3539 : /*
3540 : * If we just inserted into a new tree block, we have to
3541 : * recalculate nkey here because nkey is out of date.
3542 : *
3543 : * Otherwise we're just updating an existing block (having shoved
3544 : * some records into the new tree block), so use the regular key
3545 : * update mechanism.
3546 : */
3547 924943237 : if (bp && xfs_buf_daddr(bp) != old_bn) {
3548 1379819 : xfs_btree_get_keys(cur, block, lkey);
3549 1533100527 : } else if (xfs_btree_needs_key_update(cur, optr)) {
3550 474284725 : error = xfs_btree_update_keys(cur, level);
3551 474271614 : if (error)
3552 0 : goto error0;
3553 : }
3554 :
3555 : /*
3556 : * If we are tracking the last record in the tree and
3557 : * we are at the far right edge of the tree, update it.
3558 : */
3559 924930125 : if (xfs_btree_is_lastrec(cur, block, level)) {
3560 136113962 : cur->bc_ops->update_lastrec(cur, block, rec,
3561 : ptr, LASTREC_INSREC);
3562 : }
3563 :
3564 : /*
3565 : * Return the new block number, if any.
3566 : * If there is one, give back a record value and a cursor too.
3567 : */
3568 924873131 : *ptrp = nptr;
3569 1849746262 : if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3570 1806447 : xfs_btree_copy_keys(cur, key, lkey, 1);
3571 1806447 : *curp = ncur;
3572 : }
3573 :
3574 924873131 : *stat = 1;
3575 924873131 : return 0;
3576 :
3577 496 : error0:
3578 496 : if (ncur)
3579 0 : xfs_btree_del_cursor(ncur, error);
3580 : return error;
3581 : }
3582 :
3583 : /*
3584 : * Insert the record at the point referenced by cur.
3585 : *
3586 : * A multi-level split of the tree on insert will invalidate the original
3587 : * cursor. All callers of this function should assume that the cursor is
3588 : * no longer valid and revalidate it.
3589 : */
3590 : int
3591 923177046 : xfs_btree_insert(
3592 : struct xfs_btree_cur *cur,
3593 : int *stat)
3594 : {
3595 923177046 : int error; /* error return value */
3596 923177046 : int i; /* result value, 0 for failure */
3597 923177046 : int level; /* current level number in btree */
3598 923177046 : union xfs_btree_ptr nptr; /* new block number (split result) */
3599 923177046 : struct xfs_btree_cur *ncur; /* new cursor (split result) */
3600 923177046 : struct xfs_btree_cur *pcur; /* previous level's cursor */
3601 923177046 : union xfs_btree_key bkey; /* key of block to insert */
3602 923177046 : union xfs_btree_key *key;
3603 923177046 : union xfs_btree_rec rec; /* record to insert */
3604 :
3605 923177046 : level = 0;
3606 923177046 : ncur = NULL;
3607 923177046 : pcur = cur;
3608 923177046 : key = &bkey;
3609 :
3610 923177046 : xfs_btree_set_ptr_null(cur, &nptr);
3611 :
3612 : /* Make a key out of the record data to be inserted, and save it. */
3613 923177046 : cur->bc_ops->init_rec_from_cur(cur, &rec);
3614 923131000 : cur->bc_ops->init_key_from_rec(key, &rec);
3615 :
3616 : /*
3617 : * Loop going up the tree, starting at the leaf level.
3618 : * Stop when we don't get a split block, that must mean that
3619 : * the insert is finished with this level.
3620 : */
3621 924943913 : do {
3622 : /*
3623 : * Insert nrec/nptr into this level of the tree.
3624 : * Note if we fail, nptr will be null.
3625 : */
3626 924943913 : error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3627 : &ncur, &i);
3628 924921635 : if (error) {
3629 497 : if (pcur != cur)
3630 6 : xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3631 497 : goto error0;
3632 : }
3633 :
3634 924921138 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3635 0 : xfs_btree_mark_sick(cur);
3636 0 : error = -EFSCORRUPTED;
3637 0 : goto error0;
3638 : }
3639 924921138 : level++;
3640 :
3641 : /*
3642 : * See if the cursor we just used is trash.
3643 : * Can't trash the caller's cursor, but otherwise we should
3644 : * if ncur is a new cursor or we're about to be done.
3645 : */
3646 924921138 : if (pcur != cur &&
3647 3525816 : (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3648 : /* Save the state from the cursor before we trash it */
3649 1766267 : if (cur->bc_ops->update_cursor)
3650 481093 : cur->bc_ops->update_cursor(pcur, cur);
3651 1766266 : cur->bc_nlevels = pcur->bc_nlevels;
3652 1766266 : xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3653 : }
3654 : /* If we got a new cursor, switch to it. */
3655 924892235 : if (ncur) {
3656 1779622 : pcur = ncur;
3657 1779622 : ncur = NULL;
3658 : }
3659 1849784470 : } while (!xfs_btree_ptr_is_null(cur, &nptr));
3660 :
3661 923084043 : *stat = i;
3662 923084043 : return 0;
3663 : error0:
3664 : return error;
3665 : }
3666 :
3667 : /*
3668 : * Try to merge a non-leaf block back into the inode root.
3669 : *
3670 : * Note: the killroot names comes from the fact that we're effectively
3671 : * killing the old root block. But because we can't just delete the
3672 : * inode we have to copy the single block it was pointing to into the
3673 : * inode.
3674 : */
3675 : STATIC int
3676 20161692 : xfs_btree_kill_iroot(
3677 : struct xfs_btree_cur *cur)
3678 : {
3679 20161692 : int whichfork = cur->bc_ino.whichfork;
3680 20161692 : struct xfs_inode *ip = cur->bc_ino.ip;
3681 20161692 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
3682 20161580 : struct xfs_btree_block *block;
3683 20161580 : struct xfs_btree_block *cblock;
3684 20161580 : union xfs_btree_key *kp;
3685 20161580 : union xfs_btree_key *ckp;
3686 20161580 : union xfs_btree_ptr *pp;
3687 20161580 : union xfs_btree_ptr *cpp;
3688 20161580 : struct xfs_buf *cbp;
3689 20161580 : int level;
3690 20161580 : int index;
3691 20161580 : int numrecs;
3692 20161580 : int error;
3693 : #ifdef DEBUG
3694 20161580 : union xfs_btree_ptr ptr;
3695 : #endif
3696 20161580 : int i;
3697 :
3698 20161580 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3699 20161580 : ASSERT(cur->bc_nlevels > 1);
3700 :
3701 : /*
3702 : * Don't deal with the root block needs to be a leaf case.
3703 : * We're just going to turn the thing back into extents anyway.
3704 : */
3705 20161580 : level = cur->bc_nlevels - 1;
3706 20161580 : if (level == 1)
3707 20013842 : goto out0;
3708 :
3709 : /*
3710 : * Give up if the root has multiple children.
3711 : */
3712 147738 : block = xfs_btree_get_iroot(cur);
3713 147738 : if (xfs_btree_get_numrecs(block) != 1)
3714 22 : goto out0;
3715 :
3716 147716 : cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3717 147716 : numrecs = xfs_btree_get_numrecs(cblock);
3718 :
3719 : /*
3720 : * Only do this if the next level will fit.
3721 : * Then the data must be copied up to the inode,
3722 : * instead of freeing the root you free the next level.
3723 : */
3724 147716 : if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3725 145410 : goto out0;
3726 :
3727 2306 : XFS_BTREE_STATS_INC(cur, killroot);
3728 :
3729 : #ifdef DEBUG
3730 2306 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3731 4612 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3732 2306 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3733 4612 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3734 : #endif
3735 :
3736 2306 : index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3737 2306 : if (index) {
3738 2306 : xfs_iroot_realloc(cur->bc_ino.ip, index,
3739 2306 : cur->bc_ino.whichfork);
3740 2306 : block = ifp->if_broot;
3741 : }
3742 :
3743 2306 : be16_add_cpu(&block->bb_numrecs, index);
3744 2306 : ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3745 :
3746 2306 : kp = xfs_btree_key_addr(cur, 1, block);
3747 2306 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3748 2306 : xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3749 :
3750 2306 : pp = xfs_btree_ptr_addr(cur, 1, block);
3751 2306 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3752 :
3753 30032 : for (i = 0; i < numrecs; i++) {
3754 25420 : error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
3755 25420 : if (error)
3756 0 : return error;
3757 : }
3758 :
3759 2306 : xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3760 :
3761 2306 : error = xfs_btree_free_block(cur, cbp);
3762 2306 : if (error)
3763 : return error;
3764 :
3765 2306 : cur->bc_levels[level - 1].bp = NULL;
3766 2306 : be16_add_cpu(&block->bb_level, -1);
3767 4612 : xfs_trans_log_inode(cur->bc_tp, ip,
3768 2306 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
3769 2306 : cur->bc_nlevels--;
3770 : out0:
3771 : return 0;
3772 : }
3773 :
3774 : /*
3775 : * Kill the current root node, and replace it with it's only child node.
3776 : */
3777 : STATIC int
3778 26271 : xfs_btree_kill_root(
3779 : struct xfs_btree_cur *cur,
3780 : struct xfs_buf *bp,
3781 : int level,
3782 : union xfs_btree_ptr *newroot)
3783 : {
3784 26271 : int error;
3785 :
3786 26271 : XFS_BTREE_STATS_INC(cur, killroot);
3787 :
3788 : /*
3789 : * Update the root pointer, decreasing the level by 1 and then
3790 : * free the old root.
3791 : */
3792 26271 : cur->bc_ops->set_root(cur, newroot, -1);
3793 :
3794 26271 : error = xfs_btree_free_block(cur, bp);
3795 26271 : if (error)
3796 : return error;
3797 :
3798 26271 : cur->bc_levels[level].bp = NULL;
3799 26271 : cur->bc_levels[level].ra = 0;
3800 26271 : cur->bc_nlevels--;
3801 :
3802 26271 : return 0;
3803 : }
3804 :
3805 : STATIC int
3806 341875815 : xfs_btree_dec_cursor(
3807 : struct xfs_btree_cur *cur,
3808 : int level,
3809 : int *stat)
3810 : {
3811 341875815 : int error;
3812 341875815 : int i;
3813 :
3814 341875815 : if (level > 0) {
3815 478715 : error = xfs_btree_decrement(cur, level, &i);
3816 478715 : if (error)
3817 : return error;
3818 : }
3819 :
3820 341875815 : *stat = 1;
3821 341875815 : return 0;
3822 : }
3823 :
3824 : /*
3825 : * Single level of the btree record deletion routine.
3826 : * Delete record pointed to by cur/level.
3827 : * Remove the record from its block then rebalance the tree.
3828 : * Return 0 for error, 1 for done, 2 to go on to the next level.
3829 : */
3830 : STATIC int /* error */
3831 662298657 : xfs_btree_delrec(
3832 : struct xfs_btree_cur *cur, /* btree cursor */
3833 : int level, /* level removing record from */
3834 : int *stat) /* fail/done/go-on */
3835 : {
3836 662298657 : struct xfs_btree_block *block; /* btree block */
3837 662298657 : union xfs_btree_ptr cptr; /* current block ptr */
3838 662298657 : struct xfs_buf *bp; /* buffer for block */
3839 662298657 : int error; /* error return value */
3840 662298657 : int i; /* loop counter */
3841 662298657 : union xfs_btree_ptr lptr; /* left sibling block ptr */
3842 662298657 : struct xfs_buf *lbp; /* left buffer pointer */
3843 662298657 : struct xfs_btree_block *left; /* left btree block */
3844 662298657 : int lrecs = 0; /* left record count */
3845 662298657 : int ptr; /* key/record index */
3846 662298657 : union xfs_btree_ptr rptr; /* right sibling block ptr */
3847 662298657 : struct xfs_buf *rbp; /* right buffer pointer */
3848 662298657 : struct xfs_btree_block *right; /* right btree block */
3849 662298657 : struct xfs_btree_block *rrblock; /* right-right btree block */
3850 662298657 : struct xfs_buf *rrbp; /* right-right buffer pointer */
3851 662298657 : int rrecs = 0; /* right record count */
3852 662298657 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
3853 662298657 : int numrecs; /* temporary numrec count */
3854 :
3855 662298657 : tcur = NULL;
3856 :
3857 : /* Get the index of the entry being deleted, check for nothing there. */
3858 662298657 : ptr = cur->bc_levels[level].ptr;
3859 662298657 : if (ptr == 0) {
3860 0 : *stat = 0;
3861 0 : return 0;
3862 : }
3863 :
3864 : /* Get the buffer & block containing the record or key/ptr. */
3865 662298657 : block = xfs_btree_get_block(cur, level, &bp);
3866 662268465 : numrecs = xfs_btree_get_numrecs(block);
3867 :
3868 : #ifdef DEBUG
3869 662268465 : error = xfs_btree_check_block(cur, block, level, bp);
3870 662296993 : if (error)
3871 0 : goto error0;
3872 : #endif
3873 :
3874 : /* Fail if we're off the end of the block. */
3875 662296993 : if (ptr > numrecs) {
3876 0 : *stat = 0;
3877 0 : return 0;
3878 : }
3879 :
3880 662296993 : XFS_BTREE_STATS_INC(cur, delrec);
3881 662297905 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3882 :
3883 : /* Excise the entries being deleted. */
3884 662305905 : if (level > 0) {
3885 : /* It's a nonleaf. operate on keys and ptrs */
3886 510800 : union xfs_btree_key *lkp;
3887 510800 : union xfs_btree_ptr *lpp;
3888 :
3889 510800 : lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3890 510800 : lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3891 :
3892 14447183 : for (i = 0; i < numrecs - ptr; i++) {
3893 13936383 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
3894 13936383 : if (error)
3895 0 : goto error0;
3896 : }
3897 :
3898 510800 : if (ptr < numrecs) {
3899 265344 : xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3900 265344 : xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3901 265344 : xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3902 265344 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3903 : }
3904 : } else {
3905 : /* It's a leaf. operate on records */
3906 661795105 : if (ptr < numrecs) {
3907 441394605 : xfs_btree_shift_recs(cur,
3908 : xfs_btree_rec_addr(cur, ptr + 1, block),
3909 : -1, numrecs - ptr);
3910 441407015 : xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3911 : }
3912 : }
3913 :
3914 : /*
3915 : * Decrement and log the number of entries in the block.
3916 : */
3917 662293378 : xfs_btree_set_numrecs(block, --numrecs);
3918 662293378 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3919 :
3920 : /*
3921 : * If we are tracking the last record in the tree and
3922 : * we are at the far right edge of the tree, update it.
3923 : */
3924 662313332 : if (xfs_btree_is_lastrec(cur, block, level)) {
3925 120990658 : cur->bc_ops->update_lastrec(cur, block, NULL,
3926 : ptr, LASTREC_DELREC);
3927 : }
3928 :
3929 : /*
3930 : * We're at the root level. First, shrink the root block in-memory.
3931 : * Try to get rid of the next level down. If we can't then there's
3932 : * nothing left to do.
3933 : */
3934 662255626 : if (level == cur->bc_nlevels - 1) {
3935 292351590 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3936 30481 : xfs_iroot_realloc(cur->bc_ino.ip, -1,
3937 30481 : cur->bc_ino.whichfork);
3938 :
3939 30481 : error = xfs_btree_kill_iroot(cur);
3940 30481 : if (error)
3941 0 : goto error0;
3942 :
3943 30481 : error = xfs_btree_dec_cursor(cur, level, stat);
3944 30481 : if (error)
3945 0 : goto error0;
3946 30481 : *stat = 1;
3947 30481 : return 0;
3948 : }
3949 :
3950 : /*
3951 : * If this is the root level, and there's only one entry left,
3952 : * and it's NOT the leaf level, then we can get rid of this
3953 : * level.
3954 : */
3955 292321109 : if (numrecs == 1 && level > 0) {
3956 26271 : union xfs_btree_ptr *pp;
3957 : /*
3958 : * pp is still set to the first pointer in the block.
3959 : * Make it the new root of the btree.
3960 : */
3961 26271 : pp = xfs_btree_ptr_addr(cur, 1, block);
3962 26271 : error = xfs_btree_kill_root(cur, bp, level, pp);
3963 26271 : if (error)
3964 0 : goto error0;
3965 292294838 : } else if (level > 0) {
3966 137390 : error = xfs_btree_dec_cursor(cur, level, stat);
3967 137390 : if (error)
3968 0 : goto error0;
3969 : }
3970 292321109 : *stat = 1;
3971 292321109 : return 0;
3972 : }
3973 :
3974 : /*
3975 : * If we deleted the leftmost entry in the block, update the
3976 : * key values above us in the tree.
3977 : */
3978 569932173 : if (xfs_btree_needs_key_update(cur, ptr)) {
3979 181641419 : error = xfs_btree_update_keys(cur, level);
3980 181655345 : if (error)
3981 0 : goto error0;
3982 : }
3983 :
3984 : /*
3985 : * If the number of records remaining in the block is at least
3986 : * the minimum, we're done.
3987 : */
3988 369917962 : if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3989 305678887 : error = xfs_btree_dec_cursor(cur, level, stat);
3990 305670231 : if (error)
3991 0 : goto error0;
3992 : return 0;
3993 : }
3994 :
3995 : /*
3996 : * Otherwise, we have to move some records around to keep the
3997 : * tree balanced. Look at the left and right sibling blocks to
3998 : * see if we can re-balance by moving only one record.
3999 : */
4000 64233527 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4001 64233473 : xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
4002 :
4003 64233167 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
4004 : /*
4005 : * One child of root, need to get a chance to copy its contents
4006 : * into the root and delete it. Can't go up to next level,
4007 : * there's nothing to delete there.
4008 : */
4009 127820751 : if (xfs_btree_ptr_is_null(cur, &rptr) &&
4010 20131170 : xfs_btree_ptr_is_null(cur, &lptr) &&
4011 20131170 : level == cur->bc_nlevels - 2) {
4012 20131137 : error = xfs_btree_kill_iroot(cur);
4013 20131100 : if (!error)
4014 20131176 : error = xfs_btree_dec_cursor(cur, level, stat);
4015 20131018 : if (error)
4016 0 : goto error0;
4017 : return 0;
4018 : }
4019 : }
4020 :
4021 111086486 : ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
4022 : !xfs_btree_ptr_is_null(cur, &lptr));
4023 :
4024 : /*
4025 : * Duplicate the cursor so our btree manipulations here won't
4026 : * disrupt the next level up.
4027 : */
4028 44102030 : error = xfs_btree_dup_cursor(cur, &tcur);
4029 44102060 : if (error)
4030 0 : goto error0;
4031 :
4032 : /*
4033 : * If there's a right sibling, see if it's ok to shift an entry
4034 : * out of it.
4035 : */
4036 88204120 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
4037 : /*
4038 : * Move the temp cursor to the last entry in the next block.
4039 : * Actually any entry but the first would suffice.
4040 : */
4041 21219628 : i = xfs_btree_lastrec(tcur, level);
4042 21219956 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4043 0 : xfs_btree_mark_sick(cur);
4044 0 : error = -EFSCORRUPTED;
4045 0 : goto error0;
4046 : }
4047 :
4048 21219956 : error = xfs_btree_increment(tcur, level, &i);
4049 21219712 : if (error)
4050 10 : goto error0;
4051 21219702 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4052 0 : xfs_btree_mark_sick(cur);
4053 0 : error = -EFSCORRUPTED;
4054 0 : goto error0;
4055 : }
4056 :
4057 21219702 : i = xfs_btree_lastrec(tcur, level);
4058 21220157 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4059 0 : xfs_btree_mark_sick(cur);
4060 0 : error = -EFSCORRUPTED;
4061 0 : goto error0;
4062 : }
4063 :
4064 : /* Grab a pointer to the block. */
4065 21220157 : right = xfs_btree_get_block(tcur, level, &rbp);
4066 : #ifdef DEBUG
4067 21219833 : error = xfs_btree_check_block(tcur, right, level, rbp);
4068 21220030 : if (error)
4069 0 : goto error0;
4070 : #endif
4071 : /* Grab the current block number, for future use. */
4072 21220030 : xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
4073 :
4074 : /*
4075 : * If right block is full enough so that removing one entry
4076 : * won't make it too empty, and left-shifting an entry out
4077 : * of right to us works, we're done.
4078 : */
4079 21219433 : if (xfs_btree_get_numrecs(right) - 1 >=
4080 21219433 : cur->bc_ops->get_minrecs(tcur, level)) {
4081 15900369 : error = xfs_btree_lshift(tcur, level, &i);
4082 15901226 : if (error)
4083 0 : goto error0;
4084 15901226 : if (i) {
4085 15901242 : ASSERT(xfs_btree_get_numrecs(block) >=
4086 : cur->bc_ops->get_minrecs(tcur, level));
4087 :
4088 15901062 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4089 15901323 : tcur = NULL;
4090 :
4091 15901323 : error = xfs_btree_dec_cursor(cur, level, stat);
4092 15900501 : if (error)
4093 0 : goto error0;
4094 : return 0;
4095 : }
4096 : }
4097 :
4098 : /*
4099 : * Otherwise, grab the number of records in right for
4100 : * future reference, and fix up the temp cursor to point
4101 : * to our block again (last record).
4102 : */
4103 5319355 : rrecs = xfs_btree_get_numrecs(right);
4104 10638710 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4105 5261639 : i = xfs_btree_firstrec(tcur, level);
4106 5261662 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4107 0 : xfs_btree_mark_sick(cur);
4108 0 : error = -EFSCORRUPTED;
4109 0 : goto error0;
4110 : }
4111 :
4112 5261662 : error = xfs_btree_decrement(tcur, level, &i);
4113 5261662 : if (error)
4114 0 : goto error0;
4115 5261662 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4116 0 : xfs_btree_mark_sick(cur);
4117 0 : error = -EFSCORRUPTED;
4118 0 : goto error0;
4119 : }
4120 : }
4121 : }
4122 :
4123 : /*
4124 : * If there's a left sibling, see if it's ok to shift an entry
4125 : * out of it.
4126 : */
4127 56403620 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4128 : /*
4129 : * Move the temp cursor to the first entry in the
4130 : * previous block.
4131 : */
4132 28144087 : i = xfs_btree_firstrec(tcur, level);
4133 28144097 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4134 0 : xfs_btree_mark_sick(cur);
4135 0 : error = -EFSCORRUPTED;
4136 0 : goto error0;
4137 : }
4138 :
4139 28144097 : error = xfs_btree_decrement(tcur, level, &i);
4140 28144135 : if (error)
4141 1 : goto error0;
4142 28144134 : i = xfs_btree_firstrec(tcur, level);
4143 28144134 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4144 0 : xfs_btree_mark_sick(cur);
4145 0 : error = -EFSCORRUPTED;
4146 0 : goto error0;
4147 : }
4148 :
4149 : /* Grab a pointer to the block. */
4150 28144134 : left = xfs_btree_get_block(tcur, level, &lbp);
4151 : #ifdef DEBUG
4152 28144132 : error = xfs_btree_check_block(cur, left, level, lbp);
4153 28144141 : if (error)
4154 0 : goto error0;
4155 : #endif
4156 : /* Grab the current block number, for future use. */
4157 28144141 : xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4158 :
4159 : /*
4160 : * If left block is full enough so that removing one entry
4161 : * won't make it too empty, and right-shifting an entry out
4162 : * of left to us works, we're done.
4163 : */
4164 28144134 : if (xfs_btree_get_numrecs(left) - 1 >=
4165 28144134 : cur->bc_ops->get_minrecs(tcur, level)) {
4166 27691000 : error = xfs_btree_rshift(tcur, level, &i);
4167 27691060 : if (error)
4168 0 : goto error0;
4169 27691060 : if (i) {
4170 27691050 : ASSERT(xfs_btree_get_numrecs(block) >=
4171 : cur->bc_ops->get_minrecs(tcur, level));
4172 27691023 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4173 27691065 : tcur = NULL;
4174 27691065 : if (level == 0)
4175 27686089 : cur->bc_levels[0].ptr++;
4176 :
4177 27691065 : *stat = 1;
4178 27691065 : return 0;
4179 : }
4180 : }
4181 :
4182 : /*
4183 : * Otherwise, grab the number of records in right for
4184 : * future reference.
4185 : */
4186 453081 : lrecs = xfs_btree_get_numrecs(left);
4187 : }
4188 :
4189 : /* Delete the temp cursor, we're done with it. */
4190 510804 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4191 510799 : tcur = NULL;
4192 :
4193 : /* If here, we need to do a join to keep the tree balanced. */
4194 1021598 : ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4195 :
4196 1474679 : if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4197 453082 : lrecs + xfs_btree_get_numrecs(block) <=
4198 453082 : cur->bc_ops->get_maxrecs(cur, level)) {
4199 : /*
4200 : * Set "right" to be the starting block,
4201 : * "left" to be the left neighbor.
4202 : */
4203 453081 : rptr = cptr;
4204 453081 : right = block;
4205 453081 : rbp = bp;
4206 453081 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4207 453083 : if (error)
4208 0 : goto error0;
4209 :
4210 : /*
4211 : * If that won't work, see if we can join with the right neighbor block.
4212 : */
4213 173151 : } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4214 57717 : rrecs + xfs_btree_get_numrecs(block) <=
4215 57717 : cur->bc_ops->get_maxrecs(cur, level)) {
4216 : /*
4217 : * Set "left" to be the starting block,
4218 : * "right" to be the right neighbor.
4219 : */
4220 57717 : lptr = cptr;
4221 57717 : left = block;
4222 57717 : lbp = bp;
4223 57717 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4224 57717 : if (error)
4225 0 : goto error0;
4226 :
4227 : /*
4228 : * Otherwise, we can't fix the imbalance.
4229 : * Just return. This is probably a logic error, but it's not fatal.
4230 : */
4231 : } else {
4232 0 : error = xfs_btree_dec_cursor(cur, level, stat);
4233 0 : if (error)
4234 0 : goto error0;
4235 : return 0;
4236 : }
4237 :
4238 510800 : rrecs = xfs_btree_get_numrecs(right);
4239 510800 : lrecs = xfs_btree_get_numrecs(left);
4240 :
4241 : /*
4242 : * We're now going to join "left" and "right" by moving all the stuff
4243 : * in "right" to "left" and deleting "right".
4244 : */
4245 510800 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4246 510800 : if (level > 0) {
4247 : /* It's a non-leaf. Move keys and pointers. */
4248 835 : union xfs_btree_key *lkp; /* left btree key */
4249 835 : union xfs_btree_ptr *lpp; /* left address pointer */
4250 835 : union xfs_btree_key *rkp; /* right btree key */
4251 835 : union xfs_btree_ptr *rpp; /* right address pointer */
4252 :
4253 835 : lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4254 835 : lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4255 835 : rkp = xfs_btree_key_addr(cur, 1, right);
4256 835 : rpp = xfs_btree_ptr_addr(cur, 1, right);
4257 :
4258 47505 : for (i = 1; i < rrecs; i++) {
4259 46670 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
4260 46670 : if (error)
4261 0 : goto error0;
4262 : }
4263 :
4264 835 : xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4265 835 : xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4266 :
4267 835 : xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4268 835 : xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4269 : } else {
4270 : /* It's a leaf. Move records. */
4271 509965 : union xfs_btree_rec *lrp; /* left record pointer */
4272 509965 : union xfs_btree_rec *rrp; /* right record pointer */
4273 :
4274 509965 : lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4275 509965 : rrp = xfs_btree_rec_addr(cur, 1, right);
4276 :
4277 509965 : xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4278 509965 : xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4279 : }
4280 :
4281 510800 : XFS_BTREE_STATS_INC(cur, join);
4282 :
4283 : /*
4284 : * Fix up the number of records and right block pointer in the
4285 : * surviving block, and log it.
4286 : */
4287 510800 : xfs_btree_set_numrecs(left, lrecs + rrecs);
4288 510800 : xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
4289 510799 : xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4290 510799 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4291 :
4292 : /* If there is a right sibling, point it to the remaining block. */
4293 510800 : xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4294 1021600 : if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4295 269030 : error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4296 269030 : if (error)
4297 0 : goto error0;
4298 269030 : xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4299 269030 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4300 : }
4301 :
4302 : /* Free the deleted block. */
4303 510801 : error = xfs_btree_free_block(cur, rbp);
4304 510800 : if (error)
4305 0 : goto error0;
4306 :
4307 : /*
4308 : * If we joined with the left neighbor, set the buffer in the
4309 : * cursor to the left block, and fix up the index.
4310 : */
4311 510800 : if (bp != lbp) {
4312 453083 : cur->bc_levels[level].bp = lbp;
4313 453083 : cur->bc_levels[level].ptr += lrecs;
4314 453083 : cur->bc_levels[level].ra = 0;
4315 : }
4316 : /*
4317 : * If we joined with the right neighbor and there's a level above
4318 : * us, increment the cursor at that level.
4319 : */
4320 57717 : else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4321 55334 : (level + 1 < cur->bc_nlevels)) {
4322 57717 : error = xfs_btree_increment(cur, level + 1, &i);
4323 57717 : if (error)
4324 0 : goto error0;
4325 : }
4326 :
4327 : /*
4328 : * Readjust the ptr at this level if it's not a leaf, since it's
4329 : * still pointing at the deletion point, which makes the cursor
4330 : * inconsistent. If this makes the ptr 0, the caller fixes it up.
4331 : * We can't use decrement because it would change the next level up.
4332 : */
4333 510800 : if (level > 0)
4334 835 : cur->bc_levels[level].ptr--;
4335 :
4336 : /*
4337 : * We combined blocks, so we have to update the parent keys if the
4338 : * btree supports overlapped intervals. However,
4339 : * bc_levels[level + 1].ptr points to the old block so that the caller
4340 : * knows which record to delete. Therefore, the caller must be savvy
4341 : * enough to call updkeys for us if we return stat == 2. The other
4342 : * exit points from this function don't require deletions further up
4343 : * the tree, so they can call updkeys directly.
4344 : */
4345 :
4346 : /* Return value means the next level up has something to do. */
4347 510800 : *stat = 2;
4348 510800 : return 0;
4349 :
4350 11 : error0:
4351 11 : if (tcur)
4352 11 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4353 : return error;
4354 : }
4355 :
4356 : /*
4357 : * Delete the record pointed to by cur.
4358 : * The cursor refers to the place where the record was (could be inserted)
4359 : * when the operation returns.
4360 : */
4361 : int /* error */
4362 661782130 : xfs_btree_delete(
4363 : struct xfs_btree_cur *cur,
4364 : int *stat) /* success/failure */
4365 : {
4366 661782130 : int error; /* error return value */
4367 661782130 : int level;
4368 661782130 : int i;
4369 661782130 : bool joined = false;
4370 :
4371 : /*
4372 : * Go up the tree, starting at leaf level.
4373 : *
4374 : * If 2 is returned then a join was done; go to the next level.
4375 : * Otherwise we are done.
4376 : */
4377 1324035453 : for (level = 0, i = 2; i == 2; level++) {
4378 662302451 : error = xfs_btree_delrec(cur, level, &i);
4379 662253334 : if (error)
4380 11 : goto error0;
4381 662253323 : if (i == 2)
4382 510800 : joined = true;
4383 : }
4384 :
4385 : /*
4386 : * If we combined blocks as part of deleting the record, delrec won't
4387 : * have updated the parent high keys so we have to do that here.
4388 : */
4389 661733002 : if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4390 183533 : error = xfs_btree_updkeys_force(cur, 0);
4391 183533 : if (error)
4392 0 : goto error0;
4393 : }
4394 :
4395 661733002 : if (i == 0) {
4396 0 : for (level = 1; level < cur->bc_nlevels; level++) {
4397 0 : if (cur->bc_levels[level].ptr == 0) {
4398 0 : error = xfs_btree_decrement(cur, level, &i);
4399 0 : if (error)
4400 0 : goto error0;
4401 : break;
4402 : }
4403 : }
4404 : }
4405 :
4406 661733002 : *stat = i;
4407 661733002 : return 0;
4408 : error0:
4409 : return error;
4410 : }
4411 :
4412 : /*
4413 : * Get the data from the pointed-to record.
4414 : */
4415 : int /* error */
4416 >13236*10^7 : xfs_btree_get_rec(
4417 : struct xfs_btree_cur *cur, /* btree cursor */
4418 : union xfs_btree_rec **recp, /* output: btree record */
4419 : int *stat) /* output: success/failure */
4420 : {
4421 >13236*10^7 : struct xfs_btree_block *block; /* btree block */
4422 >13236*10^7 : struct xfs_buf *bp; /* buffer pointer */
4423 >13236*10^7 : int ptr; /* record number */
4424 : #ifdef DEBUG
4425 >13236*10^7 : int error; /* error return value */
4426 : #endif
4427 :
4428 >13236*10^7 : ptr = cur->bc_levels[0].ptr;
4429 >13236*10^7 : block = xfs_btree_get_block(cur, 0, &bp);
4430 :
4431 : #ifdef DEBUG
4432 >13209*10^7 : error = xfs_btree_check_block(cur, block, 0, bp);
4433 >13218*10^7 : if (error)
4434 : return error;
4435 : #endif
4436 :
4437 : /*
4438 : * Off the right end or left end, return failure.
4439 : */
4440 >13218*10^7 : if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4441 12720738 : *stat = 0;
4442 12720738 : return 0;
4443 : }
4444 :
4445 : /*
4446 : * Point to the record and extract its data.
4447 : */
4448 >13217*10^7 : *recp = xfs_btree_rec_addr(cur, ptr, block);
4449 >13217*10^7 : *stat = 1;
4450 >13217*10^7 : return 0;
4451 : }
4452 :
4453 : /* Visit a block in a btree. */
4454 : STATIC int
4455 151415361 : xfs_btree_visit_block(
4456 : struct xfs_btree_cur *cur,
4457 : int level,
4458 : xfs_btree_visit_blocks_fn fn,
4459 : void *data)
4460 : {
4461 151415361 : struct xfs_btree_block *block;
4462 151415361 : struct xfs_buf *bp;
4463 151415361 : union xfs_btree_ptr rptr, bufptr;
4464 151415361 : int error;
4465 :
4466 : /* do right sibling readahead */
4467 151415361 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4468 151413506 : block = xfs_btree_get_block(cur, level, &bp);
4469 :
4470 : /* process the block */
4471 151413312 : error = fn(cur, level, data);
4472 151410953 : if (error)
4473 : return error;
4474 :
4475 : /* now read rh sibling block for next iteration */
4476 151403796 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4477 302805432 : if (xfs_btree_ptr_is_null(cur, &rptr))
4478 : return -ENOENT;
4479 :
4480 : /*
4481 : * We only visit blocks once in this walk, so we have to avoid the
4482 : * internal xfs_btree_lookup_get_block() optimisation where it will
4483 : * return the same block without checking if the right sibling points
4484 : * back to us and creates a cyclic reference in the btree.
4485 : */
4486 130491809 : xfs_btree_buf_to_ptr(cur, bp, &bufptr);
4487 130492555 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4488 6397516 : if (rptr.l == bufptr.l) {
4489 0 : xfs_btree_mark_sick(cur);
4490 0 : return -EFSCORRUPTED;
4491 : }
4492 : } else {
4493 124095039 : if (rptr.s == bufptr.s) {
4494 0 : xfs_btree_mark_sick(cur);
4495 0 : return -EFSCORRUPTED;
4496 : }
4497 : }
4498 130492555 : return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4499 : }
4500 :
4501 :
4502 : /* Visit every block in a btree. */
4503 : int
4504 12666987 : xfs_btree_visit_blocks(
4505 : struct xfs_btree_cur *cur,
4506 : xfs_btree_visit_blocks_fn fn,
4507 : unsigned int flags,
4508 : void *data)
4509 : {
4510 12666987 : union xfs_btree_ptr lptr;
4511 12666987 : int level;
4512 12666987 : struct xfs_btree_block *block = NULL;
4513 12666987 : int error = 0;
4514 :
4515 12666987 : cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4516 :
4517 : /* for each level */
4518 33786362 : for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4519 : /* grab the left hand block */
4520 21124038 : error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4521 21129545 : if (error)
4522 266 : return error;
4523 :
4524 : /* readahead the left most block for the next level down */
4525 21129279 : if (level > 0) {
4526 8455777 : union xfs_btree_ptr *ptr;
4527 :
4528 8455777 : ptr = xfs_btree_ptr_addr(cur, 1, block);
4529 8455663 : xfs_btree_readahead_ptr(cur, ptr, 1);
4530 :
4531 : /* save for the next iteration of the loop */
4532 8456421 : xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4533 :
4534 8455825 : if (!(flags & XFS_BTREE_VISIT_LEAVES))
4535 206203 : continue;
4536 12673502 : } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
4537 0 : continue;
4538 : }
4539 :
4540 : /* for each buffer in the level */
4541 151416438 : do {
4542 151416438 : error = xfs_btree_visit_block(cur, level, fn, data);
4543 151412474 : } while (!error);
4544 :
4545 20919160 : if (error != -ENOENT)
4546 7399 : return error;
4547 : }
4548 :
4549 : return 0;
4550 : }
4551 :
4552 : /*
4553 : * Change the owner of a btree.
4554 : *
4555 : * The mechanism we use here is ordered buffer logging. Because we don't know
4556 : * how many buffers were are going to need to modify, we don't really want to
4557 : * have to make transaction reservations for the worst case of every buffer in a
4558 : * full size btree as that may be more space that we can fit in the log....
4559 : *
4560 : * We do the btree walk in the most optimal manner possible - we have sibling
4561 : * pointers so we can just walk all the blocks on each level from left to right
4562 : * in a single pass, and then move to the next level and do the same. We can
4563 : * also do readahead on the sibling pointers to get IO moving more quickly,
4564 : * though for slow disks this is unlikely to make much difference to performance
4565 : * as the amount of CPU work we have to do before moving to the next block is
4566 : * relatively small.
4567 : *
4568 : * For each btree block that we load, modify the owner appropriately, set the
4569 : * buffer as an ordered buffer and log it appropriately. We need to ensure that
4570 : * we mark the region we change dirty so that if the buffer is relogged in
4571 : * a subsequent transaction the changes we make here as an ordered buffer are
4572 : * correctly relogged in that transaction. If we are in recovery context, then
4573 : * just queue the modified buffer as delayed write buffer so the transaction
4574 : * recovery completion writes the changes to disk.
4575 : */
4576 : struct xfs_btree_block_change_owner_info {
4577 : uint64_t new_owner;
4578 : struct list_head *buffer_list;
4579 : };
4580 :
4581 : static int
4582 189381 : xfs_btree_block_change_owner(
4583 : struct xfs_btree_cur *cur,
4584 : int level,
4585 : void *data)
4586 : {
4587 189381 : struct xfs_btree_block_change_owner_info *bbcoi = data;
4588 189381 : struct xfs_btree_block *block;
4589 189381 : struct xfs_buf *bp;
4590 :
4591 : /* modify the owner */
4592 189381 : block = xfs_btree_get_block(cur, level, &bp);
4593 189381 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4594 189381 : if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4595 : return 0;
4596 15022 : block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4597 : } else {
4598 0 : if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4599 : return 0;
4600 0 : block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4601 : }
4602 :
4603 : /*
4604 : * If the block is a root block hosted in an inode, we might not have a
4605 : * buffer pointer here and we shouldn't attempt to log the change as the
4606 : * information is already held in the inode and discarded when the root
4607 : * block is formatted into the on-disk inode fork. We still change it,
4608 : * though, so everything is consistent in memory.
4609 : */
4610 15022 : if (!bp) {
4611 6255 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4612 6255 : ASSERT(level == cur->bc_nlevels - 1);
4613 6255 : return 0;
4614 : }
4615 :
4616 8767 : if (cur->bc_tp) {
4617 8767 : if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4618 7399 : xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4619 7399 : return -EAGAIN;
4620 : }
4621 : } else {
4622 0 : xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4623 : }
4624 :
4625 : return 0;
4626 : }
4627 :
4628 : int
4629 13654 : xfs_btree_change_owner(
4630 : struct xfs_btree_cur *cur,
4631 : uint64_t new_owner,
4632 : struct list_head *buffer_list)
4633 : {
4634 13654 : struct xfs_btree_block_change_owner_info bbcoi;
4635 :
4636 13654 : bbcoi.new_owner = new_owner;
4637 13654 : bbcoi.buffer_list = buffer_list;
4638 :
4639 13654 : return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4640 : XFS_BTREE_VISIT_ALL, &bbcoi);
4641 : }
4642 :
4643 : /* Verify the v5 fields of a long-format btree block. */
4644 : xfs_failaddr_t
4645 308699000 : xfs_btree_lblock_v5hdr_verify(
4646 : struct xfs_buf *bp,
4647 : uint64_t owner)
4648 : {
4649 308699000 : struct xfs_mount *mp = bp->b_mount;
4650 308699000 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4651 :
4652 308699000 : if (!xfs_has_crc(mp))
4653 0 : return __this_address;
4654 308699000 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4655 0 : return __this_address;
4656 308698678 : if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4657 0 : return __this_address;
4658 308698678 : if (owner != XFS_RMAP_OWN_UNKNOWN &&
4659 0 : be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4660 0 : return __this_address;
4661 : return NULL;
4662 : }
4663 :
4664 : /* Verify a long-format btree block. */
4665 : xfs_failaddr_t
4666 19492437 : xfs_btree_lblock_verify(
4667 : struct xfs_buf *bp,
4668 : unsigned int max_recs)
4669 : {
4670 19492437 : struct xfs_mount *mp = bp->b_mount;
4671 19492437 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4672 19492437 : xfs_fsblock_t fsb;
4673 19492437 : xfs_failaddr_t fa;
4674 :
4675 19492437 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4676 :
4677 : /* numrecs verification */
4678 19492437 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4679 0 : return __this_address;
4680 :
4681 : /* sibling pointer verification */
4682 19492437 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
4683 19492974 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4684 : block->bb_u.l.bb_leftsib);
4685 19492517 : if (!fa)
4686 19492759 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4687 : block->bb_u.l.bb_rightsib);
4688 : return fa;
4689 : }
4690 :
4691 : /**
4692 : * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4693 : * btree block
4694 : *
4695 : * @bp: buffer containing the btree block
4696 : */
4697 : xfs_failaddr_t
4698 150787598 : xfs_btree_sblock_v5hdr_verify(
4699 : struct xfs_buf *bp)
4700 : {
4701 150787598 : struct xfs_mount *mp = bp->b_mount;
4702 150787598 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4703 150787598 : struct xfs_perag *pag = bp->b_pag;
4704 :
4705 150787598 : if (!xfs_has_crc(mp))
4706 0 : return __this_address;
4707 150787598 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4708 0 : return __this_address;
4709 150787743 : if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4710 0 : return __this_address;
4711 150787743 : if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4712 0 : return __this_address;
4713 : return NULL;
4714 : }
4715 :
4716 : /**
4717 : * xfs_btree_sblock_verify() -- verify a short-format btree block
4718 : *
4719 : * @bp: buffer containing the btree block
4720 : * @max_recs: maximum records allowed in this btree node
4721 : */
4722 : xfs_failaddr_t
4723 47099308 : xfs_btree_sblock_verify(
4724 : struct xfs_buf *bp,
4725 : unsigned int max_recs)
4726 : {
4727 47099308 : struct xfs_mount *mp = bp->b_mount;
4728 47099308 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4729 47099308 : xfs_agblock_t agbno;
4730 47099308 : xfs_failaddr_t fa;
4731 :
4732 47099308 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4733 :
4734 : /* numrecs verification */
4735 47099308 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4736 0 : return __this_address;
4737 :
4738 : /* sibling pointer verification */
4739 47099308 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
4740 47099058 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4741 : block->bb_u.s.bb_leftsib);
4742 47097187 : if (!fa)
4743 47098933 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4744 : block->bb_u.s.bb_rightsib);
4745 : return fa;
4746 : }
4747 :
4748 : /*
4749 : * For the given limits on leaf and keyptr records per block, calculate the
4750 : * height of the tree needed to index the number of leaf records.
4751 : */
4752 : unsigned int
4753 514780 : xfs_btree_compute_maxlevels(
4754 : const unsigned int *limits,
4755 : unsigned long long records)
4756 : {
4757 514780 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4758 514780 : unsigned int height = 1;
4759 :
4760 3436459 : while (level_blocks > 1) {
4761 2921679 : level_blocks = howmany_64(level_blocks, limits[1]);
4762 2921679 : height++;
4763 : }
4764 :
4765 514780 : return height;
4766 : }
4767 :
4768 : /*
4769 : * For the given limits on leaf and keyptr records per block, calculate the
4770 : * number of blocks needed to index the given number of leaf records.
4771 : */
4772 : unsigned long long
4773 9969173 : xfs_btree_calc_size(
4774 : const unsigned int *limits,
4775 : unsigned long long records)
4776 : {
4777 9969173 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4778 9969173 : unsigned long long blocks = level_blocks;
4779 :
4780 25656655 : while (level_blocks > 1) {
4781 15686471 : level_blocks = howmany_64(level_blocks, limits[1]);
4782 15687482 : blocks += level_blocks;
4783 : }
4784 :
4785 9969711 : return blocks;
4786 : }
4787 :
4788 : /*
4789 : * Given a number of available blocks for the btree to consume with records and
4790 : * pointers, calculate the height of the tree needed to index all the records
4791 : * that space can hold based on the number of pointers each interior node
4792 : * holds.
4793 : *
4794 : * We start by assuming a single level tree consumes a single block, then track
4795 : * the number of blocks each node level consumes until we no longer have space
4796 : * to store the next node level. At this point, we are indexing all the leaf
4797 : * blocks in the space, and there's no more free space to split the tree any
4798 : * further. That's our maximum btree height.
4799 : */
4800 : unsigned int
4801 712441009 : xfs_btree_space_to_height(
4802 : const unsigned int *limits,
4803 : unsigned long long leaf_blocks)
4804 : {
4805 : /*
4806 : * The root btree block can have fewer than minrecs pointers in it
4807 : * because the tree might not be big enough to require that amount of
4808 : * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
4809 : */
4810 712441009 : unsigned long long node_blocks = 2;
4811 712441009 : unsigned long long blocks_left = leaf_blocks - 1;
4812 712441009 : unsigned int height = 1;
4813 :
4814 712441009 : if (leaf_blocks < 1)
4815 : return 0;
4816 :
4817 7836510949 : while (node_blocks < blocks_left) {
4818 7124069940 : blocks_left -= node_blocks;
4819 7124069940 : node_blocks *= limits[1];
4820 7124069940 : height++;
4821 : }
4822 :
4823 : return height;
4824 : }
4825 :
4826 : /*
4827 : * Query a regular btree for all records overlapping a given interval.
4828 : * Start with a LE lookup of the key of low_rec and return all records
4829 : * until we find a record with a key greater than the key of high_rec.
4830 : */
4831 : STATIC int
4832 3248224742 : xfs_btree_simple_query_range(
4833 : struct xfs_btree_cur *cur,
4834 : const union xfs_btree_key *low_key,
4835 : const union xfs_btree_key *high_key,
4836 : xfs_btree_query_range_fn fn,
4837 : void *priv)
4838 : {
4839 3248224742 : union xfs_btree_rec *recp;
4840 3248224742 : union xfs_btree_key rec_key;
4841 3248224742 : int stat;
4842 3248224742 : bool firstrec = true;
4843 3248224742 : int error;
4844 :
4845 3248224742 : ASSERT(cur->bc_ops->init_high_key_from_rec);
4846 3248224742 : ASSERT(cur->bc_ops->diff_two_keys);
4847 :
4848 : /*
4849 : * Find the leftmost record. The btree cursor must be set
4850 : * to the low record used to generate low_key.
4851 : */
4852 3248224742 : stat = 0;
4853 3248224742 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4854 3249032576 : if (error)
4855 12 : goto out;
4856 :
4857 : /* Nothing? See if there's anything to the right. */
4858 3249032564 : if (!stat) {
4859 753332073 : error = xfs_btree_increment(cur, 0, &stat);
4860 753010653 : if (error)
4861 0 : goto out;
4862 : }
4863 :
4864 89214052672 : while (stat) {
4865 : /* Find the record. */
4866 88594687694 : error = xfs_btree_get_rec(cur, &recp, &stat);
4867 88622713674 : if (error || !stat)
4868 : break;
4869 :
4870 : /* Skip if low_key > high_key(rec). */
4871 88622713674 : if (firstrec) {
4872 2811813599 : cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4873 2811145270 : firstrec = false;
4874 2811145270 : if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
4875 2494968069 : goto advloop;
4876 : }
4877 :
4878 : /* Stop if low_key(rec) > high_key. */
4879 86127208559 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
4880 86109342197 : if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
4881 : break;
4882 :
4883 : /* Callback */
4884 83403834094 : error = fn(cur, recp, priv);
4885 83266878881 : if (error)
4886 : break;
4887 :
4888 83266870237 : advloop:
4889 : /* Move on to the next record. */
4890 85761838306 : error = xfs_btree_increment(cur, 0, &stat);
4891 85965341528 : if (error)
4892 : break;
4893 : }
4894 :
4895 3248234184 : out:
4896 3248234196 : return error;
4897 : }
4898 :
4899 : /*
4900 : * Query an overlapped interval btree for all records overlapping a given
4901 : * interval. This function roughly follows the algorithm given in
4902 : * "Interval Trees" of _Introduction to Algorithms_, which is section
4903 : * 14.3 in the 2nd and 3rd editions.
4904 : *
4905 : * First, generate keys for the low and high records passed in.
4906 : *
4907 : * For any leaf node, generate the high and low keys for the record.
4908 : * If the record keys overlap with the query low/high keys, pass the
4909 : * record to the function iterator.
4910 : *
4911 : * For any internal node, compare the low and high keys of each
4912 : * pointer against the query low/high keys. If there's an overlap,
4913 : * follow the pointer.
4914 : *
4915 : * As an optimization, we stop scanning a block when we find a low key
4916 : * that is greater than the query's high key.
4917 : */
4918 : STATIC int
4919 1098486102 : xfs_btree_overlapped_query_range(
4920 : struct xfs_btree_cur *cur,
4921 : const union xfs_btree_key *low_key,
4922 : const union xfs_btree_key *high_key,
4923 : xfs_btree_query_range_fn fn,
4924 : void *priv)
4925 : {
4926 1098486102 : union xfs_btree_ptr ptr;
4927 1098486102 : union xfs_btree_ptr *pp;
4928 1098486102 : union xfs_btree_key rec_key;
4929 1098486102 : union xfs_btree_key rec_hkey;
4930 1098486102 : union xfs_btree_key *lkp;
4931 1098486102 : union xfs_btree_key *hkp;
4932 1098486102 : union xfs_btree_rec *recp;
4933 1098486102 : struct xfs_btree_block *block;
4934 1098486102 : int level;
4935 1098486102 : struct xfs_buf *bp;
4936 1098486102 : int i;
4937 1098486102 : int error;
4938 :
4939 : /* Load the root of the btree. */
4940 1098486102 : level = cur->bc_nlevels - 1;
4941 1098486102 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4942 1098568516 : error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4943 1099028504 : if (error)
4944 : return error;
4945 1099029701 : xfs_btree_get_block(cur, level, &bp);
4946 1098928561 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
4947 : #ifdef DEBUG
4948 1098745427 : error = xfs_btree_check_block(cur, block, level, bp);
4949 1098697251 : if (error)
4950 0 : goto out;
4951 : #endif
4952 1098697251 : cur->bc_levels[level].ptr = 1;
4953 :
4954 >12337*10^7 : while (level < cur->bc_nlevels) {
4955 >12228*10^7 : block = xfs_btree_get_block(cur, level, &bp);
4956 :
4957 : /* End of node, pop back towards the root. */
4958 >12529*10^7 : if (cur->bc_levels[level].ptr >
4959 >12232*10^7 : be16_to_cpu(block->bb_numrecs)) {
4960 317097609 : pop_up:
4961 2968593480 : if (level < cur->bc_nlevels - 1)
4962 1872299990 : cur->bc_levels[level + 1].ptr++;
4963 2968593480 : level++;
4964 2968593480 : continue;
4965 : }
4966 :
4967 >12201*10^7 : if (level == 0) {
4968 : /* Handle a leaf node. */
4969 81015465436 : recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
4970 : block);
4971 :
4972 81015465436 : cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4973 80964749606 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
4974 :
4975 : /*
4976 : * If (query's high key < record's low key), then there
4977 : * are no more interesting records in this block. Pop
4978 : * up to the leaf level to find more record blocks.
4979 : *
4980 : * If (record's high key >= query's low key) and
4981 : * (query's high key >= record's low key), then
4982 : * this record overlaps the query range; callback.
4983 : */
4984 80920811001 : if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
4985 1080040930 : goto pop_up;
4986 79882512660 : if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
4987 3084611178 : error = fn(cur, recp, priv);
4988 3087793939 : if (error)
4989 : break;
4990 : }
4991 79875346765 : cur->bc_levels[level].ptr++;
4992 79875346765 : continue;
4993 : }
4994 :
4995 : /* Handle an internal node. */
4996 40995381994 : lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
4997 40995381994 : hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
4998 : block);
4999 40995381994 : pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
5000 :
5001 : /*
5002 : * If (query's high key < pointer's low key), then there are no
5003 : * more interesting keys in this block. Pop up one leaf level
5004 : * to continue looking for records.
5005 : *
5006 : * If (pointer's high key >= query's low key) and
5007 : * (query's high key >= pointer's low key), then
5008 : * this record overlaps the query range; follow pointer.
5009 : */
5010 40979959606 : if (xfs_btree_keycmp_lt(cur, high_key, lkp))
5011 1571454941 : goto pop_up;
5012 39410755382 : if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
5013 1875160792 : level--;
5014 1875160792 : error = xfs_btree_lookup_get_block(cur, level, pp,
5015 : &block);
5016 1875473429 : if (error)
5017 0 : goto out;
5018 1875473429 : xfs_btree_get_block(cur, level, &bp);
5019 1875371290 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
5020 : #ifdef DEBUG
5021 1875345041 : error = xfs_btree_check_block(cur, block, level, bp);
5022 1875352553 : if (error)
5023 0 : goto out;
5024 : #endif
5025 1875352553 : cur->bc_levels[level].ptr = 1;
5026 1875352553 : continue;
5027 : }
5028 37560979969 : cur->bc_levels[level].ptr++;
5029 : }
5030 :
5031 1098996515 : out:
5032 : /*
5033 : * If we don't end this function with the cursor pointing at a record
5034 : * block, a subsequent non-error cursor deletion will not release
5035 : * node-level buffers, causing a buffer leak. This is quite possible
5036 : * with a zero-results range query, so release the buffers if we
5037 : * failed to return any results.
5038 : */
5039 1098996515 : if (cur->bc_levels[0].bp == NULL) {
5040 12127 : for (i = 0; i < cur->bc_nlevels; i++) {
5041 9006 : if (cur->bc_levels[i].bp) {
5042 5787 : xfs_trans_brelse(cur->bc_tp,
5043 : cur->bc_levels[i].bp);
5044 5787 : cur->bc_levels[i].bp = NULL;
5045 5787 : cur->bc_levels[i].ptr = 0;
5046 5787 : cur->bc_levels[i].ra = 0;
5047 : }
5048 : }
5049 : }
5050 :
5051 : return error;
5052 : }
5053 :
5054 : static inline void
5055 15990089913 : xfs_btree_key_from_irec(
5056 : struct xfs_btree_cur *cur,
5057 : union xfs_btree_key *key,
5058 : const union xfs_btree_irec *irec)
5059 : {
5060 15990089913 : union xfs_btree_rec rec;
5061 :
5062 15990089913 : cur->bc_rec = *irec;
5063 15990089913 : cur->bc_ops->init_rec_from_cur(cur, &rec);
5064 15988981362 : cur->bc_ops->init_key_from_rec(key, &rec);
5065 15989503479 : }
5066 :
5067 : /*
5068 : * Query a btree for all records overlapping a given interval of keys. The
5069 : * supplied function will be called with each record found; return one of the
5070 : * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
5071 : * code. This function returns -ECANCELED, zero, or a negative error code.
5072 : */
5073 : int
5074 4328821228 : xfs_btree_query_range(
5075 : struct xfs_btree_cur *cur,
5076 : const union xfs_btree_irec *low_rec,
5077 : const union xfs_btree_irec *high_rec,
5078 : xfs_btree_query_range_fn fn,
5079 : void *priv)
5080 : {
5081 4328821228 : union xfs_btree_key low_key;
5082 4328821228 : union xfs_btree_key high_key;
5083 :
5084 : /* Find the keys of both ends of the interval. */
5085 4328821228 : xfs_btree_key_from_irec(cur, &high_key, high_rec);
5086 4329173518 : xfs_btree_key_from_irec(cur, &low_key, low_rec);
5087 :
5088 : /* Enforce low key <= high key. */
5089 4329568509 : if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
5090 : return -EINVAL;
5091 :
5092 4329538172 : if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5093 3231149717 : return xfs_btree_simple_query_range(cur, &low_key,
5094 : &high_key, fn, priv);
5095 1098388455 : return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
5096 : fn, priv);
5097 : }
5098 :
5099 : /* Query a btree for all records. */
5100 : int
5101 16710076 : xfs_btree_query_all(
5102 : struct xfs_btree_cur *cur,
5103 : xfs_btree_query_range_fn fn,
5104 : void *priv)
5105 : {
5106 16710076 : union xfs_btree_key low_key;
5107 16710076 : union xfs_btree_key high_key;
5108 :
5109 16710076 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5110 16710076 : memset(&low_key, 0, sizeof(low_key));
5111 16710076 : memset(&high_key, 0xFF, sizeof(high_key));
5112 :
5113 16710076 : return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
5114 : }
5115 :
5116 : static int
5117 129030264 : xfs_btree_count_blocks_helper(
5118 : struct xfs_btree_cur *cur,
5119 : int level,
5120 : void *data)
5121 : {
5122 129030264 : xfs_extlen_t *blocks = data;
5123 129030264 : (*blocks)++;
5124 :
5125 129030264 : return 0;
5126 : }
5127 :
5128 : /* Count the blocks in a btree and return the result in *blocks. */
5129 : int
5130 7407481 : xfs_btree_count_blocks(
5131 : struct xfs_btree_cur *cur,
5132 : xfs_extlen_t *blocks)
5133 : {
5134 7407481 : *blocks = 0;
5135 7407481 : return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
5136 : XFS_BTREE_VISIT_ALL, blocks);
5137 : }
5138 :
5139 : /* Compare two btree pointers. */
5140 : int64_t
5141 13858824 : xfs_btree_diff_two_ptrs(
5142 : struct xfs_btree_cur *cur,
5143 : const union xfs_btree_ptr *a,
5144 : const union xfs_btree_ptr *b)
5145 : {
5146 13858824 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5147 1763367 : return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
5148 12095457 : return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
5149 : }
5150 :
5151 : struct xfs_btree_has_records {
5152 : /* Keys for the start and end of the range we want to know about. */
5153 : union xfs_btree_key start_key;
5154 : union xfs_btree_key end_key;
5155 :
5156 : /* Mask for key comparisons, if desired. */
5157 : const union xfs_btree_key *key_mask;
5158 :
5159 : /* Highest record key we've seen so far. */
5160 : union xfs_btree_key high_key;
5161 :
5162 : enum xbtree_recpacking outcome;
5163 : };
5164 :
5165 : STATIC int
5166 0 : xfs_btree_has_records_helper(
5167 : struct xfs_btree_cur *cur,
5168 : const union xfs_btree_rec *rec,
5169 : void *priv)
5170 : {
5171 0 : union xfs_btree_key rec_key;
5172 0 : union xfs_btree_key rec_high_key;
5173 0 : struct xfs_btree_has_records *info = priv;
5174 0 : enum xbtree_key_contig key_contig;
5175 :
5176 0 : cur->bc_ops->init_key_from_rec(&rec_key, rec);
5177 :
5178 0 : if (info->outcome == XBTREE_RECPACKING_EMPTY) {
5179 0 : info->outcome = XBTREE_RECPACKING_SPARSE;
5180 :
5181 : /*
5182 : * If the first record we find does not overlap the start key,
5183 : * then there is a hole at the start of the search range.
5184 : * Classify this as sparse and stop immediately.
5185 : */
5186 0 : if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
5187 : info->key_mask))
5188 : return -ECANCELED;
5189 : } else {
5190 : /*
5191 : * If a subsequent record does not overlap with the any record
5192 : * we've seen so far, there is a hole in the middle of the
5193 : * search range. Classify this as sparse and stop.
5194 : * If the keys overlap and this btree does not allow overlap,
5195 : * signal corruption.
5196 : */
5197 0 : key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
5198 : &rec_key, info->key_mask);
5199 0 : if (key_contig == XBTREE_KEY_OVERLAP &&
5200 0 : !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5201 : return -EFSCORRUPTED;
5202 0 : if (key_contig == XBTREE_KEY_GAP)
5203 : return -ECANCELED;
5204 : }
5205 :
5206 : /*
5207 : * If high_key(rec) is larger than any other high key we've seen,
5208 : * remember it for later.
5209 : */
5210 0 : cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
5211 0 : if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
5212 : info->key_mask))
5213 0 : info->high_key = rec_high_key; /* struct copy */
5214 :
5215 : return 0;
5216 : }
5217 :
5218 : /*
5219 : * Scan part of the keyspace of a btree and tell us if that keyspace does not
5220 : * map to any records; is fully mapped to records; or is partially mapped to
5221 : * records. This is the btree record equivalent to determining if a file is
5222 : * sparse.
5223 : *
5224 : * For most btree types, the record scan should use all available btree key
5225 : * fields to compare the keys encountered. These callers should pass NULL for
5226 : * @mask. However, some callers (e.g. scanning physical space in the rmapbt)
5227 : * want to ignore some part of the btree record keyspace when performing the
5228 : * comparison. These callers should pass in a union xfs_btree_key object with
5229 : * the fields that *should* be a part of the comparison set to any nonzero
5230 : * value, and the rest zeroed.
5231 : */
5232 : int
5233 3672659967 : xfs_btree_has_records(
5234 : struct xfs_btree_cur *cur,
5235 : const union xfs_btree_irec *low,
5236 : const union xfs_btree_irec *high,
5237 : const union xfs_btree_key *mask,
5238 : enum xbtree_recpacking *outcome)
5239 : {
5240 3672659967 : struct xfs_btree_has_records info = {
5241 : .outcome = XBTREE_RECPACKING_EMPTY,
5242 : .key_mask = mask,
5243 : };
5244 3672659967 : int error;
5245 :
5246 : /* Not all btrees support this operation. */
5247 3672659967 : if (!cur->bc_ops->keys_contiguous) {
5248 0 : ASSERT(0);
5249 0 : return -EOPNOTSUPP;
5250 : }
5251 :
5252 3672659967 : xfs_btree_key_from_irec(cur, &info.start_key, low);
5253 3670557825 : xfs_btree_key_from_irec(cur, &info.end_key, high);
5254 :
5255 3671247765 : error = xfs_btree_query_range(cur, low, high,
5256 : xfs_btree_has_records_helper, &info);
5257 3672328770 : if (error == -ECANCELED)
5258 0 : goto out;
5259 3672328770 : if (error)
5260 : return error;
5261 :
5262 3672328770 : if (info.outcome == XBTREE_RECPACKING_EMPTY)
5263 3672328770 : goto out;
5264 :
5265 : /*
5266 : * If the largest high_key(rec) we saw during the walk is greater than
5267 : * the end of the search range, classify this as full. Otherwise,
5268 : * there is a hole at the end of the search range.
5269 : */
5270 0 : if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
5271 : mask))
5272 0 : info.outcome = XBTREE_RECPACKING_FULL;
5273 :
5274 0 : out:
5275 3672328770 : *outcome = info.outcome;
5276 3672328770 : return 0;
5277 : }
5278 :
5279 : /* Are there more records in this btree? */
5280 : bool
5281 72897628 : xfs_btree_has_more_records(
5282 : struct xfs_btree_cur *cur)
5283 : {
5284 72897628 : struct xfs_btree_block *block;
5285 72897628 : struct xfs_buf *bp;
5286 :
5287 72897628 : block = xfs_btree_get_block(cur, 0, &bp);
5288 :
5289 : /* There are still records in this block. */
5290 72897496 : if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
5291 : return true;
5292 :
5293 : /* There are more record blocks. */
5294 551752 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5295 0 : return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
5296 : else
5297 551752 : return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
5298 : }
5299 :
5300 : /* Set up all the btree cursor caches. */
5301 : int __init
5302 50 : xfs_btree_init_cur_caches(void)
5303 : {
5304 50 : int error;
5305 :
5306 50 : error = xfs_allocbt_init_cur_cache();
5307 50 : if (error)
5308 : return error;
5309 50 : error = xfs_inobt_init_cur_cache();
5310 50 : if (error)
5311 0 : goto err;
5312 50 : error = xfs_bmbt_init_cur_cache();
5313 50 : if (error)
5314 0 : goto err;
5315 50 : error = xfs_rmapbt_init_cur_cache();
5316 50 : if (error)
5317 0 : goto err;
5318 50 : error = xfs_refcountbt_init_cur_cache();
5319 50 : if (error)
5320 0 : goto err;
5321 :
5322 : return 0;
5323 0 : err:
5324 0 : xfs_btree_destroy_cur_caches();
5325 0 : return error;
5326 : }
5327 :
5328 : /* Destroy all the btree cursor caches, if they've been allocated. */
5329 : void
5330 49 : xfs_btree_destroy_cur_caches(void)
5331 : {
5332 49 : xfs_allocbt_destroy_cur_cache();
5333 49 : xfs_inobt_destroy_cur_cache();
5334 49 : xfs_bmbt_destroy_cur_cache();
5335 49 : xfs_rmapbt_destroy_cur_cache();
5336 49 : xfs_refcountbt_destroy_cur_cache();
5337 49 : }
5338 :
5339 : /* Move the btree cursor before the first record. */
5340 : int
5341 132214942 : xfs_btree_goto_left_edge(
5342 : struct xfs_btree_cur *cur)
5343 : {
5344 132214942 : int stat = 0;
5345 132214942 : int error;
5346 :
5347 132214942 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5348 132214942 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
5349 132215081 : if (error)
5350 : return error;
5351 132215081 : if (!stat)
5352 : return 0;
5353 :
5354 0 : error = xfs_btree_decrement(cur, 0, &stat);
5355 0 : if (error)
5356 : return error;
5357 0 : if (stat != 0) {
5358 0 : ASSERT(0);
5359 0 : xfs_btree_mark_sick(cur);
5360 0 : return -EFSCORRUPTED;
5361 : }
5362 :
5363 : return 0;
5364 : }
|