Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_buf_item.h"
17 : #include "xfs_btree.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_alloc.h"
22 : #include "xfs_log.h"
23 : #include "xfs_btree_staging.h"
24 : #include "xfs_ag.h"
25 : #include "xfs_alloc_btree.h"
26 : #include "xfs_ialloc_btree.h"
27 : #include "xfs_bmap_btree.h"
28 : #include "xfs_rmap_btree.h"
29 : #include "xfs_refcount_btree.h"
30 : #include "xfs_health.h"
31 : #include "scrub/xfile.h"
32 : #include "scrub/xfbtree.h"
33 : #include "xfs_btree_mem.h"
34 : #include "xfs_rtgroup.h"
35 : #include "xfs_rtrmap_btree.h"
36 : #include "xfs_bmap.h"
37 : #include "xfs_rmap.h"
38 : #include "xfs_quota.h"
39 : #include "xfs_imeta.h"
40 : #include "xfs_rtrefcount_btree.h"
41 :
42 : /*
43 : * Btree magic numbers.
44 : */
45 : uint32_t
46 >25978*10^7 : xfs_btree_magic(
47 : struct xfs_mount *mp,
48 : const struct xfs_btree_ops *ops)
49 : {
50 >25978*10^7 : int idx = xfs_has_crc(mp) ? 1 : 0;
51 >25978*10^7 : __be32 magic = ops->buf_ops->magic[idx];
52 :
53 : /* Ensure we asked for crc for crc-only magics. */
54 >25978*10^7 : ASSERT(magic != 0);
55 >25978*10^7 : return be32_to_cpu(magic);
56 : }
57 :
58 : /*
59 : * These sibling pointer checks are optimised for null sibling pointers. This
60 : * happens a lot, and we don't need to byte swap at runtime if the sibling
61 : * pointer is NULL.
62 : *
63 : * These are explicitly marked at inline because the cost of calling them as
64 : * functions instead of inlining them is about 36 bytes extra code per call site
65 : * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
66 : * two sibling check functions reduces the compiled code size by over 300
67 : * bytes.
68 : */
69 : static inline xfs_failaddr_t
70 >16389*10^7 : xfs_btree_check_lblock_siblings(
71 : struct xfs_mount *mp,
72 : struct xfs_btree_cur *cur,
73 : int level,
74 : xfs_fsblock_t fsb,
75 : __be64 dsibling)
76 : {
77 >16389*10^7 : xfs_fsblock_t sibling;
78 :
79 >16389*10^7 : if (dsibling == cpu_to_be64(NULLFSBLOCK))
80 : return NULL;
81 :
82 >15561*10^7 : sibling = be64_to_cpu(dsibling);
83 >15561*10^7 : if (sibling == fsb)
84 0 : return __this_address;
85 >15561*10^7 : if (level >= 0) {
86 >15555*10^7 : if (!xfs_btree_check_lptr(cur, sibling, level + 1))
87 0 : return __this_address;
88 59223344 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
89 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
90 0 : return __this_address;
91 : } else {
92 59223344 : if (!xfs_verify_fsbno(mp, sibling))
93 0 : return __this_address;
94 : }
95 :
96 : return NULL;
97 : }
98 :
99 : static inline xfs_failaddr_t
100 >35254*10^7 : xfs_btree_check_sblock_siblings(
101 : struct xfs_perag *pag,
102 : struct xfs_btree_cur *cur,
103 : int level,
104 : xfs_agblock_t agbno,
105 : __be32 dsibling)
106 : {
107 >35254*10^7 : xfs_agblock_t sibling;
108 :
109 >35254*10^7 : if (dsibling == cpu_to_be32(NULLAGBLOCK))
110 : return NULL;
111 :
112 >24946*10^7 : sibling = be32_to_cpu(dsibling);
113 >24946*10^7 : if (sibling == agbno)
114 0 : return __this_address;
115 >24946*10^7 : if (level >= 0) {
116 >24939*10^7 : if (!xfs_btree_check_sptr(cur, sibling, level + 1))
117 0 : return __this_address;
118 62765666 : } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
119 0 : if (!xfbtree_verify_xfileoff(cur, sibling))
120 0 : return __this_address;
121 : } else {
122 62765666 : if (!xfs_verify_agbno(pag, sibling))
123 0 : return __this_address;
124 : }
125 : return NULL;
126 : }
127 :
128 : /*
129 : * Check a long btree block header. Return the address of the failing check,
130 : * or NULL if everything is ok.
131 : */
132 : xfs_failaddr_t
133 84052940461 : __xfs_btree_check_lblock(
134 : struct xfs_btree_cur *cur,
135 : struct xfs_btree_block *block,
136 : int level,
137 : struct xfs_buf *bp)
138 : {
139 84052940461 : struct xfs_mount *mp = cur->bc_mp;
140 84052940461 : int crc = xfs_has_crc(mp);
141 84052940461 : xfs_failaddr_t fa;
142 84052940461 : xfs_fsblock_t fsb = NULLFSBLOCK;
143 :
144 84052940461 : if (crc) {
145 83813819480 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
146 0 : return __this_address;
147 >16828*10^7 : if (block->bb_u.l.bb_blkno !=
148 84144626366 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
149 0 : return __this_address;
150 84144626366 : if (block->bb_u.l.bb_pad != cpu_to_be32(0))
151 0 : return __this_address;
152 : }
153 :
154 >16876*10^7 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
155 0 : return __this_address;
156 85101425262 : if (be16_to_cpu(block->bb_level) != level)
157 1129707217 : return __this_address;
158 >16887*10^7 : if (be16_to_cpu(block->bb_numrecs) >
159 83971718045 : cur->bc_ops->get_maxrecs(cur, level))
160 0 : return __this_address;
161 :
162 84905649263 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
163 2829173329 : fsb = xfbtree_buf_to_xfoff(cur, bp);
164 82076475934 : else if (bp)
165 82074930219 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
166 :
167 84905649457 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
168 : block->bb_u.l.bb_leftsib);
169 83216878162 : if (!fa)
170 83402614938 : fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
171 : block->bb_u.l.bb_rightsib);
172 : return fa;
173 : }
174 :
175 : /* Check a long btree block header. */
176 : static int
177 84763596356 : xfs_btree_check_lblock(
178 : struct xfs_btree_cur *cur,
179 : struct xfs_btree_block *block,
180 : int level,
181 : struct xfs_buf *bp)
182 : {
183 84763596356 : struct xfs_mount *mp = cur->bc_mp;
184 84763596356 : xfs_failaddr_t fa;
185 :
186 84763596356 : fa = __xfs_btree_check_lblock(cur, block, level, bp);
187 >17034*10^7 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
188 84845310054 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
189 0 : if (bp)
190 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
191 0 : xfs_btree_mark_sick(cur);
192 0 : return -EFSCORRUPTED;
193 : }
194 : return 0;
195 : }
196 :
197 : /*
198 : * Check a short btree block header. Return the address of the failing check,
199 : * or NULL if everything is ok.
200 : */
201 : xfs_failaddr_t
202 >17585*10^7 : __xfs_btree_check_sblock(
203 : struct xfs_btree_cur *cur,
204 : struct xfs_btree_block *block,
205 : int level,
206 : struct xfs_buf *bp)
207 : {
208 >17585*10^7 : struct xfs_mount *mp = cur->bc_mp;
209 >17585*10^7 : struct xfs_perag *pag = cur->bc_ag.pag;
210 >17585*10^7 : int crc = xfs_has_crc(mp);
211 >17585*10^7 : xfs_failaddr_t fa;
212 >17585*10^7 : xfs_agblock_t agbno = NULLAGBLOCK;
213 :
214 >17585*10^7 : if (crc) {
215 >17593*10^7 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
216 0 : return __this_address;
217 >35267*10^7 : if (block->bb_u.s.bb_blkno !=
218 >17633*10^7 : cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
219 0 : return __this_address;
220 : }
221 :
222 >35251*10^7 : if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
223 0 : return __this_address;
224 >17686*10^7 : if (be16_to_cpu(block->bb_level) != level)
225 455417601 : return __this_address;
226 >35246*10^7 : if (be16_to_cpu(block->bb_numrecs) >
227 >17640*10^7 : cur->bc_ops->get_maxrecs(cur, level))
228 0 : return __this_address;
229 :
230 >17605*10^7 : if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
231 264462941 : pag = NULL;
232 264462941 : agbno = xfbtree_buf_to_xfoff(cur, bp);
233 >17579*10^7 : } else if (bp) {
234 >17579*10^7 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
235 : }
236 :
237 >17605*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
238 : block->bb_u.s.bb_leftsib);
239 >17635*10^7 : if (!fa)
240 >17645*10^7 : fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
241 : block->bb_u.s.bb_rightsib);
242 : return fa;
243 : }
244 :
245 : /* Check a short btree block header. */
246 : STATIC int
247 >17605*10^7 : xfs_btree_check_sblock(
248 : struct xfs_btree_cur *cur,
249 : struct xfs_btree_block *block,
250 : int level,
251 : struct xfs_buf *bp)
252 : {
253 >17605*10^7 : struct xfs_mount *mp = cur->bc_mp;
254 >17605*10^7 : xfs_failaddr_t fa;
255 :
256 >17605*10^7 : fa = __xfs_btree_check_sblock(cur, block, level, bp);
257 >35317*10^7 : if (XFS_IS_CORRUPT(mp, fa != NULL) ||
258 >17645*10^7 : XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
259 0 : if (bp)
260 0 : trace_xfs_btree_corrupt(bp, _RET_IP_);
261 0 : xfs_btree_mark_sick(cur);
262 0 : return -EFSCORRUPTED;
263 : }
264 : return 0;
265 : }
266 :
267 : /*
268 : * Debug routine: check that block header is ok.
269 : */
270 : int
271 >26125*10^7 : xfs_btree_check_block(
272 : struct xfs_btree_cur *cur, /* btree cursor */
273 : struct xfs_btree_block *block, /* generic btree block pointer */
274 : int level, /* level of the btree block */
275 : struct xfs_buf *bp) /* buffer containing block, if any */
276 : {
277 : /* Don't check the inode-core root. */
278 >26125*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
279 81821259326 : level == cur->bc_nlevels - 1)
280 : return 0;
281 :
282 >26108*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
283 84751174806 : return xfs_btree_check_lblock(cur, block, level, bp);
284 : else
285 >17633*10^7 : return xfs_btree_check_sblock(cur, block, level, bp);
286 : }
287 :
288 : /* Check that this long pointer is valid and points within the fs. */
289 : bool
290 >16135*10^7 : xfs_btree_check_lptr(
291 : struct xfs_btree_cur *cur,
292 : xfs_fsblock_t fsbno,
293 : int level)
294 : {
295 >16135*10^7 : if (level <= 0)
296 : return false;
297 >16135*10^7 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
298 103347200 : return xfbtree_verify_xfileoff(cur, fsbno);
299 >16124*10^7 : return xfs_verify_fsbno(cur->bc_mp, fsbno);
300 : }
301 :
302 : /* Check that this short pointer is valid and points within the AG. */
303 : bool
304 >29321*10^7 : xfs_btree_check_sptr(
305 : struct xfs_btree_cur *cur,
306 : xfs_agblock_t agbno,
307 : int level)
308 : {
309 >29321*10^7 : if (level <= 0)
310 : return false;
311 >29321*10^7 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
312 310973624 : return xfbtree_verify_xfileoff(cur, agbno);
313 >29290*10^7 : return xfs_verify_agbno(cur->bc_ag.pag, agbno);
314 : }
315 :
316 : /*
317 : * Check that a given (indexed) btree pointer at a certain level of a
318 : * btree is valid and doesn't point past where it should.
319 : */
320 : static int
321 52257991255 : xfs_btree_check_ptr(
322 : struct xfs_btree_cur *cur,
323 : const union xfs_btree_ptr *ptr,
324 : int index,
325 : int level)
326 : {
327 52257991255 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
328 2303417308 : return xfbtree_check_ptr(cur, ptr, index, level);
329 :
330 49954573947 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
331 5275962526 : if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
332 : level))
333 : return 0;
334 0 : xfs_err(cur->bc_mp,
335 : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
336 : cur->bc_ino.ip->i_ino,
337 : cur->bc_ino.whichfork, cur->bc_btnum,
338 : level, index);
339 : } else {
340 89357222842 : if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
341 : level))
342 : return 0;
343 0 : xfs_err(cur->bc_mp,
344 : "AG %u: Corrupt btree %d pointer at level %d index %d.",
345 : cur->bc_ag.pag->pag_agno, cur->bc_btnum,
346 : level, index);
347 : }
348 :
349 0 : xfs_btree_mark_sick(cur);
350 0 : return -EFSCORRUPTED;
351 : }
352 :
353 : #ifdef DEBUG
354 : # define xfs_btree_debug_check_ptr xfs_btree_check_ptr
355 : #else
356 : # define xfs_btree_debug_check_ptr(...) (0)
357 : #endif
358 :
359 : /*
360 : * Calculate CRC on the whole btree block and stuff it into the
361 : * long-form btree header.
362 : *
363 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
364 : * it into the buffer so recovery knows what the last modification was that made
365 : * it to disk.
366 : */
367 : void
368 22654441 : xfs_btree_lblock_calc_crc(
369 : struct xfs_buf *bp)
370 : {
371 22654441 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
372 22654441 : struct xfs_buf_log_item *bip = bp->b_log_item;
373 :
374 22654441 : if (!xfs_has_crc(bp->b_mount))
375 : return;
376 22654441 : if (bip)
377 22240936 : block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
378 22654441 : xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
379 : }
380 :
381 : bool
382 8184072 : xfs_btree_lblock_verify_crc(
383 : struct xfs_buf *bp)
384 : {
385 8184072 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
386 8184072 : struct xfs_mount *mp = bp->b_mount;
387 :
388 8184072 : if (xfs_has_crc(mp)) {
389 8184072 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
390 : return false;
391 8184072 : return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
392 : }
393 :
394 : return true;
395 : }
396 :
397 : /*
398 : * Calculate CRC on the whole btree block and stuff it into the
399 : * short-form btree header.
400 : *
401 : * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
402 : * it into the buffer so recovery knows what the last modification was that made
403 : * it to disk.
404 : */
405 : void
406 18821081 : xfs_btree_sblock_calc_crc(
407 : struct xfs_buf *bp)
408 : {
409 18821081 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
410 18821081 : struct xfs_buf_log_item *bip = bp->b_log_item;
411 :
412 18821081 : if (!xfs_has_crc(bp->b_mount))
413 : return;
414 18814197 : if (bip)
415 18007757 : block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
416 18814197 : xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
417 : }
418 :
419 : bool
420 2395322 : xfs_btree_sblock_verify_crc(
421 : struct xfs_buf *bp)
422 : {
423 2395322 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
424 2395322 : struct xfs_mount *mp = bp->b_mount;
425 :
426 2395322 : if (xfs_has_crc(mp)) {
427 2395170 : if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
428 : return false;
429 2395170 : return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
430 : }
431 :
432 : return true;
433 : }
434 :
435 : static int
436 449577 : xfs_btree_free_block(
437 : struct xfs_btree_cur *cur,
438 : struct xfs_buf *bp)
439 : {
440 449577 : int error;
441 :
442 449577 : trace_xfs_btree_free_block(cur, bp);
443 :
444 449577 : error = cur->bc_ops->free_block(cur, bp);
445 449577 : if (!error) {
446 449577 : xfs_trans_binval(cur->bc_tp, bp);
447 449577 : XFS_BTREE_STATS_INC(cur, free);
448 : }
449 449577 : return error;
450 : }
451 :
452 : /*
453 : * Delete the btree cursor.
454 : */
455 : void
456 8072727470 : xfs_btree_del_cursor(
457 : struct xfs_btree_cur *cur, /* btree cursor */
458 : int error) /* del because of error */
459 : {
460 8072727470 : int i; /* btree level */
461 :
462 : /*
463 : * Clear the buffer pointers and release the buffers. If we're doing
464 : * this because of an error, inspect all of the entries in the bc_bufs
465 : * array for buffers to be unlocked. This is because some of the btree
466 : * code works from level n down to 0, and if we get an error along the
467 : * way we won't have initialized all the entries down to 0.
468 : */
469 20182283308 : for (i = 0; i < cur->bc_nlevels; i++) {
470 12500537122 : if (cur->bc_levels[i].bp)
471 11010799127 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
472 1489737995 : else if (!error)
473 : break;
474 : }
475 :
476 : /*
477 : * If we are doing a BMBT update, the number of unaccounted blocks
478 : * allocated during this cursor life time should be zero. If it's not
479 : * zero, then we should be shut down or on our way to shutdown due to
480 : * cancelling a dirty transaction on error.
481 : */
482 8074135421 : ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
483 : xfs_is_shutdown(cur->bc_mp) || error != 0);
484 8074135419 : if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
485 0 : kmem_free(cur->bc_ops);
486 8074135419 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
487 441849901 : !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ino.rtg)
488 216726624 : xfs_rtgroup_put(cur->bc_ino.rtg);
489 8074138942 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
490 6573883693 : !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
491 6573883693 : xfs_perag_put(cur->bc_ag.pag);
492 8074411981 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
493 1058421364 : if (cur->bc_mem.pag)
494 23922159 : xfs_perag_put(cur->bc_mem.pag);
495 1058421398 : if (cur->bc_mem.rtg)
496 9213608 : xfs_rtgroup_put(cur->bc_mem.rtg);
497 : }
498 8074412017 : kmem_cache_free(cur->bc_cache, cur);
499 8074142671 : }
500 :
501 : /* Return the buffer target for this btree's buffer. */
502 : static inline struct xfs_buftarg *
503 12674772259 : xfs_btree_buftarg(
504 : struct xfs_btree_cur *cur)
505 : {
506 12674772259 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
507 1099145546 : return xfbtree_target(cur->bc_mem.xfbtree);
508 11575626713 : return cur->bc_mp->m_ddev_targp;
509 : }
510 :
511 : /* Return the block size (in units of 512b sectors) for this btree. */
512 : static inline unsigned int
513 12674248816 : xfs_btree_bbsize(
514 : struct xfs_btree_cur *cur)
515 : {
516 12674248816 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
517 1099145531 : return xfbtree_bbsize();
518 11575103285 : return cur->bc_mp->m_bsize;
519 : }
520 :
521 : /*
522 : * Duplicate the btree cursor.
523 : * Allocate a new one, copy the record, re-get the buffers.
524 : */
525 : int /* error */
526 212133490 : xfs_btree_dup_cursor(
527 : struct xfs_btree_cur *cur, /* input cursor */
528 : struct xfs_btree_cur **ncur) /* output cursor */
529 : {
530 212133490 : struct xfs_buf *bp; /* btree block's buffer pointer */
531 212133490 : int error; /* error return value */
532 212133490 : int i; /* level number of btree block */
533 212133490 : xfs_mount_t *mp; /* mount structure for filesystem */
534 212133490 : struct xfs_btree_cur *new; /* new cursor value */
535 212133490 : xfs_trans_t *tp; /* transaction pointer, can be NULL */
536 :
537 212133490 : tp = cur->bc_tp;
538 212133490 : mp = cur->bc_mp;
539 :
540 : /*
541 : * Allocate a new cursor like the old one.
542 : */
543 212133490 : new = cur->bc_ops->dup_cursor(cur);
544 :
545 : /*
546 : * Copy the record currently in the cursor.
547 : */
548 212136206 : new->bc_rec = cur->bc_rec;
549 :
550 : /*
551 : * For each level current, re-get the buffer and copy the ptr value.
552 : */
553 701882385 : for (i = 0; i < new->bc_nlevels; i++) {
554 489746828 : new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
555 489746828 : new->bc_levels[i].ra = cur->bc_levels[i].ra;
556 489746828 : bp = cur->bc_levels[i].bp;
557 489746828 : if (bp) {
558 417533646 : error = xfs_trans_read_buf(mp, tp,
559 : xfs_btree_buftarg(cur),
560 : xfs_buf_daddr(bp),
561 417531032 : xfs_btree_bbsize(cur), 0, &bp,
562 417532916 : cur->bc_ops->buf_ops);
563 417533003 : if (xfs_metadata_is_sick(error))
564 0 : xfs_btree_mark_sick(new);
565 417533003 : if (error) {
566 6 : xfs_btree_del_cursor(new, error);
567 6 : *ncur = NULL;
568 6 : return error;
569 : }
570 : }
571 489746179 : new->bc_levels[i].bp = bp;
572 : }
573 212135557 : *ncur = new;
574 212135557 : return 0;
575 : }
576 :
577 : /*
578 : * XFS btree block layout and addressing:
579 : *
580 : * There are two types of blocks in the btree: leaf and non-leaf blocks.
581 : *
582 : * The leaf record start with a header then followed by records containing
583 : * the values. A non-leaf block also starts with the same header, and
584 : * then first contains lookup keys followed by an equal number of pointers
585 : * to the btree blocks at the previous level.
586 : *
587 : * +--------+-------+-------+-------+-------+-------+-------+
588 : * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
589 : * +--------+-------+-------+-------+-------+-------+-------+
590 : *
591 : * +--------+-------+-------+-------+-------+-------+-------+
592 : * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
593 : * +--------+-------+-------+-------+-------+-------+-------+
594 : *
595 : * The header is called struct xfs_btree_block for reasons better left unknown
596 : * and comes in different versions for short (32bit) and long (64bit) block
597 : * pointers. The record and key structures are defined by the btree instances
598 : * and opaque to the btree core. The block pointers are simple disk endian
599 : * integers, available in a short (32bit) and long (64bit) variant.
600 : *
601 : * The helpers below calculate the offset of a given record, key or pointer
602 : * into a btree block (xfs_btree_*_offset) or return a pointer to the given
603 : * record, key or pointer (xfs_btree_*_addr). Note that all addressing
604 : * inside the btree block is done using indices starting at one, not zero!
605 : *
606 : * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
607 : * overlapping intervals. In such a tree, records are still sorted lowest to
608 : * highest and indexed by the smallest key value that refers to the record.
609 : * However, nodes are different: each pointer has two associated keys -- one
610 : * indexing the lowest key available in the block(s) below (the same behavior
611 : * as the key in a regular btree) and another indexing the highest key
612 : * available in the block(s) below. Because records are /not/ sorted by the
613 : * highest key, all leaf block updates require us to compute the highest key
614 : * that matches any record in the leaf and to recursively update the high keys
615 : * in the nodes going further up in the tree, if necessary. Nodes look like
616 : * this:
617 : *
618 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
619 : * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
620 : * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
621 : *
622 : * To perform an interval query on an overlapped tree, perform the usual
623 : * depth-first search and use the low and high keys to decide if we can skip
624 : * that particular node. If a leaf node is reached, return the records that
625 : * intersect the interval. Note that an interval query may return numerous
626 : * entries. For a non-overlapped tree, simply search for the record associated
627 : * with the lowest key and iterate forward until a non-matching record is
628 : * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
629 : * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
630 : * more detail.
631 : *
632 : * Why do we care about overlapping intervals? Let's say you have a bunch of
633 : * reverse mapping records on a reflink filesystem:
634 : *
635 : * 1: +- file A startblock B offset C length D -----------+
636 : * 2: +- file E startblock F offset G length H --------------+
637 : * 3: +- file I startblock F offset J length K --+
638 : * 4: +- file L... --+
639 : *
640 : * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
641 : * we'd simply increment the length of record 1. But how do we find the record
642 : * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
643 : * record 3 because the keys are ordered first by startblock. An interval
644 : * query would return records 1 and 2 because they both overlap (B+D-1), and
645 : * from that we can pick out record 1 as the appropriate left neighbor.
646 : *
647 : * In the non-overlapped case you can do a LE lookup and decrement the cursor
648 : * because a record's interval must end before the next record.
649 : */
650 :
651 : /*
652 : * Return size of the btree block header for this btree instance.
653 : */
654 >73886*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
655 : {
656 >73886*10^7 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
657 >14799*10^7 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
658 : return XFS_BTREE_LBLOCK_CRC_LEN;
659 0 : return XFS_BTREE_LBLOCK_LEN;
660 : }
661 >59087*10^7 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
662 >59100*10^7 : return XFS_BTREE_SBLOCK_CRC_LEN;
663 : return XFS_BTREE_SBLOCK_LEN;
664 : }
665 :
666 : /*
667 : * Return size of btree block pointers for this btree instance.
668 : */
669 : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
670 : {
671 64047836744 : return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
672 64047836744 : sizeof(__be64) : sizeof(__be32);
673 : }
674 :
675 : /*
676 : * Calculate offset of the n-th record in a btree block.
677 : */
678 : STATIC size_t
679 >52374*10^7 : xfs_btree_rec_offset(
680 : struct xfs_btree_cur *cur,
681 : int n)
682 : {
683 >52374*10^7 : return xfs_btree_block_len(cur) +
684 >52374*10^7 : (n - 1) * cur->bc_ops->rec_len;
685 : }
686 :
687 : /*
688 : * Calculate offset of the n-th key in a btree block.
689 : */
690 : STATIC size_t
691 87788403343 : xfs_btree_key_offset(
692 : struct xfs_btree_cur *cur,
693 : int n)
694 : {
695 87788403343 : return xfs_btree_block_len(cur) +
696 87788403343 : (n - 1) * cur->bc_ops->key_len;
697 : }
698 :
699 : /*
700 : * Calculate offset of the n-th high key in a btree block.
701 : */
702 : STATIC size_t
703 65166753234 : xfs_btree_high_key_offset(
704 : struct xfs_btree_cur *cur,
705 : int n)
706 : {
707 65166753234 : return xfs_btree_block_len(cur) +
708 65166753234 : (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
709 : }
710 :
711 : /*
712 : * Calculate offset of the n-th block pointer in a btree block.
713 : */
714 : STATIC size_t
715 64028826499 : xfs_btree_ptr_offset(
716 : struct xfs_btree_cur *cur,
717 : int n,
718 : int level)
719 : {
720 64028826499 : return xfs_btree_block_len(cur) +
721 >12806*10^7 : cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
722 >12032*10^7 : (n - 1) * xfs_btree_ptr_len(cur);
723 : }
724 :
725 : /*
726 : * Return a pointer to the n-th record in the btree block.
727 : */
728 : union xfs_btree_rec *
729 5225713953 : xfs_btree_rec_addr(
730 : struct xfs_btree_cur *cur,
731 : int n,
732 : struct xfs_btree_block *block)
733 : {
734 >52300*10^7 : return (union xfs_btree_rec *)
735 >52300*10^7 : ((char *)block + xfs_btree_rec_offset(cur, n));
736 : }
737 :
738 : /*
739 : * Return a pointer to the n-th key in the btree block.
740 : */
741 : union xfs_btree_key *
742 4106943171 : xfs_btree_key_addr(
743 : struct xfs_btree_cur *cur,
744 : int n,
745 : struct xfs_btree_block *block)
746 : {
747 87444289785 : return (union xfs_btree_key *)
748 87444289785 : ((char *)block + xfs_btree_key_offset(cur, n));
749 : }
750 :
751 : /*
752 : * Return a pointer to the n-th high key in the btree block.
753 : */
754 : union xfs_btree_key *
755 3103639032 : xfs_btree_high_key_addr(
756 : struct xfs_btree_cur *cur,
757 : int n,
758 : struct xfs_btree_block *block)
759 : {
760 65180853692 : return (union xfs_btree_key *)
761 65180853692 : ((char *)block + xfs_btree_high_key_offset(cur, n));
762 : }
763 :
764 : /*
765 : * Return a pointer to the n-th block pointer in the btree block.
766 : */
767 : union xfs_btree_ptr *
768 64027272440 : xfs_btree_ptr_addr(
769 : struct xfs_btree_cur *cur,
770 : int n,
771 : struct xfs_btree_block *block)
772 : {
773 64027272440 : int level = xfs_btree_get_level(block);
774 :
775 64027272440 : ASSERT(block->bb_level != 0);
776 :
777 >12805*10^7 : return (union xfs_btree_ptr *)
778 64027272440 : ((char *)block + xfs_btree_ptr_offset(cur, n, level));
779 : }
780 :
781 : struct xfs_ifork *
782 3426860890 : xfs_btree_ifork_ptr(
783 : struct xfs_btree_cur *cur)
784 : {
785 3426860890 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
786 :
787 3426860890 : if (cur->bc_flags & XFS_BTREE_STAGING)
788 132175 : return cur->bc_ino.ifake->if_fork;
789 3426728715 : return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
790 : }
791 :
792 : /*
793 : * Get the root block which is stored in the inode.
794 : *
795 : * For now this btree implementation assumes the btree root is always
796 : * stored in the if_broot field of an inode fork.
797 : */
798 : STATIC struct xfs_btree_block *
799 2013036000 : xfs_btree_get_iroot(
800 : struct xfs_btree_cur *cur)
801 : {
802 2013036000 : struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
803 :
804 2013106684 : return (struct xfs_btree_block *)ifp->if_broot;
805 : }
806 :
807 : /*
808 : * Retrieve the block pointer from the cursor at the given level.
809 : * This may be an inode btree root or from a buffer.
810 : */
811 : struct xfs_btree_block * /* generic btree block pointer */
812 >47052*10^7 : xfs_btree_get_block(
813 : struct xfs_btree_cur *cur, /* btree cursor */
814 : int level, /* level in btree */
815 : struct xfs_buf **bpp) /* buffer containing the block */
816 : {
817 >47052*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
818 >11000*10^7 : (level == cur->bc_nlevels - 1)) {
819 620387778 : *bpp = NULL;
820 620387778 : return xfs_btree_get_iroot(cur);
821 : }
822 :
823 >46990*10^7 : *bpp = cur->bc_levels[level].bp;
824 >46990*10^7 : return XFS_BUF_TO_BLOCK(*bpp);
825 : }
826 :
827 : /*
828 : * Change the cursor to point to the first record at the given level.
829 : * Other levels are unaffected.
830 : */
831 : STATIC int /* success=1, failure=0 */
832 82499122 : xfs_btree_firstrec(
833 : struct xfs_btree_cur *cur, /* btree cursor */
834 : int level) /* level to change */
835 : {
836 82499122 : struct xfs_btree_block *block; /* generic btree block pointer */
837 82499122 : struct xfs_buf *bp; /* buffer containing block */
838 :
839 : /*
840 : * Get the block pointer for this level.
841 : */
842 82499122 : block = xfs_btree_get_block(cur, level, &bp);
843 82499241 : if (xfs_btree_check_block(cur, block, level, bp))
844 : return 0;
845 : /*
846 : * It's empty, there is no such record.
847 : */
848 82499431 : if (!block->bb_numrecs)
849 : return 0;
850 : /*
851 : * Set the ptr value to 1, that's the first record/key.
852 : */
853 82499431 : cur->bc_levels[level].ptr = 1;
854 82499431 : return 1;
855 : }
856 :
857 : /*
858 : * Change the cursor to point to the last record in the current block
859 : * at the given level. Other levels are unaffected.
860 : */
861 : STATIC int /* success=1, failure=0 */
862 88711133 : xfs_btree_lastrec(
863 : struct xfs_btree_cur *cur, /* btree cursor */
864 : int level) /* level to change */
865 : {
866 88711133 : struct xfs_btree_block *block; /* generic btree block pointer */
867 88711133 : struct xfs_buf *bp; /* buffer containing block */
868 :
869 : /*
870 : * Get the block pointer for this level.
871 : */
872 88711133 : block = xfs_btree_get_block(cur, level, &bp);
873 88711266 : if (xfs_btree_check_block(cur, block, level, bp))
874 : return 0;
875 : /*
876 : * It's empty, there is no such record.
877 : */
878 88711636 : if (!block->bb_numrecs)
879 : return 0;
880 : /*
881 : * Set the ptr value to numrecs, that's the last record/key.
882 : */
883 88711636 : cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
884 88711636 : return 1;
885 : }
886 :
887 : /*
888 : * Compute first and last byte offsets for the fields given.
889 : * Interprets the offsets table, which contains struct field offsets.
890 : */
891 : void
892 2055434749 : xfs_btree_offsets(
893 : uint32_t fields, /* bitmask of fields */
894 : const short *offsets, /* table of field offsets */
895 : int nbits, /* number of bits to inspect */
896 : int *first, /* output: first byte offset */
897 : int *last) /* output: last byte offset */
898 : {
899 2055434749 : int i; /* current bit number */
900 2055434749 : uint32_t imask; /* mask for current bit number */
901 :
902 2055434749 : ASSERT(fields != 0);
903 : /*
904 : * Find the lowest bit, so the first byte offset.
905 : */
906 7734769232 : for (i = 0, imask = 1u; ; i++, imask <<= 1) {
907 7734769232 : if (imask & fields) {
908 2055434749 : *first = offsets[i];
909 2055434749 : break;
910 : }
911 : }
912 : /*
913 : * Find the highest bit, so the last byte offset.
914 : */
915 2055434749 : for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
916 14335346669 : if (imask & fields) {
917 2055434749 : *last = offsets[i + 1] - 1;
918 2055434749 : break;
919 : }
920 : }
921 2055434749 : }
922 :
923 : /*
924 : * Get a buffer for the block, return it read in.
925 : * Long-form addressing.
926 : */
927 : int
928 410320077 : xfs_btree_read_bufl(
929 : struct xfs_mount *mp, /* file system mount point */
930 : struct xfs_trans *tp, /* transaction pointer */
931 : xfs_fsblock_t fsbno, /* file system block number */
932 : struct xfs_buf **bpp, /* buffer for fsbno */
933 : int refval, /* ref count value for buffer */
934 : const struct xfs_buf_ops *ops)
935 : {
936 410320077 : struct xfs_buf *bp; /* return value */
937 410320077 : xfs_daddr_t d; /* real disk block address */
938 410320077 : int error;
939 :
940 410320077 : if (!xfs_verify_fsbno(mp, fsbno))
941 : return -EFSCORRUPTED;
942 410320973 : d = XFS_FSB_TO_DADDR(mp, fsbno);
943 410320973 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
944 : mp->m_bsize, 0, &bp, ops);
945 410321814 : if (error)
946 : return error;
947 410321763 : if (bp)
948 410321763 : xfs_buf_set_ref(bp, refval);
949 410321833 : *bpp = bp;
950 410321833 : return 0;
951 : }
952 :
953 : /*
954 : * Read-ahead the block, don't wait for it, don't return a buffer.
955 : * Long-form addressing.
956 : */
957 : /* ARGSUSED */
958 : void
959 439395821 : xfs_btree_reada_bufl(
960 : struct xfs_mount *mp, /* file system mount point */
961 : xfs_fsblock_t fsbno, /* file system block number */
962 : xfs_extlen_t count, /* count of filesystem blocks */
963 : const struct xfs_buf_ops *ops)
964 : {
965 439395821 : xfs_daddr_t d;
966 :
967 439395821 : ASSERT(fsbno != NULLFSBLOCK);
968 439395821 : d = XFS_FSB_TO_DADDR(mp, fsbno);
969 439395821 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
970 439412098 : }
971 :
972 : /*
973 : * Read-ahead the block, don't wait for it, don't return a buffer.
974 : * Short-form addressing.
975 : */
976 : /* ARGSUSED */
977 : void
978 4964643656 : xfs_btree_reada_bufs(
979 : struct xfs_mount *mp, /* file system mount point */
980 : xfs_agnumber_t agno, /* allocation group number */
981 : xfs_agblock_t agbno, /* allocation group block number */
982 : xfs_extlen_t count, /* count of filesystem blocks */
983 : const struct xfs_buf_ops *ops)
984 : {
985 4964643656 : xfs_daddr_t d;
986 :
987 4964643656 : ASSERT(agno != NULLAGNUMBER);
988 4964643656 : ASSERT(agbno != NULLAGBLOCK);
989 4964643656 : d = XFS_AGB_TO_DADDR(mp, agno, agbno);
990 4964643656 : xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
991 4966474002 : }
992 :
993 : STATIC int
994 442603187 : xfs_btree_readahead_lblock(
995 : struct xfs_btree_cur *cur,
996 : int lr,
997 : struct xfs_btree_block *block)
998 : {
999 442603187 : int rval = 0;
1000 442603187 : xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
1001 442603187 : xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
1002 :
1003 442603187 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1004 : return 0;
1005 :
1006 439398575 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
1007 38857475 : xfs_btree_reada_bufl(cur->bc_mp, left, 1,
1008 38857475 : cur->bc_ops->buf_ops);
1009 38857475 : rval++;
1010 : }
1011 :
1012 439398645 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
1013 400539638 : xfs_btree_reada_bufl(cur->bc_mp, right, 1,
1014 400539638 : cur->bc_ops->buf_ops);
1015 400554619 : rval++;
1016 : }
1017 :
1018 : return rval;
1019 : }
1020 :
1021 : STATIC int
1022 1491861741 : xfs_btree_readahead_sblock(
1023 : struct xfs_btree_cur *cur,
1024 : int lr,
1025 : struct xfs_btree_block *block)
1026 : {
1027 1491861741 : int rval = 0;
1028 1491861741 : xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
1029 1491861741 : xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
1030 :
1031 1491861741 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1032 : return 0;
1033 :
1034 1486094868 : if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
1035 77831277 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1036 77831277 : left, 1, cur->bc_ops->buf_ops);
1037 77831277 : rval++;
1038 : }
1039 :
1040 1486095148 : if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
1041 1408290081 : xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1042 1408290081 : right, 1, cur->bc_ops->buf_ops);
1043 1408371054 : rval++;
1044 : }
1045 :
1046 : return rval;
1047 : }
1048 :
1049 : /*
1050 : * Read-ahead btree blocks, at the given level.
1051 : * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
1052 : */
1053 : STATIC int
1054 >12596*10^7 : xfs_btree_readahead(
1055 : struct xfs_btree_cur *cur, /* btree cursor */
1056 : int lev, /* level in btree */
1057 : int lr) /* left/right bits */
1058 : {
1059 >12596*10^7 : struct xfs_btree_block *block;
1060 :
1061 : /*
1062 : * No readahead needed if we are at the root level and the
1063 : * btree root is stored in the inode.
1064 : */
1065 >12596*10^7 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1066 40560819755 : (lev == cur->bc_nlevels - 1))
1067 : return 0;
1068 :
1069 >12594*10^7 : if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
1070 : return 0;
1071 :
1072 1934499339 : cur->bc_levels[lev].ra |= lr;
1073 1934499339 : block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
1074 :
1075 1934499339 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1076 442607158 : return xfs_btree_readahead_lblock(cur, lr, block);
1077 1491892181 : return xfs_btree_readahead_sblock(cur, lr, block);
1078 : }
1079 :
1080 : STATIC int
1081 43580638568 : xfs_btree_ptr_to_daddr(
1082 : struct xfs_btree_cur *cur,
1083 : const union xfs_btree_ptr *ptr,
1084 : xfs_daddr_t *daddr)
1085 : {
1086 43580638568 : xfs_fsblock_t fsbno;
1087 43580638568 : xfs_agblock_t agbno;
1088 43580638568 : int error;
1089 :
1090 43580638568 : error = xfs_btree_check_ptr(cur, ptr, 0, 1);
1091 43599702249 : if (error)
1092 : return error;
1093 :
1094 43599702249 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1095 2254738227 : *daddr = xfbtree_ptr_to_daddr(cur, ptr);
1096 2254738322 : return 0;
1097 : }
1098 :
1099 41344964022 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1100 3481117133 : fsbno = be64_to_cpu(ptr->l);
1101 3481117133 : *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
1102 : } else {
1103 37863846889 : agbno = be32_to_cpu(ptr->s);
1104 37863846889 : *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1105 : agbno);
1106 : }
1107 :
1108 : return 0;
1109 : }
1110 :
1111 : /*
1112 : * Readahead @count btree blocks at the given @ptr location.
1113 : *
1114 : * We don't need to care about long or short form btrees here as we have a
1115 : * method of converting the ptr directly to a daddr available to us.
1116 : */
1117 : STATIC void
1118 14031953 : xfs_btree_readahead_ptr(
1119 : struct xfs_btree_cur *cur,
1120 : union xfs_btree_ptr *ptr,
1121 : xfs_extlen_t count)
1122 : {
1123 14031953 : xfs_daddr_t daddr;
1124 :
1125 14031953 : if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
1126 0 : return;
1127 14031950 : xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
1128 14031956 : xfs_btree_bbsize(cur) * count,
1129 14031948 : cur->bc_ops->buf_ops);
1130 : }
1131 :
1132 : /*
1133 : * Set the buffer for level "lev" in the cursor to bp, releasing
1134 : * any previous buffer.
1135 : */
1136 : STATIC void
1137 12098949121 : xfs_btree_setbuf(
1138 : struct xfs_btree_cur *cur, /* btree cursor */
1139 : int lev, /* level in btree */
1140 : struct xfs_buf *bp) /* new buffer to set */
1141 : {
1142 12098949121 : struct xfs_btree_block *b; /* btree block */
1143 :
1144 12098949121 : if (cur->bc_levels[lev].bp)
1145 1502424328 : xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
1146 12096917243 : cur->bc_levels[lev].bp = bp;
1147 12096917243 : cur->bc_levels[lev].ra = 0;
1148 :
1149 12096917243 : b = XFS_BUF_TO_BLOCK(bp);
1150 12096917243 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1151 2192682519 : if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1152 1334792009 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1153 2192682519 : if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1154 1467727165 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1155 : } else {
1156 9904234724 : if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1157 6758740460 : cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
1158 9904234724 : if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1159 6829180590 : cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
1160 : }
1161 12096917243 : }
1162 :
1163 : bool
1164 39895652 : xfs_btree_ptr_is_null(
1165 : struct xfs_btree_cur *cur,
1166 : const union xfs_btree_ptr *ptr)
1167 : {
1168 470079994 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1169 3093443252 : return ptr->l == cpu_to_be64(NULLFSBLOCK);
1170 : else
1171 5359726705 : return ptr->s == cpu_to_be32(NULLAGBLOCK);
1172 : }
1173 :
1174 : void
1175 2140800 : xfs_btree_set_ptr_null(
1176 : struct xfs_btree_cur *cur,
1177 : union xfs_btree_ptr *ptr)
1178 : {
1179 2140800 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1180 936632054 : ptr->l = cpu_to_be64(NULLFSBLOCK);
1181 : else
1182 735273402 : ptr->s = cpu_to_be32(NULLAGBLOCK);
1183 2140800 : }
1184 :
1185 : /*
1186 : * Get/set/init sibling pointers
1187 : */
1188 : void
1189 6342330755 : xfs_btree_get_sibling(
1190 : struct xfs_btree_cur *cur,
1191 : struct xfs_btree_block *block,
1192 : union xfs_btree_ptr *ptr,
1193 : int lr)
1194 : {
1195 6342330755 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1196 :
1197 6342330755 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1198 1744742777 : if (lr == XFS_BB_RIGHTSIB)
1199 1302968746 : ptr->l = block->bb_u.l.bb_rightsib;
1200 : else
1201 441774031 : ptr->l = block->bb_u.l.bb_leftsib;
1202 : } else {
1203 4597587978 : if (lr == XFS_BB_RIGHTSIB)
1204 4483925784 : ptr->s = block->bb_u.s.bb_rightsib;
1205 : else
1206 113662194 : ptr->s = block->bb_u.s.bb_leftsib;
1207 : }
1208 6342330755 : }
1209 :
1210 : void
1211 5817258 : xfs_btree_set_sibling(
1212 : struct xfs_btree_cur *cur,
1213 : struct xfs_btree_block *block,
1214 : const union xfs_btree_ptr *ptr,
1215 : int lr)
1216 : {
1217 5817258 : ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1218 :
1219 5817258 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1220 2271202 : if (lr == XFS_BB_RIGHTSIB)
1221 1311556 : block->bb_u.l.bb_rightsib = ptr->l;
1222 : else
1223 959646 : block->bb_u.l.bb_leftsib = ptr->l;
1224 : } else {
1225 3546056 : if (lr == XFS_BB_RIGHTSIB)
1226 1675942 : block->bb_u.s.bb_rightsib = ptr->s;
1227 : else
1228 1870114 : block->bb_u.s.bb_leftsib = ptr->s;
1229 : }
1230 5817258 : }
1231 :
1232 : static void
1233 18083807 : __xfs_btree_init_block(
1234 : struct xfs_mount *mp,
1235 : struct xfs_btree_block *buf,
1236 : const struct xfs_btree_ops *ops,
1237 : xfs_daddr_t blkno,
1238 : __u16 level,
1239 : __u16 numrecs,
1240 : __u64 owner)
1241 : {
1242 18083807 : int crc = xfs_has_crc(mp);
1243 18083807 : __u32 magic = xfs_btree_magic(mp, ops);
1244 :
1245 18083817 : buf->bb_magic = cpu_to_be32(magic);
1246 18083817 : buf->bb_level = cpu_to_be16(level);
1247 18083817 : buf->bb_numrecs = cpu_to_be16(numrecs);
1248 :
1249 18083817 : if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
1250 16719397 : buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1251 16719397 : buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1252 16719397 : if (crc) {
1253 16719399 : buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1254 16719399 : buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1255 16719399 : uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1256 16719396 : buf->bb_u.l.bb_pad = 0;
1257 16719396 : buf->bb_u.l.bb_lsn = 0;
1258 : }
1259 : } else {
1260 : /* owner is a 32 bit value on short blocks */
1261 1364420 : __u32 __owner = (__u32)owner;
1262 :
1263 1364420 : buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1264 1364420 : buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1265 1364420 : if (crc) {
1266 1357590 : buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1267 1357590 : buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1268 1357590 : uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1269 1357585 : buf->bb_u.s.bb_lsn = 0;
1270 : }
1271 : }
1272 18083809 : }
1273 :
1274 : void
1275 14251891 : xfs_btree_init_block(
1276 : struct xfs_mount *mp,
1277 : struct xfs_btree_block *block,
1278 : const struct xfs_btree_ops *ops,
1279 : __u16 level,
1280 : __u16 numrecs,
1281 : __u64 owner)
1282 : {
1283 14251891 : __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
1284 : numrecs, owner);
1285 14251878 : }
1286 :
1287 : void
1288 3802812 : xfs_btree_init_buf(
1289 : struct xfs_mount *mp,
1290 : struct xfs_buf *bp,
1291 : const struct xfs_btree_ops *ops,
1292 : __u16 level,
1293 : __u16 numrecs,
1294 : __u64 owner)
1295 : {
1296 3802812 : __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
1297 : xfs_buf_daddr(bp), level, numrecs, owner);
1298 3802798 : bp->b_ops = ops->buf_ops;
1299 3802798 : }
1300 :
1301 : void
1302 2118602 : xfs_btree_init_block_cur(
1303 : struct xfs_btree_cur *cur,
1304 : struct xfs_buf *bp,
1305 : int level,
1306 : int numrecs)
1307 : {
1308 2118602 : __u64 owner;
1309 :
1310 : /*
1311 : * we can pull the owner from the cursor right now as the different
1312 : * owners align directly with the pointer size of the btree. This may
1313 : * change in future, but is safe for current users of the generic btree
1314 : * code.
1315 : */
1316 2118602 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1317 11191 : owner = xfbtree_owner(cur);
1318 2107411 : else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1319 799813 : owner = cur->bc_ino.ip->i_ino;
1320 : else
1321 1307598 : owner = cur->bc_ag.pag->pag_agno;
1322 :
1323 2118602 : xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
1324 2118599 : }
1325 :
1326 : /*
1327 : * Return true if ptr is the last record in the btree and
1328 : * we need to track updates to this record. The decision
1329 : * will be further refined in the update_lastrec method.
1330 : */
1331 : STATIC int
1332 2016997796 : xfs_btree_is_lastrec(
1333 : struct xfs_btree_cur *cur,
1334 : struct xfs_btree_block *block,
1335 : int level)
1336 : {
1337 2016997796 : union xfs_btree_ptr ptr;
1338 :
1339 2016997796 : if (level > 0)
1340 : return 0;
1341 2015684162 : if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1342 : return 0;
1343 :
1344 191973145 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1345 383944842 : if (!xfs_btree_ptr_is_null(cur, &ptr))
1346 22659851 : return 0;
1347 : return 1;
1348 : }
1349 :
1350 : STATIC void
1351 112690206 : xfs_btree_buf_to_ptr(
1352 : struct xfs_btree_cur *cur,
1353 : struct xfs_buf *bp,
1354 : union xfs_btree_ptr *ptr)
1355 : {
1356 112690206 : if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
1357 11191 : xfbtree_buf_to_ptr(cur, bp, ptr);
1358 11191 : return;
1359 : }
1360 :
1361 112679015 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1362 14598272 : ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1363 : xfs_buf_daddr(bp)));
1364 : else {
1365 98080743 : ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1366 : xfs_buf_daddr(bp)));
1367 : }
1368 : }
1369 :
1370 : static inline void
1371 : xfs_btree_set_refs(
1372 : struct xfs_btree_cur *cur,
1373 : struct xfs_buf *bp)
1374 : {
1375 12243221157 : xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
1376 : }
1377 :
1378 : int
1379 2136480 : xfs_btree_get_buf_block(
1380 : struct xfs_btree_cur *cur,
1381 : const union xfs_btree_ptr *ptr,
1382 : struct xfs_btree_block **block,
1383 : struct xfs_buf **bpp)
1384 : {
1385 2136480 : xfs_daddr_t d;
1386 2136480 : int error;
1387 :
1388 2136480 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1389 2136486 : if (error)
1390 : return error;
1391 4272962 : error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
1392 2136481 : xfs_btree_bbsize(cur), 0, bpp);
1393 2136481 : if (error)
1394 : return error;
1395 :
1396 2136481 : (*bpp)->b_ops = cur->bc_ops->buf_ops;
1397 2136481 : *block = XFS_BUF_TO_BLOCK(*bpp);
1398 2136481 : return 0;
1399 : }
1400 :
1401 : /*
1402 : * Read in the buffer at the given ptr and return the buffer and
1403 : * the block pointer within the buffer.
1404 : */
1405 : int
1406 12240626578 : xfs_btree_read_buf_block(
1407 : struct xfs_btree_cur *cur,
1408 : const union xfs_btree_ptr *ptr,
1409 : int flags,
1410 : struct xfs_btree_block **block,
1411 : struct xfs_buf **bpp)
1412 : {
1413 12240626578 : struct xfs_mount *mp = cur->bc_mp;
1414 12240626578 : xfs_daddr_t d;
1415 12240626578 : int error;
1416 :
1417 : /* need to sort out how callers deal with failures first */
1418 12240626578 : ASSERT(!(flags & XBF_TRYLOCK));
1419 :
1420 12240626578 : error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1421 12241788386 : if (error)
1422 : return error;
1423 12241076402 : error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
1424 12241514225 : xfs_btree_bbsize(cur), flags, bpp,
1425 12241639584 : cur->bc_ops->buf_ops);
1426 12243354685 : if (xfs_metadata_is_sick(error))
1427 270 : xfs_btree_mark_sick(cur);
1428 12243354685 : if (error)
1429 : return error;
1430 :
1431 12243221157 : xfs_btree_set_refs(cur, *bpp);
1432 12243639622 : *block = XFS_BUF_TO_BLOCK(*bpp);
1433 12243639622 : return 0;
1434 : }
1435 :
1436 : /*
1437 : * Copy keys from one btree block to another.
1438 : */
1439 : void
1440 222561139 : xfs_btree_copy_keys(
1441 : struct xfs_btree_cur *cur,
1442 : union xfs_btree_key *dst_key,
1443 : const union xfs_btree_key *src_key,
1444 : int numkeys)
1445 : {
1446 222561139 : ASSERT(numkeys >= 0);
1447 445122278 : memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1448 222561139 : }
1449 :
1450 : /*
1451 : * Copy records from one btree block to another.
1452 : */
1453 : STATIC void
1454 1447867448 : xfs_btree_copy_recs(
1455 : struct xfs_btree_cur *cur,
1456 : union xfs_btree_rec *dst_rec,
1457 : union xfs_btree_rec *src_rec,
1458 : int numrecs)
1459 : {
1460 1447867448 : ASSERT(numrecs >= 0);
1461 2895734896 : memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1462 1447867448 : }
1463 :
1464 : /*
1465 : * Copy block pointers from one btree block to another.
1466 : */
1467 : void
1468 17763203 : xfs_btree_copy_ptrs(
1469 : struct xfs_btree_cur *cur,
1470 : union xfs_btree_ptr *dst_ptr,
1471 : const union xfs_btree_ptr *src_ptr,
1472 : int numptrs)
1473 : {
1474 17763203 : ASSERT(numptrs >= 0);
1475 39675934 : memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1476 17763203 : }
1477 :
1478 : /*
1479 : * Shift keys one index left/right inside a single btree block.
1480 : */
1481 : STATIC void
1482 1247043 : xfs_btree_shift_keys(
1483 : struct xfs_btree_cur *cur,
1484 : union xfs_btree_key *key,
1485 : int dir,
1486 : int numkeys)
1487 : {
1488 1247043 : char *dst_key;
1489 :
1490 1247043 : ASSERT(numkeys >= 0);
1491 1247043 : ASSERT(dir == 1 || dir == -1);
1492 :
1493 1247043 : dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1494 2494086 : memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1495 1247043 : }
1496 :
1497 : /*
1498 : * Shift records one index left/right inside a single btree block.
1499 : */
1500 : STATIC void
1501 1270423907 : xfs_btree_shift_recs(
1502 : struct xfs_btree_cur *cur,
1503 : union xfs_btree_rec *rec,
1504 : int dir,
1505 : int numrecs)
1506 : {
1507 1270423907 : char *dst_rec;
1508 :
1509 1270423907 : ASSERT(numrecs >= 0);
1510 1270423907 : ASSERT(dir == 1 || dir == -1);
1511 :
1512 1270423907 : dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1513 2540847814 : memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1514 1270423907 : }
1515 :
1516 : /*
1517 : * Shift block pointers one index left/right inside a single btree block.
1518 : */
1519 : STATIC void
1520 1247042 : xfs_btree_shift_ptrs(
1521 : struct xfs_btree_cur *cur,
1522 : union xfs_btree_ptr *ptr,
1523 : int dir,
1524 : int numptrs)
1525 : {
1526 1247042 : char *dst_ptr;
1527 :
1528 1247042 : ASSERT(numptrs >= 0);
1529 1247042 : ASSERT(dir == 1 || dir == -1);
1530 :
1531 1247042 : dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1532 2494084 : memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1533 1247042 : }
1534 :
1535 : /*
1536 : * Log key values from the btree block.
1537 : */
1538 : STATIC void
1539 221134601 : xfs_btree_log_keys(
1540 : struct xfs_btree_cur *cur,
1541 : struct xfs_buf *bp,
1542 : int first,
1543 : int last)
1544 : {
1545 :
1546 221134601 : if (bp) {
1547 211448607 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1548 211449037 : xfs_trans_log_buf(cur->bc_tp, bp,
1549 211449037 : xfs_btree_key_offset(cur, first),
1550 211449037 : xfs_btree_key_offset(cur, last + 1) - 1);
1551 : } else {
1552 9685994 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1553 9685994 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1554 : }
1555 221135073 : }
1556 :
1557 : /*
1558 : * Log record values from the btree block.
1559 : */
1560 : void
1561 1825846041 : xfs_btree_log_recs(
1562 : struct xfs_btree_cur *cur,
1563 : struct xfs_buf *bp,
1564 : int first,
1565 : int last)
1566 : {
1567 1825846041 : if (!bp) {
1568 2959190 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1569 1479595 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1570 1479595 : return;
1571 : }
1572 :
1573 1824366446 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1574 1824409177 : xfs_trans_log_buf(cur->bc_tp, bp,
1575 1824409177 : xfs_btree_rec_offset(cur, first),
1576 1824409177 : xfs_btree_rec_offset(cur, last + 1) - 1);
1577 : }
1578 :
1579 : /*
1580 : * Log block pointer fields from a btree block (nonleaf).
1581 : */
1582 : STATIC void
1583 1377589 : xfs_btree_log_ptrs(
1584 : struct xfs_btree_cur *cur, /* btree cursor */
1585 : struct xfs_buf *bp, /* buffer containing btree block */
1586 : int first, /* index of first pointer to log */
1587 : int last) /* index of last pointer to log */
1588 : {
1589 :
1590 1377589 : if (bp) {
1591 1345473 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1592 1345473 : int level = xfs_btree_get_level(block);
1593 :
1594 1345473 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1595 1345474 : xfs_trans_log_buf(cur->bc_tp, bp,
1596 1345473 : xfs_btree_ptr_offset(cur, first, level),
1597 1345474 : xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1598 : } else {
1599 32116 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1600 32116 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1601 : }
1602 :
1603 1377589 : }
1604 :
1605 : /*
1606 : * Log fields from a btree block header.
1607 : */
1608 : void
1609 1804035623 : xfs_btree_log_block(
1610 : struct xfs_btree_cur *cur, /* btree cursor */
1611 : struct xfs_buf *bp, /* buffer containing btree block */
1612 : uint32_t fields) /* mask of fields: XFS_BB_... */
1613 : {
1614 1804035623 : int first; /* first byte offset logged */
1615 1804035623 : int last; /* last byte offset logged */
1616 1804035623 : static const short soffsets[] = { /* table of offsets (short) */
1617 : offsetof(struct xfs_btree_block, bb_magic),
1618 : offsetof(struct xfs_btree_block, bb_level),
1619 : offsetof(struct xfs_btree_block, bb_numrecs),
1620 : offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1621 : offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1622 : offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1623 : offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1624 : offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1625 : offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1626 : offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1627 : XFS_BTREE_SBLOCK_CRC_LEN
1628 : };
1629 1804035623 : static const short loffsets[] = { /* table of offsets (long) */
1630 : offsetof(struct xfs_btree_block, bb_magic),
1631 : offsetof(struct xfs_btree_block, bb_level),
1632 : offsetof(struct xfs_btree_block, bb_numrecs),
1633 : offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1634 : offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1635 : offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1636 : offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1637 : offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1638 : offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1639 : offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1640 : offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1641 : XFS_BTREE_LBLOCK_CRC_LEN
1642 : };
1643 :
1644 1804035623 : if (bp) {
1645 1802844392 : int nbits;
1646 :
1647 1802844392 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1648 : /*
1649 : * We don't log the CRC when updating a btree
1650 : * block but instead recreate it during log
1651 : * recovery. As the log buffers have checksums
1652 : * of their own this is safe and avoids logging a crc
1653 : * update in a lot of places.
1654 : */
1655 1802838522 : if (fields == XFS_BB_ALL_BITS)
1656 2515429 : fields = XFS_BB_ALL_BITS_CRC;
1657 : nbits = XFS_BB_NUM_BITS_CRC;
1658 : } else {
1659 : nbits = XFS_BB_NUM_BITS;
1660 : }
1661 1802844392 : xfs_btree_offsets(fields,
1662 1802844392 : (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1663 : loffsets : soffsets,
1664 : nbits, &first, &last);
1665 1802907524 : xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1666 1802920280 : xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1667 : } else {
1668 1191231 : xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
1669 1191231 : xfs_ilog_fbroot(cur->bc_ino.whichfork));
1670 : }
1671 1803990026 : }
1672 :
1673 : /*
1674 : * Increment cursor by one record at the level.
1675 : * For nonzero levels the leaf-ward information is untouched.
1676 : */
1677 : int /* error */
1678 >12381*10^7 : xfs_btree_increment(
1679 : struct xfs_btree_cur *cur,
1680 : int level,
1681 : int *stat) /* success/failure */
1682 : {
1683 >12381*10^7 : struct xfs_btree_block *block;
1684 >12381*10^7 : union xfs_btree_ptr ptr;
1685 >12381*10^7 : struct xfs_buf *bp;
1686 >12381*10^7 : int error; /* error return value */
1687 >12381*10^7 : int lev;
1688 :
1689 >12381*10^7 : ASSERT(level < cur->bc_nlevels);
1690 :
1691 : /* Read-ahead to the right at this level. */
1692 >12381*10^7 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1693 :
1694 : /* Get a pointer to the btree block. */
1695 >12403*10^7 : block = xfs_btree_get_block(cur, level, &bp);
1696 :
1697 : #ifdef DEBUG
1698 >12416*10^7 : error = xfs_btree_check_block(cur, block, level, bp);
1699 >12366*10^7 : if (error)
1700 0 : goto error0;
1701 : #endif
1702 :
1703 : /* We're done if we remain in the block after the increment. */
1704 >24733*10^7 : if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
1705 >11925*10^7 : goto out1;
1706 :
1707 : /* Fail if we just went off the right edge of the tree. */
1708 4408604856 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1709 8817590438 : if (xfs_btree_ptr_is_null(cur, &ptr))
1710 3608953482 : goto out0;
1711 :
1712 799841737 : XFS_BTREE_STATS_INC(cur, increment);
1713 :
1714 : /*
1715 : * March up the tree incrementing pointers.
1716 : * Stop when we don't go off the right edge of a block.
1717 : */
1718 808209485 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1719 808203456 : block = xfs_btree_get_block(cur, lev, &bp);
1720 :
1721 : #ifdef DEBUG
1722 808216244 : error = xfs_btree_check_block(cur, block, lev, bp);
1723 808178696 : if (error)
1724 0 : goto error0;
1725 : #endif
1726 :
1727 1616357392 : if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
1728 : break;
1729 :
1730 : /* Read-ahead the right block for the next loop. */
1731 8357095 : xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1732 : }
1733 :
1734 : /*
1735 : * If we went off the root then we are either seriously
1736 : * confused or have the tree root in an inode.
1737 : */
1738 799827630 : if (lev == cur->bc_nlevels) {
1739 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1740 0 : goto out0;
1741 0 : ASSERT(0);
1742 0 : xfs_btree_mark_sick(cur);
1743 0 : error = -EFSCORRUPTED;
1744 0 : goto error0;
1745 : }
1746 799827630 : ASSERT(lev < cur->bc_nlevels);
1747 :
1748 : /*
1749 : * Now walk back down the tree, fixing up the cursor's buffer
1750 : * pointers and key numbers.
1751 : */
1752 1607984414 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1753 808186460 : union xfs_btree_ptr *ptrp;
1754 :
1755 808186460 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1756 808213691 : --lev;
1757 808213691 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1758 808231739 : if (error)
1759 14 : goto error0;
1760 :
1761 808231725 : xfs_btree_setbuf(cur, lev, bp);
1762 808156784 : cur->bc_levels[lev].ptr = 1;
1763 : }
1764 799792126 : out1:
1765 >12005*10^7 : *stat = 1;
1766 >12005*10^7 : return 0;
1767 :
1768 3608953482 : out0:
1769 3608953482 : *stat = 0;
1770 3608953482 : return 0;
1771 :
1772 : error0:
1773 : return error;
1774 : }
1775 :
1776 : /*
1777 : * Decrement cursor by one record at the level.
1778 : * For nonzero levels the leaf-ward information is untouched.
1779 : */
1780 : int /* error */
1781 1870585236 : xfs_btree_decrement(
1782 : struct xfs_btree_cur *cur,
1783 : int level,
1784 : int *stat) /* success/failure */
1785 : {
1786 1870585236 : struct xfs_btree_block *block;
1787 1870585236 : struct xfs_buf *bp;
1788 1870585236 : int error; /* error return value */
1789 1870585236 : int lev;
1790 1870585236 : union xfs_btree_ptr ptr;
1791 :
1792 1870585236 : ASSERT(level < cur->bc_nlevels);
1793 :
1794 : /* Read-ahead to the left at this level. */
1795 1870585236 : xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1796 :
1797 : /* We're done if we remain in the block after the decrement. */
1798 1870601795 : if (--cur->bc_levels[level].ptr > 0)
1799 1472539083 : goto out1;
1800 :
1801 : /* Get a pointer to the btree block. */
1802 398062712 : block = xfs_btree_get_block(cur, level, &bp);
1803 :
1804 : #ifdef DEBUG
1805 398062727 : error = xfs_btree_check_block(cur, block, level, bp);
1806 398062726 : if (error)
1807 0 : goto error0;
1808 : #endif
1809 :
1810 : /* Fail if we just went off the left edge of the tree. */
1811 398062726 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1812 796125694 : if (xfs_btree_ptr_is_null(cur, &ptr))
1813 337323395 : goto out0;
1814 :
1815 60739452 : XFS_BTREE_STATS_INC(cur, decrement);
1816 :
1817 : /*
1818 : * March up the tree decrementing pointers.
1819 : * Stop when we don't go off the left edge of a block.
1820 : */
1821 60909658 : for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1822 60909653 : if (--cur->bc_levels[lev].ptr > 0)
1823 : break;
1824 : /* Read-ahead the left block for the next loop. */
1825 170098 : xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1826 : }
1827 :
1828 : /*
1829 : * If we went off the root then we are seriously confused.
1830 : * or the root of the tree is in an inode.
1831 : */
1832 60739560 : if (lev == cur->bc_nlevels) {
1833 0 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1834 0 : goto out0;
1835 0 : ASSERT(0);
1836 0 : xfs_btree_mark_sick(cur);
1837 0 : error = -EFSCORRUPTED;
1838 0 : goto error0;
1839 : }
1840 60739560 : ASSERT(lev < cur->bc_nlevels);
1841 :
1842 : /*
1843 : * Now walk back down the tree, fixing up the cursor's buffer
1844 : * pointers and key numbers.
1845 : */
1846 121649004 : for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1847 60909608 : union xfs_btree_ptr *ptrp;
1848 :
1849 60909608 : ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
1850 60909645 : --lev;
1851 60909645 : error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1852 60909464 : if (error)
1853 3 : goto error0;
1854 60909461 : xfs_btree_setbuf(cur, lev, bp);
1855 121818888 : cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
1856 : }
1857 60739361 : out1:
1858 1533278444 : *stat = 1;
1859 1533278444 : return 0;
1860 :
1861 337323395 : out0:
1862 337323395 : *stat = 0;
1863 337323395 : return 0;
1864 :
1865 : error0:
1866 : return error;
1867 : }
1868 :
1869 : /*
1870 : * Check the btree block owner now that we have the context to know who the
1871 : * real owner is.
1872 : */
1873 : static inline xfs_failaddr_t
1874 11227776446 : xfs_btree_check_block_owner(
1875 : struct xfs_btree_cur *cur,
1876 : struct xfs_btree_block *block)
1877 : {
1878 11227776446 : if (!xfs_has_crc(cur->bc_mp))
1879 : return NULL;
1880 :
1881 11227775692 : if (cur->bc_flags & XFS_BTREE_IN_XFILE)
1882 1071397113 : return xfbtree_check_block_owner(cur, block);
1883 :
1884 10156378579 : if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
1885 9363891136 : if (be32_to_cpu(block->bb_u.s.bb_owner) !=
1886 9363891136 : cur->bc_ag.pag->pag_agno)
1887 0 : return __this_address;
1888 : return NULL;
1889 : }
1890 :
1891 792487443 : if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
1892 : return NULL;
1893 :
1894 792487443 : if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
1895 0 : return __this_address;
1896 :
1897 : return NULL;
1898 : }
1899 :
1900 : int
1901 32731242380 : xfs_btree_lookup_get_block(
1902 : struct xfs_btree_cur *cur, /* btree cursor */
1903 : int level, /* level in the btree */
1904 : const union xfs_btree_ptr *pp, /* ptr to btree block */
1905 : struct xfs_btree_block **blkp) /* return btree block */
1906 : {
1907 32731242380 : struct xfs_buf *bp; /* buffer pointer for btree block */
1908 32731242380 : xfs_daddr_t daddr;
1909 32731242380 : int error = 0;
1910 :
1911 : /* special case the root block if in an inode */
1912 32731242380 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1913 3639755496 : (level == cur->bc_nlevels - 1)) {
1914 1390956068 : *blkp = xfs_btree_get_iroot(cur);
1915 1390963037 : return 0;
1916 : }
1917 :
1918 : /*
1919 : * If the old buffer at this level for the disk address we are
1920 : * looking for re-use it.
1921 : *
1922 : * Otherwise throw it away and get a new one.
1923 : */
1924 31340286312 : bp = cur->bc_levels[level].bp;
1925 31340286312 : error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
1926 31353337062 : if (error)
1927 : return error;
1928 31353337062 : if (bp && xfs_buf_daddr(bp) == daddr) {
1929 20126742853 : *blkp = XFS_BUF_TO_BLOCK(bp);
1930 20126742853 : return 0;
1931 : }
1932 :
1933 11226594209 : error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1934 11229671819 : if (error)
1935 : return error;
1936 :
1937 : /* Check the inode owner since the verifiers don't. */
1938 11229708252 : if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
1939 0 : goto out_bad;
1940 :
1941 : /* Did we get the level we were looking for? */
1942 11228649198 : if (be16_to_cpu((*blkp)->bb_level) != level)
1943 0 : goto out_bad;
1944 :
1945 : /* Check that internal nodes have at least one record. */
1946 11228649198 : if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1947 0 : goto out_bad;
1948 :
1949 11228649198 : xfs_btree_setbuf(cur, level, bp);
1950 11228649198 : return 0;
1951 :
1952 0 : out_bad:
1953 0 : *blkp = NULL;
1954 0 : xfs_buf_mark_corrupt(bp);
1955 0 : xfs_trans_brelse(cur->bc_tp, bp);
1956 0 : xfs_btree_mark_sick(cur);
1957 0 : return -EFSCORRUPTED;
1958 : }
1959 :
1960 : /*
1961 : * Get current search key. For level 0 we don't actually have a key
1962 : * structure so we make one up from the record. For all other levels
1963 : * we just return the right key.
1964 : */
1965 : STATIC union xfs_btree_key *
1966 >11946*10^7 : xfs_lookup_get_search_key(
1967 : struct xfs_btree_cur *cur,
1968 : int level,
1969 : int keyno,
1970 : struct xfs_btree_block *block,
1971 : union xfs_btree_key *kp)
1972 : {
1973 >11946*10^7 : if (level == 0) {
1974 91379801522 : cur->bc_ops->init_key_from_rec(kp,
1975 : xfs_btree_rec_addr(cur, keyno, block));
1976 91379801522 : return kp;
1977 : }
1978 :
1979 28088349965 : return xfs_btree_key_addr(cur, keyno, block);
1980 : }
1981 :
1982 : /*
1983 : * Lookup the record. The cursor is made to point to it, based on dir.
1984 : * stat is set to 0 if can't find any such record, 1 for success.
1985 : */
1986 : int /* error */
1987 19616931805 : xfs_btree_lookup(
1988 : struct xfs_btree_cur *cur, /* btree cursor */
1989 : xfs_lookup_t dir, /* <=, ==, or >= */
1990 : int *stat) /* success/failure */
1991 : {
1992 19616931805 : struct xfs_btree_block *block; /* current btree block */
1993 19616931805 : int64_t diff; /* difference for the current key */
1994 19616931805 : int error; /* error return value */
1995 19616931805 : int keyno; /* current key number */
1996 19616931805 : int level; /* level in the btree */
1997 19616931805 : union xfs_btree_ptr *pp; /* ptr to btree block */
1998 19616931805 : union xfs_btree_ptr ptr; /* ptr to btree block */
1999 :
2000 19616931805 : XFS_BTREE_STATS_INC(cur, lookup);
2001 :
2002 : /* No such thing as a zero-level tree. */
2003 19616931805 : if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
2004 0 : xfs_btree_mark_sick(cur);
2005 0 : return -EFSCORRUPTED;
2006 : }
2007 :
2008 19616931805 : block = NULL;
2009 19616931805 : keyno = 0;
2010 :
2011 : /* initialise start pointer from cursor */
2012 19616931805 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
2013 19617291073 : pp = &ptr;
2014 :
2015 : /*
2016 : * Iterate over each level in the btree, starting at the root.
2017 : * For each level above the leaves, find the key we need, based
2018 : * on the lookup record, then follow the corresponding block
2019 : * pointer down to the next level.
2020 : */
2021 45771492756 : for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
2022 : /* Get the block we need to do the lookup on. */
2023 28247243483 : error = xfs_btree_lookup_get_block(cur, level, pp, &block);
2024 28247136429 : if (error)
2025 4860 : goto error0;
2026 :
2027 28247131569 : if (diff == 0) {
2028 : /*
2029 : * If we already had a key match at a higher level, we
2030 : * know we need to use the first entry in this block.
2031 : */
2032 : keyno = 1;
2033 : } else {
2034 : /* Otherwise search this block. Do a binary search. */
2035 :
2036 28226635966 : int high; /* high entry number */
2037 28226635966 : int low; /* low entry number */
2038 :
2039 : /* Set low and high entry numbers, 1-based. */
2040 28226635966 : low = 1;
2041 28226635966 : high = xfs_btree_get_numrecs(block);
2042 28226635966 : if (!high) {
2043 : /* Block is empty, must be an empty leaf. */
2044 2099358238 : if (level != 0 || cur->bc_nlevels != 1) {
2045 0 : XFS_CORRUPTION_ERROR(__func__,
2046 : XFS_ERRLEVEL_LOW,
2047 : cur->bc_mp, block,
2048 : sizeof(*block));
2049 0 : xfs_btree_mark_sick(cur);
2050 0 : return -EFSCORRUPTED;
2051 : }
2052 :
2053 2099358238 : cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
2054 2099358238 : *stat = 0;
2055 2099358238 : return 0;
2056 : }
2057 :
2058 : /* Binary search the block. */
2059 >14359*10^7 : while (low <= high) {
2060 >11947*10^7 : union xfs_btree_key key;
2061 >11947*10^7 : union xfs_btree_key *kp;
2062 :
2063 >11947*10^7 : XFS_BTREE_STATS_INC(cur, compare);
2064 :
2065 : /* keyno is average of low and high. */
2066 >11947*10^7 : keyno = (low + high) >> 1;
2067 :
2068 : /* Get current search key */
2069 >11947*10^7 : kp = xfs_lookup_get_search_key(cur, level,
2070 : keyno, block, &key);
2071 :
2072 : /*
2073 : * Compute difference to get next direction:
2074 : * - less than, move right
2075 : * - greater than, move left
2076 : * - equal, we're done
2077 : */
2078 >11940*10^7 : diff = cur->bc_ops->key_diff(cur, kp);
2079 >11948*10^7 : if (diff < 0)
2080 65196379307 : low = keyno + 1;
2081 54287448147 : else if (diff > 0)
2082 52269216749 : high = keyno - 1;
2083 : else
2084 : break;
2085 : }
2086 : }
2087 :
2088 : /*
2089 : * If there are more levels, set up for the next level
2090 : * by getting the block number and filling in the cursor.
2091 : */
2092 26154292273 : if (level > 0) {
2093 : /*
2094 : * If we moved left, need the previous key number,
2095 : * unless there isn't one.
2096 : */
2097 8631296173 : if (diff > 0 && --keyno < 1)
2098 : keyno = 1;
2099 8631296173 : pp = xfs_btree_ptr_addr(cur, keyno, block);
2100 :
2101 8630939099 : error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
2102 8631205583 : if (error)
2103 0 : goto error0;
2104 :
2105 8631205583 : cur->bc_levels[level].ptr = keyno;
2106 : }
2107 : }
2108 :
2109 : /* Done with the search. See if we need to adjust the results. */
2110 17524249273 : if (dir != XFS_LOOKUP_LE && diff < 0) {
2111 824451666 : keyno++;
2112 : /*
2113 : * If ge search and we went off the end of the block, but it's
2114 : * not the last block, we're in the wrong block.
2115 : */
2116 824451666 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2117 1405987851 : if (dir == XFS_LOOKUP_GE &&
2118 371819333 : keyno > xfs_btree_get_numrecs(block) &&
2119 : !xfs_btree_ptr_is_null(cur, &ptr)) {
2120 1539438 : int i;
2121 :
2122 1539438 : cur->bc_levels[0].ptr = keyno;
2123 1539438 : error = xfs_btree_increment(cur, 0, &i);
2124 1539438 : if (error)
2125 1 : goto error0;
2126 1539437 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
2127 0 : xfs_btree_mark_sick(cur);
2128 1539437 : return -EFSCORRUPTED;
2129 : }
2130 1539437 : *stat = 1;
2131 1539437 : return 0;
2132 : }
2133 16699797607 : } else if (dir == XFS_LOOKUP_LE && diff > 0)
2134 9042704637 : keyno--;
2135 17522707623 : cur->bc_levels[0].ptr = keyno;
2136 :
2137 : /* Return if we succeeded or not. */
2138 30738857373 : if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
2139 4772584409 : *stat = 0;
2140 12750123214 : else if (dir != XFS_LOOKUP_EQ || diff == 0)
2141 12365167215 : *stat = 1;
2142 : else
2143 384955999 : *stat = 0;
2144 : return 0;
2145 :
2146 : error0:
2147 : return error;
2148 : }
2149 :
2150 : /* Find the high key storage area from a regular key. */
2151 : union xfs_btree_key *
2152 1102579761 : xfs_btree_high_key_from_key(
2153 : struct xfs_btree_cur *cur,
2154 : union xfs_btree_key *key)
2155 : {
2156 1102579761 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2157 1102579761 : return (union xfs_btree_key *)((char *)key +
2158 1102579761 : (cur->bc_ops->key_len / 2));
2159 : }
2160 :
2161 : /* Determine the low (and high if overlapped) keys of a leaf block */
2162 : STATIC void
2163 1028171405 : xfs_btree_get_leaf_keys(
2164 : struct xfs_btree_cur *cur,
2165 : struct xfs_btree_block *block,
2166 : union xfs_btree_key *key)
2167 : {
2168 1028171405 : union xfs_btree_key max_hkey;
2169 1028171405 : union xfs_btree_key hkey;
2170 1028171405 : union xfs_btree_rec *rec;
2171 1028171405 : union xfs_btree_key *high;
2172 1028171405 : int n;
2173 :
2174 1028171405 : rec = xfs_btree_rec_addr(cur, 1, block);
2175 1028171405 : cur->bc_ops->init_key_from_rec(key, rec);
2176 :
2177 1028185028 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2178 :
2179 505929433 : cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2180 >28939*10^7 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2181 >14393*10^7 : rec = xfs_btree_rec_addr(cur, n, block);
2182 >14393*10^7 : cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2183 >14394*10^7 : if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
2184 >14089*10^7 : max_hkey = hkey;
2185 : }
2186 :
2187 505930936 : high = xfs_btree_high_key_from_key(cur, key);
2188 1011850630 : memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2189 : }
2190 1028180910 : }
2191 :
2192 : /* Determine the low (and high if overlapped) keys of a node block */
2193 : STATIC void
2194 90904244 : xfs_btree_get_node_keys(
2195 : struct xfs_btree_cur *cur,
2196 : struct xfs_btree_block *block,
2197 : union xfs_btree_key *key)
2198 : {
2199 90904244 : union xfs_btree_key *hkey;
2200 90904244 : union xfs_btree_key *max_hkey;
2201 90904244 : union xfs_btree_key *high;
2202 90904244 : int n;
2203 :
2204 90904244 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2205 181525464 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2206 : cur->bc_ops->key_len / 2);
2207 :
2208 90762732 : max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2209 14019215754 : for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2210 6918845185 : hkey = xfs_btree_high_key_addr(cur, n, block);
2211 6918845185 : if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
2212 6898782262 : max_hkey = hkey;
2213 : }
2214 :
2215 90762692 : high = xfs_btree_high_key_from_key(cur, key);
2216 181525278 : memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2217 : } else {
2218 283024 : memcpy(key, xfs_btree_key_addr(cur, 1, block),
2219 : cur->bc_ops->key_len);
2220 : }
2221 90904151 : }
2222 :
2223 : /* Derive the keys for any btree block. */
2224 : void
2225 1028503718 : xfs_btree_get_keys(
2226 : struct xfs_btree_cur *cur,
2227 : struct xfs_btree_block *block,
2228 : union xfs_btree_key *key)
2229 : {
2230 1028503718 : if (be16_to_cpu(block->bb_level) == 0)
2231 1027242106 : xfs_btree_get_leaf_keys(cur, block, key);
2232 : else
2233 1261612 : xfs_btree_get_node_keys(cur, block, key);
2234 1028506413 : }
2235 :
2236 : /*
2237 : * Decide if we need to update the parent keys of a btree block. For
2238 : * a standard btree this is only necessary if we're updating the first
2239 : * record/key. For an overlapping btree, we must always update the
2240 : * keys because the highest key can be in any of the records or keys
2241 : * in the block.
2242 : */
2243 : static inline bool
2244 : xfs_btree_needs_key_update(
2245 : struct xfs_btree_cur *cur,
2246 : int ptr)
2247 : {
2248 1179946360 : return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2249 : }
2250 :
2251 : /*
2252 : * Update the low and high parent keys of the given level, progressing
2253 : * towards the root. If force_all is false, stop if the keys for a given
2254 : * level do not need updating.
2255 : */
2256 : STATIC int
2257 504977701 : __xfs_btree_updkeys(
2258 : struct xfs_btree_cur *cur,
2259 : int level,
2260 : struct xfs_btree_block *block,
2261 : struct xfs_buf *bp0,
2262 : bool force_all)
2263 : {
2264 504977701 : union xfs_btree_key key; /* keys from current level */
2265 504977701 : union xfs_btree_key *lkey; /* keys from the next level up */
2266 504977701 : union xfs_btree_key *hkey;
2267 504977701 : union xfs_btree_key *nlkey; /* keys from the next level up */
2268 504977701 : union xfs_btree_key *nhkey;
2269 504977701 : struct xfs_buf *bp;
2270 504977701 : int ptr;
2271 :
2272 504977701 : ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2273 :
2274 : /* Exit if there aren't any parent levels to update. */
2275 504977701 : if (level + 1 >= cur->bc_nlevels)
2276 : return 0;
2277 :
2278 483887879 : trace_xfs_btree_updkeys(cur, level, bp0);
2279 :
2280 483892216 : lkey = &key;
2281 483892216 : hkey = xfs_btree_high_key_from_key(cur, lkey);
2282 483889016 : xfs_btree_get_keys(cur, block, lkey);
2283 1057406643 : for (level++; level < cur->bc_nlevels; level++) {
2284 : #ifdef DEBUG
2285 573517627 : int error;
2286 : #endif
2287 573517627 : block = xfs_btree_get_block(cur, level, &bp);
2288 573521671 : trace_xfs_btree_updkeys(cur, level, bp);
2289 : #ifdef DEBUG
2290 573521781 : error = xfs_btree_check_block(cur, block, level, bp);
2291 573519524 : if (error)
2292 0 : return error;
2293 : #endif
2294 573519524 : ptr = cur->bc_levels[level].ptr;
2295 573519524 : nlkey = xfs_btree_key_addr(cur, ptr, block);
2296 573519524 : nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2297 1146766680 : if (!force_all &&
2298 519509701 : xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
2299 : xfs_btree_keycmp_eq(cur, nhkey, hkey))
2300 : break;
2301 132125019 : xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2302 132124226 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2303 132124280 : if (level + 1 >= cur->bc_nlevels)
2304 : break;
2305 89629380 : xfs_btree_get_node_keys(cur, block, lkey);
2306 : }
2307 :
2308 : return 0;
2309 : }
2310 :
2311 : /* Update all the keys from some level in cursor back to the root. */
2312 : STATIC int
2313 155173 : xfs_btree_updkeys_force(
2314 : struct xfs_btree_cur *cur,
2315 : int level)
2316 : {
2317 155173 : struct xfs_buf *bp;
2318 155173 : struct xfs_btree_block *block;
2319 :
2320 155173 : block = xfs_btree_get_block(cur, level, &bp);
2321 155173 : return __xfs_btree_updkeys(cur, level, block, bp, true);
2322 : }
2323 :
2324 : /*
2325 : * Update the parent keys of the given level, progressing towards the root.
2326 : */
2327 : STATIC int
2328 1015689658 : xfs_btree_update_keys(
2329 : struct xfs_btree_cur *cur,
2330 : int level)
2331 : {
2332 1015689658 : struct xfs_btree_block *block;
2333 1015689658 : struct xfs_buf *bp;
2334 1015689658 : union xfs_btree_key *kp;
2335 1015689658 : union xfs_btree_key key;
2336 1015689658 : int ptr;
2337 :
2338 1015689658 : ASSERT(level >= 0);
2339 :
2340 1015689658 : block = xfs_btree_get_block(cur, level, &bp);
2341 1015742458 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2342 504827408 : return __xfs_btree_updkeys(cur, level, block, bp, false);
2343 :
2344 : /*
2345 : * Go up the tree from this level toward the root.
2346 : * At each level, update the key value to the value input.
2347 : * Stop when we reach a level where the cursor isn't pointing
2348 : * at the first entry in the block.
2349 : */
2350 510915050 : xfs_btree_get_keys(cur, block, &key);
2351 598548044 : for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2352 : #ifdef DEBUG
2353 87635638 : int error;
2354 : #endif
2355 87635638 : block = xfs_btree_get_block(cur, level, &bp);
2356 : #ifdef DEBUG
2357 87634535 : error = xfs_btree_check_block(cur, block, level, bp);
2358 87634838 : if (error)
2359 0 : return error;
2360 : #endif
2361 87634838 : ptr = cur->bc_levels[level].ptr;
2362 87634838 : kp = xfs_btree_key_addr(cur, ptr, block);
2363 87634838 : xfs_btree_copy_keys(cur, kp, &key, 1);
2364 87635426 : xfs_btree_log_keys(cur, bp, ptr, ptr);
2365 : }
2366 :
2367 : return 0;
2368 : }
2369 :
2370 : /*
2371 : * Update the record referred to by cur to the value in the
2372 : * given record. This either works (return 0) or gets an
2373 : * EFSCORRUPTED error.
2374 : */
2375 : int
2376 480779149 : xfs_btree_update(
2377 : struct xfs_btree_cur *cur,
2378 : union xfs_btree_rec *rec)
2379 : {
2380 480779149 : struct xfs_btree_block *block;
2381 480779149 : struct xfs_buf *bp;
2382 480779149 : int error;
2383 480779149 : int ptr;
2384 480779149 : union xfs_btree_rec *rp;
2385 :
2386 : /* Pick up the current block. */
2387 480779149 : block = xfs_btree_get_block(cur, 0, &bp);
2388 :
2389 : #ifdef DEBUG
2390 480798713 : error = xfs_btree_check_block(cur, block, 0, bp);
2391 480765908 : if (error)
2392 0 : goto error0;
2393 : #endif
2394 : /* Get the address of the rec to be updated. */
2395 480765908 : ptr = cur->bc_levels[0].ptr;
2396 480765908 : rp = xfs_btree_rec_addr(cur, ptr, block);
2397 :
2398 : /* Fill in the new contents and log them. */
2399 480765908 : xfs_btree_copy_recs(cur, rp, rec, 1);
2400 480799410 : xfs_btree_log_recs(cur, bp, ptr, ptr);
2401 :
2402 : /*
2403 : * If we are tracking the last record in the tree and
2404 : * we are at the far right edge of the tree, update it.
2405 : */
2406 480804843 : if (xfs_btree_is_lastrec(cur, block, 0)) {
2407 0 : cur->bc_ops->update_lastrec(cur, block, rec,
2408 : ptr, LASTREC_UPDATE);
2409 : }
2410 :
2411 : /* Pass new key value up to our parent. */
2412 904165033 : if (xfs_btree_needs_key_update(cur, ptr)) {
2413 139822339 : error = xfs_btree_update_keys(cur, 0);
2414 139818781 : if (error)
2415 0 : goto error0;
2416 : }
2417 :
2418 : return 0;
2419 :
2420 : error0:
2421 : return error;
2422 : }
2423 :
2424 : /*
2425 : * Move 1 record left from cur/level if possible.
2426 : * Update cur to reflect the new path.
2427 : */
2428 : STATIC int /* error */
2429 72952747 : xfs_btree_lshift(
2430 : struct xfs_btree_cur *cur,
2431 : int level,
2432 : int *stat) /* success/failure */
2433 : {
2434 72952747 : struct xfs_buf *lbp; /* left buffer pointer */
2435 72952747 : struct xfs_btree_block *left; /* left btree block */
2436 72952747 : int lrecs; /* left record count */
2437 72952747 : struct xfs_buf *rbp; /* right buffer pointer */
2438 72952747 : struct xfs_btree_block *right; /* right btree block */
2439 72952747 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2440 72952747 : int rrecs; /* right record count */
2441 72952747 : union xfs_btree_ptr lptr; /* left btree pointer */
2442 72952747 : union xfs_btree_key *rkp = NULL; /* right btree key */
2443 72952747 : union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2444 72952747 : union xfs_btree_rec *rrp = NULL; /* right record pointer */
2445 72952747 : int error; /* error return value */
2446 72952747 : int i;
2447 :
2448 72952747 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2449 33495467 : level == cur->bc_nlevels - 1)
2450 0 : goto out0;
2451 :
2452 : /* Set up variables for this block as "right". */
2453 72952747 : right = xfs_btree_get_block(cur, level, &rbp);
2454 :
2455 : #ifdef DEBUG
2456 72944867 : error = xfs_btree_check_block(cur, right, level, rbp);
2457 72939609 : if (error)
2458 0 : goto error0;
2459 : #endif
2460 :
2461 : /* If we've got no left sibling then we can't shift an entry left. */
2462 72939609 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2463 145886588 : if (xfs_btree_ptr_is_null(cur, &lptr))
2464 39045 : goto out0;
2465 :
2466 : /*
2467 : * If the cursor entry is the one that would be moved, don't
2468 : * do it... it's too complicated.
2469 : */
2470 72904249 : if (cur->bc_levels[level].ptr <= 1)
2471 14237 : goto out0;
2472 :
2473 : /* Set up the left neighbor as "left". */
2474 72890012 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2475 72893514 : if (error)
2476 50 : goto error0;
2477 :
2478 : /* If it's full, it can't take another entry. */
2479 72893464 : lrecs = xfs_btree_get_numrecs(left);
2480 72893464 : if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2481 859279 : goto out0;
2482 :
2483 72035480 : rrecs = xfs_btree_get_numrecs(right);
2484 :
2485 : /*
2486 : * We add one entry to the left side and remove one for the right side.
2487 : * Account for it here, the changes will be updated on disk and logged
2488 : * later.
2489 : */
2490 72035480 : lrecs++;
2491 72035480 : rrecs--;
2492 :
2493 72035480 : XFS_BTREE_STATS_INC(cur, lshift);
2494 72035480 : XFS_BTREE_STATS_ADD(cur, moves, 1);
2495 :
2496 : /*
2497 : * If non-leaf, copy a key and a ptr to the left block.
2498 : * Log the changes to the left block.
2499 : */
2500 72035480 : if (level > 0) {
2501 : /* It's a non-leaf. Move keys and pointers. */
2502 103882 : union xfs_btree_key *lkp; /* left btree key */
2503 103882 : union xfs_btree_ptr *lpp; /* left address pointer */
2504 :
2505 103882 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2506 103882 : rkp = xfs_btree_key_addr(cur, 1, right);
2507 :
2508 103882 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2509 103829 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2510 :
2511 103829 : error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
2512 103829 : if (error)
2513 0 : goto error0;
2514 :
2515 103829 : xfs_btree_copy_keys(cur, lkp, rkp, 1);
2516 103829 : xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2517 :
2518 103829 : xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2519 103829 : xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2520 :
2521 103829 : ASSERT(cur->bc_ops->keys_inorder(cur,
2522 : xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2523 : } else {
2524 : /* It's a leaf. Move records. */
2525 71931598 : union xfs_btree_rec *lrp; /* left record pointer */
2526 :
2527 71931598 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2528 71931598 : rrp = xfs_btree_rec_addr(cur, 1, right);
2529 :
2530 71931598 : xfs_btree_copy_recs(cur, lrp, rrp, 1);
2531 71931737 : xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2532 :
2533 71929766 : ASSERT(cur->bc_ops->recs_inorder(cur,
2534 : xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2535 : }
2536 :
2537 72037120 : xfs_btree_set_numrecs(left, lrecs);
2538 72037120 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2539 :
2540 72036506 : xfs_btree_set_numrecs(right, rrecs);
2541 72036506 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2542 :
2543 : /*
2544 : * Slide the contents of right down one entry.
2545 : */
2546 72036453 : XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2547 72036453 : if (level > 0) {
2548 : /* It's a nonleaf. operate on keys and ptrs */
2549 15969201 : for (i = 0; i < rrecs; i++) {
2550 15865372 : error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
2551 15865372 : if (error)
2552 0 : goto error0;
2553 : }
2554 :
2555 103829 : xfs_btree_shift_keys(cur,
2556 : xfs_btree_key_addr(cur, 2, right),
2557 : -1, rrecs);
2558 103829 : xfs_btree_shift_ptrs(cur,
2559 : xfs_btree_ptr_addr(cur, 2, right),
2560 : -1, rrecs);
2561 :
2562 103829 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2563 103829 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2564 : } else {
2565 : /* It's a leaf. operate on records */
2566 71932624 : xfs_btree_shift_recs(cur,
2567 : xfs_btree_rec_addr(cur, 2, right),
2568 : -1, rrecs);
2569 71934619 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2570 : }
2571 :
2572 : /*
2573 : * Using a temporary cursor, update the parent key values of the
2574 : * block on the left.
2575 : */
2576 72039144 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2577 26401356 : error = xfs_btree_dup_cursor(cur, &tcur);
2578 26401357 : if (error)
2579 0 : goto error0;
2580 26401357 : i = xfs_btree_firstrec(tcur, level);
2581 26401340 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2582 0 : xfs_btree_mark_sick(cur);
2583 0 : error = -EFSCORRUPTED;
2584 0 : goto error0;
2585 : }
2586 :
2587 26401340 : error = xfs_btree_decrement(tcur, level, &i);
2588 26401364 : if (error)
2589 0 : goto error1;
2590 :
2591 : /* Update the parent high keys of the left block, if needed. */
2592 26401364 : error = xfs_btree_update_keys(tcur, level);
2593 26401357 : if (error)
2594 0 : goto error1;
2595 :
2596 26401357 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2597 : }
2598 :
2599 : /* Update the parent keys of the right block. */
2600 72039155 : error = xfs_btree_update_keys(cur, level);
2601 72038799 : if (error)
2602 0 : goto error0;
2603 :
2604 : /* Slide the cursor value left one. */
2605 72038799 : cur->bc_levels[level].ptr--;
2606 :
2607 72038799 : *stat = 1;
2608 72038799 : return 0;
2609 :
2610 912561 : out0:
2611 912561 : *stat = 0;
2612 912561 : return 0;
2613 :
2614 : error0:
2615 : return error;
2616 :
2617 0 : error1:
2618 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2619 0 : return error;
2620 : }
2621 :
2622 : /*
2623 : * Move 1 record right from cur/level if possible.
2624 : * Update cur to reflect the new path.
2625 : */
2626 : STATIC int /* error */
2627 122143983 : xfs_btree_rshift(
2628 : struct xfs_btree_cur *cur,
2629 : int level,
2630 : int *stat) /* success/failure */
2631 : {
2632 122143983 : struct xfs_buf *lbp; /* left buffer pointer */
2633 122143983 : struct xfs_btree_block *left; /* left btree block */
2634 122143983 : struct xfs_buf *rbp; /* right buffer pointer */
2635 122143983 : struct xfs_btree_block *right; /* right btree block */
2636 122143983 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
2637 122143983 : union xfs_btree_ptr rptr; /* right block pointer */
2638 122143983 : union xfs_btree_key *rkp; /* right btree key */
2639 122143983 : int rrecs; /* right record count */
2640 122143983 : int lrecs; /* left record count */
2641 122143983 : int error; /* error return value */
2642 122143983 : int i; /* loop counter */
2643 :
2644 122143983 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2645 62176064 : (level == cur->bc_nlevels - 1))
2646 0 : goto out0;
2647 :
2648 : /* Set up variables for this block as "left". */
2649 122143983 : left = xfs_btree_get_block(cur, level, &lbp);
2650 :
2651 : #ifdef DEBUG
2652 122142606 : error = xfs_btree_check_block(cur, left, level, lbp);
2653 122138640 : if (error)
2654 0 : goto error0;
2655 : #endif
2656 :
2657 : /* If we've got no right sibling then we can't shift an entry right. */
2658 122138640 : xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2659 244285618 : if (xfs_btree_ptr_is_null(cur, &rptr))
2660 44501415 : goto out0;
2661 :
2662 : /*
2663 : * If the cursor entry is the one that would be moved, don't
2664 : * do it... it's too complicated.
2665 : */
2666 77641394 : lrecs = xfs_btree_get_numrecs(left);
2667 77641394 : if (cur->bc_levels[level].ptr >= lrecs)
2668 7204047 : goto out0;
2669 :
2670 : /* Set up the right neighbor as "right". */
2671 70437347 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2672 70436292 : if (error)
2673 139 : goto error0;
2674 :
2675 : /* If it's full, it can't take another entry. */
2676 70436153 : rrecs = xfs_btree_get_numrecs(right);
2677 70436153 : if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2678 10876357 : goto out0;
2679 :
2680 59560928 : XFS_BTREE_STATS_INC(cur, rshift);
2681 59560928 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2682 :
2683 : /*
2684 : * Make a hole at the start of the right neighbor block, then
2685 : * copy the last left block entry to the hole.
2686 : */
2687 59560928 : if (level > 0) {
2688 : /* It's a nonleaf. make a hole in the keys and ptrs */
2689 45622 : union xfs_btree_key *lkp;
2690 45622 : union xfs_btree_ptr *lpp;
2691 45622 : union xfs_btree_ptr *rpp;
2692 :
2693 45622 : lkp = xfs_btree_key_addr(cur, lrecs, left);
2694 45622 : lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2695 45622 : rkp = xfs_btree_key_addr(cur, 1, right);
2696 45622 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2697 :
2698 3093758 : for (i = rrecs - 1; i >= 0; i--) {
2699 3048136 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
2700 3048136 : if (error)
2701 0 : goto error0;
2702 : }
2703 :
2704 45622 : xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2705 45622 : xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2706 :
2707 45622 : error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
2708 45622 : if (error)
2709 0 : goto error0;
2710 :
2711 : /* Now put the new data in, and log it. */
2712 45622 : xfs_btree_copy_keys(cur, rkp, lkp, 1);
2713 45622 : xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2714 :
2715 45622 : xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2716 45622 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2717 :
2718 45622 : ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2719 : xfs_btree_key_addr(cur, 2, right)));
2720 : } else {
2721 : /* It's a leaf. make a hole in the records */
2722 59515306 : union xfs_btree_rec *lrp;
2723 59515306 : union xfs_btree_rec *rrp;
2724 :
2725 59515306 : lrp = xfs_btree_rec_addr(cur, lrecs, left);
2726 59515306 : rrp = xfs_btree_rec_addr(cur, 1, right);
2727 :
2728 59515306 : xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2729 :
2730 : /* Now put the new data in, and log it. */
2731 59515550 : xfs_btree_copy_recs(cur, rrp, lrp, 1);
2732 59515552 : xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2733 : }
2734 :
2735 : /*
2736 : * Decrement and log left's numrecs, bump and log right's numrecs.
2737 : */
2738 59561869 : xfs_btree_set_numrecs(left, --lrecs);
2739 59561869 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2740 :
2741 59561507 : xfs_btree_set_numrecs(right, ++rrecs);
2742 59561507 : xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2743 :
2744 : /*
2745 : * Using a temporary cursor, update the parent key values of the
2746 : * block on the right.
2747 : */
2748 59561190 : error = xfs_btree_dup_cursor(cur, &tcur);
2749 59561806 : if (error)
2750 0 : goto error0;
2751 59561806 : i = xfs_btree_lastrec(tcur, level);
2752 59562050 : if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
2753 0 : xfs_btree_mark_sick(cur);
2754 0 : error = -EFSCORRUPTED;
2755 0 : goto error0;
2756 : }
2757 :
2758 59562050 : error = xfs_btree_increment(tcur, level, &i);
2759 59562534 : if (error)
2760 1 : goto error1;
2761 :
2762 : /* Update the parent high keys of the left block, if needed. */
2763 59562533 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2764 25804829 : error = xfs_btree_update_keys(cur, level);
2765 25804808 : if (error)
2766 0 : goto error1;
2767 : }
2768 :
2769 : /* Update the parent keys of the right block. */
2770 59562512 : error = xfs_btree_update_keys(tcur, level);
2771 59562291 : if (error)
2772 0 : goto error1;
2773 :
2774 59562291 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2775 :
2776 59562389 : *stat = 1;
2777 59562389 : return 0;
2778 :
2779 62581819 : out0:
2780 62581819 : *stat = 0;
2781 62581819 : return 0;
2782 :
2783 : error0:
2784 : return error;
2785 :
2786 1 : error1:
2787 1 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2788 1 : return error;
2789 : }
2790 :
2791 : static inline int
2792 948992 : xfs_btree_alloc_block(
2793 : struct xfs_btree_cur *cur,
2794 : const union xfs_btree_ptr *hint_block,
2795 : union xfs_btree_ptr *new_block,
2796 : int *stat)
2797 : {
2798 948992 : int error;
2799 :
2800 948992 : error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
2801 948992 : trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
2802 948992 : return error;
2803 : }
2804 :
2805 : /*
2806 : * Split cur/level block in half.
2807 : * Return new block number and the key to its first
2808 : * record (to be inserted into parent).
2809 : */
2810 : STATIC int /* error */
2811 912561 : __xfs_btree_split(
2812 : struct xfs_btree_cur *cur,
2813 : int level,
2814 : union xfs_btree_ptr *ptrp,
2815 : union xfs_btree_key *key,
2816 : struct xfs_btree_cur **curp,
2817 : int *stat) /* success/failure */
2818 : {
2819 912561 : union xfs_btree_ptr lptr; /* left sibling block ptr */
2820 912561 : struct xfs_buf *lbp; /* left buffer pointer */
2821 912561 : struct xfs_btree_block *left; /* left btree block */
2822 912561 : union xfs_btree_ptr rptr; /* right sibling block ptr */
2823 912561 : struct xfs_buf *rbp; /* right buffer pointer */
2824 912561 : struct xfs_btree_block *right; /* right btree block */
2825 912561 : union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2826 912561 : struct xfs_buf *rrbp; /* right-right buffer pointer */
2827 912561 : struct xfs_btree_block *rrblock; /* right-right btree block */
2828 912561 : int lrecs;
2829 912561 : int rrecs;
2830 912561 : int src_index;
2831 912561 : int error; /* error return value */
2832 912561 : int i;
2833 :
2834 912561 : XFS_BTREE_STATS_INC(cur, split);
2835 :
2836 : /* Set up left block (current one). */
2837 912561 : left = xfs_btree_get_block(cur, level, &lbp);
2838 :
2839 : #ifdef DEBUG
2840 912561 : error = xfs_btree_check_block(cur, left, level, lbp);
2841 912561 : if (error)
2842 0 : goto error0;
2843 : #endif
2844 :
2845 912561 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2846 :
2847 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
2848 912561 : error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
2849 912561 : if (error)
2850 6 : goto error0;
2851 912555 : if (*stat == 0)
2852 0 : goto out0;
2853 912555 : XFS_BTREE_STATS_INC(cur, alloc);
2854 :
2855 : /* Set up the new block as "right". */
2856 912555 : error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
2857 912555 : if (error)
2858 0 : goto error0;
2859 :
2860 : /* Fill in the btree header for the new right block. */
2861 1825110 : xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2862 :
2863 : /*
2864 : * Split the entries between the old and the new block evenly.
2865 : * Make sure that if there's an odd number of entries now, that
2866 : * each new block will have the same number of entries.
2867 : */
2868 912554 : lrecs = xfs_btree_get_numrecs(left);
2869 912554 : rrecs = lrecs / 2;
2870 912554 : if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
2871 85929 : rrecs++;
2872 912554 : src_index = (lrecs - rrecs + 1);
2873 :
2874 912554 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2875 :
2876 : /* Adjust numrecs for the later get_*_keys() calls. */
2877 912554 : lrecs -= rrecs;
2878 912554 : xfs_btree_set_numrecs(left, lrecs);
2879 1825108 : xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2880 :
2881 : /*
2882 : * Copy btree block entries from the left block over to the
2883 : * new block, the right. Update the right block and log the
2884 : * changes.
2885 : */
2886 912554 : if (level > 0) {
2887 : /* It's a non-leaf. Move keys and pointers. */
2888 4679 : union xfs_btree_key *lkp; /* left btree key */
2889 4679 : union xfs_btree_ptr *lpp; /* left address pointer */
2890 4679 : union xfs_btree_key *rkp; /* right btree key */
2891 4679 : union xfs_btree_ptr *rpp; /* right address pointer */
2892 :
2893 4679 : lkp = xfs_btree_key_addr(cur, src_index, left);
2894 4679 : lpp = xfs_btree_ptr_addr(cur, src_index, left);
2895 4680 : rkp = xfs_btree_key_addr(cur, 1, right);
2896 4680 : rpp = xfs_btree_ptr_addr(cur, 1, right);
2897 :
2898 9360 : for (i = src_index; i < rrecs; i++) {
2899 0 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
2900 0 : if (error)
2901 0 : goto error0;
2902 : }
2903 :
2904 : /* Copy the keys & pointers to the new block. */
2905 4680 : xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2906 4680 : xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2907 :
2908 4680 : xfs_btree_log_keys(cur, rbp, 1, rrecs);
2909 4680 : xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2910 :
2911 : /* Stash the keys of the new block for later insertion. */
2912 4680 : xfs_btree_get_node_keys(cur, right, key);
2913 : } else {
2914 : /* It's a leaf. Move records. */
2915 907875 : union xfs_btree_rec *lrp; /* left record pointer */
2916 907875 : union xfs_btree_rec *rrp; /* right record pointer */
2917 :
2918 907875 : lrp = xfs_btree_rec_addr(cur, src_index, left);
2919 907875 : rrp = xfs_btree_rec_addr(cur, 1, right);
2920 :
2921 : /* Copy records to the new block. */
2922 907875 : xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2923 907875 : xfs_btree_log_recs(cur, rbp, 1, rrecs);
2924 :
2925 : /* Stash the keys of the new block for later insertion. */
2926 907875 : xfs_btree_get_leaf_keys(cur, right, key);
2927 : }
2928 :
2929 : /*
2930 : * Find the left block number by looking in the buffer.
2931 : * Adjust sibling pointers.
2932 : */
2933 912555 : xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2934 912555 : xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2935 912555 : xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2936 912555 : xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2937 :
2938 912555 : xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2939 912555 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2940 :
2941 : /*
2942 : * If there's a block to the new block's right, make that block
2943 : * point back to right instead of to left.
2944 : */
2945 1825110 : if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2946 524844 : error = xfs_btree_read_buf_block(cur, &rrptr,
2947 : 0, &rrblock, &rrbp);
2948 524844 : if (error)
2949 206 : goto error0;
2950 524638 : xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2951 524638 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2952 : }
2953 :
2954 : /* Update the parent high keys of the left block, if needed. */
2955 912349 : if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2956 545712 : error = xfs_btree_update_keys(cur, level);
2957 545712 : if (error)
2958 0 : goto error0;
2959 : }
2960 :
2961 : /*
2962 : * If the cursor is really in the right block, move it there.
2963 : * If it's just pointing past the last entry in left, then we'll
2964 : * insert there, so don't change anything in that case.
2965 : */
2966 912349 : if (cur->bc_levels[level].ptr > lrecs + 1) {
2967 672624 : xfs_btree_setbuf(cur, level, rbp);
2968 672624 : cur->bc_levels[level].ptr -= lrecs;
2969 : }
2970 : /*
2971 : * If there are more levels, we'll need another cursor which refers
2972 : * the right block, no matter where this cursor was.
2973 : */
2974 912349 : if (level + 1 < cur->bc_nlevels) {
2975 893769 : error = xfs_btree_dup_cursor(cur, curp);
2976 893769 : if (error)
2977 3 : goto error0;
2978 893766 : (*curp)->bc_levels[level + 1].ptr++;
2979 : }
2980 912346 : *ptrp = rptr;
2981 912346 : *stat = 1;
2982 912346 : return 0;
2983 : out0:
2984 0 : *stat = 0;
2985 0 : return 0;
2986 :
2987 : error0:
2988 : return error;
2989 : }
2990 :
2991 : #ifdef __KERNEL__
2992 : struct xfs_btree_split_args {
2993 : struct xfs_btree_cur *cur;
2994 : int level;
2995 : union xfs_btree_ptr *ptrp;
2996 : union xfs_btree_key *key;
2997 : struct xfs_btree_cur **curp;
2998 : int *stat; /* success/failure */
2999 : int result;
3000 : bool kswapd; /* allocation in kswapd context */
3001 : struct completion *done;
3002 : struct work_struct work;
3003 : };
3004 :
3005 : /*
3006 : * Stack switching interfaces for allocation
3007 : */
3008 : static void
3009 20291 : xfs_btree_split_worker(
3010 : struct work_struct *work)
3011 : {
3012 20291 : struct xfs_btree_split_args *args = container_of(work,
3013 : struct xfs_btree_split_args, work);
3014 20291 : unsigned long pflags;
3015 20291 : unsigned long new_pflags = 0;
3016 :
3017 : /*
3018 : * we are in a transaction context here, but may also be doing work
3019 : * in kswapd context, and hence we may need to inherit that state
3020 : * temporarily to ensure that we don't block waiting for memory reclaim
3021 : * in any way.
3022 : */
3023 20291 : if (args->kswapd)
3024 0 : new_pflags |= PF_MEMALLOC | PF_KSWAPD;
3025 :
3026 20291 : current_set_flags_nested(&pflags, new_pflags);
3027 20291 : xfs_trans_set_context(args->cur->bc_tp);
3028 :
3029 20291 : args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
3030 : args->key, args->curp, args->stat);
3031 :
3032 20291 : xfs_trans_clear_context(args->cur->bc_tp);
3033 20291 : current_restore_flags_nested(&pflags, new_pflags);
3034 :
3035 : /*
3036 : * Do not access args after complete() has run here. We don't own args
3037 : * and the owner may run and free args before we return here.
3038 : */
3039 20291 : complete(args->done);
3040 :
3041 20291 : }
3042 :
3043 : /*
3044 : * BMBT split requests often come in with little stack to work on so we push
3045 : * them off to a worker thread so there is lots of stack to use. For the other
3046 : * btree types, just call directly to avoid the context switch overhead here.
3047 : *
3048 : * Care must be taken here - the work queue rescuer thread introduces potential
3049 : * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
3050 : * AGFs to allocate blocks. A task being run by the rescuer could attempt to
3051 : * lock an AGF that is already locked by a task queued to run by the rescuer,
3052 : * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
3053 : * release it until the current thread it is running gains the lock.
3054 : *
3055 : * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
3056 : * already locked to allocate from. The only place that doesn't hold an AGF
3057 : * locked is unwritten extent conversion at IO completion, but that has already
3058 : * been offloaded to a worker thread and hence has no stack consumption issues
3059 : * we have to worry about.
3060 : */
3061 : STATIC int /* error */
3062 912561 : xfs_btree_split(
3063 : struct xfs_btree_cur *cur,
3064 : int level,
3065 : union xfs_btree_ptr *ptrp,
3066 : union xfs_btree_key *key,
3067 : struct xfs_btree_cur **curp,
3068 : int *stat) /* success/failure */
3069 : {
3070 912561 : struct xfs_btree_split_args args;
3071 912561 : DECLARE_COMPLETION_ONSTACK(done);
3072 :
3073 912561 : if (cur->bc_btnum != XFS_BTNUM_BMAP ||
3074 238867 : cur->bc_tp->t_highest_agno == NULLAGNUMBER)
3075 892270 : return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
3076 :
3077 20291 : args.cur = cur;
3078 20291 : args.level = level;
3079 20291 : args.ptrp = ptrp;
3080 20291 : args.key = key;
3081 20291 : args.curp = curp;
3082 20291 : args.stat = stat;
3083 20291 : args.done = &done;
3084 20291 : args.kswapd = current_is_kswapd();
3085 20291 : INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
3086 20291 : queue_work(xfs_alloc_wq, &args.work);
3087 20291 : wait_for_completion(&done);
3088 20291 : destroy_work_on_stack(&args.work);
3089 20291 : return args.result;
3090 : }
3091 : #else
3092 : #define xfs_btree_split __xfs_btree_split
3093 : #endif /* __KERNEL__ */
3094 :
3095 : static inline void
3096 1196433 : xfs_btree_iroot_realloc(
3097 : struct xfs_btree_cur *cur,
3098 : int rec_diff)
3099 : {
3100 1196433 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3101 :
3102 1196433 : xfs_iroot_realloc(cur->bc_ino.ip, cur->bc_ino.whichfork,
3103 1196433 : cur->bc_ops->iroot_ops, rec_diff);
3104 1196433 : }
3105 :
3106 : /*
3107 : * Move the records from a root leaf block to a separate block.
3108 : *
3109 : * Trickery here: The amount of memory that we need per record for the incore
3110 : * root block changes when we convert a leaf block to an internal block.
3111 : * Therefore, we copy leaf records into the new btree block (cblock) before
3112 : * freeing the incore root block and changing the tree height.
3113 : *
3114 : * Once we've changed the tree height, we allocate a new incore root block
3115 : * (which will now be an internal root block) and populate it with a pointer to
3116 : * cblock and the relevant keys.
3117 : */
3118 : STATIC void
3119 14842 : xfs_btree_promote_leaf_iroot(
3120 : struct xfs_btree_cur *cur,
3121 : struct xfs_btree_block *block,
3122 : struct xfs_buf *cbp,
3123 : union xfs_btree_ptr *cptr,
3124 : struct xfs_btree_block *cblock)
3125 : {
3126 14842 : union xfs_btree_rec *rp;
3127 14842 : union xfs_btree_rec *crp;
3128 14842 : union xfs_btree_key *kp;
3129 14842 : union xfs_btree_ptr *pp;
3130 14842 : size_t size;
3131 14842 : int numrecs = xfs_btree_get_numrecs(block);
3132 :
3133 : /* Copy the records from the leaf root into the new child block. */
3134 14842 : rp = xfs_btree_rec_addr(cur, 1, block);
3135 14842 : crp = xfs_btree_rec_addr(cur, 1, cblock);
3136 14842 : xfs_btree_copy_recs(cur, crp, rp, numrecs);
3137 :
3138 : /* Zap the old root and change the tree height. */
3139 14842 : xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
3140 14842 : cur->bc_nlevels++;
3141 14842 : cur->bc_levels[1].ptr = 1;
3142 :
3143 : /*
3144 : * Allocate a new internal root block buffer and reinitialize it to
3145 : * point to a single new child.
3146 : */
3147 14842 : size = cur->bc_ops->iroot_ops->size(cur->bc_mp, cur->bc_nlevels - 1, 1);
3148 14842 : xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
3149 14842 : block = xfs_btree_get_iroot(cur);
3150 14842 : xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops,
3151 14842 : cur->bc_nlevels - 1, 1, cur->bc_ino.ip->i_ino);
3152 :
3153 14842 : pp = xfs_btree_ptr_addr(cur, 1, block);
3154 14842 : kp = xfs_btree_key_addr(cur, 1, block);
3155 14842 : xfs_btree_copy_ptrs(cur, pp, cptr, 1);
3156 14842 : xfs_btree_get_keys(cur, cblock, kp);
3157 :
3158 : /* Attach the new block to the cursor and log it. */
3159 14842 : xfs_btree_setbuf(cur, 0, cbp);
3160 14842 : xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3161 14842 : xfs_btree_log_recs(cur, cbp, 1, numrecs);
3162 14842 : }
3163 :
3164 : /*
3165 : * Move the keys and pointers from a root block to a separate block.
3166 : *
3167 : * Since the keyptr size does not change, all we have to do is increase the
3168 : * tree height, copy the keyptrs to the new internal node (cblock), shrink
3169 : * the root, and copy the pointers there.
3170 : */
3171 : STATIC int
3172 3009 : xfs_btree_promote_node_iroot(
3173 : struct xfs_btree_cur *cur,
3174 : struct xfs_btree_block *block,
3175 : int level,
3176 : struct xfs_buf *cbp,
3177 : union xfs_btree_ptr *cptr,
3178 : struct xfs_btree_block *cblock)
3179 : {
3180 3009 : union xfs_btree_key *ckp;
3181 3009 : union xfs_btree_key *kp;
3182 3009 : union xfs_btree_ptr *cpp;
3183 3009 : union xfs_btree_ptr *pp;
3184 3009 : int i;
3185 3009 : int error;
3186 3009 : int numrecs = xfs_btree_get_numrecs(block);
3187 :
3188 : /*
3189 : * Increase tree height, adjusting the root block level to match.
3190 : * We cannot change the root btree node size until we've copied the
3191 : * block contents to the new child block.
3192 : */
3193 3009 : be16_add_cpu(&block->bb_level, 1);
3194 3009 : cur->bc_nlevels++;
3195 3009 : cur->bc_levels[level + 1].ptr = 1;
3196 :
3197 : /*
3198 : * Adjust the root btree record count, then copy the keys from the old
3199 : * root to the new child block.
3200 : */
3201 3009 : xfs_btree_set_numrecs(block, 1);
3202 3009 : kp = xfs_btree_key_addr(cur, 1, block);
3203 3009 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3204 3009 : xfs_btree_copy_keys(cur, ckp, kp, numrecs);
3205 :
3206 : /* Check the pointers and copy them to the new child block. */
3207 3009 : pp = xfs_btree_ptr_addr(cur, 1, block);
3208 3009 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3209 35935 : for (i = 0; i < numrecs; i++) {
3210 29917 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3211 29917 : if (error)
3212 0 : return error;
3213 : }
3214 3009 : xfs_btree_copy_ptrs(cur, cpp, pp, numrecs);
3215 :
3216 : /*
3217 : * Set the first keyptr to point to the new child block, then shrink
3218 : * the memory buffer for the root block.
3219 : */
3220 3009 : error = xfs_btree_debug_check_ptr(cur, cptr, 0, level);
3221 3009 : if (error)
3222 : return error;
3223 3009 : xfs_btree_copy_ptrs(cur, pp, cptr, 1);
3224 3009 : xfs_btree_get_keys(cur, cblock, kp);
3225 3009 : xfs_btree_iroot_realloc(cur, 1 - numrecs);
3226 :
3227 : /* Attach the new block to the cursor and log it. */
3228 3009 : xfs_btree_setbuf(cur, level, cbp);
3229 3009 : xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3230 3009 : xfs_btree_log_keys(cur, cbp, 1, numrecs);
3231 3009 : xfs_btree_log_ptrs(cur, cbp, 1, numrecs);
3232 3009 : return 0;
3233 : }
3234 :
3235 : /*
3236 : * Copy the old inode root contents into a real block and make the
3237 : * broot point to it.
3238 : */
3239 : int /* error */
3240 17851 : xfs_btree_new_iroot(
3241 : struct xfs_btree_cur *cur, /* btree cursor */
3242 : int *logflags, /* logging flags for inode */
3243 : int *stat) /* return status - 0 fail */
3244 : {
3245 17851 : struct xfs_buf *cbp; /* buffer for cblock */
3246 17851 : struct xfs_btree_block *block; /* btree block */
3247 17851 : struct xfs_btree_block *cblock; /* child btree block */
3248 17851 : union xfs_btree_ptr aptr;
3249 17851 : union xfs_btree_ptr nptr; /* new block addr */
3250 17851 : int level; /* btree level */
3251 17851 : int error; /* error return code */
3252 :
3253 17851 : XFS_BTREE_STATS_INC(cur, newroot);
3254 :
3255 17851 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3256 :
3257 17851 : level = cur->bc_nlevels - 1;
3258 :
3259 17851 : block = xfs_btree_get_iroot(cur);
3260 17851 : ASSERT(level > 0 || (cur->bc_flags & XFS_BTREE_IROOT_RECORDS));
3261 17851 : if (level > 0)
3262 3009 : aptr = *xfs_btree_ptr_addr(cur, 1, block);
3263 : else
3264 14842 : aptr.l = cpu_to_be64(XFS_INO_TO_FSB(cur->bc_mp,
3265 : cur->bc_ino.ip->i_ino));
3266 :
3267 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3268 17851 : error = xfs_btree_alloc_block(cur, &aptr, &nptr, stat);
3269 17851 : if (error)
3270 0 : goto error0;
3271 17851 : if (*stat == 0)
3272 : return 0;
3273 :
3274 17851 : XFS_BTREE_STATS_INC(cur, alloc);
3275 :
3276 : /* Copy the root into a real block. */
3277 17851 : error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
3278 17851 : if (error)
3279 0 : goto error0;
3280 :
3281 : /*
3282 : * we can't just memcpy() the root in for CRC enabled btree blocks.
3283 : * In that case have to also ensure the blkno remains correct
3284 : */
3285 35702 : memcpy(cblock, block, xfs_btree_block_len(cur));
3286 17851 : if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
3287 17851 : __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
3288 17851 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3289 17851 : cblock->bb_u.l.bb_blkno = bno;
3290 : else
3291 0 : cblock->bb_u.s.bb_blkno = bno;
3292 : }
3293 :
3294 17851 : if (level > 0) {
3295 3009 : error = xfs_btree_promote_node_iroot(cur, block, level, cbp,
3296 : &nptr, cblock);
3297 3009 : if (error)
3298 0 : goto error0;
3299 : } else {
3300 14842 : xfs_btree_promote_leaf_iroot(cur, block, cbp, &nptr, cblock);
3301 : }
3302 :
3303 35702 : *logflags |=
3304 17851 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
3305 17851 : *stat = 1;
3306 17851 : return 0;
3307 : error0:
3308 : return error;
3309 : }
3310 :
3311 : /*
3312 : * Allocate a new root block, fill it in.
3313 : */
3314 : STATIC int /* error */
3315 18580 : xfs_btree_new_root(
3316 : struct xfs_btree_cur *cur, /* btree cursor */
3317 : int *stat) /* success/failure */
3318 : {
3319 18580 : struct xfs_btree_block *block; /* one half of the old root block */
3320 18580 : struct xfs_buf *bp; /* buffer containing block */
3321 18580 : int error; /* error return value */
3322 18580 : struct xfs_buf *lbp; /* left buffer pointer */
3323 18580 : struct xfs_btree_block *left; /* left btree block */
3324 18580 : struct xfs_buf *nbp; /* new (root) buffer */
3325 18580 : struct xfs_btree_block *new; /* new (root) btree block */
3326 18580 : int nptr; /* new value for key index, 1 or 2 */
3327 18580 : struct xfs_buf *rbp; /* right buffer pointer */
3328 18580 : struct xfs_btree_block *right; /* right btree block */
3329 18580 : union xfs_btree_ptr rptr;
3330 18580 : union xfs_btree_ptr lptr;
3331 :
3332 18580 : XFS_BTREE_STATS_INC(cur, newroot);
3333 :
3334 : /* initialise our start point from the cursor */
3335 18580 : cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3336 :
3337 : /* Allocate the new block. If we can't do it, we're toast. Give up. */
3338 18580 : error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
3339 18580 : if (error)
3340 0 : goto error0;
3341 18580 : if (*stat == 0)
3342 0 : goto out0;
3343 18580 : XFS_BTREE_STATS_INC(cur, alloc);
3344 :
3345 : /* Set up the new block. */
3346 18580 : error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
3347 18580 : if (error)
3348 0 : goto error0;
3349 :
3350 : /* Set the root in the holding structure increasing the level by 1. */
3351 18580 : cur->bc_ops->set_root(cur, &lptr, 1);
3352 :
3353 : /*
3354 : * At the previous root level there are now two blocks: the old root,
3355 : * and the new block generated when it was split. We don't know which
3356 : * one the cursor is pointing at, so we set up variables "left" and
3357 : * "right" for each case.
3358 : */
3359 18580 : block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3360 :
3361 : #ifdef DEBUG
3362 18580 : error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3363 18580 : if (error)
3364 0 : goto error0;
3365 : #endif
3366 :
3367 18580 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3368 37160 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3369 : /* Our block is left, pick up the right block. */
3370 7955 : lbp = bp;
3371 7955 : xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3372 7955 : left = block;
3373 7955 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3374 7955 : if (error)
3375 1 : goto error0;
3376 7954 : bp = rbp;
3377 7954 : nptr = 1;
3378 : } else {
3379 : /* Our block is right, pick up the left block. */
3380 10625 : rbp = bp;
3381 10625 : xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3382 10625 : right = block;
3383 10625 : xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3384 10625 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3385 10625 : if (error)
3386 0 : goto error0;
3387 10625 : bp = lbp;
3388 10625 : nptr = 2;
3389 : }
3390 :
3391 : /* Fill in the new block's btree header and log it. */
3392 18579 : xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3393 18579 : xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3394 55737 : ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3395 : !xfs_btree_ptr_is_null(cur, &rptr));
3396 :
3397 : /* Fill in the key data in the new root. */
3398 37158 : if (xfs_btree_get_level(left) > 0) {
3399 : /*
3400 : * Get the keys for the left block's keys and put them directly
3401 : * in the parent block. Do the same for the right block.
3402 : */
3403 376 : xfs_btree_get_node_keys(cur, left,
3404 : xfs_btree_key_addr(cur, 1, new));
3405 376 : xfs_btree_get_node_keys(cur, right,
3406 : xfs_btree_key_addr(cur, 2, new));
3407 : } else {
3408 : /*
3409 : * Get the keys for the left block's records and put them
3410 : * directly in the parent block. Do the same for the right
3411 : * block.
3412 : */
3413 18203 : xfs_btree_get_leaf_keys(cur, left,
3414 : xfs_btree_key_addr(cur, 1, new));
3415 18203 : xfs_btree_get_leaf_keys(cur, right,
3416 : xfs_btree_key_addr(cur, 2, new));
3417 : }
3418 18579 : xfs_btree_log_keys(cur, nbp, 1, 2);
3419 :
3420 : /* Fill in the pointer data in the new root. */
3421 18579 : xfs_btree_copy_ptrs(cur,
3422 : xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3423 18579 : xfs_btree_copy_ptrs(cur,
3424 : xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3425 18579 : xfs_btree_log_ptrs(cur, nbp, 1, 2);
3426 :
3427 : /* Fix up the cursor. */
3428 18579 : xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3429 18579 : cur->bc_levels[cur->bc_nlevels].ptr = nptr;
3430 18579 : cur->bc_nlevels++;
3431 18579 : ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
3432 18579 : *stat = 1;
3433 18579 : return 0;
3434 : error0:
3435 : return error;
3436 : out0:
3437 0 : *stat = 0;
3438 0 : return 0;
3439 : }
3440 :
3441 : STATIC int
3442 97239884 : xfs_btree_make_block_unfull(
3443 : struct xfs_btree_cur *cur, /* btree cursor */
3444 : int level, /* btree level */
3445 : int numrecs,/* # of recs in block */
3446 : int *oindex,/* old tree index */
3447 : int *index, /* new tree index */
3448 : union xfs_btree_ptr *nptr, /* new btree ptr */
3449 : struct xfs_btree_cur **ncur, /* new btree cursor */
3450 : union xfs_btree_key *key, /* key of new block */
3451 : int *stat)
3452 : {
3453 97239884 : int error = 0;
3454 :
3455 97239884 : if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3456 41705289 : level == cur->bc_nlevels - 1) {
3457 648865 : struct xfs_inode *ip = cur->bc_ino.ip;
3458 :
3459 648865 : if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3460 : /* A root block that can be made bigger. */
3461 631014 : xfs_btree_iroot_realloc(cur, 1);
3462 631014 : *stat = 1;
3463 : } else {
3464 : /* A root block that needs replacing */
3465 17851 : int logflags = 0;
3466 :
3467 17851 : error = xfs_btree_new_iroot(cur, &logflags, stat);
3468 17851 : if (error || *stat == 0)
3469 0 : return error;
3470 :
3471 17851 : xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3472 : }
3473 :
3474 648865 : return 0;
3475 : }
3476 :
3477 : /* First, try shifting an entry to the right neighbor. */
3478 96591019 : error = xfs_btree_rshift(cur, level, stat);
3479 96590800 : if (error || *stat)
3480 : return error;
3481 :
3482 : /* Next, try shifting an entry to the left neighbor. */
3483 62581869 : error = xfs_btree_lshift(cur, level, stat);
3484 62581891 : if (error)
3485 : return error;
3486 :
3487 62581841 : if (*stat) {
3488 61669280 : *oindex = *index = cur->bc_levels[level].ptr;
3489 61669280 : return 0;
3490 : }
3491 :
3492 : /*
3493 : * Next, try splitting the current block in half.
3494 : *
3495 : * If this works we have to re-set our variables because we
3496 : * could be in a different block now.
3497 : */
3498 912561 : error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3499 912561 : if (error || *stat == 0)
3500 : return error;
3501 :
3502 :
3503 912346 : *index = cur->bc_levels[level].ptr;
3504 912346 : return 0;
3505 : }
3506 :
3507 : /*
3508 : * Insert one record/level. Return information to the caller
3509 : * allowing the next level up to proceed if necessary.
3510 : */
3511 : STATIC int
3512 835321062 : xfs_btree_insrec(
3513 : struct xfs_btree_cur *cur, /* btree cursor */
3514 : int level, /* level to insert record at */
3515 : union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
3516 : union xfs_btree_rec *rec, /* record to insert */
3517 : union xfs_btree_key *key, /* i/o: block key for ptrp */
3518 : struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3519 : int *stat) /* success/failure */
3520 : {
3521 835321062 : struct xfs_btree_block *block; /* btree block */
3522 835321062 : struct xfs_buf *bp; /* buffer for block */
3523 835321062 : union xfs_btree_ptr nptr; /* new block ptr */
3524 835321062 : struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
3525 835321062 : union xfs_btree_key nkey; /* new block key */
3526 835321062 : union xfs_btree_key *lkey;
3527 835321062 : int optr; /* old key/record index */
3528 835321062 : int ptr; /* key/record index */
3529 835321062 : int numrecs;/* number of records */
3530 835321062 : int error; /* error return value */
3531 835321062 : int i;
3532 835321062 : xfs_daddr_t old_bn;
3533 :
3534 835321062 : ncur = NULL;
3535 835321062 : lkey = &nkey;
3536 :
3537 : /*
3538 : * If we have an external root pointer, and we've made it to the
3539 : * root level, allocate a new root block and we're done.
3540 : */
3541 835321062 : if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3542 706025648 : (level >= cur->bc_nlevels)) {
3543 18580 : error = xfs_btree_new_root(cur, stat);
3544 18580 : xfs_btree_set_ptr_null(cur, ptrp);
3545 :
3546 18580 : return error;
3547 : }
3548 :
3549 : /* If we're off the left edge, return failure. */
3550 835302482 : ptr = cur->bc_levels[level].ptr;
3551 835302482 : if (ptr == 0) {
3552 0 : *stat = 0;
3553 0 : return 0;
3554 : }
3555 :
3556 835302482 : optr = ptr;
3557 :
3558 835302482 : XFS_BTREE_STATS_INC(cur, insrec);
3559 :
3560 : /* Get pointers to the btree buffer and block. */
3561 835302482 : block = xfs_btree_get_block(cur, level, &bp);
3562 835307516 : old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
3563 835307516 : numrecs = xfs_btree_get_numrecs(block);
3564 :
3565 : #ifdef DEBUG
3566 835307516 : error = xfs_btree_check_block(cur, block, level, bp);
3567 835321564 : if (error)
3568 0 : goto error0;
3569 :
3570 : /* Check that the new entry is being inserted in the right place. */
3571 835321564 : if (ptr <= numrecs) {
3572 407772929 : if (level == 0) {
3573 407262232 : ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3574 : xfs_btree_rec_addr(cur, ptr, block)));
3575 : } else {
3576 510697 : ASSERT(cur->bc_ops->keys_inorder(cur, key,
3577 : xfs_btree_key_addr(cur, ptr, block)));
3578 : }
3579 : }
3580 : #endif
3581 :
3582 : /*
3583 : * If the block is full, we can't insert the new entry until we
3584 : * make the block un-full.
3585 : */
3586 835324858 : xfs_btree_set_ptr_null(cur, &nptr);
3587 835324858 : if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3588 97239734 : error = xfs_btree_make_block_unfull(cur, level, numrecs,
3589 : &optr, &ptr, &nptr, &ncur, lkey, stat);
3590 97239902 : if (error || *stat == 0)
3591 405 : goto error0;
3592 : }
3593 :
3594 : /*
3595 : * The current block may have changed if the block was
3596 : * previously full and we have just made space in it.
3597 : */
3598 835316632 : block = xfs_btree_get_block(cur, level, &bp);
3599 835317093 : numrecs = xfs_btree_get_numrecs(block);
3600 :
3601 : #ifdef DEBUG
3602 835317093 : error = xfs_btree_check_block(cur, block, level, bp);
3603 835314141 : if (error)
3604 0 : goto error0;
3605 : #endif
3606 :
3607 : /*
3608 : * At this point we know there's room for our new entry in the block
3609 : * we're pointing at.
3610 : */
3611 835314141 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3612 :
3613 835314141 : if (level > 0) {
3614 : /* It's a nonleaf. make a hole in the keys and ptrs */
3615 893760 : union xfs_btree_key *kp;
3616 893760 : union xfs_btree_ptr *pp;
3617 :
3618 893760 : kp = xfs_btree_key_addr(cur, ptr, block);
3619 893760 : pp = xfs_btree_ptr_addr(cur, ptr, block);
3620 :
3621 16049672 : for (i = numrecs - ptr; i >= 0; i--) {
3622 14262152 : error = xfs_btree_debug_check_ptr(cur, pp, i, level);
3623 14262152 : if (error)
3624 0 : goto error0;
3625 : }
3626 :
3627 893760 : xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3628 893760 : xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3629 :
3630 893760 : error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
3631 893760 : if (error)
3632 0 : goto error0;
3633 :
3634 : /* Now put the new data in, bump numrecs and log it. */
3635 893760 : xfs_btree_copy_keys(cur, kp, key, 1);
3636 893760 : xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3637 893760 : numrecs++;
3638 893760 : xfs_btree_set_numrecs(block, numrecs);
3639 893760 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3640 893760 : xfs_btree_log_keys(cur, bp, ptr, numrecs);
3641 : #ifdef DEBUG
3642 893760 : if (ptr < numrecs) {
3643 510626 : ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3644 : xfs_btree_key_addr(cur, ptr + 1, block)));
3645 : }
3646 : #endif
3647 : } else {
3648 : /* It's a leaf. make a hole in the records */
3649 834420381 : union xfs_btree_rec *rp;
3650 :
3651 834420381 : rp = xfs_btree_rec_addr(cur, ptr, block);
3652 :
3653 834420381 : xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3654 :
3655 : /* Now put the new data in, bump numrecs and log it. */
3656 834417588 : xfs_btree_copy_recs(cur, rp, rec, 1);
3657 834424876 : xfs_btree_set_numrecs(block, ++numrecs);
3658 834424876 : xfs_btree_log_recs(cur, bp, ptr, numrecs);
3659 : #ifdef DEBUG
3660 834424377 : if (ptr < numrecs) {
3661 407266044 : ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3662 : xfs_btree_rec_addr(cur, ptr + 1, block)));
3663 : }
3664 : #endif
3665 : }
3666 :
3667 : /* Log the new number of records in the btree header. */
3668 835321508 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3669 :
3670 : /*
3671 : * Update btree keys to reflect the newly added record or keyptr.
3672 : * There are three cases here to be aware of. Normally, all we have to
3673 : * do is walk towards the root, updating keys as necessary.
3674 : *
3675 : * If the caller had us target a full block for the insertion, we dealt
3676 : * with that by calling the _make_block_unfull function. If the
3677 : * "make unfull" function splits the block, it'll hand us back the key
3678 : * and pointer of the new block. We haven't yet added the new block to
3679 : * the next level up, so if we decide to add the new record to the new
3680 : * block (bp->b_bn != old_bn), we have to update the caller's pointer
3681 : * so that the caller adds the new block with the correct key.
3682 : *
3683 : * However, there is a third possibility-- if the selected block is the
3684 : * root block of an inode-rooted btree and cannot be expanded further,
3685 : * the "make unfull" function moves the root block contents to a new
3686 : * block and updates the root block to point to the new block. In this
3687 : * case, no block pointer is passed back because the block has already
3688 : * been added to the btree. In this case, we need to use the regular
3689 : * key update function, just like the first case. This is critical for
3690 : * overlapping btrees, because the high key must be updated to reflect
3691 : * the entire tree, not just the subtree accessible through the first
3692 : * child of the root (which is now two levels down from the root).
3693 : */
3694 1670640692 : if (!xfs_btree_ptr_is_null(cur, &nptr) &&
3695 912346 : bp && xfs_buf_daddr(bp) != old_bn) {
3696 672623 : xfs_btree_get_keys(cur, block, lkey);
3697 1452148231 : } else if (xfs_btree_needs_key_update(cur, optr)) {
3698 562041723 : error = xfs_btree_update_keys(cur, level);
3699 562040453 : if (error)
3700 0 : goto error0;
3701 : }
3702 :
3703 : /*
3704 : * If we are tracking the last record in the tree and
3705 : * we are at the far right edge of the tree, update it.
3706 : */
3707 835319076 : if (xfs_btree_is_lastrec(cur, block, level)) {
3708 87067042 : cur->bc_ops->update_lastrec(cur, block, rec,
3709 : ptr, LASTREC_INSREC);
3710 : }
3711 :
3712 : /*
3713 : * Return the new block number, if any.
3714 : * If there is one, give back a record value and a cursor too.
3715 : */
3716 835318836 : *ptrp = nptr;
3717 1670637672 : if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3718 912346 : xfs_btree_copy_keys(cur, key, lkey, 1);
3719 912346 : *curp = ncur;
3720 : }
3721 :
3722 835318836 : *stat = 1;
3723 835318836 : return 0;
3724 :
3725 405 : error0:
3726 405 : if (ncur)
3727 0 : xfs_btree_del_cursor(ncur, error);
3728 : return error;
3729 : }
3730 :
3731 : /*
3732 : * Insert the record at the point referenced by cur.
3733 : *
3734 : * A multi-level split of the tree on insert will invalidate the original
3735 : * cursor. All callers of this function should assume that the cursor is
3736 : * no longer valid and revalidate it.
3737 : */
3738 : int
3739 834421218 : xfs_btree_insert(
3740 : struct xfs_btree_cur *cur,
3741 : int *stat)
3742 : {
3743 834421218 : int error; /* error return value */
3744 834421218 : int i; /* result value, 0 for failure */
3745 834421218 : int level; /* current level number in btree */
3746 834421218 : union xfs_btree_ptr nptr; /* new block number (split result) */
3747 834421218 : struct xfs_btree_cur *ncur; /* new cursor (split result) */
3748 834421218 : struct xfs_btree_cur *pcur; /* previous level's cursor */
3749 834421218 : union xfs_btree_key bkey; /* key of block to insert */
3750 834421218 : union xfs_btree_key *key;
3751 834421218 : union xfs_btree_rec rec; /* record to insert */
3752 :
3753 834421218 : level = 0;
3754 834421218 : ncur = NULL;
3755 834421218 : pcur = cur;
3756 834421218 : key = &bkey;
3757 :
3758 834421218 : xfs_btree_set_ptr_null(cur, &nptr);
3759 :
3760 : /* Make a key out of the record data to be inserted, and save it. */
3761 834421218 : cur->bc_ops->init_rec_from_cur(cur, &rec);
3762 834416049 : cur->bc_ops->init_key_from_rec(key, &rec);
3763 :
3764 : /*
3765 : * Loop going up the tree, starting at the leaf level.
3766 : * Stop when we don't get a split block, that must mean that
3767 : * the insert is finished with this level.
3768 : */
3769 835335641 : do {
3770 : /*
3771 : * Insert nrec/nptr into this level of the tree.
3772 : * Note if we fail, nptr will be null.
3773 : */
3774 835335641 : error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3775 : &ncur, &i);
3776 835334396 : if (error) {
3777 406 : if (pcur != cur)
3778 6 : xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3779 406 : goto error0;
3780 : }
3781 :
3782 835333990 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
3783 0 : xfs_btree_mark_sick(cur);
3784 0 : error = -EFSCORRUPTED;
3785 0 : goto error0;
3786 : }
3787 835333990 : level++;
3788 :
3789 : /*
3790 : * See if the cursor we just used is trash.
3791 : * Can't trash the caller's cursor, but otherwise we should
3792 : * if ncur is a new cursor or we're about to be done.
3793 : */
3794 835333990 : if (pcur != cur &&
3795 1783968 : (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3796 : /* Save the state from the cursor before we trash it */
3797 893760 : if (cur->bc_ops->update_cursor)
3798 238860 : cur->bc_ops->update_cursor(pcur, cur);
3799 893760 : cur->bc_nlevels = pcur->bc_nlevels;
3800 893760 : xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3801 : }
3802 : /* If we got a new cursor, switch to it. */
3803 835342707 : if (ncur) {
3804 900119 : pcur = ncur;
3805 900119 : ncur = NULL;
3806 : }
3807 1670685414 : } while (!xfs_btree_ptr_is_null(cur, &nptr));
3808 :
3809 834423695 : *stat = i;
3810 834423695 : return 0;
3811 : error0:
3812 : return error;
3813 : }
3814 :
3815 : /*
3816 : * Move the records from a child leaf block to the root block.
3817 : *
3818 : * Trickery here: The amount of memory we need per record for the incore root
3819 : * block changes when we convert a leaf block to an internal block. Therefore,
3820 : * we free the incore root block, change the tree height, allocate a new incore
3821 : * root, and copy the records from the doomed block into the new root.
3822 : */
3823 : STATIC void
3824 14276 : xfs_btree_demote_leaf_child(
3825 : struct xfs_btree_cur *cur,
3826 : struct xfs_btree_block *cblock,
3827 : int numrecs)
3828 : {
3829 14276 : union xfs_btree_rec *rp;
3830 14276 : union xfs_btree_rec *crp;
3831 14276 : struct xfs_btree_block *block;
3832 14276 : size_t size;
3833 :
3834 : /* Zap the old root and change the tree height. */
3835 14276 : xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
3836 14276 : cur->bc_levels[0].bp = NULL;
3837 14276 : cur->bc_nlevels--;
3838 :
3839 : /*
3840 : * Allocate a new internal root block buffer and reinitialize it with
3841 : * the leaf records in the child.
3842 : */
3843 14276 : size = cur->bc_ops->iroot_ops->size(cur->bc_mp, 0, numrecs);
3844 14276 : xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
3845 14276 : block = xfs_btree_get_iroot(cur);
3846 14276 : xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops, 0, numrecs,
3847 14276 : cur->bc_ino.ip->i_ino);
3848 :
3849 14276 : rp = xfs_btree_rec_addr(cur, 1, block);
3850 14276 : crp = xfs_btree_rec_addr(cur, 1, cblock);
3851 14276 : xfs_btree_copy_recs(cur, rp, crp, numrecs);
3852 14276 : }
3853 :
3854 : /*
3855 : * Move the keyptrs from a child node block to the root block.
3856 : *
3857 : * Since the keyptr size does not change, all we have to do is increase the
3858 : * tree height, copy the keyptrs to the new internal node (cblock), shrink
3859 : * the root, and copy the pointers there.
3860 : */
3861 : STATIC int
3862 2193 : xfs_btree_demote_node_child(
3863 : struct xfs_btree_cur *cur,
3864 : struct xfs_btree_block *cblock,
3865 : int level,
3866 : int numrecs)
3867 : {
3868 2193 : struct xfs_btree_block *block;
3869 2193 : union xfs_btree_key *ckp;
3870 2193 : union xfs_btree_key *kp;
3871 2193 : union xfs_btree_ptr *cpp;
3872 2193 : union xfs_btree_ptr *pp;
3873 2193 : int i;
3874 2193 : int error;
3875 2193 : int diff;
3876 :
3877 : /*
3878 : * Adjust the root btree node size and the record count to match the
3879 : * doomed child so that we can copy the keyptrs ahead of changing the
3880 : * tree shape.
3881 : */
3882 2193 : diff = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3883 2193 : xfs_btree_iroot_realloc(cur, diff);
3884 2193 : block = xfs_btree_get_iroot(cur);
3885 :
3886 2193 : xfs_btree_set_numrecs(block, numrecs);
3887 2193 : ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3888 :
3889 : /* Copy keys from the doomed block. */
3890 2193 : kp = xfs_btree_key_addr(cur, 1, block);
3891 2193 : ckp = xfs_btree_key_addr(cur, 1, cblock);
3892 2193 : xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3893 :
3894 : /* Copy pointers from the doomed block. */
3895 2193 : pp = xfs_btree_ptr_addr(cur, 1, block);
3896 2193 : cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3897 28453 : for (i = 0; i < numrecs; i++) {
3898 24067 : error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
3899 24067 : if (error)
3900 0 : return error;
3901 : }
3902 2193 : xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3903 :
3904 : /* Decrease tree height, adjusting the root block level to match. */
3905 2193 : cur->bc_levels[level - 1].bp = NULL;
3906 2193 : be16_add_cpu(&block->bb_level, -1);
3907 2193 : cur->bc_nlevels--;
3908 2193 : return 0;
3909 : }
3910 :
3911 : /*
3912 : * Try to merge a non-leaf block back into the inode root.
3913 : *
3914 : * Note: the killroot names comes from the fact that we're effectively
3915 : * killing the old root block. But because we can't just delete the
3916 : * inode we have to copy the single block it was pointing to into the
3917 : * inode.
3918 : */
3919 : STATIC int
3920 15128998 : xfs_btree_kill_iroot(
3921 : struct xfs_btree_cur *cur)
3922 : {
3923 15128998 : struct xfs_inode *ip = cur->bc_ino.ip;
3924 15128998 : struct xfs_btree_block *block;
3925 15128998 : struct xfs_btree_block *cblock;
3926 15128998 : struct xfs_buf *cbp;
3927 15128998 : int level;
3928 15128998 : int numrecs;
3929 15128998 : int error;
3930 : #ifdef DEBUG
3931 15128998 : union xfs_btree_ptr ptr;
3932 : #endif
3933 :
3934 15128998 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3935 15128998 : ASSERT((cur->bc_flags & XFS_BTREE_IROOT_RECORDS) ||
3936 : cur->bc_nlevels > 1);
3937 :
3938 : /*
3939 : * Don't deal with the root block needs to be a leaf case.
3940 : * We're just going to turn the thing back into extents anyway.
3941 : */
3942 15128998 : level = cur->bc_nlevels - 1;
3943 15128998 : if (level == 1 && !(cur->bc_flags & XFS_BTREE_IROOT_RECORDS))
3944 12631182 : goto out0;
3945 :
3946 : /* If we're already a leaf, jump out. */
3947 2497816 : if (level == 0)
3948 536770 : goto out0;
3949 :
3950 : /*
3951 : * Give up if the root has multiple children.
3952 : */
3953 1961046 : block = xfs_btree_get_iroot(cur);
3954 3922092 : if (xfs_btree_get_numrecs(block) != 1)
3955 597 : goto out0;
3956 :
3957 1960449 : cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3958 1960449 : numrecs = xfs_btree_get_numrecs(cblock);
3959 :
3960 : /*
3961 : * Only do this if the next level will fit.
3962 : * Then the data must be copied up to the inode,
3963 : * instead of freeing the root you free the next level.
3964 : */
3965 1960449 : if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3966 1943980 : goto out0;
3967 :
3968 16469 : XFS_BTREE_STATS_INC(cur, killroot);
3969 :
3970 : #ifdef DEBUG
3971 16469 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3972 32938 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3973 16469 : xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3974 32938 : ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3975 : #endif
3976 :
3977 16469 : if (level > 1) {
3978 2193 : error = xfs_btree_demote_node_child(cur, cblock, level,
3979 : numrecs);
3980 2193 : if (error)
3981 : return error;
3982 : } else
3983 14276 : xfs_btree_demote_leaf_child(cur, cblock, numrecs);
3984 :
3985 16469 : error = xfs_btree_free_block(cur, cbp);
3986 16469 : if (error)
3987 : return error;
3988 :
3989 32938 : xfs_trans_log_inode(cur->bc_tp, ip,
3990 16469 : XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
3991 : out0:
3992 : return 0;
3993 : }
3994 :
3995 : /*
3996 : * Kill the current root node, and replace it with it's only child node.
3997 : */
3998 : STATIC int
3999 13234 : xfs_btree_kill_root(
4000 : struct xfs_btree_cur *cur,
4001 : struct xfs_buf *bp,
4002 : int level,
4003 : union xfs_btree_ptr *newroot)
4004 : {
4005 13234 : int error;
4006 :
4007 13234 : XFS_BTREE_STATS_INC(cur, killroot);
4008 :
4009 : /*
4010 : * Update the root pointer, decreasing the level by 1 and then
4011 : * free the old root.
4012 : */
4013 13234 : cur->bc_ops->set_root(cur, newroot, -1);
4014 :
4015 13234 : error = xfs_btree_free_block(cur, bp);
4016 13234 : if (error)
4017 : return error;
4018 :
4019 13234 : cur->bc_levels[level].bp = NULL;
4020 13234 : cur->bc_levels[level].ra = 0;
4021 13234 : cur->bc_nlevels--;
4022 :
4023 13234 : return 0;
4024 : }
4025 :
4026 : STATIC int
4027 239030253 : xfs_btree_dec_cursor(
4028 : struct xfs_btree_cur *cur,
4029 : int level,
4030 : int *stat)
4031 : {
4032 239030253 : int error;
4033 239030253 : int i;
4034 :
4035 239030253 : if (level > 0) {
4036 403354 : error = xfs_btree_decrement(cur, level, &i);
4037 403354 : if (error)
4038 : return error;
4039 : }
4040 :
4041 239030253 : *stat = 1;
4042 239030253 : return 0;
4043 : }
4044 :
4045 : /*
4046 : * Single level of the btree record deletion routine.
4047 : * Delete record pointed to by cur/level.
4048 : * Remove the record from its block then rebalance the tree.
4049 : * Return 0 for error, 1 for done, 2 to go on to the next level.
4050 : */
4051 : STATIC int /* error */
4052 701112943 : xfs_btree_delrec(
4053 : struct xfs_btree_cur *cur, /* btree cursor */
4054 : int level, /* level removing record from */
4055 : int *stat) /* fail/done/go-on */
4056 : {
4057 701112943 : struct xfs_btree_block *block; /* btree block */
4058 701112943 : union xfs_btree_ptr cptr; /* current block ptr */
4059 701112943 : struct xfs_buf *bp; /* buffer for block */
4060 701112943 : int error; /* error return value */
4061 701112943 : int i; /* loop counter */
4062 701112943 : union xfs_btree_ptr lptr; /* left sibling block ptr */
4063 701112943 : struct xfs_buf *lbp; /* left buffer pointer */
4064 701112943 : struct xfs_btree_block *left; /* left btree block */
4065 701112943 : int lrecs = 0; /* left record count */
4066 701112943 : int ptr; /* key/record index */
4067 701112943 : union xfs_btree_ptr rptr; /* right sibling block ptr */
4068 701112943 : struct xfs_buf *rbp; /* right buffer pointer */
4069 701112943 : struct xfs_btree_block *right; /* right btree block */
4070 701112943 : struct xfs_btree_block *rrblock; /* right-right btree block */
4071 701112943 : struct xfs_buf *rrbp; /* right-right buffer pointer */
4072 701112943 : int rrecs = 0; /* right record count */
4073 701112943 : struct xfs_btree_cur *tcur; /* temporary btree cursor */
4074 701112943 : int numrecs; /* temporary numrec count */
4075 :
4076 701112943 : tcur = NULL;
4077 :
4078 : /* Get the index of the entry being deleted, check for nothing there. */
4079 701112943 : ptr = cur->bc_levels[level].ptr;
4080 701112943 : if (ptr == 0) {
4081 0 : *stat = 0;
4082 0 : return 0;
4083 : }
4084 :
4085 : /* Get the buffer & block containing the record or key/ptr. */
4086 701112943 : block = xfs_btree_get_block(cur, level, &bp);
4087 701114002 : numrecs = xfs_btree_get_numrecs(block);
4088 :
4089 : #ifdef DEBUG
4090 701114002 : error = xfs_btree_check_block(cur, block, level, bp);
4091 701115141 : if (error)
4092 0 : goto error0;
4093 : #endif
4094 :
4095 : /* Fail if we're off the end of the block. */
4096 701115141 : if (ptr > numrecs) {
4097 0 : *stat = 0;
4098 0 : return 0;
4099 : }
4100 :
4101 701115141 : XFS_BTREE_STATS_INC(cur, delrec);
4102 701115141 : XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
4103 :
4104 : /* Excise the entries being deleted. */
4105 701115141 : if (level > 0) {
4106 : /* It's a nonleaf. operate on keys and ptrs */
4107 419874 : union xfs_btree_key *lkp;
4108 419874 : union xfs_btree_ptr *lpp;
4109 :
4110 419874 : lkp = xfs_btree_key_addr(cur, ptr + 1, block);
4111 419874 : lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
4112 :
4113 7580458 : for (i = 0; i < numrecs - ptr; i++) {
4114 7160584 : error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
4115 7160584 : if (error)
4116 0 : goto error0;
4117 : }
4118 :
4119 419874 : if (ptr < numrecs) {
4120 203832 : xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
4121 203832 : xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
4122 203832 : xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
4123 203832 : xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
4124 : }
4125 : } else {
4126 : /* It's a leaf. operate on records */
4127 700695267 : if (ptr < numrecs) {
4128 304669661 : xfs_btree_shift_recs(cur,
4129 : xfs_btree_rec_addr(cur, ptr + 1, block),
4130 : -1, numrecs - ptr);
4131 304671766 : xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
4132 : }
4133 : }
4134 :
4135 : /*
4136 : * Decrement and log the number of entries in the block.
4137 : */
4138 701119921 : xfs_btree_set_numrecs(block, --numrecs);
4139 701119921 : xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
4140 :
4141 : /*
4142 : * If we are tracking the last record in the tree and
4143 : * we are at the far right edge of the tree, update it.
4144 : */
4145 701118640 : if (xfs_btree_is_lastrec(cur, block, level)) {
4146 82248452 : cur->bc_ops->update_lastrec(cur, block, NULL,
4147 : ptr, LASTREC_DELREC);
4148 : }
4149 :
4150 : /*
4151 : * We're at the root level. First, shrink the root block in-memory.
4152 : * Try to get rid of the next level down. If we can't then there's
4153 : * nothing left to do.
4154 : */
4155 701126572 : if (level == cur->bc_nlevels - 1) {
4156 436790599 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
4157 560217 : xfs_btree_iroot_realloc(cur, -1);
4158 :
4159 560217 : error = xfs_btree_kill_iroot(cur);
4160 560217 : if (error)
4161 0 : goto error0;
4162 :
4163 560217 : error = xfs_btree_dec_cursor(cur, level, stat);
4164 560217 : if (error)
4165 0 : goto error0;
4166 560217 : *stat = 1;
4167 560217 : return 0;
4168 : }
4169 :
4170 : /*
4171 : * If this is the root level, and there's only one entry left,
4172 : * and it's NOT the leaf level, then we can get rid of this
4173 : * level.
4174 : */
4175 436230382 : if (numrecs == 1 && level > 0) {
4176 13234 : union xfs_btree_ptr *pp;
4177 : /*
4178 : * pp is still set to the first pointer in the block.
4179 : * Make it the new root of the btree.
4180 : */
4181 13234 : pp = xfs_btree_ptr_addr(cur, 1, block);
4182 13234 : error = xfs_btree_kill_root(cur, bp, level, pp);
4183 13234 : if (error)
4184 0 : goto error0;
4185 436217148 : } else if (level > 0) {
4186 107496 : error = xfs_btree_dec_cursor(cur, level, stat);
4187 107496 : if (error)
4188 0 : goto error0;
4189 : }
4190 436230382 : *stat = 1;
4191 436230382 : return 0;
4192 : }
4193 :
4194 : /*
4195 : * If we deleted the leftmost entry in the block, update the
4196 : * key values above us in the tree.
4197 : */
4198 403426242 : if (xfs_btree_needs_key_update(cur, ptr)) {
4199 129531389 : error = xfs_btree_update_keys(cur, level);
4200 129531832 : if (error)
4201 0 : goto error0;
4202 : }
4203 :
4204 : /*
4205 : * If the number of records remaining in the block is at least
4206 : * the minimum, we're done.
4207 : */
4208 264336416 : if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
4209 213423656 : error = xfs_btree_dec_cursor(cur, level, stat);
4210 213423790 : if (error)
4211 0 : goto error0;
4212 : return 0;
4213 : }
4214 :
4215 : /*
4216 : * Otherwise, we have to move some records around to keep the
4217 : * tree balanced. Look at the left and right sibling blocks to
4218 : * see if we can re-balance by moving only one record.
4219 : */
4220 50912884 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4221 50912928 : xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
4222 :
4223 50912965 : if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
4224 : /*
4225 : * One child of root, need to get a chance to copy its contents
4226 : * into the root and delete it. Can't go up to next level,
4227 : * there's nothing to delete there.
4228 : */
4229 108457358 : if (xfs_btree_ptr_is_null(cur, &rptr) &&
4230 14568782 : xfs_btree_ptr_is_null(cur, &lptr) &&
4231 14568782 : level == cur->bc_nlevels - 2) {
4232 14568783 : error = xfs_btree_kill_iroot(cur);
4233 14568773 : if (!error)
4234 14568773 : error = xfs_btree_dec_cursor(cur, level, stat);
4235 14568768 : if (error)
4236 0 : goto error0;
4237 : return 0;
4238 : }
4239 : }
4240 :
4241 94457792 : ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
4242 : !xfs_btree_ptr_is_null(cur, &lptr));
4243 :
4244 : /*
4245 : * Duplicate the cursor so our btree manipulations here won't
4246 : * disrupt the next level up.
4247 : */
4248 36344182 : error = xfs_btree_dup_cursor(cur, &tcur);
4249 36344176 : if (error)
4250 2 : goto error0;
4251 :
4252 : /*
4253 : * If there's a right sibling, see if it's ok to shift an entry
4254 : * out of it.
4255 : */
4256 72688348 : if (!xfs_btree_ptr_is_null(cur, &rptr)) {
4257 : /*
4258 : * Move the temp cursor to the last entry in the next block.
4259 : * Actually any entry but the first would suffice.
4260 : */
4261 14574746 : i = xfs_btree_lastrec(tcur, level);
4262 14574754 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4263 0 : xfs_btree_mark_sick(cur);
4264 0 : error = -EFSCORRUPTED;
4265 0 : goto error0;
4266 : }
4267 :
4268 14574754 : error = xfs_btree_increment(tcur, level, &i);
4269 14574759 : if (error)
4270 3 : goto error0;
4271 14574756 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4272 0 : xfs_btree_mark_sick(cur);
4273 0 : error = -EFSCORRUPTED;
4274 0 : goto error0;
4275 : }
4276 :
4277 14574756 : i = xfs_btree_lastrec(tcur, level);
4278 14574745 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4279 0 : xfs_btree_mark_sick(cur);
4280 0 : error = -EFSCORRUPTED;
4281 0 : goto error0;
4282 : }
4283 :
4284 : /* Grab a pointer to the block. */
4285 14574745 : right = xfs_btree_get_block(tcur, level, &rbp);
4286 : #ifdef DEBUG
4287 14574757 : error = xfs_btree_check_block(tcur, right, level, rbp);
4288 14574756 : if (error)
4289 0 : goto error0;
4290 : #endif
4291 : /* Grab the current block number, for future use. */
4292 14574756 : xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
4293 :
4294 : /*
4295 : * If right block is full enough so that removing one entry
4296 : * won't make it too empty, and left-shifting an entry out
4297 : * of right to us works, we're done.
4298 : */
4299 29149511 : if (xfs_btree_get_numrecs(right) - 1 >=
4300 14574755 : cur->bc_ops->get_minrecs(tcur, level)) {
4301 10370846 : error = xfs_btree_lshift(tcur, level, &i);
4302 10370847 : if (error)
4303 0 : goto error0;
4304 10370847 : if (i) {
4305 20741696 : ASSERT(xfs_btree_get_numrecs(block) >=
4306 : cur->bc_ops->get_minrecs(tcur, level));
4307 :
4308 10370848 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4309 10370848 : tcur = NULL;
4310 :
4311 10370848 : error = xfs_btree_dec_cursor(cur, level, stat);
4312 10370848 : if (error)
4313 0 : goto error0;
4314 : return 0;
4315 : }
4316 : }
4317 :
4318 : /*
4319 : * Otherwise, grab the number of records in right for
4320 : * future reference, and fix up the temp cursor to point
4321 : * to our block again (last record).
4322 : */
4323 4203909 : rrecs = xfs_btree_get_numrecs(right);
4324 8407818 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4325 4186491 : i = xfs_btree_firstrec(tcur, level);
4326 4186491 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4327 0 : xfs_btree_mark_sick(cur);
4328 0 : error = -EFSCORRUPTED;
4329 0 : goto error0;
4330 : }
4331 :
4332 4186491 : error = xfs_btree_decrement(tcur, level, &i);
4333 4186491 : if (error)
4334 0 : goto error0;
4335 4186491 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4336 0 : xfs_btree_mark_sick(cur);
4337 0 : error = -EFSCORRUPTED;
4338 0 : goto error0;
4339 : }
4340 : }
4341 : }
4342 :
4343 : /*
4344 : * If there's a left sibling, see if it's ok to shift an entry
4345 : * out of it.
4346 : */
4347 51946674 : if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4348 : /*
4349 : * Move the temp cursor to the first entry in the
4350 : * previous block.
4351 : */
4352 25955876 : i = xfs_btree_firstrec(tcur, level);
4353 25955859 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4354 0 : xfs_btree_mark_sick(cur);
4355 0 : error = -EFSCORRUPTED;
4356 0 : goto error0;
4357 : }
4358 :
4359 25955859 : error = xfs_btree_decrement(tcur, level, &i);
4360 25955914 : if (error)
4361 0 : goto error0;
4362 25955914 : i = xfs_btree_firstrec(tcur, level);
4363 25955916 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
4364 0 : xfs_btree_mark_sick(cur);
4365 0 : error = -EFSCORRUPTED;
4366 0 : goto error0;
4367 : }
4368 :
4369 : /* Grab a pointer to the block. */
4370 25955916 : left = xfs_btree_get_block(tcur, level, &lbp);
4371 : #ifdef DEBUG
4372 25955916 : error = xfs_btree_check_block(cur, left, level, lbp);
4373 25955914 : if (error)
4374 0 : goto error0;
4375 : #endif
4376 : /* Grab the current block number, for future use. */
4377 25955914 : xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4378 :
4379 : /*
4380 : * If left block is full enough so that removing one entry
4381 : * won't make it too empty, and right-shifting an entry out
4382 : * of left to us works, we're done.
4383 : */
4384 51911833 : if (xfs_btree_get_numrecs(left) - 1 >=
4385 25955917 : cur->bc_ops->get_minrecs(tcur, level)) {
4386 25553460 : error = xfs_btree_rshift(tcur, level, &i);
4387 25553463 : if (error)
4388 0 : goto error0;
4389 25553463 : if (i) {
4390 51106926 : ASSERT(xfs_btree_get_numrecs(block) >=
4391 : cur->bc_ops->get_minrecs(tcur, level));
4392 25553463 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4393 25553463 : tcur = NULL;
4394 25553463 : if (level == 0)
4395 25550627 : cur->bc_levels[0].ptr++;
4396 :
4397 25553463 : *stat = 1;
4398 25553463 : return 0;
4399 : }
4400 : }
4401 :
4402 : /*
4403 : * Otherwise, grab the number of records in right for
4404 : * future reference.
4405 : */
4406 402456 : lrecs = xfs_btree_get_numrecs(left);
4407 : }
4408 :
4409 : /* Delete the temp cursor, we're done with it. */
4410 419917 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4411 419874 : tcur = NULL;
4412 :
4413 : /* If here, we need to do a join to keep the tree balanced. */
4414 839748 : ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4415 :
4416 1242204 : if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4417 402456 : lrecs + xfs_btree_get_numrecs(block) <=
4418 402456 : cur->bc_ops->get_maxrecs(cur, level)) {
4419 : /*
4420 : * Set "right" to be the starting block,
4421 : * "left" to be the left neighbor.
4422 : */
4423 402456 : rptr = cptr;
4424 402456 : right = block;
4425 402456 : rbp = bp;
4426 402456 : error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4427 402456 : if (error)
4428 0 : goto error0;
4429 :
4430 : /*
4431 : * If that won't work, see if we can join with the right neighbor block.
4432 : */
4433 52254 : } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4434 17418 : rrecs + xfs_btree_get_numrecs(block) <=
4435 17418 : cur->bc_ops->get_maxrecs(cur, level)) {
4436 : /*
4437 : * Set "left" to be the starting block,
4438 : * "right" to be the right neighbor.
4439 : */
4440 17418 : lptr = cptr;
4441 17418 : left = block;
4442 17418 : lbp = bp;
4443 17418 : error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4444 17418 : if (error)
4445 0 : goto error0;
4446 :
4447 : /*
4448 : * Otherwise, we can't fix the imbalance.
4449 : * Just return. This is probably a logic error, but it's not fatal.
4450 : */
4451 : } else {
4452 0 : error = xfs_btree_dec_cursor(cur, level, stat);
4453 0 : if (error)
4454 0 : goto error0;
4455 : return 0;
4456 : }
4457 :
4458 419874 : rrecs = xfs_btree_get_numrecs(right);
4459 419874 : lrecs = xfs_btree_get_numrecs(left);
4460 :
4461 : /*
4462 : * We're now going to join "left" and "right" by moving all the stuff
4463 : * in "right" to "left" and deleting "right".
4464 : */
4465 419874 : XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4466 419874 : if (level > 0) {
4467 : /* It's a non-leaf. Move keys and pointers. */
4468 450 : union xfs_btree_key *lkp; /* left btree key */
4469 450 : union xfs_btree_ptr *lpp; /* left address pointer */
4470 450 : union xfs_btree_key *rkp; /* right btree key */
4471 450 : union xfs_btree_ptr *rpp; /* right address pointer */
4472 :
4473 450 : lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4474 450 : lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4475 450 : rkp = xfs_btree_key_addr(cur, 1, right);
4476 450 : rpp = xfs_btree_ptr_addr(cur, 1, right);
4477 :
4478 22137 : for (i = 1; i < rrecs; i++) {
4479 21687 : error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
4480 21687 : if (error)
4481 0 : goto error0;
4482 : }
4483 :
4484 450 : xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4485 450 : xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4486 :
4487 450 : xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4488 450 : xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4489 : } else {
4490 : /* It's a leaf. Move records. */
4491 419424 : union xfs_btree_rec *lrp; /* left record pointer */
4492 419424 : union xfs_btree_rec *rrp; /* right record pointer */
4493 :
4494 419424 : lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4495 419424 : rrp = xfs_btree_rec_addr(cur, 1, right);
4496 :
4497 419424 : xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4498 419424 : xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4499 : }
4500 :
4501 419874 : XFS_BTREE_STATS_INC(cur, join);
4502 :
4503 : /*
4504 : * Fix up the number of records and right block pointer in the
4505 : * surviving block, and log it.
4506 : */
4507 419874 : xfs_btree_set_numrecs(left, lrecs + rrecs);
4508 419874 : xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
4509 419874 : xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4510 419874 : xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4511 :
4512 : /* If there is a right sibling, point it to the remaining block. */
4513 419874 : xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4514 839748 : if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4515 205083 : error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4516 205083 : if (error)
4517 0 : goto error0;
4518 205083 : xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4519 205083 : xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4520 : }
4521 :
4522 : /* Free the deleted block. */
4523 419874 : error = xfs_btree_free_block(cur, rbp);
4524 419874 : if (error)
4525 0 : goto error0;
4526 :
4527 : /*
4528 : * If we joined with the left neighbor, set the buffer in the
4529 : * cursor to the left block, and fix up the index.
4530 : */
4531 419874 : if (bp != lbp) {
4532 402456 : cur->bc_levels[level].bp = lbp;
4533 402456 : cur->bc_levels[level].ptr += lrecs;
4534 402456 : cur->bc_levels[level].ra = 0;
4535 : }
4536 : /*
4537 : * If we joined with the right neighbor and there's a level above
4538 : * us, increment the cursor at that level.
4539 : */
4540 17418 : else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4541 16767 : (level + 1 < cur->bc_nlevels)) {
4542 17418 : error = xfs_btree_increment(cur, level + 1, &i);
4543 17418 : if (error)
4544 0 : goto error0;
4545 : }
4546 :
4547 : /*
4548 : * Readjust the ptr at this level if it's not a leaf, since it's
4549 : * still pointing at the deletion point, which makes the cursor
4550 : * inconsistent. If this makes the ptr 0, the caller fixes it up.
4551 : * We can't use decrement because it would change the next level up.
4552 : */
4553 419874 : if (level > 0)
4554 450 : cur->bc_levels[level].ptr--;
4555 :
4556 : /*
4557 : * We combined blocks, so we have to update the parent keys if the
4558 : * btree supports overlapped intervals. However,
4559 : * bc_levels[level + 1].ptr points to the old block so that the caller
4560 : * knows which record to delete. Therefore, the caller must be savvy
4561 : * enough to call updkeys for us if we return stat == 2. The other
4562 : * exit points from this function don't require deletions further up
4563 : * the tree, so they can call updkeys directly.
4564 : */
4565 :
4566 : /* Return value means the next level up has something to do. */
4567 419874 : *stat = 2;
4568 419874 : return 0;
4569 :
4570 5 : error0:
4571 5 : if (tcur)
4572 3 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4573 : return error;
4574 : }
4575 :
4576 : /*
4577 : * Delete the record pointed to by cur.
4578 : * The cursor refers to the place where the record was (could be inserted)
4579 : * when the operation returns.
4580 : */
4581 : int /* error */
4582 700687971 : xfs_btree_delete(
4583 : struct xfs_btree_cur *cur,
4584 : int *stat) /* success/failure */
4585 : {
4586 700687971 : int error; /* error return value */
4587 700687971 : int level;
4588 700687971 : int i;
4589 700687971 : bool joined = false;
4590 :
4591 : /*
4592 : * Go up the tree, starting at leaf level.
4593 : *
4594 : * If 2 is returned then a join was done; go to the next level.
4595 : * Otherwise we are done.
4596 : */
4597 1401800780 : for (level = 0, i = 2; i == 2; level++) {
4598 701107635 : error = xfs_btree_delrec(cur, level, &i);
4599 701112814 : if (error)
4600 5 : goto error0;
4601 701112809 : if (i == 2)
4602 419874 : joined = true;
4603 : }
4604 :
4605 : /*
4606 : * If we combined blocks as part of deleting the record, delrec won't
4607 : * have updated the parent high keys so we have to do that here.
4608 : */
4609 700693145 : if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4610 155173 : error = xfs_btree_updkeys_force(cur, 0);
4611 155173 : if (error)
4612 0 : goto error0;
4613 : }
4614 :
4615 700693145 : if (i == 0) {
4616 0 : for (level = 1; level < cur->bc_nlevels; level++) {
4617 0 : if (cur->bc_levels[level].ptr == 0) {
4618 0 : error = xfs_btree_decrement(cur, level, &i);
4619 0 : if (error)
4620 0 : goto error0;
4621 : break;
4622 : }
4623 : }
4624 : }
4625 :
4626 700693145 : *stat = i;
4627 700693145 : return 0;
4628 : error0:
4629 : return error;
4630 : }
4631 :
4632 : /*
4633 : * Get the data from the pointed-to record.
4634 : */
4635 : int /* error */
4636 >13315*10^7 : xfs_btree_get_rec(
4637 : struct xfs_btree_cur *cur, /* btree cursor */
4638 : union xfs_btree_rec **recp, /* output: btree record */
4639 : int *stat) /* output: success/failure */
4640 : {
4641 >13315*10^7 : struct xfs_btree_block *block; /* btree block */
4642 >13315*10^7 : struct xfs_buf *bp; /* buffer pointer */
4643 >13315*10^7 : int ptr; /* record number */
4644 : #ifdef DEBUG
4645 >13315*10^7 : int error; /* error return value */
4646 : #endif
4647 :
4648 >13315*10^7 : ptr = cur->bc_levels[0].ptr;
4649 >13315*10^7 : block = xfs_btree_get_block(cur, 0, &bp);
4650 :
4651 : #ifdef DEBUG
4652 >13339*10^7 : error = xfs_btree_check_block(cur, block, 0, bp);
4653 >13219*10^7 : if (error)
4654 : return error;
4655 : #endif
4656 :
4657 : /*
4658 : * Off the right end or left end, return failure.
4659 : */
4660 >26438*10^7 : if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4661 79696 : *stat = 0;
4662 79696 : return 0;
4663 : }
4664 :
4665 : /*
4666 : * Point to the record and extract its data.
4667 : */
4668 >13272*10^7 : *recp = xfs_btree_rec_addr(cur, ptr, block);
4669 >13272*10^7 : *stat = 1;
4670 >13272*10^7 : return 0;
4671 : }
4672 :
4673 : /* Visit a block in a btree. */
4674 : STATIC int
4675 141680945 : xfs_btree_visit_block(
4676 : struct xfs_btree_cur *cur,
4677 : int level,
4678 : xfs_btree_visit_blocks_fn fn,
4679 : void *data)
4680 : {
4681 141680945 : struct xfs_btree_block *block;
4682 141680945 : struct xfs_buf *bp;
4683 141680945 : union xfs_btree_ptr rptr, bufptr;
4684 141680945 : int error;
4685 :
4686 : /* do right sibling readahead */
4687 141680945 : xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4688 141681000 : block = xfs_btree_get_block(cur, level, &bp);
4689 :
4690 : /* process the block */
4691 141680910 : error = fn(cur, level, data);
4692 141680932 : if (error)
4693 : return error;
4694 :
4695 : /* now read rh sibling block for next iteration */
4696 141680934 : xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4697 283362192 : if (xfs_btree_ptr_is_null(cur, &rptr))
4698 : return -ENOENT;
4699 :
4700 : /*
4701 : * We only visit blocks once in this walk, so we have to avoid the
4702 : * internal xfs_btree_lookup_get_block() optimisation where it will
4703 : * return the same block without checking if the right sibling points
4704 : * back to us and creates a cyclic reference in the btree.
4705 : */
4706 111759128 : xfs_btree_buf_to_ptr(cur, bp, &bufptr);
4707 111759082 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4708 14211959 : if (rptr.l == bufptr.l) {
4709 0 : xfs_btree_mark_sick(cur);
4710 0 : return -EFSCORRUPTED;
4711 : }
4712 : } else {
4713 97547123 : if (rptr.s == bufptr.s) {
4714 0 : xfs_btree_mark_sick(cur);
4715 0 : return -EFSCORRUPTED;
4716 : }
4717 : }
4718 111759082 : return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4719 : }
4720 :
4721 :
4722 : /* Visit every block in a btree. */
4723 : int
4724 17433778 : xfs_btree_visit_blocks(
4725 : struct xfs_btree_cur *cur,
4726 : xfs_btree_visit_blocks_fn fn,
4727 : unsigned int flags,
4728 : void *data)
4729 : {
4730 17433778 : union xfs_btree_ptr lptr;
4731 17433778 : int level;
4732 17433778 : struct xfs_btree_block *block = NULL;
4733 17433778 : int error = 0;
4734 :
4735 17433778 : cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4736 :
4737 : /* for each level */
4738 48899547 : for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4739 : /* grab the left hand block */
4740 31465812 : error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4741 31465882 : if (error)
4742 185 : return error;
4743 :
4744 : /* readahead the left most block for the next level down */
4745 31465697 : if (level > 0) {
4746 14031961 : union xfs_btree_ptr *ptr;
4747 :
4748 14031961 : ptr = xfs_btree_ptr_addr(cur, 1, block);
4749 14031947 : xfs_btree_readahead_ptr(cur, ptr, 1);
4750 :
4751 : /* save for the next iteration of the loop */
4752 14031962 : xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4753 :
4754 14031974 : if (!(flags & XFS_BTREE_VISIT_LEAVES))
4755 1543728 : continue;
4756 17433736 : } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
4757 0 : continue;
4758 : }
4759 :
4760 : /* for each buffer in the level */
4761 141680939 : do {
4762 141680939 : error = xfs_btree_visit_block(cur, level, fn, data);
4763 141680922 : } while (!error);
4764 :
4765 29921965 : if (error != -ENOENT)
4766 1 : return error;
4767 : }
4768 :
4769 : return 0;
4770 : }
4771 :
4772 : /*
4773 : * Change the owner of a btree.
4774 : *
4775 : * The mechanism we use here is ordered buffer logging. Because we don't know
4776 : * how many buffers were are going to need to modify, we don't really want to
4777 : * have to make transaction reservations for the worst case of every buffer in a
4778 : * full size btree as that may be more space that we can fit in the log....
4779 : *
4780 : * We do the btree walk in the most optimal manner possible - we have sibling
4781 : * pointers so we can just walk all the blocks on each level from left to right
4782 : * in a single pass, and then move to the next level and do the same. We can
4783 : * also do readahead on the sibling pointers to get IO moving more quickly,
4784 : * though for slow disks this is unlikely to make much difference to performance
4785 : * as the amount of CPU work we have to do before moving to the next block is
4786 : * relatively small.
4787 : *
4788 : * For each btree block that we load, modify the owner appropriately, set the
4789 : * buffer as an ordered buffer and log it appropriately. We need to ensure that
4790 : * we mark the region we change dirty so that if the buffer is relogged in
4791 : * a subsequent transaction the changes we make here as an ordered buffer are
4792 : * correctly relogged in that transaction. If we are in recovery context, then
4793 : * just queue the modified buffer as delayed write buffer so the transaction
4794 : * recovery completion writes the changes to disk.
4795 : */
4796 : struct xfs_btree_block_change_owner_info {
4797 : uint64_t new_owner;
4798 : struct list_head *buffer_list;
4799 : };
4800 :
4801 : static int
4802 0 : xfs_btree_block_change_owner(
4803 : struct xfs_btree_cur *cur,
4804 : int level,
4805 : void *data)
4806 : {
4807 0 : struct xfs_btree_block_change_owner_info *bbcoi = data;
4808 0 : struct xfs_btree_block *block;
4809 0 : struct xfs_buf *bp;
4810 :
4811 : /* modify the owner */
4812 0 : block = xfs_btree_get_block(cur, level, &bp);
4813 0 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4814 0 : if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4815 : return 0;
4816 0 : block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4817 : } else {
4818 0 : if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4819 : return 0;
4820 0 : block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4821 : }
4822 :
4823 : /*
4824 : * If the block is a root block hosted in an inode, we might not have a
4825 : * buffer pointer here and we shouldn't attempt to log the change as the
4826 : * information is already held in the inode and discarded when the root
4827 : * block is formatted into the on-disk inode fork. We still change it,
4828 : * though, so everything is consistent in memory.
4829 : */
4830 0 : if (!bp) {
4831 0 : ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4832 0 : ASSERT(level == cur->bc_nlevels - 1);
4833 0 : return 0;
4834 : }
4835 :
4836 0 : if (cur->bc_tp) {
4837 0 : if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4838 0 : xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4839 0 : return -EAGAIN;
4840 : }
4841 : } else {
4842 0 : xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4843 : }
4844 :
4845 : return 0;
4846 : }
4847 :
4848 : int
4849 0 : xfs_btree_change_owner(
4850 : struct xfs_btree_cur *cur,
4851 : uint64_t new_owner,
4852 : struct list_head *buffer_list)
4853 : {
4854 0 : struct xfs_btree_block_change_owner_info bbcoi;
4855 :
4856 0 : bbcoi.new_owner = new_owner;
4857 0 : bbcoi.buffer_list = buffer_list;
4858 :
4859 0 : return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4860 : XFS_BTREE_VISIT_ALL, &bbcoi);
4861 : }
4862 :
4863 : /* Verify the v5 fields of a long-format btree block. */
4864 : xfs_failaddr_t
4865 755708587 : xfs_btree_lblock_v5hdr_verify(
4866 : struct xfs_buf *bp,
4867 : uint64_t owner)
4868 : {
4869 755708587 : struct xfs_mount *mp = bp->b_mount;
4870 755708587 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4871 :
4872 755708587 : if (!xfs_has_crc(mp))
4873 0 : return __this_address;
4874 755708587 : if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
4875 0 : return __this_address;
4876 755709454 : if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4877 0 : return __this_address;
4878 755709454 : if (owner != XFS_RMAP_OWN_UNKNOWN &&
4879 0 : be64_to_cpu(block->bb_u.l.bb_owner) != owner)
4880 0 : return __this_address;
4881 : return NULL;
4882 : }
4883 :
4884 : /* Verify a long-format btree block. */
4885 : xfs_failaddr_t
4886 52475790 : xfs_btree_lblock_verify(
4887 : struct xfs_buf *bp,
4888 : unsigned int max_recs)
4889 : {
4890 52475790 : struct xfs_mount *mp = bp->b_mount;
4891 52475790 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4892 52475790 : xfs_fsblock_t fsb;
4893 52475790 : xfs_failaddr_t fa;
4894 :
4895 52475790 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4896 :
4897 : /* numrecs verification */
4898 52475790 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4899 0 : return __this_address;
4900 :
4901 : /* sibling pointer verification */
4902 52475894 : fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
4903 52475894 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4904 : block->bb_u.l.bb_leftsib);
4905 52475932 : if (!fa)
4906 52475952 : fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
4907 : block->bb_u.l.bb_rightsib);
4908 : return fa;
4909 : }
4910 :
4911 : /**
4912 : * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4913 : * btree block
4914 : *
4915 : * @bp: buffer containing the btree block
4916 : */
4917 : xfs_failaddr_t
4918 73179078 : xfs_btree_sblock_v5hdr_verify(
4919 : struct xfs_buf *bp)
4920 : {
4921 73179078 : struct xfs_mount *mp = bp->b_mount;
4922 73179078 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4923 73179078 : struct xfs_perag *pag = bp->b_pag;
4924 :
4925 73179078 : if (!xfs_has_crc(mp))
4926 0 : return __this_address;
4927 73179078 : if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4928 0 : return __this_address;
4929 73179220 : if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
4930 0 : return __this_address;
4931 118091137 : if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4932 0 : return __this_address;
4933 : return NULL;
4934 : }
4935 :
4936 : /**
4937 : * xfs_btree_sblock_verify() -- verify a short-format btree block
4938 : *
4939 : * @bp: buffer containing the btree block
4940 : * @max_recs: maximum records allowed in this btree node
4941 : */
4942 : xfs_failaddr_t
4943 44951432 : xfs_btree_sblock_verify(
4944 : struct xfs_buf *bp,
4945 : unsigned int max_recs)
4946 : {
4947 44951432 : struct xfs_mount *mp = bp->b_mount;
4948 44951432 : struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4949 44951432 : xfs_agblock_t agbno;
4950 44951432 : xfs_failaddr_t fa;
4951 :
4952 44951432 : ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
4953 :
4954 : /* numrecs verification */
4955 44951432 : if (be16_to_cpu(block->bb_numrecs) > max_recs)
4956 14 : return __this_address;
4957 :
4958 : /* sibling pointer verification */
4959 44951422 : agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
4960 44951422 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4961 : block->bb_u.s.bb_leftsib);
4962 44951447 : if (!fa)
4963 44951450 : fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
4964 : block->bb_u.s.bb_rightsib);
4965 : return fa;
4966 : }
4967 :
4968 : /*
4969 : * For the given limits on leaf and keyptr records per block, calculate the
4970 : * height of the tree needed to index the number of leaf records.
4971 : */
4972 : unsigned int
4973 219186 : xfs_btree_compute_maxlevels(
4974 : const unsigned int *limits,
4975 : unsigned long long records)
4976 : {
4977 219186 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4978 219186 : unsigned int height = 1;
4979 :
4980 1433227 : while (level_blocks > 1) {
4981 1214041 : level_blocks = howmany_64(level_blocks, limits[1]);
4982 1214041 : height++;
4983 : }
4984 :
4985 219186 : return height;
4986 : }
4987 :
4988 : /*
4989 : * For the given limits on leaf and keyptr records per block, calculate the
4990 : * number of blocks needed to index the given number of leaf records.
4991 : */
4992 : unsigned long long
4993 7502990 : xfs_btree_calc_size(
4994 : const unsigned int *limits,
4995 : unsigned long long records)
4996 : {
4997 7502990 : unsigned long long level_blocks = howmany_64(records, limits[0]);
4998 7502990 : unsigned long long blocks = level_blocks;
4999 :
5000 21348087 : while (level_blocks > 1) {
5001 13845097 : level_blocks = howmany_64(level_blocks, limits[1]);
5002 13845097 : blocks += level_blocks;
5003 : }
5004 :
5005 7502990 : return blocks;
5006 : }
5007 :
5008 : /*
5009 : * Given a number of available blocks for the btree to consume with records and
5010 : * pointers, calculate the height of the tree needed to index all the records
5011 : * that space can hold based on the number of pointers each interior node
5012 : * holds.
5013 : *
5014 : * We start by assuming a single level tree consumes a single block, then track
5015 : * the number of blocks each node level consumes until we no longer have space
5016 : * to store the next node level. At this point, we are indexing all the leaf
5017 : * blocks in the space, and there's no more free space to split the tree any
5018 : * further. That's our maximum btree height.
5019 : */
5020 : unsigned int
5021 1717004792 : xfs_btree_space_to_height(
5022 : const unsigned int *limits,
5023 : unsigned long long leaf_blocks)
5024 : {
5025 : /*
5026 : * The root btree block can have fewer than minrecs pointers in it
5027 : * because the tree might not be big enough to require that amount of
5028 : * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
5029 : */
5030 1717004792 : unsigned long long node_blocks = 2;
5031 1717004792 : unsigned long long blocks_left = leaf_blocks - 1;
5032 1717004792 : unsigned int height = 1;
5033 :
5034 1717004792 : if (leaf_blocks < 1)
5035 : return 0;
5036 :
5037 13736006152 : while (node_blocks < blocks_left) {
5038 12019001360 : blocks_left -= node_blocks;
5039 12019001360 : node_blocks *= limits[1];
5040 12019001360 : height++;
5041 : }
5042 :
5043 : return height;
5044 : }
5045 :
5046 : /*
5047 : * Query a regular btree for all records overlapping a given interval.
5048 : * Start with a LE lookup of the key of low_rec and return all records
5049 : * until we find a record with a key greater than the key of high_rec.
5050 : */
5051 : STATIC int
5052 4400785467 : xfs_btree_simple_query_range(
5053 : struct xfs_btree_cur *cur,
5054 : const union xfs_btree_key *low_key,
5055 : const union xfs_btree_key *high_key,
5056 : xfs_btree_query_range_fn fn,
5057 : void *priv)
5058 : {
5059 4400785467 : union xfs_btree_rec *recp;
5060 4400785467 : union xfs_btree_key rec_key;
5061 4400785467 : int stat;
5062 4400785467 : bool firstrec = true;
5063 4400785467 : int error;
5064 :
5065 4400785467 : ASSERT(cur->bc_ops->init_high_key_from_rec);
5066 4400785467 : ASSERT(cur->bc_ops->diff_two_keys);
5067 :
5068 : /*
5069 : * Find the leftmost record. The btree cursor must be set
5070 : * to the low record used to generate low_key.
5071 : */
5072 4400785467 : stat = 0;
5073 4400785467 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
5074 4400794459 : if (error)
5075 4 : goto out;
5076 :
5077 : /* Nothing? See if there's anything to the right. */
5078 4400794455 : if (!stat) {
5079 1769755506 : error = xfs_btree_increment(cur, 0, &stat);
5080 1769768468 : if (error)
5081 0 : goto out;
5082 : }
5083 :
5084 >10338*10^7 : while (stat) {
5085 : /* Find the record. */
5086 >10222*10^7 : error = xfs_btree_get_rec(cur, &recp, &stat);
5087 >10187*10^7 : if (error || !stat)
5088 : break;
5089 :
5090 : /* Skip if low_key > high_key(rec). */
5091 >10187*10^7 : if (firstrec) {
5092 3411027589 : cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
5093 3411030846 : firstrec = false;
5094 3411030846 : if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
5095 2631140896 : goto advloop;
5096 : }
5097 :
5098 : /* Stop if low_key(rec) > high_key. */
5099 99240563007 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
5100 97864164311 : if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
5101 : break;
5102 :
5103 : /* Callback */
5104 95785073280 : error = fn(cur, recp, priv);
5105 95901717653 : if (error)
5106 : break;
5107 :
5108 95901715420 : advloop:
5109 : /* Move on to the next record. */
5110 98532856316 : error = xfs_btree_increment(cur, 0, &stat);
5111 98982402807 : if (error)
5112 : break;
5113 : }
5114 :
5115 4400859398 : out:
5116 4400859402 : return error;
5117 : }
5118 :
5119 : /*
5120 : * Query an overlapped interval btree for all records overlapping a given
5121 : * interval. This function roughly follows the algorithm given in
5122 : * "Interval Trees" of _Introduction to Algorithms_, which is section
5123 : * 14.3 in the 2nd and 3rd editions.
5124 : *
5125 : * First, generate keys for the low and high records passed in.
5126 : *
5127 : * For any leaf node, generate the high and low keys for the record.
5128 : * If the record keys overlap with the query low/high keys, pass the
5129 : * record to the function iterator.
5130 : *
5131 : * For any internal node, compare the low and high keys of each
5132 : * pointer against the query low/high keys. If there's an overlap,
5133 : * follow the pointer.
5134 : *
5135 : * As an optimization, we stop scanning a block when we find a low key
5136 : * that is greater than the query's high key.
5137 : */
5138 : STATIC int
5139 1576493168 : xfs_btree_overlapped_query_range(
5140 : struct xfs_btree_cur *cur,
5141 : const union xfs_btree_key *low_key,
5142 : const union xfs_btree_key *high_key,
5143 : xfs_btree_query_range_fn fn,
5144 : void *priv)
5145 : {
5146 1576493168 : union xfs_btree_ptr ptr;
5147 1576493168 : union xfs_btree_ptr *pp;
5148 1576493168 : union xfs_btree_key rec_key;
5149 1576493168 : union xfs_btree_key rec_hkey;
5150 1576493168 : union xfs_btree_key *lkp;
5151 1576493168 : union xfs_btree_key *hkp;
5152 1576493168 : union xfs_btree_rec *recp;
5153 1576493168 : struct xfs_btree_block *block;
5154 1576493168 : int level;
5155 1576493168 : struct xfs_buf *bp;
5156 1576493168 : int i;
5157 1576493168 : int error;
5158 :
5159 : /* Load the root of the btree. */
5160 1576493168 : level = cur->bc_nlevels - 1;
5161 1576493168 : cur->bc_ops->init_ptr_from_cur(cur, &ptr);
5162 1576499933 : error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
5163 1576497934 : if (error)
5164 : return error;
5165 1576498924 : xfs_btree_get_block(cur, level, &bp);
5166 1576495840 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
5167 : #ifdef DEBUG
5168 1576495081 : error = xfs_btree_check_block(cur, block, level, bp);
5169 1576490830 : if (error)
5170 0 : goto out;
5171 : #endif
5172 1576490830 : cur->bc_levels[level].ptr = 1;
5173 :
5174 >20264*10^7 : while (level < cur->bc_nlevels) {
5175 >20106*10^7 : block = xfs_btree_get_block(cur, level, &bp);
5176 :
5177 : /* End of node, pop back towards the root. */
5178 >20106*10^7 : if (cur->bc_levels[level].ptr >
5179 >20106*10^7 : be16_to_cpu(block->bb_numrecs)) {
5180 566207000 : pop_up:
5181 4329855348 : if (level < cur->bc_nlevels - 1)
5182 2754909575 : cur->bc_levels[level + 1].ptr++;
5183 4329855348 : level++;
5184 4329855348 : continue;
5185 : }
5186 :
5187 >20049*10^7 : if (level == 0) {
5188 : /* Handle a leaf node. */
5189 >14600*10^7 : recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
5190 : block);
5191 :
5192 >14600*10^7 : cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
5193 >14598*10^7 : cur->bc_ops->init_key_from_rec(&rec_key, recp);
5194 :
5195 : /*
5196 : * If (query's high key < record's low key), then there
5197 : * are no more interesting records in this block. Pop
5198 : * up to the leaf level to find more record blocks.
5199 : *
5200 : * If (record's high key >= query's low key) and
5201 : * (query's high key >= record's low key), then
5202 : * this record overlaps the query range; callback.
5203 : */
5204 >14599*10^7 : if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
5205 1555439833 : goto pop_up;
5206 >14445*10^7 : if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
5207 12325337818 : error = fn(cur, recp, priv);
5208 12325475310 : if (error)
5209 : break;
5210 : }
5211 >14444*10^7 : cur->bc_levels[level].ptr++;
5212 >14444*10^7 : continue;
5213 : }
5214 :
5215 : /* Handle an internal node. */
5216 54494087219 : lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
5217 54494087219 : hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
5218 : block);
5219 54494087219 : pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
5220 :
5221 : /*
5222 : * If (query's high key < pointer's low key), then there are no
5223 : * more interesting keys in this block. Pop up one leaf level
5224 : * to continue looking for records.
5225 : *
5226 : * If (pointer's high key >= query's low key) and
5227 : * (query's high key >= pointer's low key), then
5228 : * this record overlaps the query range; follow pointer.
5229 : */
5230 54485628166 : if (xfs_btree_keycmp_lt(cur, high_key, lkp))
5231 2208208515 : goto pop_up;
5232 52283026434 : if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
5233 2757934374 : level--;
5234 2757934374 : error = xfs_btree_lookup_get_block(cur, level, pp,
5235 : &block);
5236 2757946145 : if (error)
5237 0 : goto out;
5238 2757946145 : xfs_btree_get_block(cur, level, &bp);
5239 2757934366 : trace_xfs_btree_overlapped_query_range(cur, level, bp);
5240 : #ifdef DEBUG
5241 2757932465 : error = xfs_btree_check_block(cur, block, level, bp);
5242 2757938301 : if (error)
5243 0 : goto out;
5244 : #endif
5245 2757938301 : cur->bc_levels[level].ptr = 1;
5246 2757938301 : continue;
5247 : }
5248 49536804887 : cur->bc_levels[level].ptr++;
5249 : }
5250 :
5251 1576485350 : out:
5252 : /*
5253 : * If we don't end this function with the cursor pointing at a record
5254 : * block, a subsequent non-error cursor deletion will not release
5255 : * node-level buffers, causing a buffer leak. This is quite possible
5256 : * with a zero-results range query, so release the buffers if we
5257 : * failed to return any results.
5258 : */
5259 1576485350 : if (cur->bc_levels[0].bp == NULL) {
5260 608205 : for (i = 0; i < cur->bc_nlevels; i++) {
5261 322861 : if (cur->bc_levels[i].bp) {
5262 26084 : xfs_trans_brelse(cur->bc_tp,
5263 : cur->bc_levels[i].bp);
5264 26083 : cur->bc_levels[i].bp = NULL;
5265 26083 : cur->bc_levels[i].ptr = 0;
5266 26083 : cur->bc_levels[i].ra = 0;
5267 : }
5268 : }
5269 : }
5270 :
5271 : return error;
5272 : }
5273 :
5274 : static inline void
5275 21524464193 : xfs_btree_key_from_irec(
5276 : struct xfs_btree_cur *cur,
5277 : union xfs_btree_key *key,
5278 : const union xfs_btree_irec *irec)
5279 : {
5280 21524464193 : union xfs_btree_rec rec;
5281 :
5282 21524464193 : cur->bc_rec = *irec;
5283 21524464193 : cur->bc_ops->init_rec_from_cur(cur, &rec);
5284 21524315414 : cur->bc_ops->init_key_from_rec(key, &rec);
5285 21524087905 : }
5286 :
5287 : /*
5288 : * Query a btree for all records overlapping a given interval of keys. The
5289 : * supplied function will be called with each record found; return one of the
5290 : * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
5291 : * code. This function returns -ECANCELED, zero, or a negative error code.
5292 : */
5293 : int
5294 5972090878 : xfs_btree_query_range(
5295 : struct xfs_btree_cur *cur,
5296 : const union xfs_btree_irec *low_rec,
5297 : const union xfs_btree_irec *high_rec,
5298 : xfs_btree_query_range_fn fn,
5299 : void *priv)
5300 : {
5301 5972090878 : union xfs_btree_key low_key;
5302 5972090878 : union xfs_btree_key high_key;
5303 :
5304 : /* Find the keys of both ends of the interval. */
5305 5972090878 : xfs_btree_key_from_irec(cur, &high_key, high_rec);
5306 5972187427 : xfs_btree_key_from_irec(cur, &low_key, low_rec);
5307 :
5308 : /* Enforce low key <= high key. */
5309 5972182340 : if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
5310 : return -EINVAL;
5311 :
5312 5972391010 : if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5313 4395894095 : return xfs_btree_simple_query_range(cur, &low_key,
5314 : &high_key, fn, priv);
5315 1576496915 : return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
5316 : fn, priv);
5317 : }
5318 :
5319 : /* Query a btree for all records. */
5320 : int
5321 4904048 : xfs_btree_query_all(
5322 : struct xfs_btree_cur *cur,
5323 : xfs_btree_query_range_fn fn,
5324 : void *priv)
5325 : {
5326 4904048 : union xfs_btree_key low_key;
5327 4904048 : union xfs_btree_key high_key;
5328 :
5329 4904048 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5330 4904048 : memset(&low_key, 0, sizeof(low_key));
5331 4904048 : memset(&high_key, 0xFF, sizeof(high_key));
5332 :
5333 4904048 : return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
5334 : }
5335 :
5336 : static int
5337 106143492 : xfs_btree_count_blocks_helper(
5338 : struct xfs_btree_cur *cur,
5339 : int level,
5340 : void *data)
5341 : {
5342 106143492 : xfs_extlen_t *blocks = data;
5343 106143492 : (*blocks)++;
5344 :
5345 106143492 : return 0;
5346 : }
5347 :
5348 : /* Count the blocks in a btree and return the result in *blocks. */
5349 : int
5350 5926329 : xfs_btree_count_blocks(
5351 : struct xfs_btree_cur *cur,
5352 : xfs_extlen_t *blocks)
5353 : {
5354 5926329 : *blocks = 0;
5355 5926329 : return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
5356 : XFS_BTREE_VISIT_ALL, blocks);
5357 : }
5358 :
5359 : /* Compare two btree pointers. */
5360 : int64_t
5361 28543099 : xfs_btree_diff_two_ptrs(
5362 : struct xfs_btree_cur *cur,
5363 : const union xfs_btree_ptr *a,
5364 : const union xfs_btree_ptr *b)
5365 : {
5366 28543099 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5367 13724397 : return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
5368 14818702 : return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
5369 : }
5370 :
5371 : struct xfs_btree_has_records {
5372 : /* Keys for the start and end of the range we want to know about. */
5373 : union xfs_btree_key start_key;
5374 : union xfs_btree_key end_key;
5375 :
5376 : /* Mask for key comparisons, if desired. */
5377 : const union xfs_btree_key *key_mask;
5378 :
5379 : /* Highest record key we've seen so far. */
5380 : union xfs_btree_key high_key;
5381 :
5382 : enum xbtree_recpacking outcome;
5383 : };
5384 :
5385 : STATIC int
5386 5305180735 : xfs_btree_has_records_helper(
5387 : struct xfs_btree_cur *cur,
5388 : const union xfs_btree_rec *rec,
5389 : void *priv)
5390 : {
5391 5305180735 : union xfs_btree_key rec_key;
5392 5305180735 : union xfs_btree_key rec_high_key;
5393 5305180735 : struct xfs_btree_has_records *info = priv;
5394 5305180735 : enum xbtree_key_contig key_contig;
5395 :
5396 5305180735 : cur->bc_ops->init_key_from_rec(&rec_key, rec);
5397 :
5398 5305268768 : if (info->outcome == XBTREE_RECPACKING_EMPTY) {
5399 3916080 : info->outcome = XBTREE_RECPACKING_SPARSE;
5400 :
5401 : /*
5402 : * If the first record we find does not overlap the start key,
5403 : * then there is a hole at the start of the search range.
5404 : * Classify this as sparse and stop immediately.
5405 : */
5406 3916080 : if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
5407 : info->key_mask))
5408 : return -ECANCELED;
5409 : } else {
5410 : /*
5411 : * If a subsequent record does not overlap with the any record
5412 : * we've seen so far, there is a hole in the middle of the
5413 : * search range. Classify this as sparse and stop.
5414 : * If the keys overlap and this btree does not allow overlap,
5415 : * signal corruption.
5416 : */
5417 5301352688 : key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
5418 : &rec_key, info->key_mask);
5419 5301642109 : if (key_contig == XBTREE_KEY_OVERLAP &&
5420 758678093 : !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
5421 : return -EFSCORRUPTED;
5422 5301642109 : if (key_contig == XBTREE_KEY_GAP)
5423 : return -ECANCELED;
5424 : }
5425 :
5426 : /*
5427 : * If high_key(rec) is larger than any other high key we've seen,
5428 : * remember it for later.
5429 : */
5430 5305558226 : cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
5431 5305192178 : if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
5432 : info->key_mask))
5433 4559551663 : info->high_key = rec_high_key; /* struct copy */
5434 :
5435 : return 0;
5436 : }
5437 :
5438 : /*
5439 : * Scan part of the keyspace of a btree and tell us if that keyspace does not
5440 : * map to any records; is fully mapped to records; or is partially mapped to
5441 : * records. This is the btree record equivalent to determining if a file is
5442 : * sparse.
5443 : *
5444 : * For most btree types, the record scan should use all available btree key
5445 : * fields to compare the keys encountered. These callers should pass NULL for
5446 : * @mask. However, some callers (e.g. scanning physical space in the rmapbt)
5447 : * want to ignore some part of the btree record keyspace when performing the
5448 : * comparison. These callers should pass in a union xfs_btree_key object with
5449 : * the fields that *should* be a part of the comparison set to any nonzero
5450 : * value, and the rest zeroed.
5451 : */
5452 : int
5453 4790863338 : xfs_btree_has_records(
5454 : struct xfs_btree_cur *cur,
5455 : const union xfs_btree_irec *low,
5456 : const union xfs_btree_irec *high,
5457 : const union xfs_btree_key *mask,
5458 : enum xbtree_recpacking *outcome)
5459 : {
5460 4790863338 : struct xfs_btree_has_records info = {
5461 : .outcome = XBTREE_RECPACKING_EMPTY,
5462 : .key_mask = mask,
5463 : };
5464 4790863338 : int error;
5465 :
5466 : /* Not all btrees support this operation. */
5467 4790863338 : if (!cur->bc_ops->keys_contiguous) {
5468 0 : ASSERT(0);
5469 0 : return -EOPNOTSUPP;
5470 : }
5471 :
5472 4790863338 : xfs_btree_key_from_irec(cur, &info.start_key, low);
5473 4790878043 : xfs_btree_key_from_irec(cur, &info.end_key, high);
5474 :
5475 4790854462 : error = xfs_btree_query_range(cur, low, high,
5476 : xfs_btree_has_records_helper, &info);
5477 4790907482 : if (error == -ECANCELED)
5478 0 : goto out;
5479 4790907482 : if (error)
5480 : return error;
5481 :
5482 4790907482 : if (info.outcome == XBTREE_RECPACKING_EMPTY)
5483 4786991335 : goto out;
5484 :
5485 : /*
5486 : * If the largest high_key(rec) we saw during the walk is greater than
5487 : * the end of the search range, classify this as full. Otherwise,
5488 : * there is a hole at the end of the search range.
5489 : */
5490 3916147 : if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
5491 : mask))
5492 3916154 : info.outcome = XBTREE_RECPACKING_FULL;
5493 :
5494 0 : out:
5495 4790907489 : *outcome = info.outcome;
5496 4790907489 : return 0;
5497 : }
5498 :
5499 : /* Are there more records in this btree? */
5500 : bool
5501 135655613 : xfs_btree_has_more_records(
5502 : struct xfs_btree_cur *cur)
5503 : {
5504 135655613 : struct xfs_btree_block *block;
5505 135655613 : struct xfs_buf *bp;
5506 :
5507 135655613 : block = xfs_btree_get_block(cur, 0, &bp);
5508 :
5509 : /* There are still records in this block. */
5510 271311226 : if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
5511 : return true;
5512 :
5513 : /* There are more record blocks. */
5514 992024 : if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
5515 315845 : return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
5516 : else
5517 676179 : return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
5518 : }
5519 :
5520 : /* Set up all the btree cursor caches. */
5521 : int __init
5522 12 : xfs_btree_init_cur_caches(void)
5523 : {
5524 12 : int error;
5525 :
5526 12 : error = xfs_allocbt_init_cur_cache();
5527 12 : if (error)
5528 : return error;
5529 12 : error = xfs_inobt_init_cur_cache();
5530 12 : if (error)
5531 0 : goto err;
5532 12 : error = xfs_bmbt_init_cur_cache();
5533 12 : if (error)
5534 0 : goto err;
5535 12 : error = xfs_rmapbt_init_cur_cache();
5536 12 : if (error)
5537 0 : goto err;
5538 12 : error = xfs_refcountbt_init_cur_cache();
5539 12 : if (error)
5540 0 : goto err;
5541 12 : error = xfs_rtrmapbt_init_cur_cache();
5542 12 : if (error)
5543 0 : goto err;
5544 12 : error = xfs_rtrefcountbt_init_cur_cache();
5545 12 : if (error)
5546 0 : goto err;
5547 :
5548 : return 0;
5549 0 : err:
5550 0 : xfs_btree_destroy_cur_caches();
5551 0 : return error;
5552 : }
5553 :
5554 : /* Destroy all the btree cursor caches, if they've been allocated. */
5555 : void
5556 12 : xfs_btree_destroy_cur_caches(void)
5557 : {
5558 12 : xfs_allocbt_destroy_cur_cache();
5559 12 : xfs_inobt_destroy_cur_cache();
5560 12 : xfs_bmbt_destroy_cur_cache();
5561 12 : xfs_rmapbt_destroy_cur_cache();
5562 12 : xfs_refcountbt_destroy_cur_cache();
5563 12 : xfs_rtrmapbt_destroy_cur_cache();
5564 12 : xfs_rtrefcountbt_destroy_cur_cache();
5565 12 : }
5566 :
5567 : /* Move the btree cursor before the first record. */
5568 : int
5569 331924518 : xfs_btree_goto_left_edge(
5570 : struct xfs_btree_cur *cur)
5571 : {
5572 331924518 : int stat = 0;
5573 331924518 : int error;
5574 :
5575 331924518 : memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
5576 331924518 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
5577 331924517 : if (error)
5578 : return error;
5579 331924517 : if (!stat)
5580 : return 0;
5581 :
5582 0 : error = xfs_btree_decrement(cur, 0, &stat);
5583 0 : if (error)
5584 : return error;
5585 0 : if (stat != 0) {
5586 0 : ASSERT(0);
5587 0 : xfs_btree_mark_sick(cur);
5588 0 : return -EFSCORRUPTED;
5589 : }
5590 :
5591 : return 0;
5592 : }
5593 :
5594 : /* Allocate a block for an inode-rooted metadata btree. */
5595 : int
5596 162793 : xfs_btree_alloc_imeta_block(
5597 : struct xfs_btree_cur *cur,
5598 : const union xfs_btree_ptr *start,
5599 : union xfs_btree_ptr *new,
5600 : int *stat)
5601 : {
5602 162793 : struct xfs_alloc_arg args = {
5603 162793 : .mp = cur->bc_mp,
5604 162793 : .tp = cur->bc_tp,
5605 : .resv = XFS_AG_RESV_IMETA,
5606 : .minlen = 1,
5607 : .maxlen = 1,
5608 : .prod = 1,
5609 : };
5610 162793 : struct xfs_inode *ip = cur->bc_ino.ip;
5611 162793 : int error;
5612 :
5613 162793 : ASSERT(xfs_is_metadir_inode(ip));
5614 162793 : ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
5615 :
5616 162793 : xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork);
5617 162793 : error = xfs_alloc_vextent_start_ag(&args,
5618 162793 : XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino));
5619 162793 : if (error)
5620 : return error;
5621 162793 : if (args.fsbno == NULLFSBLOCK) {
5622 0 : *stat = 0;
5623 0 : return 0;
5624 : }
5625 162793 : ASSERT(args.len == 1);
5626 :
5627 162793 : xfs_imeta_resv_alloc_extent(ip, &args);
5628 162793 : cur->bc_ino.allocated++;
5629 :
5630 162793 : new->l = cpu_to_be64(args.fsbno);
5631 162793 : *stat = 1;
5632 162793 : return 0;
5633 : }
5634 :
5635 : /* Free a block from an inode-rooted metadata btree. */
5636 : int
5637 16485 : xfs_btree_free_imeta_block(
5638 : struct xfs_btree_cur *cur,
5639 : struct xfs_buf *bp)
5640 : {
5641 16485 : struct xfs_owner_info oinfo;
5642 16485 : struct xfs_mount *mp = cur->bc_mp;
5643 16485 : struct xfs_inode *ip = cur->bc_ino.ip;
5644 16485 : struct xfs_trans *tp = cur->bc_tp;
5645 16485 : xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
5646 16485 : int error;
5647 :
5648 16485 : ASSERT(xfs_is_metadir_inode(ip));
5649 16485 : ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
5650 :
5651 16485 : xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
5652 16485 : error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_IMETA,
5653 : 0);
5654 16485 : if (error)
5655 : return error;
5656 :
5657 16485 : xfs_imeta_resv_free_extent(ip, tp, 1);
5658 16485 : return 0;
5659 : }
|