Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 : * Copyright (c) 2013 Red Hat, Inc.
5 : * All Rights Reserved.
6 : */
7 : #include "xfs.h"
8 : #include "xfs_fs.h"
9 : #include "xfs_shared.h"
10 : #include "xfs_format.h"
11 : #include "xfs_log_format.h"
12 : #include "xfs_trans_resv.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_mount.h"
15 : #include "xfs_inode.h"
16 : #include "xfs_dir2.h"
17 : #include "xfs_dir2_priv.h"
18 : #include "xfs_trans.h"
19 : #include "xfs_bmap.h"
20 : #include "xfs_attr_leaf.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_buf_item.h"
24 : #include "xfs_log.h"
25 : #include "xfs_errortag.h"
26 :
27 : /*
28 : * xfs_da_btree.c
29 : *
30 : * Routines to implement directories as Btrees of hashed names.
31 : */
32 :
33 : /*========================================================================
34 : * Function prototypes for the kernel.
35 : *========================================================================*/
36 :
37 : /*
38 : * Routines used for growing the Btree.
39 : */
40 : STATIC int xfs_da3_root_split(xfs_da_state_t *state,
41 : xfs_da_state_blk_t *existing_root,
42 : xfs_da_state_blk_t *new_child);
43 : STATIC int xfs_da3_node_split(xfs_da_state_t *state,
44 : xfs_da_state_blk_t *existing_blk,
45 : xfs_da_state_blk_t *split_blk,
46 : xfs_da_state_blk_t *blk_to_add,
47 : int treelevel,
48 : int *result);
49 : STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
50 : xfs_da_state_blk_t *node_blk_1,
51 : xfs_da_state_blk_t *node_blk_2);
52 : STATIC void xfs_da3_node_add(xfs_da_state_t *state,
53 : xfs_da_state_blk_t *old_node_blk,
54 : xfs_da_state_blk_t *new_node_blk);
55 :
56 : /*
57 : * Routines used for shrinking the Btree.
58 : */
59 : STATIC int xfs_da3_root_join(xfs_da_state_t *state,
60 : xfs_da_state_blk_t *root_blk);
61 : STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
62 : STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
63 : xfs_da_state_blk_t *drop_blk);
64 : STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
65 : xfs_da_state_blk_t *src_node_blk,
66 : xfs_da_state_blk_t *dst_node_blk);
67 :
68 : /*
69 : * Utility routines.
70 : */
71 : STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state,
72 : xfs_da_state_blk_t *drop_blk,
73 : xfs_da_state_blk_t *save_blk);
74 :
75 :
76 : struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */
77 :
78 : /*
79 : * Allocate a dir-state structure.
80 : * We don't put them on the stack since they're large.
81 : */
82 : struct xfs_da_state *
83 182810581 : xfs_da_state_alloc(
84 : struct xfs_da_args *args)
85 : {
86 182810581 : struct xfs_da_state *state;
87 :
88 182810581 : state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
89 182828186 : state->args = args;
90 182828186 : state->mp = args->dp->i_mount;
91 182828186 : return state;
92 : }
93 :
94 : /*
95 : * Kill the altpath contents of a da-state structure.
96 : */
97 : STATIC void
98 182824592 : xfs_da_state_kill_altpath(xfs_da_state_t *state)
99 : {
100 182824592 : int i;
101 :
102 182864917 : for (i = 0; i < state->altpath.active; i++)
103 31354 : state->altpath.blk[i].bp = NULL;
104 182833563 : state->altpath.active = 0;
105 182833563 : }
106 :
107 : /*
108 : * Free a da-state structure.
109 : */
110 : void
111 182800113 : xfs_da_state_free(xfs_da_state_t *state)
112 : {
113 182800113 : xfs_da_state_kill_altpath(state);
114 : #ifdef DEBUG
115 182814329 : memset((char *)state, 0, sizeof(*state));
116 : #endif /* DEBUG */
117 182814329 : kmem_cache_free(xfs_da_state_cache, state);
118 182821526 : }
119 :
120 : void
121 2 : xfs_da_state_reset(
122 : struct xfs_da_state *state,
123 : struct xfs_da_args *args)
124 : {
125 2 : xfs_da_state_kill_altpath(state);
126 2 : memset(state, 0, sizeof(struct xfs_da_state));
127 2 : state->args = args;
128 2 : state->mp = state->args->dp->i_mount;
129 2 : }
130 :
131 : static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
132 : {
133 1452080019 : if (whichfork == XFS_DATA_FORK)
134 952645098 : return mp->m_dir_geo->fsbcount;
135 499434921 : return mp->m_attr_geo->fsbcount;
136 : }
137 :
138 : void
139 682211795 : xfs_da3_node_hdr_from_disk(
140 : struct xfs_mount *mp,
141 : struct xfs_da3_icnode_hdr *to,
142 : struct xfs_da_intnode *from)
143 : {
144 682211795 : if (xfs_has_crc(mp)) {
145 682211795 : struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from;
146 :
147 682211795 : to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
148 682211795 : to->back = be32_to_cpu(from3->hdr.info.hdr.back);
149 682211795 : to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
150 682211795 : to->count = be16_to_cpu(from3->hdr.__count);
151 682211795 : to->level = be16_to_cpu(from3->hdr.__level);
152 682211795 : to->btree = from3->__btree;
153 682211795 : ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
154 : } else {
155 0 : to->forw = be32_to_cpu(from->hdr.info.forw);
156 0 : to->back = be32_to_cpu(from->hdr.info.back);
157 0 : to->magic = be16_to_cpu(from->hdr.info.magic);
158 0 : to->count = be16_to_cpu(from->hdr.__count);
159 0 : to->level = be16_to_cpu(from->hdr.__level);
160 0 : to->btree = from->__btree;
161 0 : ASSERT(to->magic == XFS_DA_NODE_MAGIC);
162 : }
163 682211795 : }
164 :
165 : void
166 127038 : xfs_da3_node_hdr_to_disk(
167 : struct xfs_mount *mp,
168 : struct xfs_da_intnode *to,
169 : struct xfs_da3_icnode_hdr *from)
170 : {
171 127038 : if (xfs_has_crc(mp)) {
172 127038 : struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to;
173 :
174 127038 : ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
175 127038 : to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
176 127038 : to3->hdr.info.hdr.back = cpu_to_be32(from->back);
177 127038 : to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
178 127038 : to3->hdr.__count = cpu_to_be16(from->count);
179 254076 : to3->hdr.__level = cpu_to_be16(from->level);
180 : } else {
181 0 : ASSERT(from->magic == XFS_DA_NODE_MAGIC);
182 0 : to->hdr.info.forw = cpu_to_be32(from->forw);
183 0 : to->hdr.info.back = cpu_to_be32(from->back);
184 0 : to->hdr.info.magic = cpu_to_be16(from->magic);
185 0 : to->hdr.__count = cpu_to_be16(from->count);
186 0 : to->hdr.__level = cpu_to_be16(from->level);
187 : }
188 127038 : }
189 :
190 : /*
191 : * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only
192 : * accessible on v5 filesystems. This header format is common across da node,
193 : * attr leaf and dir leaf blocks.
194 : */
195 : xfs_failaddr_t
196 15790645 : xfs_da3_blkinfo_verify(
197 : struct xfs_buf *bp,
198 : struct xfs_da3_blkinfo *hdr3)
199 : {
200 15790645 : struct xfs_mount *mp = bp->b_mount;
201 15790645 : struct xfs_da_blkinfo *hdr = &hdr3->hdr;
202 :
203 15790645 : if (!xfs_verify_magic16(bp, hdr->magic))
204 0 : return __this_address;
205 :
206 15790650 : if (xfs_has_crc(mp)) {
207 15790650 : if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
208 0 : return __this_address;
209 15790652 : if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
210 2 : return __this_address;
211 15790650 : if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
212 0 : return __this_address;
213 : }
214 :
215 : return NULL;
216 : }
217 :
218 : static xfs_failaddr_t
219 309896 : xfs_da3_node_verify(
220 : struct xfs_buf *bp)
221 : {
222 309896 : struct xfs_mount *mp = bp->b_mount;
223 309896 : struct xfs_da_intnode *hdr = bp->b_addr;
224 309896 : struct xfs_da3_icnode_hdr ichdr;
225 309896 : xfs_failaddr_t fa;
226 :
227 309896 : xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
228 :
229 309896 : fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
230 309896 : if (fa)
231 : return fa;
232 :
233 309896 : if (ichdr.level == 0)
234 0 : return __this_address;
235 309896 : if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
236 0 : return __this_address;
237 309896 : if (ichdr.count == 0)
238 0 : return __this_address;
239 :
240 : /*
241 : * we don't know if the node is for and attribute or directory tree,
242 : * so only fail if the count is outside both bounds
243 : */
244 309896 : if (ichdr.count > mp->m_dir_geo->node_ents &&
245 0 : ichdr.count > mp->m_attr_geo->node_ents)
246 0 : return __this_address;
247 :
248 : /* XXX: hash order check? */
249 :
250 : return NULL;
251 : }
252 :
253 : static void
254 226574 : xfs_da3_node_write_verify(
255 : struct xfs_buf *bp)
256 : {
257 226574 : struct xfs_mount *mp = bp->b_mount;
258 226574 : struct xfs_buf_log_item *bip = bp->b_log_item;
259 226574 : struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
260 226574 : xfs_failaddr_t fa;
261 :
262 226574 : fa = xfs_da3_node_verify(bp);
263 226574 : if (fa) {
264 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
265 0 : return;
266 : }
267 :
268 226574 : if (!xfs_has_crc(mp))
269 : return;
270 :
271 226574 : if (bip)
272 226574 : hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
273 :
274 226574 : xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
275 : }
276 :
277 : /*
278 : * leaf/node format detection on trees is sketchy, so a node read can be done on
279 : * leaf level blocks when detection identifies the tree as a node format tree
280 : * incorrectly. In this case, we need to swap the verifier to match the correct
281 : * format of the block being read.
282 : */
283 : static void
284 135947 : xfs_da3_node_read_verify(
285 : struct xfs_buf *bp)
286 : {
287 135947 : struct xfs_da_blkinfo *info = bp->b_addr;
288 135947 : xfs_failaddr_t fa;
289 :
290 135947 : switch (be16_to_cpu(info->magic)) {
291 : case XFS_DA3_NODE_MAGIC:
292 29892 : if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
293 16 : xfs_verifier_error(bp, -EFSBADCRC,
294 8 : __this_address);
295 8 : break;
296 : }
297 29884 : fallthrough;
298 : case XFS_DA_NODE_MAGIC:
299 29884 : fa = xfs_da3_node_verify(bp);
300 29884 : if (fa)
301 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
302 : return;
303 43155 : case XFS_ATTR_LEAF_MAGIC:
304 : case XFS_ATTR3_LEAF_MAGIC:
305 43155 : bp->b_ops = &xfs_attr3_leaf_buf_ops;
306 43155 : bp->b_ops->verify_read(bp);
307 43155 : return;
308 62898 : case XFS_DIR2_LEAFN_MAGIC:
309 : case XFS_DIR3_LEAFN_MAGIC:
310 62898 : bp->b_ops = &xfs_dir3_leafn_buf_ops;
311 62898 : bp->b_ops->verify_read(bp);
312 62898 : return;
313 : default:
314 2 : xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
315 2 : break;
316 : }
317 : }
318 :
319 : /* Verify the structure of a da3 block. */
320 : static xfs_failaddr_t
321 53438 : xfs_da3_node_verify_struct(
322 : struct xfs_buf *bp)
323 : {
324 53438 : struct xfs_da_blkinfo *info = bp->b_addr;
325 :
326 53438 : switch (be16_to_cpu(info->magic)) {
327 53438 : case XFS_DA3_NODE_MAGIC:
328 : case XFS_DA_NODE_MAGIC:
329 53438 : return xfs_da3_node_verify(bp);
330 0 : case XFS_ATTR_LEAF_MAGIC:
331 : case XFS_ATTR3_LEAF_MAGIC:
332 0 : bp->b_ops = &xfs_attr3_leaf_buf_ops;
333 0 : return bp->b_ops->verify_struct(bp);
334 0 : case XFS_DIR2_LEAFN_MAGIC:
335 : case XFS_DIR3_LEAFN_MAGIC:
336 0 : bp->b_ops = &xfs_dir3_leafn_buf_ops;
337 0 : return bp->b_ops->verify_struct(bp);
338 : default:
339 0 : return __this_address;
340 : }
341 : }
342 :
343 : const struct xfs_buf_ops xfs_da3_node_buf_ops = {
344 : .name = "xfs_da3_node",
345 : .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC),
346 : cpu_to_be16(XFS_DA3_NODE_MAGIC) },
347 : .verify_read = xfs_da3_node_read_verify,
348 : .verify_write = xfs_da3_node_write_verify,
349 : .verify_struct = xfs_da3_node_verify_struct,
350 : };
351 :
352 : static int
353 646339318 : xfs_da3_node_set_type(
354 : struct xfs_trans *tp,
355 : struct xfs_buf *bp)
356 : {
357 646339318 : struct xfs_da_blkinfo *info = bp->b_addr;
358 :
359 646339318 : switch (be16_to_cpu(info->magic)) {
360 172363064 : case XFS_DA_NODE_MAGIC:
361 : case XFS_DA3_NODE_MAGIC:
362 172363064 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
363 172363064 : return 0;
364 305789028 : case XFS_ATTR_LEAF_MAGIC:
365 : case XFS_ATTR3_LEAF_MAGIC:
366 305789028 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
367 305789028 : return 0;
368 168187226 : case XFS_DIR2_LEAFN_MAGIC:
369 : case XFS_DIR3_LEAFN_MAGIC:
370 168187226 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
371 168187226 : return 0;
372 0 : default:
373 0 : XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
374 : info, sizeof(*info));
375 0 : xfs_trans_brelse(tp, bp);
376 0 : return -EFSCORRUPTED;
377 : }
378 : }
379 :
380 : int
381 670939826 : xfs_da3_node_read(
382 : struct xfs_trans *tp,
383 : struct xfs_inode *dp,
384 : xfs_dablk_t bno,
385 : struct xfs_buf **bpp,
386 : int whichfork)
387 : {
388 670939826 : int error;
389 :
390 670939826 : error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
391 : &xfs_da3_node_buf_ops);
392 671055818 : if (error || !*bpp || !tp)
393 : return error;
394 646279361 : return xfs_da3_node_set_type(tp, *bpp);
395 : }
396 :
397 : int
398 13304 : xfs_da3_node_read_mapped(
399 : struct xfs_trans *tp,
400 : struct xfs_inode *dp,
401 : xfs_daddr_t mappedbno,
402 : struct xfs_buf **bpp,
403 : int whichfork)
404 : {
405 13304 : struct xfs_mount *mp = dp->i_mount;
406 13304 : int error;
407 :
408 13304 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
409 26608 : XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
410 : bpp, &xfs_da3_node_buf_ops);
411 13304 : if (error || !*bpp)
412 : return error;
413 :
414 13304 : if (whichfork == XFS_ATTR_FORK)
415 13304 : xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
416 : else
417 0 : xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
418 :
419 13304 : if (!tp)
420 : return 0;
421 13304 : return xfs_da3_node_set_type(tp, *bpp);
422 : }
423 :
424 : /*========================================================================
425 : * Routines used for growing the Btree.
426 : *========================================================================*/
427 :
428 : /*
429 : * Create the initial contents of an intermediate node.
430 : */
431 : int
432 23735 : xfs_da3_node_create(
433 : struct xfs_da_args *args,
434 : xfs_dablk_t blkno,
435 : int level,
436 : struct xfs_buf **bpp,
437 : int whichfork)
438 : {
439 23735 : struct xfs_da_intnode *node;
440 23735 : struct xfs_trans *tp = args->trans;
441 23735 : struct xfs_mount *mp = tp->t_mountp;
442 23735 : struct xfs_da3_icnode_hdr ichdr = {0};
443 23735 : struct xfs_buf *bp;
444 23735 : int error;
445 23735 : struct xfs_inode *dp = args->dp;
446 :
447 23735 : trace_xfs_da_node_create(args);
448 23735 : ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
449 :
450 23735 : error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
451 23735 : if (error)
452 : return error;
453 23735 : bp->b_ops = &xfs_da3_node_buf_ops;
454 23735 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
455 23735 : node = bp->b_addr;
456 :
457 23735 : if (xfs_has_crc(mp)) {
458 23735 : struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
459 :
460 23735 : memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
461 23735 : ichdr.magic = XFS_DA3_NODE_MAGIC;
462 23735 : hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
463 23735 : hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
464 23735 : uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
465 : } else {
466 0 : ichdr.magic = XFS_DA_NODE_MAGIC;
467 : }
468 23735 : ichdr.level = level;
469 :
470 23735 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
471 23735 : xfs_trans_log_buf(tp, bp,
472 23735 : XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
473 :
474 23735 : *bpp = bp;
475 23735 : return 0;
476 : }
477 :
478 : /*
479 : * Split a leaf node, rebalance, then possibly split
480 : * intermediate nodes, rebalance, etc.
481 : */
482 : int /* error */
483 61530 : xfs_da3_split(
484 : struct xfs_da_state *state)
485 : {
486 61530 : struct xfs_da_state_blk *oldblk;
487 61530 : struct xfs_da_state_blk *newblk;
488 61530 : struct xfs_da_state_blk *addblk;
489 61530 : struct xfs_da_intnode *node;
490 61530 : int max;
491 61530 : int action = 0;
492 61530 : int error;
493 61530 : int i;
494 :
495 61530 : trace_xfs_da_split(state->args);
496 :
497 61530 : if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
498 : return -EIO;
499 :
500 : /*
501 : * Walk back up the tree splitting/inserting/adjusting as necessary.
502 : * If we need to insert and there isn't room, split the node, then
503 : * decide which fragment to insert the new block from below into.
504 : * Note that we may split the root this way, but we need more fixup.
505 : */
506 61528 : max = state->path.active - 1;
507 61528 : ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
508 61528 : ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
509 : state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
510 :
511 61528 : addblk = &state->path.blk[max]; /* initial dummy value */
512 183065 : for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
513 121537 : oldblk = &state->path.blk[i];
514 121537 : newblk = &state->altpath.blk[i];
515 :
516 : /*
517 : * If a leaf node then
518 : * Allocate a new leaf node, then rebalance across them.
519 : * else if an intermediate node then
520 : * We split on the last layer, must we split the node?
521 : */
522 121537 : switch (oldblk->magic) {
523 37129 : case XFS_ATTR_LEAF_MAGIC:
524 37129 : error = xfs_attr3_leaf_split(state, oldblk, newblk);
525 37129 : if ((error != 0) && (error != -ENOSPC)) {
526 0 : return error; /* GROT: attr is inconsistent */
527 : }
528 37129 : if (!error) {
529 : addblk = newblk;
530 : break;
531 : }
532 : /*
533 : * Entry wouldn't fit, split the leaf again. The new
534 : * extrablk will be consumed by xfs_da3_node_split if
535 : * the node is split.
536 : */
537 0 : state->extravalid = 1;
538 0 : if (state->inleaf) {
539 0 : state->extraafter = 0; /* before newblk */
540 0 : trace_xfs_attr_leaf_split_before(state->args);
541 0 : error = xfs_attr3_leaf_split(state, oldblk,
542 0 : &state->extrablk);
543 : } else {
544 0 : state->extraafter = 1; /* after newblk */
545 0 : trace_xfs_attr_leaf_split_after(state->args);
546 0 : error = xfs_attr3_leaf_split(state, newblk,
547 0 : &state->extrablk);
548 : }
549 0 : if (error)
550 0 : return error; /* GROT: attr inconsistent */
551 : addblk = newblk;
552 : break;
553 24399 : case XFS_DIR2_LEAFN_MAGIC:
554 24399 : error = xfs_dir2_leafn_split(state, oldblk, newblk);
555 24399 : if (error)
556 0 : return error;
557 : addblk = newblk;
558 : break;
559 60009 : case XFS_DA_NODE_MAGIC:
560 60009 : error = xfs_da3_node_split(state, oldblk, newblk, addblk,
561 : max - i, &action);
562 60009 : addblk->bp = NULL;
563 60009 : if (error)
564 0 : return error; /* GROT: dir is inconsistent */
565 : /*
566 : * Record the newly split block for the next time thru?
567 : */
568 60009 : if (action)
569 : addblk = newblk;
570 : else
571 59977 : addblk = NULL;
572 : break;
573 : }
574 :
575 : /*
576 : * Update the btree to show the new hashval for this child.
577 : */
578 121537 : xfs_da3_fixhashpath(state, &state->path);
579 : }
580 61528 : if (!addblk)
581 : return 0;
582 :
583 : /*
584 : * xfs_da3_node_split() should have consumed any extra blocks we added
585 : * during a double leaf split in the attr fork. This is guaranteed as
586 : * we can't be here if the attr fork only has a single leaf block.
587 : */
588 1551 : ASSERT(state->extravalid == 0 ||
589 : state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
590 :
591 : /*
592 : * Split the root node.
593 : */
594 1551 : ASSERT(state->path.active == 0);
595 1551 : oldblk = &state->path.blk[0];
596 1551 : error = xfs_da3_root_split(state, oldblk, addblk);
597 1551 : if (error)
598 0 : goto out;
599 :
600 : /*
601 : * Update pointers to the node which used to be block 0 and just got
602 : * bumped because of the addition of a new root node. Note that the
603 : * original block 0 could be at any position in the list of blocks in
604 : * the tree.
605 : *
606 : * Note: the magic numbers and sibling pointers are in the same physical
607 : * place for both v2 and v3 headers (by design). Hence it doesn't matter
608 : * which version of the xfs_da_intnode structure we use here as the
609 : * result will be the same using either structure.
610 : */
611 1551 : node = oldblk->bp->b_addr;
612 1551 : if (node->hdr.info.forw) {
613 3102 : if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
614 0 : xfs_buf_mark_corrupt(oldblk->bp);
615 0 : error = -EFSCORRUPTED;
616 0 : goto out;
617 : }
618 1551 : node = addblk->bp->b_addr;
619 1551 : node->hdr.info.back = cpu_to_be32(oldblk->blkno);
620 1551 : xfs_trans_log_buf(state->args->trans, addblk->bp,
621 : XFS_DA_LOGRANGE(node, &node->hdr.info,
622 : sizeof(node->hdr.info)));
623 : }
624 1551 : node = oldblk->bp->b_addr;
625 1551 : if (node->hdr.info.back) {
626 0 : if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
627 0 : xfs_buf_mark_corrupt(oldblk->bp);
628 0 : error = -EFSCORRUPTED;
629 0 : goto out;
630 : }
631 0 : node = addblk->bp->b_addr;
632 0 : node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
633 0 : xfs_trans_log_buf(state->args->trans, addblk->bp,
634 : XFS_DA_LOGRANGE(node, &node->hdr.info,
635 : sizeof(node->hdr.info)));
636 : }
637 1551 : out:
638 1551 : addblk->bp = NULL;
639 1551 : return error;
640 : }
641 :
642 : /*
643 : * Split the root. We have to create a new root and point to the two
644 : * parts (the split old root) that we just created. Copy block zero to
645 : * the EOF, extending the inode in process.
646 : */
647 : STATIC int /* error */
648 1551 : xfs_da3_root_split(
649 : struct xfs_da_state *state,
650 : struct xfs_da_state_blk *blk1,
651 : struct xfs_da_state_blk *blk2)
652 : {
653 1551 : struct xfs_da_intnode *node;
654 1551 : struct xfs_da_intnode *oldroot;
655 1551 : struct xfs_da_node_entry *btree;
656 1551 : struct xfs_da3_icnode_hdr nodehdr;
657 1551 : struct xfs_da_args *args;
658 1551 : struct xfs_buf *bp;
659 1551 : struct xfs_inode *dp;
660 1551 : struct xfs_trans *tp;
661 1551 : struct xfs_dir2_leaf *leaf;
662 1551 : xfs_dablk_t blkno;
663 1551 : int level;
664 1551 : int error;
665 1551 : int size;
666 :
667 1551 : trace_xfs_da_root_split(state->args);
668 :
669 : /*
670 : * Copy the existing (incorrect) block from the root node position
671 : * to a free space somewhere.
672 : */
673 1551 : args = state->args;
674 1551 : error = xfs_da_grow_inode(args, &blkno);
675 1551 : if (error)
676 : return error;
677 :
678 1551 : dp = args->dp;
679 1551 : tp = args->trans;
680 1551 : error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
681 1551 : if (error)
682 : return error;
683 1551 : node = bp->b_addr;
684 1551 : oldroot = blk1->bp->b_addr;
685 1551 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
686 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
687 5 : struct xfs_da3_icnode_hdr icnodehdr;
688 :
689 5 : xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
690 5 : btree = icnodehdr.btree;
691 5 : size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
692 5 : level = icnodehdr.level;
693 :
694 : /*
695 : * we are about to copy oldroot to bp, so set up the type
696 : * of bp while we know exactly what it will be.
697 : */
698 5 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
699 : } else {
700 1546 : struct xfs_dir3_icleaf_hdr leafhdr;
701 :
702 1546 : leaf = (xfs_dir2_leaf_t *)oldroot;
703 1546 : xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
704 :
705 1546 : ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
706 : leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
707 1546 : size = (int)((char *)&leafhdr.ents[leafhdr.count] -
708 : (char *)leaf);
709 1546 : level = 0;
710 :
711 : /*
712 : * we are about to copy oldroot to bp, so set up the type
713 : * of bp while we know exactly what it will be.
714 : */
715 1546 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
716 : }
717 :
718 : /*
719 : * we can copy most of the information in the node from one block to
720 : * another, but for CRC enabled headers we have to make sure that the
721 : * block specific identifiers are kept intact. We update the buffer
722 : * directly for this.
723 : */
724 3102 : memcpy(node, oldroot, size);
725 1551 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
726 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
727 1551 : struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
728 :
729 1551 : node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
730 : }
731 1551 : xfs_trans_log_buf(tp, bp, 0, size - 1);
732 :
733 1551 : bp->b_ops = blk1->bp->b_ops;
734 1551 : xfs_trans_buf_copy_type(bp, blk1->bp);
735 1551 : blk1->bp = bp;
736 1551 : blk1->blkno = blkno;
737 :
738 : /*
739 : * Set up the new root node.
740 : */
741 1551 : error = xfs_da3_node_create(args,
742 1551 : (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
743 : level + 1, &bp, args->whichfork);
744 1551 : if (error)
745 : return error;
746 :
747 1551 : node = bp->b_addr;
748 1551 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
749 1551 : btree = nodehdr.btree;
750 1551 : btree[0].hashval = cpu_to_be32(blk1->hashval);
751 1551 : btree[0].before = cpu_to_be32(blk1->blkno);
752 1551 : btree[1].hashval = cpu_to_be32(blk2->hashval);
753 1551 : btree[1].before = cpu_to_be32(blk2->blkno);
754 1551 : nodehdr.count = 2;
755 1551 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
756 :
757 : #ifdef DEBUG
758 1551 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
759 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
760 0 : ASSERT(blk1->blkno >= args->geo->leafblk &&
761 : blk1->blkno < args->geo->freeblk);
762 0 : ASSERT(blk2->blkno >= args->geo->leafblk &&
763 : blk2->blkno < args->geo->freeblk);
764 : }
765 : #endif
766 :
767 : /* Header is already logged by xfs_da_node_create */
768 1551 : xfs_trans_log_buf(tp, bp,
769 1551 : XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
770 :
771 1551 : return 0;
772 : }
773 :
774 : /*
775 : * Split the node, rebalance, then add the new entry.
776 : */
777 : STATIC int /* error */
778 60009 : xfs_da3_node_split(
779 : struct xfs_da_state *state,
780 : struct xfs_da_state_blk *oldblk,
781 : struct xfs_da_state_blk *newblk,
782 : struct xfs_da_state_blk *addblk,
783 : int treelevel,
784 : int *result)
785 : {
786 60009 : struct xfs_da_intnode *node;
787 60009 : struct xfs_da3_icnode_hdr nodehdr;
788 60009 : xfs_dablk_t blkno;
789 60009 : int newcount;
790 60009 : int error;
791 60009 : int useextra;
792 60009 : struct xfs_inode *dp = state->args->dp;
793 :
794 60009 : trace_xfs_da_node_split(state->args);
795 :
796 60009 : node = oldblk->bp->b_addr;
797 60009 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
798 :
799 : /*
800 : * With V2 dirs the extra block is data or freespace.
801 : */
802 60009 : useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
803 60009 : newcount = 1 + useextra;
804 : /*
805 : * Do we have to split the node?
806 : */
807 60009 : if (nodehdr.count + newcount > state->args->geo->node_ents) {
808 : /*
809 : * Allocate a new node, add to the doubly linked chain of
810 : * nodes, then move some of our excess entries into it.
811 : */
812 32 : error = xfs_da_grow_inode(state->args, &blkno);
813 32 : if (error)
814 : return error; /* GROT: dir is inconsistent */
815 :
816 32 : error = xfs_da3_node_create(state->args, blkno, treelevel,
817 : &newblk->bp, state->args->whichfork);
818 32 : if (error)
819 : return error; /* GROT: dir is inconsistent */
820 32 : newblk->blkno = blkno;
821 32 : newblk->magic = XFS_DA_NODE_MAGIC;
822 32 : xfs_da3_node_rebalance(state, oldblk, newblk);
823 32 : error = xfs_da3_blk_link(state, oldblk, newblk);
824 32 : if (error)
825 : return error;
826 32 : *result = 1;
827 : } else {
828 59977 : *result = 0;
829 : }
830 :
831 : /*
832 : * Insert the new entry(s) into the correct block
833 : * (updating last hashval in the process).
834 : *
835 : * xfs_da3_node_add() inserts BEFORE the given index,
836 : * and as a result of using node_lookup_int() we always
837 : * point to a valid entry (not after one), but a split
838 : * operation always results in a new block whose hashvals
839 : * FOLLOW the current block.
840 : *
841 : * If we had double-split op below us, then add the extra block too.
842 : */
843 60009 : node = oldblk->bp->b_addr;
844 60009 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
845 60009 : if (oldblk->index <= nodehdr.count) {
846 59977 : oldblk->index++;
847 59977 : xfs_da3_node_add(state, oldblk, addblk);
848 59977 : if (useextra) {
849 0 : if (state->extraafter)
850 0 : oldblk->index++;
851 0 : xfs_da3_node_add(state, oldblk, &state->extrablk);
852 0 : state->extravalid = 0;
853 : }
854 : } else {
855 32 : newblk->index++;
856 32 : xfs_da3_node_add(state, newblk, addblk);
857 32 : if (useextra) {
858 0 : if (state->extraafter)
859 0 : newblk->index++;
860 0 : xfs_da3_node_add(state, newblk, &state->extrablk);
861 0 : state->extravalid = 0;
862 : }
863 : }
864 :
865 : return 0;
866 : }
867 :
868 : /*
869 : * Balance the btree elements between two intermediate nodes,
870 : * usually one full and one empty.
871 : *
872 : * NOTE: if blk2 is empty, then it will get the upper half of blk1.
873 : */
874 : STATIC void
875 32 : xfs_da3_node_rebalance(
876 : struct xfs_da_state *state,
877 : struct xfs_da_state_blk *blk1,
878 : struct xfs_da_state_blk *blk2)
879 : {
880 32 : struct xfs_da_intnode *node1;
881 32 : struct xfs_da_intnode *node2;
882 32 : struct xfs_da_node_entry *btree1;
883 32 : struct xfs_da_node_entry *btree2;
884 32 : struct xfs_da_node_entry *btree_s;
885 32 : struct xfs_da_node_entry *btree_d;
886 32 : struct xfs_da3_icnode_hdr nodehdr1;
887 32 : struct xfs_da3_icnode_hdr nodehdr2;
888 32 : struct xfs_trans *tp;
889 32 : int count;
890 32 : int tmp;
891 32 : int swap = 0;
892 32 : struct xfs_inode *dp = state->args->dp;
893 :
894 32 : trace_xfs_da_node_rebalance(state->args);
895 :
896 32 : node1 = blk1->bp->b_addr;
897 32 : node2 = blk2->bp->b_addr;
898 32 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
899 32 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
900 32 : btree1 = nodehdr1.btree;
901 32 : btree2 = nodehdr2.btree;
902 :
903 : /*
904 : * Figure out how many entries need to move, and in which direction.
905 : * Swap the nodes around if that makes it simpler.
906 : */
907 32 : if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
908 0 : ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
909 0 : (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
910 0 : be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
911 0 : swap(node1, node2);
912 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
913 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
914 0 : btree1 = nodehdr1.btree;
915 0 : btree2 = nodehdr2.btree;
916 0 : swap = 1;
917 : }
918 :
919 32 : count = (nodehdr1.count - nodehdr2.count) / 2;
920 32 : if (count == 0)
921 0 : return;
922 32 : tp = state->args->trans;
923 : /*
924 : * Two cases: high-to-low and low-to-high.
925 : */
926 32 : if (count > 0) {
927 : /*
928 : * Move elements in node2 up to make a hole.
929 : */
930 32 : tmp = nodehdr2.count;
931 32 : if (tmp > 0) {
932 0 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
933 0 : btree_s = &btree2[0];
934 0 : btree_d = &btree2[count];
935 0 : memmove(btree_d, btree_s, tmp);
936 : }
937 :
938 : /*
939 : * Move the req'd B-tree elements from high in node1 to
940 : * low in node2.
941 : */
942 32 : nodehdr2.count += count;
943 32 : tmp = count * (uint)sizeof(xfs_da_node_entry_t);
944 32 : btree_s = &btree1[nodehdr1.count - count];
945 32 : btree_d = &btree2[0];
946 64 : memcpy(btree_d, btree_s, tmp);
947 32 : nodehdr1.count -= count;
948 : } else {
949 : /*
950 : * Move the req'd B-tree elements from low in node2 to
951 : * high in node1.
952 : */
953 0 : count = -count;
954 0 : tmp = count * (uint)sizeof(xfs_da_node_entry_t);
955 0 : btree_s = &btree2[0];
956 0 : btree_d = &btree1[nodehdr1.count];
957 0 : memcpy(btree_d, btree_s, tmp);
958 0 : nodehdr1.count += count;
959 :
960 0 : xfs_trans_log_buf(tp, blk1->bp,
961 0 : XFS_DA_LOGRANGE(node1, btree_d, tmp));
962 :
963 : /*
964 : * Move elements in node2 down to fill the hole.
965 : */
966 0 : tmp = nodehdr2.count - count;
967 0 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
968 0 : btree_s = &btree2[count];
969 0 : btree_d = &btree2[0];
970 0 : memmove(btree_d, btree_s, tmp);
971 0 : nodehdr2.count -= count;
972 : }
973 :
974 : /*
975 : * Log header of node 1 and all current bits of node 2.
976 : */
977 32 : xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
978 32 : xfs_trans_log_buf(tp, blk1->bp,
979 32 : XFS_DA_LOGRANGE(node1, &node1->hdr,
980 : state->args->geo->node_hdr_size));
981 :
982 32 : xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
983 32 : xfs_trans_log_buf(tp, blk2->bp,
984 32 : XFS_DA_LOGRANGE(node2, &node2->hdr,
985 : state->args->geo->node_hdr_size +
986 : (sizeof(btree2[0]) * nodehdr2.count)));
987 :
988 : /*
989 : * Record the last hashval from each block for upward propagation.
990 : * (note: don't use the swapped node pointers)
991 : */
992 32 : if (swap) {
993 0 : node1 = blk1->bp->b_addr;
994 0 : node2 = blk2->bp->b_addr;
995 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
996 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
997 0 : btree1 = nodehdr1.btree;
998 0 : btree2 = nodehdr2.btree;
999 : }
1000 32 : blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
1001 32 : blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
1002 :
1003 : /*
1004 : * Adjust the expected index for insertion.
1005 : */
1006 32 : if (blk1->index >= nodehdr1.count) {
1007 32 : blk2->index = blk1->index - nodehdr1.count;
1008 32 : blk1->index = nodehdr1.count + 1; /* make it invalid */
1009 : }
1010 : }
1011 :
1012 : /*
1013 : * Add a new entry to an intermediate node.
1014 : */
1015 : STATIC void
1016 60009 : xfs_da3_node_add(
1017 : struct xfs_da_state *state,
1018 : struct xfs_da_state_blk *oldblk,
1019 : struct xfs_da_state_blk *newblk)
1020 : {
1021 60009 : struct xfs_da_intnode *node;
1022 60009 : struct xfs_da3_icnode_hdr nodehdr;
1023 60009 : struct xfs_da_node_entry *btree;
1024 60009 : int tmp;
1025 60009 : struct xfs_inode *dp = state->args->dp;
1026 :
1027 60009 : trace_xfs_da_node_add(state->args);
1028 :
1029 60009 : node = oldblk->bp->b_addr;
1030 60009 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1031 60009 : btree = nodehdr.btree;
1032 :
1033 60009 : ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
1034 60009 : ASSERT(newblk->blkno != 0);
1035 60009 : if (state->args->whichfork == XFS_DATA_FORK)
1036 22857 : ASSERT(newblk->blkno >= state->args->geo->leafblk &&
1037 : newblk->blkno < state->args->geo->freeblk);
1038 :
1039 : /*
1040 : * We may need to make some room before we insert the new node.
1041 : */
1042 60009 : tmp = 0;
1043 60009 : if (oldblk->index < nodehdr.count) {
1044 17333 : tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
1045 34666 : memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
1046 : }
1047 60009 : btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
1048 60009 : btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
1049 60009 : xfs_trans_log_buf(state->args->trans, oldblk->bp,
1050 60009 : XFS_DA_LOGRANGE(node, &btree[oldblk->index],
1051 : tmp + sizeof(*btree)));
1052 :
1053 60009 : nodehdr.count += 1;
1054 60009 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1055 60009 : xfs_trans_log_buf(state->args->trans, oldblk->bp,
1056 60009 : XFS_DA_LOGRANGE(node, &node->hdr,
1057 : state->args->geo->node_hdr_size));
1058 :
1059 : /*
1060 : * Copy the last hash value from the oldblk to propagate upwards.
1061 : */
1062 60009 : oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1063 60009 : }
1064 :
1065 : /*========================================================================
1066 : * Routines used for shrinking the Btree.
1067 : *========================================================================*/
1068 :
1069 : /*
1070 : * Deallocate an empty leaf node, remove it from its parent,
1071 : * possibly deallocating that block, etc...
1072 : */
1073 : int
1074 2190987 : xfs_da3_join(
1075 : struct xfs_da_state *state)
1076 : {
1077 2190987 : struct xfs_da_state_blk *drop_blk;
1078 2190987 : struct xfs_da_state_blk *save_blk;
1079 2190987 : int action = 0;
1080 2190987 : int error;
1081 :
1082 2190987 : trace_xfs_da_join(state->args);
1083 :
1084 2191001 : drop_blk = &state->path.blk[ state->path.active-1 ];
1085 2191001 : save_blk = &state->altpath.blk[ state->path.active-1 ];
1086 2191000 : ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
1087 2191000 : ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
1088 : drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1089 :
1090 : /*
1091 : * Walk back up the tree joining/deallocating as necessary.
1092 : * When we stop dropping blocks, break out.
1093 : */
1094 2210517 : for ( ; state->path.active >= 2; drop_blk--, save_blk--,
1095 19517 : state->path.active--) {
1096 : /*
1097 : * See if we can combine the block with a neighbor.
1098 : * (action == 0) => no options, just leave
1099 : * (action == 1) => coalesce, then unlink
1100 : * (action == 2) => block empty, unlink it
1101 : */
1102 2192274 : switch (drop_blk->magic) {
1103 536850 : case XFS_ATTR_LEAF_MAGIC:
1104 536850 : error = xfs_attr3_leaf_toosmall(state, &action);
1105 536854 : if (error)
1106 2 : return error;
1107 536852 : if (action == 0)
1108 : return 0;
1109 399 : xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
1110 399 : break;
1111 1654145 : case XFS_DIR2_LEAFN_MAGIC:
1112 1654145 : error = xfs_dir2_leafn_toosmall(state, &action);
1113 1654148 : if (error)
1114 0 : return error;
1115 1654148 : if (action == 0)
1116 : return 0;
1117 19118 : xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1118 19118 : break;
1119 1279 : case XFS_DA_NODE_MAGIC:
1120 : /*
1121 : * Remove the offending node, fixup hashvals,
1122 : * check for a toosmall neighbor.
1123 : */
1124 1279 : xfs_da3_node_remove(state, drop_blk);
1125 1279 : xfs_da3_fixhashpath(state, &state->path);
1126 1279 : error = xfs_da3_node_toosmall(state, &action);
1127 1279 : if (error)
1128 0 : return error;
1129 1279 : if (action == 0)
1130 : return 0;
1131 5 : xfs_da3_node_unbalance(state, drop_blk, save_blk);
1132 5 : break;
1133 : }
1134 19522 : xfs_da3_fixhashpath(state, &state->altpath);
1135 19522 : error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1136 19522 : xfs_da_state_kill_altpath(state);
1137 19522 : if (error)
1138 0 : return error;
1139 19522 : error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1140 : drop_blk->bp);
1141 19517 : drop_blk->bp = NULL;
1142 19517 : if (error)
1143 0 : return error;
1144 : }
1145 : /*
1146 : * We joined all the way to the top. If it turns out that
1147 : * we only have one entry in the root, make the child block
1148 : * the new root.
1149 : */
1150 18243 : xfs_da3_node_remove(state, drop_blk);
1151 18243 : xfs_da3_fixhashpath(state, &state->path);
1152 18243 : error = xfs_da3_root_join(state, &state->path.blk[0]);
1153 18243 : return error;
1154 : }
1155 :
1156 : #ifdef DEBUG
1157 : static void
1158 1139 : xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1159 : {
1160 1139 : __be16 magic = blkinfo->magic;
1161 :
1162 1139 : if (level == 1) {
1163 1138 : ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1164 : magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1165 : magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1166 : magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1167 : } else {
1168 1 : ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1169 : magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1170 : }
1171 1139 : ASSERT(!blkinfo->forw);
1172 1139 : ASSERT(!blkinfo->back);
1173 1139 : }
1174 : #else /* !DEBUG */
1175 : #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1176 : #endif /* !DEBUG */
1177 :
1178 : /*
1179 : * We have only one entry in the root. Copy the only remaining child of
1180 : * the old root to block 0 as the new root node.
1181 : */
1182 : STATIC int
1183 18243 : xfs_da3_root_join(
1184 : struct xfs_da_state *state,
1185 : struct xfs_da_state_blk *root_blk)
1186 : {
1187 18243 : struct xfs_da_intnode *oldroot;
1188 18243 : struct xfs_da_args *args;
1189 18243 : xfs_dablk_t child;
1190 18243 : struct xfs_buf *bp;
1191 18243 : struct xfs_da3_icnode_hdr oldroothdr;
1192 18243 : int error;
1193 18243 : struct xfs_inode *dp = state->args->dp;
1194 :
1195 18243 : trace_xfs_da_root_join(state->args);
1196 :
1197 18243 : ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1198 :
1199 18243 : args = state->args;
1200 18243 : oldroot = root_blk->bp->b_addr;
1201 18243 : xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
1202 18243 : ASSERT(oldroothdr.forw == 0);
1203 18243 : ASSERT(oldroothdr.back == 0);
1204 :
1205 : /*
1206 : * If the root has more than one child, then don't do anything.
1207 : */
1208 18243 : if (oldroothdr.count > 1)
1209 : return 0;
1210 :
1211 : /*
1212 : * Read in the (only) child block, then copy those bytes into
1213 : * the root block's buffer and free the original child block.
1214 : */
1215 1139 : child = be32_to_cpu(oldroothdr.btree[0].before);
1216 1139 : ASSERT(child != 0);
1217 1139 : error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
1218 1139 : if (error)
1219 : return error;
1220 1139 : xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1221 :
1222 : /*
1223 : * This could be copying a leaf back into the root block in the case of
1224 : * there only being a single leaf block left in the tree. Hence we have
1225 : * to update the b_ops pointer as well to match the buffer type change
1226 : * that could occur. For dir3 blocks we also need to update the block
1227 : * number in the buffer header.
1228 : */
1229 2278 : memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
1230 1139 : root_blk->bp->b_ops = bp->b_ops;
1231 1139 : xfs_trans_buf_copy_type(root_blk->bp, bp);
1232 1139 : if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1233 1139 : struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1234 1139 : da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp));
1235 : }
1236 1139 : xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1237 1139 : args->geo->blksize - 1);
1238 1139 : error = xfs_da_shrink_inode(args, child, bp);
1239 1139 : return error;
1240 : }
1241 :
1242 : /*
1243 : * Check a node block and its neighbors to see if the block should be
1244 : * collapsed into one or the other neighbor. Always keep the block
1245 : * with the smaller block number.
1246 : * If the current block is over 50% full, don't try to join it, return 0.
1247 : * If the block is empty, fill in the state structure and return 2.
1248 : * If it can be collapsed, fill in the state structure and return 1.
1249 : * If nothing can be done, return 0.
1250 : */
1251 : STATIC int
1252 1279 : xfs_da3_node_toosmall(
1253 : struct xfs_da_state *state,
1254 : int *action)
1255 : {
1256 1279 : struct xfs_da_intnode *node;
1257 1279 : struct xfs_da_state_blk *blk;
1258 1279 : struct xfs_da_blkinfo *info;
1259 1279 : xfs_dablk_t blkno;
1260 1279 : struct xfs_buf *bp;
1261 1279 : struct xfs_da3_icnode_hdr nodehdr;
1262 1279 : int count;
1263 1279 : int forward;
1264 1279 : int error;
1265 1279 : int retval;
1266 1279 : int i;
1267 1279 : struct xfs_inode *dp = state->args->dp;
1268 :
1269 1279 : trace_xfs_da_node_toosmall(state->args);
1270 :
1271 : /*
1272 : * Check for the degenerate case of the block being over 50% full.
1273 : * If so, it's not worth even looking to see if we might be able
1274 : * to coalesce with a sibling.
1275 : */
1276 1279 : blk = &state->path.blk[ state->path.active-1 ];
1277 1279 : info = blk->bp->b_addr;
1278 1279 : node = (xfs_da_intnode_t *)info;
1279 1279 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1280 1279 : if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1281 383 : *action = 0; /* blk over 50%, don't try to join */
1282 383 : return 0; /* blk over 50%, don't try to join */
1283 : }
1284 :
1285 : /*
1286 : * Check for the degenerate case of the block being empty.
1287 : * If the block is empty, we'll simply delete it, no need to
1288 : * coalesce it with a sibling block. We choose (arbitrarily)
1289 : * to merge with the forward block unless it is NULL.
1290 : */
1291 896 : if (nodehdr.count == 0) {
1292 : /*
1293 : * Make altpath point to the block we want to keep and
1294 : * path point to the block we want to drop (this one).
1295 : */
1296 0 : forward = (info->forw != 0);
1297 0 : memcpy(&state->altpath, &state->path, sizeof(state->path));
1298 0 : error = xfs_da3_path_shift(state, &state->altpath, forward,
1299 : 0, &retval);
1300 0 : if (error)
1301 : return error;
1302 0 : if (retval) {
1303 0 : *action = 0;
1304 : } else {
1305 0 : *action = 2;
1306 : }
1307 0 : return 0;
1308 : }
1309 :
1310 : /*
1311 : * Examine each sibling block to see if we can coalesce with
1312 : * at least 25% free space to spare. We need to figure out
1313 : * whether to merge with the forward or the backward block.
1314 : * We prefer coalescing with the lower numbered sibling so as
1315 : * to shrink a directory over time.
1316 : */
1317 896 : count = state->args->geo->node_ents;
1318 896 : count -= state->args->geo->node_ents >> 2;
1319 896 : count -= nodehdr.count;
1320 :
1321 : /* start with smaller blk num */
1322 896 : forward = nodehdr.forw < nodehdr.back;
1323 2682 : for (i = 0; i < 2; forward = !forward, i++) {
1324 1791 : struct xfs_da3_icnode_hdr thdr;
1325 1791 : if (forward)
1326 : blkno = nodehdr.forw;
1327 : else
1328 896 : blkno = nodehdr.back;
1329 1791 : if (blkno == 0)
1330 641 : continue;
1331 1150 : error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
1332 : state->args->whichfork);
1333 1150 : if (error)
1334 0 : return error;
1335 :
1336 1150 : node = bp->b_addr;
1337 1150 : xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
1338 1150 : xfs_trans_brelse(state->args->trans, bp);
1339 :
1340 1150 : if (count - thdr.count >= 0)
1341 : break; /* fits with at least 25% to spare */
1342 : }
1343 896 : if (i >= 2) {
1344 891 : *action = 0;
1345 891 : return 0;
1346 : }
1347 :
1348 : /*
1349 : * Make altpath point to the block we want to keep (the lower
1350 : * numbered block) and path point to the block we want to drop.
1351 : */
1352 10 : memcpy(&state->altpath, &state->path, sizeof(state->path));
1353 5 : if (blkno < blk->blkno) {
1354 2 : error = xfs_da3_path_shift(state, &state->altpath, forward,
1355 : 0, &retval);
1356 : } else {
1357 3 : error = xfs_da3_path_shift(state, &state->path, forward,
1358 : 0, &retval);
1359 : }
1360 5 : if (error)
1361 : return error;
1362 5 : if (retval) {
1363 0 : *action = 0;
1364 0 : return 0;
1365 : }
1366 5 : *action = 1;
1367 5 : return 0;
1368 : }
1369 :
1370 : /*
1371 : * Pick up the last hashvalue from an intermediate node.
1372 : */
1373 : STATIC uint
1374 79536 : xfs_da3_node_lasthash(
1375 : struct xfs_inode *dp,
1376 : struct xfs_buf *bp,
1377 : int *count)
1378 : {
1379 79536 : struct xfs_da3_icnode_hdr nodehdr;
1380 :
1381 79536 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
1382 79536 : if (count)
1383 79536 : *count = nodehdr.count;
1384 79536 : if (!nodehdr.count)
1385 : return 0;
1386 79536 : return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
1387 : }
1388 :
1389 : /*
1390 : * Walk back up the tree adjusting hash values as necessary,
1391 : * when we stop making changes, return.
1392 : */
1393 : void
1394 56543993 : xfs_da3_fixhashpath(
1395 : struct xfs_da_state *state,
1396 : struct xfs_da_state_path *path)
1397 : {
1398 56543993 : struct xfs_da_state_blk *blk;
1399 56543993 : struct xfs_da_intnode *node;
1400 56543993 : struct xfs_da_node_entry *btree;
1401 56543993 : xfs_dahash_t lasthash=0;
1402 56543993 : int level;
1403 56543993 : int count;
1404 56543993 : struct xfs_inode *dp = state->args->dp;
1405 :
1406 56543993 : trace_xfs_da_fixhashpath(state->args);
1407 :
1408 56549140 : level = path->active-1;
1409 56549140 : blk = &path->blk[ level ];
1410 56548498 : switch (blk->magic) {
1411 1479502 : case XFS_ATTR_LEAF_MAGIC:
1412 1479502 : lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1413 1479494 : if (count == 0)
1414 4 : return;
1415 : break;
1416 54989460 : case XFS_DIR2_LEAFN_MAGIC:
1417 54989460 : lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1418 54987097 : if (count == 0)
1419 : return;
1420 : break;
1421 79536 : case XFS_DA_NODE_MAGIC:
1422 79536 : lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1423 79536 : if (count == 0)
1424 : return;
1425 : break;
1426 : }
1427 59071233 : for (blk--, level--; level >= 0; blk--, level--) {
1428 56479976 : struct xfs_da3_icnode_hdr nodehdr;
1429 :
1430 56479976 : node = blk->bp->b_addr;
1431 56479976 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1432 56486929 : btree = nodehdr.btree;
1433 112973858 : if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1434 : break;
1435 2525043 : blk->hashval = lasthash;
1436 2525043 : btree[blk->index].hashval = cpu_to_be32(lasthash);
1437 2525043 : xfs_trans_log_buf(state->args->trans, blk->bp,
1438 2525043 : XFS_DA_LOGRANGE(node, &btree[blk->index],
1439 : sizeof(*btree)));
1440 :
1441 2525110 : lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1442 : }
1443 : }
1444 :
1445 : /*
1446 : * Remove an entry from an intermediate node.
1447 : */
1448 : STATIC void
1449 19522 : xfs_da3_node_remove(
1450 : struct xfs_da_state *state,
1451 : struct xfs_da_state_blk *drop_blk)
1452 : {
1453 19522 : struct xfs_da_intnode *node;
1454 19522 : struct xfs_da3_icnode_hdr nodehdr;
1455 19522 : struct xfs_da_node_entry *btree;
1456 19522 : int index;
1457 19522 : int tmp;
1458 19522 : struct xfs_inode *dp = state->args->dp;
1459 :
1460 19522 : trace_xfs_da_node_remove(state->args);
1461 :
1462 19522 : node = drop_blk->bp->b_addr;
1463 19522 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1464 19522 : ASSERT(drop_blk->index < nodehdr.count);
1465 19522 : ASSERT(drop_blk->index >= 0);
1466 :
1467 : /*
1468 : * Copy over the offending entry, or just zero it out.
1469 : */
1470 19522 : index = drop_blk->index;
1471 19522 : btree = nodehdr.btree;
1472 19522 : if (index < nodehdr.count - 1) {
1473 18881 : tmp = nodehdr.count - index - 1;
1474 18881 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
1475 37762 : memmove(&btree[index], &btree[index + 1], tmp);
1476 18881 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1477 18881 : XFS_DA_LOGRANGE(node, &btree[index], tmp));
1478 18881 : index = nodehdr.count - 1;
1479 : }
1480 19522 : memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1481 19522 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1482 19522 : XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1483 19522 : nodehdr.count -= 1;
1484 19522 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1485 19522 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1486 19522 : XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
1487 :
1488 : /*
1489 : * Copy the last hash value from the block to propagate upwards.
1490 : */
1491 19522 : drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1492 19522 : }
1493 :
1494 : /*
1495 : * Unbalance the elements between two intermediate nodes,
1496 : * move all Btree elements from one node into another.
1497 : */
1498 : STATIC void
1499 5 : xfs_da3_node_unbalance(
1500 : struct xfs_da_state *state,
1501 : struct xfs_da_state_blk *drop_blk,
1502 : struct xfs_da_state_blk *save_blk)
1503 : {
1504 5 : struct xfs_da_intnode *drop_node;
1505 5 : struct xfs_da_intnode *save_node;
1506 5 : struct xfs_da_node_entry *drop_btree;
1507 5 : struct xfs_da_node_entry *save_btree;
1508 5 : struct xfs_da3_icnode_hdr drop_hdr;
1509 5 : struct xfs_da3_icnode_hdr save_hdr;
1510 5 : struct xfs_trans *tp;
1511 5 : int sindex;
1512 5 : int tmp;
1513 5 : struct xfs_inode *dp = state->args->dp;
1514 :
1515 5 : trace_xfs_da_node_unbalance(state->args);
1516 :
1517 5 : drop_node = drop_blk->bp->b_addr;
1518 5 : save_node = save_blk->bp->b_addr;
1519 5 : xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
1520 5 : xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
1521 5 : drop_btree = drop_hdr.btree;
1522 5 : save_btree = save_hdr.btree;
1523 5 : tp = state->args->trans;
1524 :
1525 : /*
1526 : * If the dying block has lower hashvals, then move all the
1527 : * elements in the remaining block up to make a hole.
1528 : */
1529 10 : if ((be32_to_cpu(drop_btree[0].hashval) <
1530 9 : be32_to_cpu(save_btree[0].hashval)) ||
1531 4 : (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1532 4 : be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1533 : /* XXX: check this - is memmove dst correct? */
1534 1 : tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1535 2 : memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1536 :
1537 1 : sindex = 0;
1538 1 : xfs_trans_log_buf(tp, save_blk->bp,
1539 1 : XFS_DA_LOGRANGE(save_node, &save_btree[0],
1540 : (save_hdr.count + drop_hdr.count) *
1541 : sizeof(xfs_da_node_entry_t)));
1542 : } else {
1543 4 : sindex = save_hdr.count;
1544 4 : xfs_trans_log_buf(tp, save_blk->bp,
1545 4 : XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1546 : drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1547 : }
1548 :
1549 : /*
1550 : * Move all the B-tree elements from drop_blk to save_blk.
1551 : */
1552 5 : tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1553 10 : memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1554 5 : save_hdr.count += drop_hdr.count;
1555 :
1556 5 : xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
1557 5 : xfs_trans_log_buf(tp, save_blk->bp,
1558 5 : XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1559 : state->args->geo->node_hdr_size));
1560 :
1561 : /*
1562 : * Save the last hashval in the remaining block for upward propagation.
1563 : */
1564 5 : save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1565 5 : }
1566 :
1567 : /*========================================================================
1568 : * Routines used for finding things in the Btree.
1569 : *========================================================================*/
1570 :
1571 : /*
1572 : * Walk down the Btree looking for a particular filename, filling
1573 : * in the state structure as we go.
1574 : *
1575 : * We will set the state structure to point to each of the elements
1576 : * in each of the nodes where either the hashval is or should be.
1577 : *
1578 : * We support duplicate hashval's so for each entry in the current
1579 : * node that could contain the desired hashval, descend. This is a
1580 : * pruned depth-first tree search.
1581 : */
1582 : int /* error */
1583 181556869 : xfs_da3_node_lookup_int(
1584 : struct xfs_da_state *state,
1585 : int *result)
1586 : {
1587 181556869 : struct xfs_da_state_blk *blk;
1588 181556869 : struct xfs_da_blkinfo *curr;
1589 181556869 : struct xfs_da_intnode *node;
1590 181556869 : struct xfs_da_node_entry *btree;
1591 181556869 : struct xfs_da3_icnode_hdr nodehdr;
1592 181556869 : struct xfs_da_args *args;
1593 181556869 : xfs_dablk_t blkno;
1594 181556869 : xfs_dahash_t hashval;
1595 181556869 : xfs_dahash_t btreehashval;
1596 181556869 : int probe;
1597 181556869 : int span;
1598 181556869 : int max;
1599 181556869 : int error;
1600 181556869 : int retval;
1601 181556869 : unsigned int expected_level = 0;
1602 181556869 : uint16_t magic;
1603 181556869 : struct xfs_inode *dp = state->args->dp;
1604 :
1605 181556869 : args = state->args;
1606 :
1607 : /*
1608 : * Descend thru the B-tree searching each level for the right
1609 : * node to use, until the right hashval is found.
1610 : */
1611 181556869 : blkno = args->geo->leafblk;
1612 181556869 : for (blk = &state->path.blk[0], state->path.active = 1;
1613 365349826 : state->path.active <= XFS_DA_NODE_MAXDEPTH;
1614 183792957 : blk++, state->path.active++) {
1615 : /*
1616 : * Read the next node down in the tree.
1617 : */
1618 365349826 : blk->blkno = blkno;
1619 365349826 : error = xfs_da3_node_read(args->trans, args->dp, blkno,
1620 : &blk->bp, args->whichfork);
1621 365357670 : if (error) {
1622 7683 : blk->blkno = 0;
1623 7683 : state->path.active--;
1624 7683 : return error;
1625 : }
1626 365349987 : curr = blk->bp->b_addr;
1627 365349987 : magic = be16_to_cpu(curr->magic);
1628 :
1629 365349987 : if (magic == XFS_ATTR_LEAF_MAGIC ||
1630 365349987 : magic == XFS_ATTR3_LEAF_MAGIC) {
1631 3512247 : blk->magic = XFS_ATTR_LEAF_MAGIC;
1632 3512247 : blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1633 3512248 : break;
1634 : }
1635 :
1636 361837740 : if (magic == XFS_DIR2_LEAFN_MAGIC ||
1637 361837740 : magic == XFS_DIR3_LEAFN_MAGIC) {
1638 178052470 : blk->magic = XFS_DIR2_LEAFN_MAGIC;
1639 178052470 : blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1640 : blk->bp, NULL);
1641 178066187 : break;
1642 : }
1643 :
1644 183785270 : if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
1645 0 : xfs_buf_mark_corrupt(blk->bp);
1646 0 : return -EFSCORRUPTED;
1647 : }
1648 :
1649 183785270 : blk->magic = XFS_DA_NODE_MAGIC;
1650 :
1651 : /*
1652 : * Search an intermediate node for a match.
1653 : */
1654 183785270 : node = blk->bp->b_addr;
1655 183785270 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1656 183792957 : btree = nodehdr.btree;
1657 :
1658 : /* Tree taller than we can handle; bail out! */
1659 183792957 : if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
1660 0 : xfs_buf_mark_corrupt(blk->bp);
1661 0 : return -EFSCORRUPTED;
1662 : }
1663 :
1664 : /* Check the level from the root. */
1665 183792957 : if (blkno == args->geo->leafblk)
1666 181542887 : expected_level = nodehdr.level - 1;
1667 2253234 : else if (expected_level != nodehdr.level) {
1668 0 : xfs_buf_mark_corrupt(blk->bp);
1669 0 : return -EFSCORRUPTED;
1670 : } else
1671 2253234 : expected_level--;
1672 :
1673 183792957 : max = nodehdr.count;
1674 183792957 : blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1675 :
1676 : /*
1677 : * Binary search. (note: small blocks will skip loop)
1678 : */
1679 183792957 : probe = span = max / 2;
1680 183792957 : hashval = args->hashval;
1681 370400884 : while (span > 4) {
1682 186851646 : span /= 2;
1683 186851646 : btreehashval = be32_to_cpu(btree[probe].hashval);
1684 186851646 : if (btreehashval < hashval)
1685 86439940 : probe += span;
1686 100411706 : else if (btreehashval > hashval)
1687 100167987 : probe -= span;
1688 : else
1689 : break;
1690 : }
1691 183792957 : ASSERT((probe >= 0) && (probe < max));
1692 183965467 : ASSERT((span <= 4) ||
1693 : (be32_to_cpu(btree[probe].hashval) == hashval));
1694 :
1695 : /*
1696 : * Since we may have duplicate hashval's, find the first
1697 : * matching hashval in the node.
1698 : */
1699 914457203 : while (probe > 0 &&
1700 421437582 : be32_to_cpu(btree[probe].hashval) >= hashval) {
1701 309226664 : probe--;
1702 : }
1703 799144743 : while (probe < max &&
1704 397785800 : be32_to_cpu(btree[probe].hashval) < hashval) {
1705 217565986 : probe++;
1706 : }
1707 :
1708 : /*
1709 : * Pick the right block to descend on.
1710 : */
1711 183792957 : if (probe == max) {
1712 3577820 : blk->index = max - 1;
1713 3577820 : blkno = be32_to_cpu(btree[max - 1].before);
1714 : } else {
1715 180215137 : blk->index = probe;
1716 180215137 : blkno = be32_to_cpu(btree[probe].before);
1717 : }
1718 :
1719 : /* We can't point back to the root. */
1720 183792957 : if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
1721 0 : return -EFSCORRUPTED;
1722 : }
1723 :
1724 181578435 : if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
1725 0 : return -EFSCORRUPTED;
1726 :
1727 : /*
1728 : * A leaf block that ends in the hashval that we are interested in
1729 : * (final hashval == search hashval) means that the next block may
1730 : * contain more entries with the same hashval, shift upward to the
1731 : * next leaf and keep searching.
1732 : */
1733 787072233 : for (;;) {
1734 484325334 : if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1735 179048260 : retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1736 : &blk->index, state);
1737 305277504 : } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1738 305277504 : retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1739 305277040 : blk->index = args->index;
1740 305277040 : args->blkno = blk->blkno;
1741 : } else {
1742 0 : ASSERT(0);
1743 0 : return -EFSCORRUPTED;
1744 : }
1745 484302163 : if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1746 340435201 : (blk->hashval == args->hashval)) {
1747 303061342 : error = xfs_da3_path_shift(state, &state->path, 1, 1,
1748 : &retval);
1749 303061731 : if (error)
1750 0 : return error;
1751 303061731 : if (retval == 0) {
1752 302746899 : continue;
1753 314832 : } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1754 : /* path_shift() gives ENOENT */
1755 163752 : retval = -ENOATTR;
1756 : }
1757 : }
1758 181574103 : break;
1759 : }
1760 181574103 : *result = retval;
1761 181574103 : return 0;
1762 : }
1763 :
1764 : /*========================================================================
1765 : * Utility routines.
1766 : *========================================================================*/
1767 :
1768 : /*
1769 : * Compare two intermediate nodes for "order".
1770 : */
1771 : STATIC int
1772 32 : xfs_da3_node_order(
1773 : struct xfs_inode *dp,
1774 : struct xfs_buf *node1_bp,
1775 : struct xfs_buf *node2_bp)
1776 : {
1777 32 : struct xfs_da_intnode *node1;
1778 32 : struct xfs_da_intnode *node2;
1779 32 : struct xfs_da_node_entry *btree1;
1780 32 : struct xfs_da_node_entry *btree2;
1781 32 : struct xfs_da3_icnode_hdr node1hdr;
1782 32 : struct xfs_da3_icnode_hdr node2hdr;
1783 :
1784 32 : node1 = node1_bp->b_addr;
1785 32 : node2 = node2_bp->b_addr;
1786 32 : xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
1787 32 : xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
1788 32 : btree1 = node1hdr.btree;
1789 32 : btree2 = node2hdr.btree;
1790 :
1791 64 : if (node1hdr.count > 0 && node2hdr.count > 0 &&
1792 96 : ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1793 32 : (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1794 32 : be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1795 0 : return 1;
1796 : }
1797 : return 0;
1798 : }
1799 :
1800 : /*
1801 : * Link a new block into a doubly linked list of blocks (of whatever type).
1802 : */
1803 : int /* error */
1804 61560 : xfs_da3_blk_link(
1805 : struct xfs_da_state *state,
1806 : struct xfs_da_state_blk *old_blk,
1807 : struct xfs_da_state_blk *new_blk)
1808 : {
1809 61560 : struct xfs_da_blkinfo *old_info;
1810 61560 : struct xfs_da_blkinfo *new_info;
1811 61560 : struct xfs_da_blkinfo *tmp_info;
1812 61560 : struct xfs_da_args *args;
1813 61560 : struct xfs_buf *bp;
1814 61560 : int before = 0;
1815 61560 : int error;
1816 61560 : struct xfs_inode *dp = state->args->dp;
1817 :
1818 : /*
1819 : * Set up environment.
1820 : */
1821 61560 : args = state->args;
1822 61560 : ASSERT(args != NULL);
1823 61560 : old_info = old_blk->bp->b_addr;
1824 61560 : new_info = new_blk->bp->b_addr;
1825 61560 : ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1826 : old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1827 : old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1828 :
1829 61560 : switch (old_blk->magic) {
1830 37129 : case XFS_ATTR_LEAF_MAGIC:
1831 37129 : before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1832 37129 : break;
1833 24399 : case XFS_DIR2_LEAFN_MAGIC:
1834 24399 : before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1835 24399 : break;
1836 32 : case XFS_DA_NODE_MAGIC:
1837 32 : before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1838 32 : break;
1839 : }
1840 :
1841 : /*
1842 : * Link blocks in appropriate order.
1843 : */
1844 61560 : if (before) {
1845 : /*
1846 : * Link new block in before existing block.
1847 : */
1848 0 : trace_xfs_da_link_before(args);
1849 0 : new_info->forw = cpu_to_be32(old_blk->blkno);
1850 0 : new_info->back = old_info->back;
1851 0 : if (old_info->back) {
1852 0 : error = xfs_da3_node_read(args->trans, dp,
1853 0 : be32_to_cpu(old_info->back),
1854 : &bp, args->whichfork);
1855 0 : if (error)
1856 : return error;
1857 0 : ASSERT(bp != NULL);
1858 0 : tmp_info = bp->b_addr;
1859 0 : ASSERT(tmp_info->magic == old_info->magic);
1860 0 : ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1861 0 : tmp_info->forw = cpu_to_be32(new_blk->blkno);
1862 0 : xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1863 : }
1864 0 : old_info->back = cpu_to_be32(new_blk->blkno);
1865 : } else {
1866 : /*
1867 : * Link new block in after existing block.
1868 : */
1869 61560 : trace_xfs_da_link_after(args);
1870 61560 : new_info->forw = old_info->forw;
1871 61560 : new_info->back = cpu_to_be32(old_blk->blkno);
1872 61560 : if (old_info->forw) {
1873 17334 : error = xfs_da3_node_read(args->trans, dp,
1874 17334 : be32_to_cpu(old_info->forw),
1875 : &bp, args->whichfork);
1876 17334 : if (error)
1877 : return error;
1878 17334 : ASSERT(bp != NULL);
1879 17334 : tmp_info = bp->b_addr;
1880 17334 : ASSERT(tmp_info->magic == old_info->magic);
1881 34668 : ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1882 17334 : tmp_info->back = cpu_to_be32(new_blk->blkno);
1883 17334 : xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1884 : }
1885 123120 : old_info->forw = cpu_to_be32(new_blk->blkno);
1886 : }
1887 :
1888 61560 : xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1889 61560 : xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1890 61560 : return 0;
1891 : }
1892 :
1893 : /*
1894 : * Unlink a block from a doubly linked list of blocks.
1895 : */
1896 : STATIC int /* error */
1897 19522 : xfs_da3_blk_unlink(
1898 : struct xfs_da_state *state,
1899 : struct xfs_da_state_blk *drop_blk,
1900 : struct xfs_da_state_blk *save_blk)
1901 : {
1902 19522 : struct xfs_da_blkinfo *drop_info;
1903 19522 : struct xfs_da_blkinfo *save_info;
1904 19522 : struct xfs_da_blkinfo *tmp_info;
1905 19522 : struct xfs_da_args *args;
1906 19522 : struct xfs_buf *bp;
1907 19522 : int error;
1908 :
1909 : /*
1910 : * Set up environment.
1911 : */
1912 19522 : args = state->args;
1913 19522 : ASSERT(args != NULL);
1914 19522 : save_info = save_blk->bp->b_addr;
1915 19522 : drop_info = drop_blk->bp->b_addr;
1916 19522 : ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1917 : save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1918 : save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1919 19522 : ASSERT(save_blk->magic == drop_blk->magic);
1920 42113 : ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1921 : (be32_to_cpu(save_info->back) == drop_blk->blkno));
1922 55497 : ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1923 : (be32_to_cpu(drop_info->back) == save_blk->blkno));
1924 :
1925 : /*
1926 : * Unlink the leaf block from the doubly linked chain of leaves.
1927 : */
1928 39044 : if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1929 3069 : trace_xfs_da_unlink_back(args);
1930 3069 : save_info->back = drop_info->back;
1931 3069 : if (drop_info->back) {
1932 1903 : error = xfs_da3_node_read(args->trans, args->dp,
1933 1903 : be32_to_cpu(drop_info->back),
1934 : &bp, args->whichfork);
1935 1903 : if (error)
1936 : return error;
1937 1903 : ASSERT(bp != NULL);
1938 1903 : tmp_info = bp->b_addr;
1939 1903 : ASSERT(tmp_info->magic == save_info->magic);
1940 3806 : ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1941 1903 : tmp_info->forw = cpu_to_be32(save_blk->blkno);
1942 1903 : xfs_trans_log_buf(args->trans, bp, 0,
1943 : sizeof(*tmp_info) - 1);
1944 : }
1945 : } else {
1946 16453 : trace_xfs_da_unlink_forward(args);
1947 16453 : save_info->forw = drop_info->forw;
1948 16453 : if (drop_info->forw) {
1949 15813 : error = xfs_da3_node_read(args->trans, args->dp,
1950 15813 : be32_to_cpu(drop_info->forw),
1951 : &bp, args->whichfork);
1952 15813 : if (error)
1953 : return error;
1954 15813 : ASSERT(bp != NULL);
1955 15813 : tmp_info = bp->b_addr;
1956 15813 : ASSERT(tmp_info->magic == save_info->magic);
1957 31626 : ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1958 15813 : tmp_info->back = cpu_to_be32(save_blk->blkno);
1959 15813 : xfs_trans_log_buf(args->trans, bp, 0,
1960 : sizeof(*tmp_info) - 1);
1961 : }
1962 : }
1963 :
1964 19522 : xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1965 19522 : return 0;
1966 : }
1967 :
1968 : /*
1969 : * Move a path "forward" or "!forward" one block at the current level.
1970 : *
1971 : * This routine will adjust a "path" to point to the next block
1972 : * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1973 : * Btree, including updating pointers to the intermediate nodes between
1974 : * the new bottom and the root.
1975 : */
1976 : int /* error */
1977 304022397 : xfs_da3_path_shift(
1978 : struct xfs_da_state *state,
1979 : struct xfs_da_state_path *path,
1980 : int forward,
1981 : int release,
1982 : int *result)
1983 : {
1984 304022397 : struct xfs_da_state_blk *blk;
1985 304022397 : struct xfs_da_blkinfo *info;
1986 304022397 : struct xfs_da_args *args;
1987 304022397 : struct xfs_da_node_entry *btree;
1988 304022397 : struct xfs_da3_icnode_hdr nodehdr;
1989 304022397 : struct xfs_buf *bp;
1990 304022397 : xfs_dablk_t blkno = 0;
1991 304022397 : int level;
1992 304022397 : int error;
1993 304022397 : struct xfs_inode *dp = state->args->dp;
1994 :
1995 304022397 : trace_xfs_da_path_shift(state->args);
1996 :
1997 : /*
1998 : * Roll up the Btree looking for the first block where our
1999 : * current index is not at the edge of the block. Note that
2000 : * we skip the bottom layer because we want the sibling block.
2001 : */
2002 304022396 : args = state->args;
2003 304022396 : ASSERT(args != NULL);
2004 304022396 : ASSERT(path != NULL);
2005 304022396 : ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
2006 304022396 : level = (path->active-1) - 1; /* skip bottom layer in path */
2007 305003636 : for (; level >= 0; level--) {
2008 304582120 : blk = &path->blk[level];
2009 304582117 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2010 304582117 : blk->bp->b_addr);
2011 :
2012 304582124 : if (forward && (blk->index < nodehdr.count - 1)) {
2013 303177570 : blk->index++;
2014 303177570 : blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2015 : break;
2016 1404554 : } else if (!forward && (blk->index > 0)) {
2017 423314 : blk->index--;
2018 423314 : blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2019 : break;
2020 : }
2021 : }
2022 304022400 : if (level < 0) {
2023 421516 : *result = -ENOENT; /* we're out of our tree */
2024 421516 : ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
2025 421516 : return 0;
2026 : }
2027 :
2028 : /*
2029 : * Roll down the edge of the subtree until we reach the
2030 : * same depth we were at originally.
2031 : */
2032 607638346 : for (blk++, level++; level < path->active; blk++, level++) {
2033 : /*
2034 : * Read the next child block into a local buffer.
2035 : */
2036 304037462 : error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
2037 : args->whichfork);
2038 304037462 : if (error)
2039 0 : return error;
2040 :
2041 : /*
2042 : * Release the old block (if it's dirty, the trans doesn't
2043 : * actually let go) and swap the local buffer into the path
2044 : * structure. This ensures failure of the above read doesn't set
2045 : * a NULL buffer in an active slot in the path.
2046 : */
2047 304037462 : if (release)
2048 303183412 : xfs_trans_brelse(args->trans, blk->bp);
2049 304037462 : blk->blkno = blkno;
2050 304037462 : blk->bp = bp;
2051 :
2052 304037462 : info = blk->bp->b_addr;
2053 304037462 : ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
2054 : info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
2055 : info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2056 : info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
2057 : info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
2058 : info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
2059 :
2060 :
2061 : /*
2062 : * Note: we flatten the magic number to a single type so we
2063 : * don't have to compare against crc/non-crc types elsewhere.
2064 : */
2065 304037462 : switch (be16_to_cpu(info->magic)) {
2066 436647 : case XFS_DA_NODE_MAGIC:
2067 : case XFS_DA3_NODE_MAGIC:
2068 436647 : blk->magic = XFS_DA_NODE_MAGIC;
2069 436647 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2070 436647 : bp->b_addr);
2071 436647 : btree = nodehdr.btree;
2072 436647 : blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2073 436647 : if (forward)
2074 436581 : blk->index = 0;
2075 : else
2076 66 : blk->index = nodehdr.count - 1;
2077 436647 : blkno = be32_to_cpu(btree[blk->index].before);
2078 : break;
2079 301817934 : case XFS_ATTR_LEAF_MAGIC:
2080 : case XFS_ATTR3_LEAF_MAGIC:
2081 301817934 : blk->magic = XFS_ATTR_LEAF_MAGIC;
2082 301817934 : ASSERT(level == path->active-1);
2083 301817934 : blk->index = 0;
2084 301817934 : blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
2085 301817934 : break;
2086 1782881 : case XFS_DIR2_LEAFN_MAGIC:
2087 : case XFS_DIR3_LEAFN_MAGIC:
2088 1782881 : blk->magic = XFS_DIR2_LEAFN_MAGIC;
2089 1782881 : ASSERT(level == path->active-1);
2090 1782881 : blk->index = 0;
2091 1782881 : blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
2092 : blk->bp, NULL);
2093 1782881 : break;
2094 0 : default:
2095 0 : ASSERT(0);
2096 0 : break;
2097 : }
2098 : }
2099 303600884 : *result = 0;
2100 303600884 : return 0;
2101 : }
2102 :
2103 :
2104 : /*========================================================================
2105 : * Utility routines.
2106 : *========================================================================*/
2107 :
2108 : /*
2109 : * Implement a simple hash on a character string.
2110 : * Rotate the hash value by 7 bits, then XOR each character in.
2111 : * This is implemented with some source-level loop unrolling.
2112 : */
2113 : xfs_dahash_t
2114 1855789370 : xfs_da_hashname(const uint8_t *name, int namelen)
2115 : {
2116 1855789370 : xfs_dahash_t hash;
2117 :
2118 : /*
2119 : * Do four characters at a time as long as we can.
2120 : */
2121 3345122231 : for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2122 1489332861 : hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2123 1489332861 : (name[3] << 0) ^ rol32(hash, 7 * 4);
2124 :
2125 : /*
2126 : * Now do the rest of the characters.
2127 : */
2128 1855789370 : switch (namelen) {
2129 314096044 : case 3:
2130 314096044 : return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2131 : rol32(hash, 7 * 3);
2132 737515272 : case 2:
2133 737515272 : return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2134 542378787 : case 1:
2135 542378787 : return (name[0] << 0) ^ rol32(hash, 7 * 1);
2136 : default: /* case 0: */
2137 : return hash;
2138 : }
2139 : }
2140 :
2141 : enum xfs_dacmp
2142 1805860981 : xfs_da_compname(
2143 : struct xfs_da_args *args,
2144 : const unsigned char *name,
2145 : int len)
2146 : {
2147 1631888532 : return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2148 3437749513 : XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2149 : }
2150 :
2151 : int
2152 2647673 : xfs_da_grow_inode_int(
2153 : struct xfs_da_args *args,
2154 : xfs_fileoff_t *bno,
2155 : int count)
2156 : {
2157 2647673 : struct xfs_trans *tp = args->trans;
2158 2647673 : struct xfs_inode *dp = args->dp;
2159 2647673 : int w = args->whichfork;
2160 2647673 : xfs_rfsblock_t nblks = dp->i_nblocks;
2161 2647673 : struct xfs_bmbt_irec map, *mapp;
2162 2647673 : int nmap, error, got, i, mapi;
2163 :
2164 : /*
2165 : * Find a spot in the file space to put the new block.
2166 : */
2167 2647673 : error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2168 2647701 : if (error)
2169 : return error;
2170 :
2171 : /*
2172 : * Try mapping it in one filesystem block.
2173 : */
2174 2647701 : nmap = 1;
2175 2647701 : error = xfs_bmapi_write(tp, dp, *bno, count,
2176 2647701 : xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2177 : args->total, &map, &nmap);
2178 2647602 : if (error)
2179 : return error;
2180 :
2181 2647600 : ASSERT(nmap <= 1);
2182 2647600 : if (nmap == 1) {
2183 : mapp = ↦
2184 : mapi = 1;
2185 40 : } else if (nmap == 0 && count > 1) {
2186 40 : xfs_fileoff_t b;
2187 40 : int c;
2188 :
2189 : /*
2190 : * If we didn't get it and the block might work if fragmented,
2191 : * try without the CONTIG flag. Loop until we get it all.
2192 : */
2193 40 : mapp = kmem_alloc(sizeof(*mapp) * count, 0);
2194 80 : for (b = *bno, mapi = 0; b < *bno + count; ) {
2195 40 : c = (int)(*bno + count - b);
2196 40 : nmap = min(XFS_BMAP_MAX_NMAP, c);
2197 40 : error = xfs_bmapi_write(tp, dp, b, c,
2198 40 : xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2199 40 : args->total, &mapp[mapi], &nmap);
2200 40 : if (error)
2201 0 : goto out_free_map;
2202 40 : if (nmap < 1)
2203 : break;
2204 40 : mapi += nmap;
2205 40 : b = mapp[mapi - 1].br_startoff +
2206 40 : mapp[mapi - 1].br_blockcount;
2207 : }
2208 : } else {
2209 : mapi = 0;
2210 : mapp = NULL;
2211 : }
2212 :
2213 : /*
2214 : * Count the blocks we got, make sure it matches the total.
2215 : */
2216 5295330 : for (i = 0, got = 0; i < mapi; i++)
2217 2647730 : got += mapp[i].br_blockcount;
2218 2647600 : if (got != count || mapp[0].br_startoff != *bno ||
2219 2647709 : mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2220 2647709 : *bno + count) {
2221 1 : error = -ENOSPC;
2222 1 : goto out_free_map;
2223 : }
2224 :
2225 : /* account for newly allocated blocks in reserved blocks total */
2226 2647700 : args->total -= dp->i_nblocks - nblks;
2227 :
2228 2647600 : out_free_map:
2229 2647600 : if (mapp != &map)
2230 40 : kmem_free(mapp);
2231 : return error;
2232 : }
2233 :
2234 : /*
2235 : * Add a block to the btree ahead of the file.
2236 : * Return the new block number to the caller.
2237 : */
2238 : int
2239 2551172 : xfs_da_grow_inode(
2240 : struct xfs_da_args *args,
2241 : xfs_dablk_t *new_blkno)
2242 : {
2243 2551172 : xfs_fileoff_t bno;
2244 2551172 : int error;
2245 :
2246 2551172 : trace_xfs_da_grow_inode(args);
2247 :
2248 2551233 : bno = args->geo->leafblk;
2249 2551233 : error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2250 2551185 : if (!error)
2251 2551199 : *new_blkno = (xfs_dablk_t)bno;
2252 2551185 : return error;
2253 : }
2254 :
2255 : /*
2256 : * Ick. We need to always be able to remove a btree block, even
2257 : * if there's no space reservation because the filesystem is full.
2258 : * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2259 : * It swaps the target block with the last block in the file. The
2260 : * last block in the file can always be removed since it can't cause
2261 : * a bmap btree split to do that.
2262 : */
2263 : STATIC int
2264 0 : xfs_da3_swap_lastblock(
2265 : struct xfs_da_args *args,
2266 : xfs_dablk_t *dead_blknop,
2267 : struct xfs_buf **dead_bufp)
2268 0 : {
2269 0 : struct xfs_da_blkinfo *dead_info;
2270 0 : struct xfs_da_blkinfo *sib_info;
2271 0 : struct xfs_da_intnode *par_node;
2272 0 : struct xfs_da_intnode *dead_node;
2273 0 : struct xfs_dir2_leaf *dead_leaf2;
2274 0 : struct xfs_da_node_entry *btree;
2275 0 : struct xfs_da3_icnode_hdr par_hdr;
2276 0 : struct xfs_inode *dp;
2277 0 : struct xfs_trans *tp;
2278 0 : struct xfs_mount *mp;
2279 0 : struct xfs_buf *dead_buf;
2280 0 : struct xfs_buf *last_buf;
2281 0 : struct xfs_buf *sib_buf;
2282 0 : struct xfs_buf *par_buf;
2283 0 : xfs_dahash_t dead_hash;
2284 0 : xfs_fileoff_t lastoff;
2285 0 : xfs_dablk_t dead_blkno;
2286 0 : xfs_dablk_t last_blkno;
2287 0 : xfs_dablk_t sib_blkno;
2288 0 : xfs_dablk_t par_blkno;
2289 0 : int error;
2290 0 : int w;
2291 0 : int entno;
2292 0 : int level;
2293 0 : int dead_level;
2294 :
2295 0 : trace_xfs_da_swap_lastblock(args);
2296 :
2297 0 : dead_buf = *dead_bufp;
2298 0 : dead_blkno = *dead_blknop;
2299 0 : tp = args->trans;
2300 0 : dp = args->dp;
2301 0 : w = args->whichfork;
2302 0 : ASSERT(w == XFS_DATA_FORK);
2303 0 : mp = dp->i_mount;
2304 0 : lastoff = args->geo->freeblk;
2305 0 : error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2306 0 : if (error)
2307 : return error;
2308 0 : if (XFS_IS_CORRUPT(mp, lastoff == 0))
2309 0 : return -EFSCORRUPTED;
2310 : /*
2311 : * Read the last block in the btree space.
2312 : */
2313 0 : last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2314 0 : error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
2315 0 : if (error)
2316 : return error;
2317 : /*
2318 : * Copy the last block into the dead buffer and log it.
2319 : */
2320 0 : memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
2321 0 : xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2322 0 : dead_info = dead_buf->b_addr;
2323 : /*
2324 : * Get values from the moved block.
2325 : */
2326 0 : if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2327 : dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2328 0 : struct xfs_dir3_icleaf_hdr leafhdr;
2329 0 : struct xfs_dir2_leaf_entry *ents;
2330 :
2331 0 : dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2332 0 : xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
2333 : dead_leaf2);
2334 0 : ents = leafhdr.ents;
2335 0 : dead_level = 0;
2336 0 : dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2337 : } else {
2338 0 : struct xfs_da3_icnode_hdr deadhdr;
2339 :
2340 0 : dead_node = (xfs_da_intnode_t *)dead_info;
2341 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
2342 0 : btree = deadhdr.btree;
2343 0 : dead_level = deadhdr.level;
2344 0 : dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2345 : }
2346 0 : sib_buf = par_buf = NULL;
2347 : /*
2348 : * If the moved block has a left sibling, fix up the pointers.
2349 : */
2350 0 : if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2351 0 : error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2352 0 : if (error)
2353 0 : goto done;
2354 0 : sib_info = sib_buf->b_addr;
2355 0 : if (XFS_IS_CORRUPT(mp,
2356 : be32_to_cpu(sib_info->forw) != last_blkno ||
2357 : sib_info->magic != dead_info->magic)) {
2358 0 : error = -EFSCORRUPTED;
2359 0 : goto done;
2360 : }
2361 0 : sib_info->forw = cpu_to_be32(dead_blkno);
2362 0 : xfs_trans_log_buf(tp, sib_buf,
2363 : XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2364 : sizeof(sib_info->forw)));
2365 0 : sib_buf = NULL;
2366 : }
2367 : /*
2368 : * If the moved block has a right sibling, fix up the pointers.
2369 : */
2370 0 : if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2371 0 : error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2372 0 : if (error)
2373 0 : goto done;
2374 0 : sib_info = sib_buf->b_addr;
2375 0 : if (XFS_IS_CORRUPT(mp,
2376 : be32_to_cpu(sib_info->back) != last_blkno ||
2377 : sib_info->magic != dead_info->magic)) {
2378 0 : error = -EFSCORRUPTED;
2379 0 : goto done;
2380 : }
2381 0 : sib_info->back = cpu_to_be32(dead_blkno);
2382 0 : xfs_trans_log_buf(tp, sib_buf,
2383 : XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2384 : sizeof(sib_info->back)));
2385 0 : sib_buf = NULL;
2386 : }
2387 0 : par_blkno = args->geo->leafblk;
2388 0 : level = -1;
2389 : /*
2390 : * Walk down the tree looking for the parent of the moved block.
2391 : */
2392 0 : for (;;) {
2393 0 : error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2394 0 : if (error)
2395 0 : goto done;
2396 0 : par_node = par_buf->b_addr;
2397 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2398 0 : if (XFS_IS_CORRUPT(mp,
2399 : level >= 0 && level != par_hdr.level + 1)) {
2400 0 : error = -EFSCORRUPTED;
2401 0 : goto done;
2402 : }
2403 0 : level = par_hdr.level;
2404 0 : btree = par_hdr.btree;
2405 0 : for (entno = 0;
2406 0 : entno < par_hdr.count &&
2407 0 : be32_to_cpu(btree[entno].hashval) < dead_hash;
2408 0 : entno++)
2409 0 : continue;
2410 0 : if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
2411 0 : error = -EFSCORRUPTED;
2412 0 : goto done;
2413 : }
2414 0 : par_blkno = be32_to_cpu(btree[entno].before);
2415 0 : if (level == dead_level + 1)
2416 : break;
2417 0 : xfs_trans_brelse(tp, par_buf);
2418 0 : par_buf = NULL;
2419 : }
2420 : /*
2421 : * We're in the right parent block.
2422 : * Look for the right entry.
2423 : */
2424 0 : for (;;) {
2425 0 : for (;
2426 0 : entno < par_hdr.count &&
2427 0 : be32_to_cpu(btree[entno].before) != last_blkno;
2428 0 : entno++)
2429 0 : continue;
2430 0 : if (entno < par_hdr.count)
2431 : break;
2432 0 : par_blkno = par_hdr.forw;
2433 0 : xfs_trans_brelse(tp, par_buf);
2434 0 : par_buf = NULL;
2435 0 : if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
2436 0 : error = -EFSCORRUPTED;
2437 0 : goto done;
2438 : }
2439 0 : error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2440 0 : if (error)
2441 0 : goto done;
2442 0 : par_node = par_buf->b_addr;
2443 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2444 0 : if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
2445 0 : error = -EFSCORRUPTED;
2446 0 : goto done;
2447 : }
2448 0 : btree = par_hdr.btree;
2449 0 : entno = 0;
2450 : }
2451 : /*
2452 : * Update the parent entry pointing to the moved block.
2453 : */
2454 0 : btree[entno].before = cpu_to_be32(dead_blkno);
2455 0 : xfs_trans_log_buf(tp, par_buf,
2456 0 : XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2457 : sizeof(btree[entno].before)));
2458 0 : *dead_blknop = last_blkno;
2459 0 : *dead_bufp = last_buf;
2460 0 : return 0;
2461 0 : done:
2462 0 : if (par_buf)
2463 0 : xfs_trans_brelse(tp, par_buf);
2464 0 : if (sib_buf)
2465 0 : xfs_trans_brelse(tp, sib_buf);
2466 0 : xfs_trans_brelse(tp, last_buf);
2467 0 : return error;
2468 : }
2469 :
2470 : /*
2471 : * Remove a btree block from a directory or attribute.
2472 : */
2473 : int
2474 1274521 : xfs_da_shrink_inode(
2475 : struct xfs_da_args *args,
2476 : xfs_dablk_t dead_blkno,
2477 : struct xfs_buf *dead_buf)
2478 : {
2479 1274521 : struct xfs_inode *dp;
2480 1274521 : int done, error, w, count;
2481 1274521 : struct xfs_trans *tp;
2482 :
2483 1274521 : trace_xfs_da_shrink_inode(args);
2484 :
2485 1274523 : dp = args->dp;
2486 1274523 : w = args->whichfork;
2487 1274523 : tp = args->trans;
2488 1274523 : count = args->geo->fsbcount;
2489 1274536 : for (;;) {
2490 : /*
2491 : * Remove extents. If we get ENOSPC for a dir we have to move
2492 : * the last block to the place we want to kill.
2493 : */
2494 1305283 : error = xfs_bunmapi(tp, dp, dead_blkno, count,
2495 : xfs_bmapi_aflag(w), 0, &done);
2496 1274531 : if (error == -ENOSPC) {
2497 0 : if (w != XFS_DATA_FORK)
2498 : break;
2499 0 : error = xfs_da3_swap_lastblock(args, &dead_blkno,
2500 : &dead_buf);
2501 13 : if (error)
2502 : break;
2503 : } else {
2504 : break;
2505 : }
2506 : }
2507 1274531 : xfs_trans_binval(tp, dead_buf);
2508 1274531 : return error;
2509 : }
2510 :
2511 : static int
2512 1452066715 : xfs_dabuf_map(
2513 : struct xfs_inode *dp,
2514 : xfs_dablk_t bno,
2515 : unsigned int flags,
2516 : int whichfork,
2517 : struct xfs_buf_map **mapp,
2518 : int *nmaps)
2519 : {
2520 1452066715 : struct xfs_mount *mp = dp->i_mount;
2521 1452066715 : int nfsb = xfs_dabuf_nfsb(mp, whichfork);
2522 1452066715 : struct xfs_bmbt_irec irec, *irecs = &irec;
2523 1452066715 : struct xfs_buf_map *map = *mapp;
2524 1452066715 : xfs_fileoff_t off = bno;
2525 1452066715 : int error = 0, nirecs, i;
2526 :
2527 1452066715 : if (nfsb > 1)
2528 638291 : irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS);
2529 :
2530 1452414058 : nirecs = nfsb;
2531 2405362265 : error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
2532 : xfs_bmapi_aflag(whichfork));
2533 1453046682 : if (error)
2534 177 : goto out_free_irecs;
2535 :
2536 : /*
2537 : * Use the caller provided map for the single map case, else allocate a
2538 : * larger one that needs to be free by the caller.
2539 : */
2540 1453046505 : if (nirecs > 1) {
2541 68676 : map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS);
2542 68676 : if (!map) {
2543 0 : error = -ENOMEM;
2544 0 : goto out_free_irecs;
2545 : }
2546 68676 : *mapp = map;
2547 : }
2548 :
2549 2906850330 : for (i = 0; i < nirecs; i++) {
2550 1453681169 : if (irecs[i].br_startblock == HOLESTARTBLOCK ||
2551 : irecs[i].br_startblock == DELAYSTARTBLOCK)
2552 134649 : goto invalid_mapping;
2553 1453546520 : if (off != irecs[i].br_startoff)
2554 0 : goto invalid_mapping;
2555 :
2556 1453546520 : map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2557 1453803825 : map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2558 1453803825 : off += irecs[i].br_blockcount;
2559 : }
2560 :
2561 1453169161 : if (off != bno + nfsb)
2562 0 : goto invalid_mapping;
2563 :
2564 1453169161 : *nmaps = nirecs;
2565 1453303987 : out_free_irecs:
2566 1453303987 : if (irecs != &irec)
2567 638291 : kmem_free(irecs);
2568 1453303987 : return error;
2569 :
2570 134649 : invalid_mapping:
2571 : /* Caller ok with no mapping. */
2572 134649 : if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
2573 0 : error = -EFSCORRUPTED;
2574 0 : if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2575 0 : xfs_alert(mp, "%s: bno %u inode %llu",
2576 : __func__, bno, dp->i_ino);
2577 :
2578 0 : for (i = 0; i < nirecs; i++) {
2579 0 : xfs_alert(mp,
2580 : "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2581 : i, irecs[i].br_startoff,
2582 : irecs[i].br_startblock,
2583 : irecs[i].br_blockcount,
2584 : irecs[i].br_state);
2585 : }
2586 : }
2587 : } else {
2588 134649 : *nmaps = 0;
2589 : }
2590 134649 : goto out_free_irecs;
2591 : }
2592 :
2593 : /*
2594 : * Get a buffer for the dir/attr block.
2595 : */
2596 : int
2597 2671329 : xfs_da_get_buf(
2598 : struct xfs_trans *tp,
2599 : struct xfs_inode *dp,
2600 : xfs_dablk_t bno,
2601 : struct xfs_buf **bpp,
2602 : int whichfork)
2603 : {
2604 2671329 : struct xfs_mount *mp = dp->i_mount;
2605 2671329 : struct xfs_buf *bp;
2606 2671329 : struct xfs_buf_map map, *mapp = ↦
2607 2671329 : int nmap = 1;
2608 2671329 : int error;
2609 :
2610 2671329 : *bpp = NULL;
2611 2671329 : error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
2612 2671404 : if (error || nmap == 0)
2613 0 : goto out_free;
2614 :
2615 2671404 : error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
2616 2671420 : if (error)
2617 0 : goto out_free;
2618 :
2619 2671420 : *bpp = bp;
2620 :
2621 2671420 : out_free:
2622 2671420 : if (mapp != &map)
2623 42 : kmem_free(mapp);
2624 :
2625 2671420 : return error;
2626 : }
2627 :
2628 : /*
2629 : * Get a buffer for the dir/attr block, fill in the contents.
2630 : */
2631 : int
2632 1345404224 : xfs_da_read_buf(
2633 : struct xfs_trans *tp,
2634 : struct xfs_inode *dp,
2635 : xfs_dablk_t bno,
2636 : unsigned int flags,
2637 : struct xfs_buf **bpp,
2638 : int whichfork,
2639 : const struct xfs_buf_ops *ops)
2640 : {
2641 1345404224 : struct xfs_mount *mp = dp->i_mount;
2642 1345404224 : struct xfs_buf *bp;
2643 1345404224 : struct xfs_buf_map map, *mapp = ↦
2644 1345404224 : int nmap = 1;
2645 1345404224 : int error;
2646 :
2647 1345404224 : *bpp = NULL;
2648 1345404224 : error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2649 1346390480 : if (error || !nmap)
2650 134819 : goto out_free;
2651 :
2652 1346255661 : error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
2653 : &bp, ops);
2654 1346168061 : if (error)
2655 12120 : goto out_free;
2656 :
2657 1346155941 : if (whichfork == XFS_ATTR_FORK)
2658 496938783 : xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2659 : else
2660 849217158 : xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2661 1346291656 : *bpp = bp;
2662 1346438595 : out_free:
2663 1346438595 : if (mapp != &map)
2664 68634 : kmem_free(mapp);
2665 :
2666 1346438595 : return error;
2667 : }
2668 :
2669 : /*
2670 : * Readahead the dir/attr block.
2671 : */
2672 : int
2673 104353522 : xfs_da_reada_buf(
2674 : struct xfs_inode *dp,
2675 : xfs_dablk_t bno,
2676 : unsigned int flags,
2677 : int whichfork,
2678 : const struct xfs_buf_ops *ops)
2679 : {
2680 104353522 : struct xfs_buf_map map;
2681 104353522 : struct xfs_buf_map *mapp;
2682 104353522 : int nmap;
2683 104353522 : int error;
2684 :
2685 104353522 : mapp = ↦
2686 104353522 : nmap = 1;
2687 104353522 : error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2688 104368855 : if (error || !nmap)
2689 7 : goto out_free;
2690 :
2691 104368848 : xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2692 :
2693 104360540 : out_free:
2694 104360540 : if (mapp != &map)
2695 0 : kmem_free(mapp);
2696 :
2697 104360540 : return error;
2698 : }
|