Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 : * Copyright (c) 2013 Red Hat, Inc.
5 : * All Rights Reserved.
6 : */
7 : #include "xfs.h"
8 : #include "xfs_fs.h"
9 : #include "xfs_shared.h"
10 : #include "xfs_format.h"
11 : #include "xfs_log_format.h"
12 : #include "xfs_trans_resv.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_mount.h"
15 : #include "xfs_inode.h"
16 : #include "xfs_dir2.h"
17 : #include "xfs_dir2_priv.h"
18 : #include "xfs_trans.h"
19 : #include "xfs_bmap.h"
20 : #include "xfs_attr_leaf.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_buf_item.h"
24 : #include "xfs_log.h"
25 : #include "xfs_errortag.h"
26 :
27 : /*
28 : * xfs_da_btree.c
29 : *
30 : * Routines to implement directories as Btrees of hashed names.
31 : */
32 :
33 : /*========================================================================
34 : * Function prototypes for the kernel.
35 : *========================================================================*/
36 :
37 : /*
38 : * Routines used for growing the Btree.
39 : */
40 : STATIC int xfs_da3_root_split(xfs_da_state_t *state,
41 : xfs_da_state_blk_t *existing_root,
42 : xfs_da_state_blk_t *new_child);
43 : STATIC int xfs_da3_node_split(xfs_da_state_t *state,
44 : xfs_da_state_blk_t *existing_blk,
45 : xfs_da_state_blk_t *split_blk,
46 : xfs_da_state_blk_t *blk_to_add,
47 : int treelevel,
48 : int *result);
49 : STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
50 : xfs_da_state_blk_t *node_blk_1,
51 : xfs_da_state_blk_t *node_blk_2);
52 : STATIC void xfs_da3_node_add(xfs_da_state_t *state,
53 : xfs_da_state_blk_t *old_node_blk,
54 : xfs_da_state_blk_t *new_node_blk);
55 :
56 : /*
57 : * Routines used for shrinking the Btree.
58 : */
59 : STATIC int xfs_da3_root_join(xfs_da_state_t *state,
60 : xfs_da_state_blk_t *root_blk);
61 : STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
62 : STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
63 : xfs_da_state_blk_t *drop_blk);
64 : STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
65 : xfs_da_state_blk_t *src_node_blk,
66 : xfs_da_state_blk_t *dst_node_blk);
67 :
68 : /*
69 : * Utility routines.
70 : */
71 : STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state,
72 : xfs_da_state_blk_t *drop_blk,
73 : xfs_da_state_blk_t *save_blk);
74 :
75 :
76 : struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */
77 :
78 : /*
79 : * Allocate a dir-state structure.
80 : * We don't put them on the stack since they're large.
81 : */
82 : struct xfs_da_state *
83 223673013 : xfs_da_state_alloc(
84 : struct xfs_da_args *args)
85 : {
86 223673013 : struct xfs_da_state *state;
87 :
88 223673013 : state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
89 224688372 : state->args = args;
90 224688372 : state->mp = args->dp->i_mount;
91 224688372 : return state;
92 : }
93 :
94 : /*
95 : * Kill the altpath contents of a da-state structure.
96 : */
97 : STATIC void
98 224439057 : xfs_da_state_kill_altpath(xfs_da_state_t *state)
99 : {
100 224439057 : int i;
101 :
102 224564621 : for (i = 0; i < state->altpath.active; i++)
103 233324 : state->altpath.blk[i].bp = NULL;
104 224331297 : state->altpath.active = 0;
105 224331297 : }
106 :
107 : /*
108 : * Free a da-state structure.
109 : */
110 : void
111 224578294 : xfs_da_state_free(xfs_da_state_t *state)
112 : {
113 224578294 : xfs_da_state_kill_altpath(state);
114 : #ifdef DEBUG
115 224869053 : memset((char *)state, 0, sizeof(*state));
116 : #endif /* DEBUG */
117 224869053 : kmem_cache_free(xfs_da_state_cache, state);
118 224604334 : }
119 :
120 : void
121 2139 : xfs_da_state_reset(
122 : struct xfs_da_state *state,
123 : struct xfs_da_args *args)
124 : {
125 2139 : xfs_da_state_kill_altpath(state);
126 2139 : memset(state, 0, sizeof(struct xfs_da_state));
127 2139 : state->args = args;
128 2139 : state->mp = state->args->dp->i_mount;
129 2139 : }
130 :
131 : static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
132 : {
133 1804444643 : if (whichfork == XFS_DATA_FORK)
134 1213199762 : return mp->m_dir_geo->fsbcount;
135 591244881 : return mp->m_attr_geo->fsbcount;
136 : }
137 :
138 : void
139 835177312 : xfs_da3_node_hdr_from_disk(
140 : struct xfs_mount *mp,
141 : struct xfs_da3_icnode_hdr *to,
142 : struct xfs_da_intnode *from)
143 : {
144 835177312 : if (xfs_has_crc(mp)) {
145 835177312 : struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from;
146 :
147 835177312 : to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
148 835177312 : to->back = be32_to_cpu(from3->hdr.info.hdr.back);
149 835177312 : to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
150 835177312 : to->count = be16_to_cpu(from3->hdr.__count);
151 835177312 : to->level = be16_to_cpu(from3->hdr.__level);
152 835177312 : to->btree = from3->__btree;
153 835177312 : ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
154 : } else {
155 0 : to->forw = be32_to_cpu(from->hdr.info.forw);
156 0 : to->back = be32_to_cpu(from->hdr.info.back);
157 0 : to->magic = be16_to_cpu(from->hdr.info.magic);
158 0 : to->count = be16_to_cpu(from->hdr.__count);
159 0 : to->level = be16_to_cpu(from->hdr.__level);
160 0 : to->btree = from->__btree;
161 0 : ASSERT(to->magic == XFS_DA_NODE_MAGIC);
162 : }
163 835177312 : }
164 :
165 : void
166 278377 : xfs_da3_node_hdr_to_disk(
167 : struct xfs_mount *mp,
168 : struct xfs_da_intnode *to,
169 : struct xfs_da3_icnode_hdr *from)
170 : {
171 278377 : if (xfs_has_crc(mp)) {
172 278377 : struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to;
173 :
174 278377 : ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
175 278377 : to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
176 278377 : to3->hdr.info.hdr.back = cpu_to_be32(from->back);
177 278377 : to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
178 278377 : to3->hdr.__count = cpu_to_be16(from->count);
179 278377 : to3->hdr.__level = cpu_to_be16(from->level);
180 : } else {
181 0 : ASSERT(from->magic == XFS_DA_NODE_MAGIC);
182 0 : to->hdr.info.forw = cpu_to_be32(from->forw);
183 0 : to->hdr.info.back = cpu_to_be32(from->back);
184 0 : to->hdr.info.magic = cpu_to_be16(from->magic);
185 0 : to->hdr.__count = cpu_to_be16(from->count);
186 0 : to->hdr.__level = cpu_to_be16(from->level);
187 : }
188 278377 : }
189 :
190 : /*
191 : * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only
192 : * accessible on v5 filesystems. This header format is common across da node,
193 : * attr leaf and dir leaf blocks.
194 : */
195 : xfs_failaddr_t
196 16765389 : xfs_da3_blkinfo_verify(
197 : struct xfs_buf *bp,
198 : struct xfs_da3_blkinfo *hdr3)
199 : {
200 16765389 : struct xfs_mount *mp = bp->b_mount;
201 16765389 : struct xfs_da_blkinfo *hdr = &hdr3->hdr;
202 :
203 16765389 : if (!xfs_verify_magic16(bp, hdr->magic))
204 0 : return __this_address;
205 :
206 16758467 : if (xfs_has_crc(mp)) {
207 16758467 : if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
208 0 : return __this_address;
209 16758770 : if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp))
210 0 : return __this_address;
211 16758770 : if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
212 0 : return __this_address;
213 : }
214 :
215 : return NULL;
216 : }
217 :
218 : static xfs_failaddr_t
219 344884 : xfs_da3_node_verify(
220 : struct xfs_buf *bp)
221 : {
222 344884 : struct xfs_mount *mp = bp->b_mount;
223 344884 : struct xfs_da_intnode *hdr = bp->b_addr;
224 344884 : struct xfs_da3_icnode_hdr ichdr;
225 344884 : xfs_failaddr_t fa;
226 :
227 344884 : xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
228 :
229 344883 : fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
230 344881 : if (fa)
231 : return fa;
232 :
233 344880 : if (ichdr.level == 0)
234 0 : return __this_address;
235 344880 : if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
236 0 : return __this_address;
237 344880 : if (ichdr.count == 0)
238 0 : return __this_address;
239 :
240 : /*
241 : * we don't know if the node is for and attribute or directory tree,
242 : * so only fail if the count is outside both bounds
243 : */
244 344880 : if (ichdr.count > mp->m_dir_geo->node_ents &&
245 0 : ichdr.count > mp->m_attr_geo->node_ents)
246 0 : return __this_address;
247 :
248 : /* XXX: hash order check? */
249 :
250 : return NULL;
251 : }
252 :
253 : static void
254 240658 : xfs_da3_node_write_verify(
255 : struct xfs_buf *bp)
256 : {
257 240658 : struct xfs_mount *mp = bp->b_mount;
258 240658 : struct xfs_buf_log_item *bip = bp->b_log_item;
259 240658 : struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
260 240658 : xfs_failaddr_t fa;
261 :
262 240658 : fa = xfs_da3_node_verify(bp);
263 240658 : if (fa) {
264 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
265 0 : return;
266 : }
267 :
268 240658 : if (!xfs_has_crc(mp))
269 : return;
270 :
271 240658 : if (bip)
272 240658 : hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
273 :
274 240658 : xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
275 : }
276 :
277 : /*
278 : * leaf/node format detection on trees is sketchy, so a node read can be done on
279 : * leaf level blocks when detection identifies the tree as a node format tree
280 : * incorrectly. In this case, we need to swap the verifier to match the correct
281 : * format of the block being read.
282 : */
283 : static void
284 224977 : xfs_da3_node_read_verify(
285 : struct xfs_buf *bp)
286 : {
287 224977 : struct xfs_da_blkinfo *info = bp->b_addr;
288 224977 : xfs_failaddr_t fa;
289 :
290 224977 : switch (be16_to_cpu(info->magic)) {
291 : case XFS_DA3_NODE_MAGIC:
292 37330 : if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
293 80 : xfs_verifier_error(bp, -EFSBADCRC,
294 40 : __this_address);
295 40 : break;
296 : }
297 37290 : fallthrough;
298 : case XFS_DA_NODE_MAGIC:
299 37290 : fa = xfs_da3_node_verify(bp);
300 37290 : if (fa)
301 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
302 : return;
303 69079 : case XFS_ATTR_LEAF_MAGIC:
304 : case XFS_ATTR3_LEAF_MAGIC:
305 69079 : bp->b_ops = &xfs_attr3_leaf_buf_ops;
306 69079 : bp->b_ops->verify_read(bp);
307 69079 : return;
308 118559 : case XFS_DIR2_LEAFN_MAGIC:
309 : case XFS_DIR3_LEAFN_MAGIC:
310 118559 : bp->b_ops = &xfs_dir3_leafn_buf_ops;
311 118559 : bp->b_ops->verify_read(bp);
312 118559 : return;
313 : default:
314 9 : xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
315 9 : break;
316 : }
317 : }
318 :
319 : /* Verify the structure of a da3 block. */
320 : static xfs_failaddr_t
321 66935 : xfs_da3_node_verify_struct(
322 : struct xfs_buf *bp)
323 : {
324 66935 : struct xfs_da_blkinfo *info = bp->b_addr;
325 :
326 66935 : switch (be16_to_cpu(info->magic)) {
327 66935 : case XFS_DA3_NODE_MAGIC:
328 : case XFS_DA_NODE_MAGIC:
329 66935 : return xfs_da3_node_verify(bp);
330 0 : case XFS_ATTR_LEAF_MAGIC:
331 : case XFS_ATTR3_LEAF_MAGIC:
332 0 : bp->b_ops = &xfs_attr3_leaf_buf_ops;
333 0 : return bp->b_ops->verify_struct(bp);
334 0 : case XFS_DIR2_LEAFN_MAGIC:
335 : case XFS_DIR3_LEAFN_MAGIC:
336 0 : bp->b_ops = &xfs_dir3_leafn_buf_ops;
337 0 : return bp->b_ops->verify_struct(bp);
338 : default:
339 0 : return __this_address;
340 : }
341 : }
342 :
343 : const struct xfs_buf_ops xfs_da3_node_buf_ops = {
344 : .name = "xfs_da3_node",
345 : .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC),
346 : cpu_to_be16(XFS_DA3_NODE_MAGIC) },
347 : .verify_read = xfs_da3_node_read_verify,
348 : .verify_write = xfs_da3_node_write_verify,
349 : .verify_struct = xfs_da3_node_verify_struct,
350 : };
351 :
352 : static int
353 766646332 : xfs_da3_node_set_type(
354 : struct xfs_trans *tp,
355 : struct xfs_buf *bp)
356 : {
357 766646332 : struct xfs_da_blkinfo *info = bp->b_addr;
358 :
359 766646332 : switch (be16_to_cpu(info->magic)) {
360 204756163 : case XFS_DA_NODE_MAGIC:
361 : case XFS_DA3_NODE_MAGIC:
362 204756163 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
363 204756163 : return 0;
364 373738681 : case XFS_ATTR_LEAF_MAGIC:
365 : case XFS_ATTR3_LEAF_MAGIC:
366 373738681 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
367 373738681 : return 0;
368 188151488 : case XFS_DIR2_LEAFN_MAGIC:
369 : case XFS_DIR3_LEAFN_MAGIC:
370 188151488 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
371 188151488 : return 0;
372 0 : default:
373 0 : XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
374 : info, sizeof(*info));
375 0 : xfs_trans_brelse(tp, bp);
376 0 : return -EFSCORRUPTED;
377 : }
378 : }
379 :
380 : int
381 846599012 : xfs_da3_node_read(
382 : struct xfs_trans *tp,
383 : struct xfs_inode *dp,
384 : xfs_dablk_t bno,
385 : struct xfs_buf **bpp,
386 : int whichfork)
387 : {
388 846599012 : int error;
389 :
390 846599012 : error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
391 : &xfs_da3_node_buf_ops);
392 848254507 : if (error || !*bpp || !tp)
393 : return error;
394 766837724 : return xfs_da3_node_set_type(tp, *bpp);
395 : }
396 :
397 : int
398 14827 : xfs_da3_node_read_mapped(
399 : struct xfs_trans *tp,
400 : struct xfs_inode *dp,
401 : xfs_daddr_t mappedbno,
402 : struct xfs_buf **bpp,
403 : int whichfork)
404 : {
405 14827 : struct xfs_mount *mp = dp->i_mount;
406 14827 : int error;
407 :
408 14827 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
409 29654 : XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
410 : bpp, &xfs_da3_node_buf_ops);
411 14830 : if (error || !*bpp)
412 : return error;
413 :
414 14830 : if (whichfork == XFS_ATTR_FORK)
415 14830 : xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
416 : else
417 0 : xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
418 :
419 14827 : if (!tp)
420 : return 0;
421 14827 : return xfs_da3_node_set_type(tp, *bpp);
422 : }
423 :
424 : /*========================================================================
425 : * Routines used for growing the Btree.
426 : *========================================================================*/
427 :
428 : /*
429 : * Create the initial contents of an intermediate node.
430 : */
431 : int
432 33853 : xfs_da3_node_create(
433 : struct xfs_da_args *args,
434 : xfs_dablk_t blkno,
435 : int level,
436 : struct xfs_buf **bpp,
437 : int whichfork)
438 : {
439 33853 : struct xfs_da_intnode *node;
440 33853 : struct xfs_trans *tp = args->trans;
441 33853 : struct xfs_mount *mp = tp->t_mountp;
442 33853 : struct xfs_da3_icnode_hdr ichdr = {0};
443 33853 : struct xfs_buf *bp;
444 33853 : int error;
445 33853 : struct xfs_inode *dp = args->dp;
446 :
447 33853 : trace_xfs_da_node_create(args);
448 33853 : ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
449 :
450 33853 : error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
451 33853 : if (error)
452 : return error;
453 33853 : bp->b_ops = &xfs_da3_node_buf_ops;
454 33853 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
455 33853 : node = bp->b_addr;
456 :
457 33853 : if (xfs_has_crc(mp)) {
458 33853 : struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
459 :
460 33853 : memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
461 33853 : ichdr.magic = XFS_DA3_NODE_MAGIC;
462 33853 : hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
463 33853 : hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
464 33853 : uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
465 : } else {
466 0 : ichdr.magic = XFS_DA_NODE_MAGIC;
467 : }
468 33853 : ichdr.level = level;
469 :
470 33853 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
471 33853 : xfs_trans_log_buf(tp, bp,
472 33853 : XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
473 :
474 33853 : *bpp = bp;
475 33853 : return 0;
476 : }
477 :
478 : /*
479 : * Split a leaf node, rebalance, then possibly split
480 : * intermediate nodes, rebalance, etc.
481 : */
482 : int /* error */
483 162303 : xfs_da3_split(
484 : struct xfs_da_state *state)
485 : {
486 162303 : struct xfs_da_state_blk *oldblk;
487 162303 : struct xfs_da_state_blk *newblk;
488 162303 : struct xfs_da_state_blk *addblk;
489 162303 : struct xfs_da_intnode *node;
490 162303 : int max;
491 162303 : int action = 0;
492 162303 : int error;
493 162303 : int i;
494 :
495 162303 : trace_xfs_da_split(state->args);
496 :
497 162302 : if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
498 : return -EIO;
499 :
500 : /*
501 : * Walk back up the tree splitting/inserting/adjusting as necessary.
502 : * If we need to insert and there isn't room, split the node, then
503 : * decide which fragment to insert the new block from below into.
504 : * Note that we may split the root this way, but we need more fixup.
505 : */
506 162293 : max = state->path.active - 1;
507 162293 : ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
508 162293 : ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
509 : state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
510 :
511 162292 : addblk = &state->path.blk[max]; /* initial dummy value */
512 478061 : for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
513 315765 : oldblk = &state->path.blk[i];
514 315765 : newblk = &state->altpath.blk[i];
515 :
516 : /*
517 : * If a leaf node then
518 : * Allocate a new leaf node, then rebalance across them.
519 : * else if an intermediate node then
520 : * We split on the last layer, must we split the node?
521 : */
522 315767 : switch (oldblk->magic) {
523 88853 : case XFS_ATTR_LEAF_MAGIC:
524 88853 : error = xfs_attr3_leaf_split(state, oldblk, newblk);
525 88853 : if ((error != 0) && (error != -ENOSPC)) {
526 0 : return error; /* GROT: attr is inconsistent */
527 : }
528 88853 : if (!error) {
529 : addblk = newblk;
530 : break;
531 : }
532 : /*
533 : * Entry wouldn't fit, split the leaf again. The new
534 : * extrablk will be consumed by xfs_da3_node_split if
535 : * the node is split.
536 : */
537 0 : state->extravalid = 1;
538 0 : if (state->inleaf) {
539 0 : state->extraafter = 0; /* before newblk */
540 0 : trace_xfs_attr_leaf_split_before(state->args);
541 0 : error = xfs_attr3_leaf_split(state, oldblk,
542 0 : &state->extrablk);
543 : } else {
544 0 : state->extraafter = 1; /* after newblk */
545 0 : trace_xfs_attr_leaf_split_after(state->args);
546 0 : error = xfs_attr3_leaf_split(state, newblk,
547 0 : &state->extrablk);
548 : }
549 0 : if (error)
550 0 : return error; /* GROT: attr inconsistent */
551 : addblk = newblk;
552 : break;
553 73441 : case XFS_DIR2_LEAFN_MAGIC:
554 73441 : error = xfs_dir2_leafn_split(state, oldblk, newblk);
555 73446 : if (error)
556 0 : return error;
557 : addblk = newblk;
558 : break;
559 153473 : case XFS_DA_NODE_MAGIC:
560 153473 : error = xfs_da3_node_split(state, oldblk, newblk, addblk,
561 : max - i, &action);
562 153471 : addblk->bp = NULL;
563 153471 : if (error)
564 0 : return error; /* GROT: dir is inconsistent */
565 : /*
566 : * Record the newly split block for the next time thru?
567 : */
568 153471 : if (action)
569 : addblk = newblk;
570 : else
571 153232 : addblk = NULL;
572 : break;
573 : }
574 :
575 : /*
576 : * Update the btree to show the new hashval for this child.
577 : */
578 315770 : xfs_da3_fixhashpath(state, &state->path);
579 : }
580 162296 : if (!addblk)
581 : return 0;
582 :
583 : /*
584 : * xfs_da3_node_split() should have consumed any extra blocks we added
585 : * during a double leaf split in the attr fork. This is guaranteed as
586 : * we can't be here if the attr fork only has a single leaf block.
587 : */
588 9065 : ASSERT(state->extravalid == 0 ||
589 : state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
590 :
591 : /*
592 : * Split the root node.
593 : */
594 9065 : ASSERT(state->path.active == 0);
595 9065 : oldblk = &state->path.blk[0];
596 9065 : error = xfs_da3_root_split(state, oldblk, addblk);
597 9066 : if (error)
598 0 : goto out;
599 :
600 : /*
601 : * Update pointers to the node which used to be block 0 and just got
602 : * bumped because of the addition of a new root node. Note that the
603 : * original block 0 could be at any position in the list of blocks in
604 : * the tree.
605 : *
606 : * Note: the magic numbers and sibling pointers are in the same physical
607 : * place for both v2 and v3 headers (by design). Hence it doesn't matter
608 : * which version of the xfs_da_intnode structure we use here as the
609 : * result will be the same using either structure.
610 : */
611 9066 : node = oldblk->bp->b_addr;
612 9066 : if (node->hdr.info.forw) {
613 9066 : if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
614 0 : xfs_buf_mark_corrupt(oldblk->bp);
615 0 : error = -EFSCORRUPTED;
616 0 : goto out;
617 : }
618 9066 : node = addblk->bp->b_addr;
619 9066 : node->hdr.info.back = cpu_to_be32(oldblk->blkno);
620 9066 : xfs_trans_log_buf(state->args->trans, addblk->bp,
621 : XFS_DA_LOGRANGE(node, &node->hdr.info,
622 : sizeof(node->hdr.info)));
623 : }
624 9066 : node = oldblk->bp->b_addr;
625 9066 : if (node->hdr.info.back) {
626 0 : if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
627 0 : xfs_buf_mark_corrupt(oldblk->bp);
628 0 : error = -EFSCORRUPTED;
629 0 : goto out;
630 : }
631 0 : node = addblk->bp->b_addr;
632 0 : node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
633 0 : xfs_trans_log_buf(state->args->trans, addblk->bp,
634 : XFS_DA_LOGRANGE(node, &node->hdr.info,
635 : sizeof(node->hdr.info)));
636 : }
637 9066 : out:
638 9066 : addblk->bp = NULL;
639 9066 : return error;
640 : }
641 :
642 : /*
643 : * Split the root. We have to create a new root and point to the two
644 : * parts (the split old root) that we just created. Copy block zero to
645 : * the EOF, extending the inode in process.
646 : */
647 : STATIC int /* error */
648 9066 : xfs_da3_root_split(
649 : struct xfs_da_state *state,
650 : struct xfs_da_state_blk *blk1,
651 : struct xfs_da_state_blk *blk2)
652 : {
653 9066 : struct xfs_da_intnode *node;
654 9066 : struct xfs_da_intnode *oldroot;
655 9066 : struct xfs_da_node_entry *btree;
656 9066 : struct xfs_da3_icnode_hdr nodehdr;
657 9066 : struct xfs_da_args *args;
658 9066 : struct xfs_buf *bp;
659 9066 : struct xfs_inode *dp;
660 9066 : struct xfs_trans *tp;
661 9066 : struct xfs_dir2_leaf *leaf;
662 9066 : xfs_dablk_t blkno;
663 9066 : int level;
664 9066 : int error;
665 9066 : int size;
666 :
667 9066 : trace_xfs_da_root_split(state->args);
668 :
669 : /*
670 : * Copy the existing (incorrect) block from the root node position
671 : * to a free space somewhere.
672 : */
673 9065 : args = state->args;
674 9065 : error = xfs_da_grow_inode(args, &blkno);
675 9066 : if (error)
676 : return error;
677 :
678 9066 : dp = args->dp;
679 9066 : tp = args->trans;
680 9066 : error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
681 9066 : if (error)
682 : return error;
683 9066 : node = bp->b_addr;
684 9066 : oldroot = blk1->bp->b_addr;
685 9066 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
686 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
687 32 : struct xfs_da3_icnode_hdr icnodehdr;
688 :
689 32 : xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
690 32 : btree = icnodehdr.btree;
691 32 : size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
692 32 : level = icnodehdr.level;
693 :
694 : /*
695 : * we are about to copy oldroot to bp, so set up the type
696 : * of bp while we know exactly what it will be.
697 : */
698 32 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
699 : } else {
700 9034 : struct xfs_dir3_icleaf_hdr leafhdr;
701 :
702 9034 : leaf = (xfs_dir2_leaf_t *)oldroot;
703 9034 : xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
704 :
705 9034 : ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
706 : leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
707 9034 : size = (int)((char *)&leafhdr.ents[leafhdr.count] -
708 : (char *)leaf);
709 9034 : level = 0;
710 :
711 : /*
712 : * we are about to copy oldroot to bp, so set up the type
713 : * of bp while we know exactly what it will be.
714 : */
715 9034 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
716 : }
717 :
718 : /*
719 : * we can copy most of the information in the node from one block to
720 : * another, but for CRC enabled headers we have to make sure that the
721 : * block specific identifiers are kept intact. We update the buffer
722 : * directly for this.
723 : */
724 18132 : memcpy(node, oldroot, size);
725 9066 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
726 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
727 9066 : struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
728 :
729 9066 : node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
730 : }
731 9066 : xfs_trans_log_buf(tp, bp, 0, size - 1);
732 :
733 9066 : bp->b_ops = blk1->bp->b_ops;
734 9066 : xfs_trans_buf_copy_type(bp, blk1->bp);
735 9066 : blk1->bp = bp;
736 9066 : blk1->blkno = blkno;
737 :
738 : /*
739 : * Set up the new root node.
740 : */
741 18110 : error = xfs_da3_node_create(args,
742 9044 : (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
743 : level + 1, &bp, args->whichfork);
744 9066 : if (error)
745 : return error;
746 :
747 9066 : node = bp->b_addr;
748 9066 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
749 9066 : btree = nodehdr.btree;
750 9066 : btree[0].hashval = cpu_to_be32(blk1->hashval);
751 9066 : btree[0].before = cpu_to_be32(blk1->blkno);
752 9066 : btree[1].hashval = cpu_to_be32(blk2->hashval);
753 9066 : btree[1].before = cpu_to_be32(blk2->blkno);
754 9066 : nodehdr.count = 2;
755 9066 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
756 :
757 : #ifdef DEBUG
758 9066 : if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
759 : oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
760 0 : ASSERT(blk1->blkno >= args->geo->leafblk &&
761 : blk1->blkno < args->geo->freeblk);
762 0 : ASSERT(blk2->blkno >= args->geo->leafblk &&
763 : blk2->blkno < args->geo->freeblk);
764 : }
765 : #endif
766 :
767 : /* Header is already logged by xfs_da_node_create */
768 9066 : xfs_trans_log_buf(tp, bp,
769 9066 : XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
770 :
771 9066 : return 0;
772 : }
773 :
774 : /*
775 : * Split the node, rebalance, then add the new entry.
776 : */
777 : STATIC int /* error */
778 153471 : xfs_da3_node_split(
779 : struct xfs_da_state *state,
780 : struct xfs_da_state_blk *oldblk,
781 : struct xfs_da_state_blk *newblk,
782 : struct xfs_da_state_blk *addblk,
783 : int treelevel,
784 : int *result)
785 : {
786 153471 : struct xfs_da_intnode *node;
787 153471 : struct xfs_da3_icnode_hdr nodehdr;
788 153471 : xfs_dablk_t blkno;
789 153471 : int newcount;
790 153471 : int error;
791 153471 : int useextra;
792 153471 : struct xfs_inode *dp = state->args->dp;
793 :
794 153471 : trace_xfs_da_node_split(state->args);
795 :
796 153471 : node = oldblk->bp->b_addr;
797 153471 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
798 :
799 : /*
800 : * With V2 dirs the extra block is data or freespace.
801 : */
802 153471 : useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
803 153471 : newcount = 1 + useextra;
804 : /*
805 : * Do we have to split the node?
806 : */
807 153471 : if (nodehdr.count + newcount > state->args->geo->node_ents) {
808 : /*
809 : * Allocate a new node, add to the doubly linked chain of
810 : * nodes, then move some of our excess entries into it.
811 : */
812 239 : error = xfs_da_grow_inode(state->args, &blkno);
813 239 : if (error)
814 : return error; /* GROT: dir is inconsistent */
815 :
816 239 : error = xfs_da3_node_create(state->args, blkno, treelevel,
817 239 : &newblk->bp, state->args->whichfork);
818 239 : if (error)
819 : return error; /* GROT: dir is inconsistent */
820 239 : newblk->blkno = blkno;
821 239 : newblk->magic = XFS_DA_NODE_MAGIC;
822 239 : xfs_da3_node_rebalance(state, oldblk, newblk);
823 239 : error = xfs_da3_blk_link(state, oldblk, newblk);
824 239 : if (error)
825 : return error;
826 239 : *result = 1;
827 : } else {
828 153232 : *result = 0;
829 : }
830 :
831 : /*
832 : * Insert the new entry(s) into the correct block
833 : * (updating last hashval in the process).
834 : *
835 : * xfs_da3_node_add() inserts BEFORE the given index,
836 : * and as a result of using node_lookup_int() we always
837 : * point to a valid entry (not after one), but a split
838 : * operation always results in a new block whose hashvals
839 : * FOLLOW the current block.
840 : *
841 : * If we had double-split op below us, then add the extra block too.
842 : */
843 153471 : node = oldblk->bp->b_addr;
844 153471 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
845 153472 : if (oldblk->index <= nodehdr.count) {
846 153250 : oldblk->index++;
847 153250 : xfs_da3_node_add(state, oldblk, addblk);
848 153250 : if (useextra) {
849 0 : if (state->extraafter)
850 0 : oldblk->index++;
851 0 : xfs_da3_node_add(state, oldblk, &state->extrablk);
852 0 : state->extravalid = 0;
853 : }
854 : } else {
855 222 : newblk->index++;
856 222 : xfs_da3_node_add(state, newblk, addblk);
857 222 : if (useextra) {
858 0 : if (state->extraafter)
859 0 : newblk->index++;
860 0 : xfs_da3_node_add(state, newblk, &state->extrablk);
861 0 : state->extravalid = 0;
862 : }
863 : }
864 :
865 : return 0;
866 : }
867 :
868 : /*
869 : * Balance the btree elements between two intermediate nodes,
870 : * usually one full and one empty.
871 : *
872 : * NOTE: if blk2 is empty, then it will get the upper half of blk1.
873 : */
874 : STATIC void
875 239 : xfs_da3_node_rebalance(
876 : struct xfs_da_state *state,
877 : struct xfs_da_state_blk *blk1,
878 : struct xfs_da_state_blk *blk2)
879 : {
880 239 : struct xfs_da_intnode *node1;
881 239 : struct xfs_da_intnode *node2;
882 239 : struct xfs_da_node_entry *btree1;
883 239 : struct xfs_da_node_entry *btree2;
884 239 : struct xfs_da_node_entry *btree_s;
885 239 : struct xfs_da_node_entry *btree_d;
886 239 : struct xfs_da3_icnode_hdr nodehdr1;
887 239 : struct xfs_da3_icnode_hdr nodehdr2;
888 239 : struct xfs_trans *tp;
889 239 : int count;
890 239 : int tmp;
891 239 : int swap = 0;
892 239 : struct xfs_inode *dp = state->args->dp;
893 :
894 239 : trace_xfs_da_node_rebalance(state->args);
895 :
896 239 : node1 = blk1->bp->b_addr;
897 239 : node2 = blk2->bp->b_addr;
898 239 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
899 239 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
900 239 : btree1 = nodehdr1.btree;
901 239 : btree2 = nodehdr2.btree;
902 :
903 : /*
904 : * Figure out how many entries need to move, and in which direction.
905 : * Swap the nodes around if that makes it simpler.
906 : */
907 239 : if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
908 0 : ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
909 0 : (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
910 0 : be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
911 0 : swap(node1, node2);
912 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
913 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
914 0 : btree1 = nodehdr1.btree;
915 0 : btree2 = nodehdr2.btree;
916 0 : swap = 1;
917 : }
918 :
919 239 : count = (nodehdr1.count - nodehdr2.count) / 2;
920 239 : if (count == 0)
921 0 : return;
922 239 : tp = state->args->trans;
923 : /*
924 : * Two cases: high-to-low and low-to-high.
925 : */
926 239 : if (count > 0) {
927 : /*
928 : * Move elements in node2 up to make a hole.
929 : */
930 239 : tmp = nodehdr2.count;
931 239 : if (tmp > 0) {
932 0 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
933 0 : btree_s = &btree2[0];
934 0 : btree_d = &btree2[count];
935 0 : memmove(btree_d, btree_s, tmp);
936 : }
937 :
938 : /*
939 : * Move the req'd B-tree elements from high in node1 to
940 : * low in node2.
941 : */
942 239 : nodehdr2.count += count;
943 239 : tmp = count * (uint)sizeof(xfs_da_node_entry_t);
944 239 : btree_s = &btree1[nodehdr1.count - count];
945 239 : btree_d = &btree2[0];
946 478 : memcpy(btree_d, btree_s, tmp);
947 239 : nodehdr1.count -= count;
948 : } else {
949 : /*
950 : * Move the req'd B-tree elements from low in node2 to
951 : * high in node1.
952 : */
953 0 : count = -count;
954 0 : tmp = count * (uint)sizeof(xfs_da_node_entry_t);
955 0 : btree_s = &btree2[0];
956 0 : btree_d = &btree1[nodehdr1.count];
957 0 : memcpy(btree_d, btree_s, tmp);
958 0 : nodehdr1.count += count;
959 :
960 0 : xfs_trans_log_buf(tp, blk1->bp,
961 0 : XFS_DA_LOGRANGE(node1, btree_d, tmp));
962 :
963 : /*
964 : * Move elements in node2 down to fill the hole.
965 : */
966 0 : tmp = nodehdr2.count - count;
967 0 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
968 0 : btree_s = &btree2[count];
969 0 : btree_d = &btree2[0];
970 0 : memmove(btree_d, btree_s, tmp);
971 0 : nodehdr2.count -= count;
972 : }
973 :
974 : /*
975 : * Log header of node 1 and all current bits of node 2.
976 : */
977 239 : xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
978 239 : xfs_trans_log_buf(tp, blk1->bp,
979 239 : XFS_DA_LOGRANGE(node1, &node1->hdr,
980 : state->args->geo->node_hdr_size));
981 :
982 239 : xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
983 239 : xfs_trans_log_buf(tp, blk2->bp,
984 239 : XFS_DA_LOGRANGE(node2, &node2->hdr,
985 : state->args->geo->node_hdr_size +
986 : (sizeof(btree2[0]) * nodehdr2.count)));
987 :
988 : /*
989 : * Record the last hashval from each block for upward propagation.
990 : * (note: don't use the swapped node pointers)
991 : */
992 239 : if (swap) {
993 0 : node1 = blk1->bp->b_addr;
994 0 : node2 = blk2->bp->b_addr;
995 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
996 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
997 0 : btree1 = nodehdr1.btree;
998 0 : btree2 = nodehdr2.btree;
999 : }
1000 239 : blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
1001 239 : blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
1002 :
1003 : /*
1004 : * Adjust the expected index for insertion.
1005 : */
1006 239 : if (blk1->index >= nodehdr1.count) {
1007 222 : blk2->index = blk1->index - nodehdr1.count;
1008 222 : blk1->index = nodehdr1.count + 1; /* make it invalid */
1009 : }
1010 : }
1011 :
1012 : /*
1013 : * Add a new entry to an intermediate node.
1014 : */
1015 : STATIC void
1016 153471 : xfs_da3_node_add(
1017 : struct xfs_da_state *state,
1018 : struct xfs_da_state_blk *oldblk,
1019 : struct xfs_da_state_blk *newblk)
1020 : {
1021 153471 : struct xfs_da_intnode *node;
1022 153471 : struct xfs_da3_icnode_hdr nodehdr;
1023 153471 : struct xfs_da_node_entry *btree;
1024 153471 : int tmp;
1025 153471 : struct xfs_inode *dp = state->args->dp;
1026 :
1027 153471 : trace_xfs_da_node_add(state->args);
1028 :
1029 153470 : node = oldblk->bp->b_addr;
1030 153470 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1031 153470 : btree = nodehdr.btree;
1032 :
1033 153470 : ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
1034 153470 : ASSERT(newblk->blkno != 0);
1035 153470 : if (state->args->whichfork == XFS_DATA_FORK)
1036 64441 : ASSERT(newblk->blkno >= state->args->geo->leafblk &&
1037 : newblk->blkno < state->args->geo->freeblk);
1038 :
1039 : /*
1040 : * We may need to make some room before we insert the new node.
1041 : */
1042 153470 : tmp = 0;
1043 153470 : if (oldblk->index < nodehdr.count) {
1044 44622 : tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
1045 89244 : memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
1046 : }
1047 153470 : btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
1048 153470 : btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
1049 153470 : xfs_trans_log_buf(state->args->trans, oldblk->bp,
1050 153470 : XFS_DA_LOGRANGE(node, &btree[oldblk->index],
1051 : tmp + sizeof(*btree)));
1052 :
1053 153473 : nodehdr.count += 1;
1054 153473 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1055 153470 : xfs_trans_log_buf(state->args->trans, oldblk->bp,
1056 153470 : XFS_DA_LOGRANGE(node, &node->hdr,
1057 : state->args->geo->node_hdr_size));
1058 :
1059 : /*
1060 : * Copy the last hash value from the oldblk to propagate upwards.
1061 : */
1062 153473 : oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1063 153473 : }
1064 :
1065 : /*========================================================================
1066 : * Routines used for shrinking the Btree.
1067 : *========================================================================*/
1068 :
1069 : /*
1070 : * Deallocate an empty leaf node, remove it from its parent,
1071 : * possibly deallocating that block, etc...
1072 : */
1073 : int
1074 3292209 : xfs_da3_join(
1075 : struct xfs_da_state *state)
1076 : {
1077 3292209 : struct xfs_da_state_blk *drop_blk;
1078 3292209 : struct xfs_da_state_blk *save_blk;
1079 3292209 : int action = 0;
1080 3292209 : int error;
1081 :
1082 3292209 : trace_xfs_da_join(state->args);
1083 :
1084 3292129 : drop_blk = &state->path.blk[ state->path.active-1 ];
1085 3292129 : save_blk = &state->altpath.blk[ state->path.active-1 ];
1086 3292132 : ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
1087 3292132 : ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
1088 : drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1089 :
1090 : /*
1091 : * Walk back up the tree joining/deallocating as necessary.
1092 : * When we stop dropping blocks, break out.
1093 : */
1094 3349061 : for ( ; state->path.active >= 2; drop_blk--, save_blk--,
1095 56929 : state->path.active--) {
1096 : /*
1097 : * See if we can combine the block with a neighbor.
1098 : * (action == 0) => no options, just leave
1099 : * (action == 1) => coalesce, then unlink
1100 : * (action == 2) => block empty, unlink it
1101 : */
1102 3303819 : switch (drop_blk->magic) {
1103 603758 : case XFS_ATTR_LEAF_MAGIC:
1104 603758 : error = xfs_attr3_leaf_toosmall(state, &action);
1105 603783 : if (error)
1106 10 : return error;
1107 603773 : if (action == 0)
1108 : return 0;
1109 600 : xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
1110 600 : break;
1111 2688381 : case XFS_DIR2_LEAFN_MAGIC:
1112 2688381 : error = xfs_dir2_leafn_toosmall(state, &action);
1113 2688437 : if (error)
1114 0 : return error;
1115 2688437 : if (action == 0)
1116 : return 0;
1117 56281 : xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
1118 56281 : break;
1119 11680 : case XFS_DA_NODE_MAGIC:
1120 : /*
1121 : * Remove the offending node, fixup hashvals,
1122 : * check for a toosmall neighbor.
1123 : */
1124 11680 : xfs_da3_node_remove(state, drop_blk);
1125 11680 : xfs_da3_fixhashpath(state, &state->path);
1126 11680 : error = xfs_da3_node_toosmall(state, &action);
1127 11680 : if (error)
1128 0 : return error;
1129 11680 : if (action == 0)
1130 : return 0;
1131 41 : xfs_da3_node_unbalance(state, drop_blk, save_blk);
1132 41 : break;
1133 : }
1134 56922 : xfs_da3_fixhashpath(state, &state->altpath);
1135 56922 : error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
1136 56922 : xfs_da_state_kill_altpath(state);
1137 56922 : if (error)
1138 0 : return error;
1139 56922 : error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1140 : drop_blk->bp);
1141 56929 : drop_blk->bp = NULL;
1142 56929 : if (error)
1143 0 : return error;
1144 : }
1145 : /*
1146 : * We joined all the way to the top. If it turns out that
1147 : * we only have one entry in the root, make the child block
1148 : * the new root.
1149 : */
1150 45242 : xfs_da3_node_remove(state, drop_blk);
1151 45242 : xfs_da3_fixhashpath(state, &state->path);
1152 45241 : error = xfs_da3_root_join(state, &state->path.blk[0]);
1153 45241 : return error;
1154 : }
1155 :
1156 : #ifdef DEBUG
1157 : static void
1158 7281 : xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1159 : {
1160 7281 : __be16 magic = blkinfo->magic;
1161 :
1162 7281 : if (level == 1) {
1163 7271 : ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1164 : magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
1165 : magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1166 : magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
1167 : } else {
1168 10 : ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1169 : magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1170 : }
1171 7281 : ASSERT(!blkinfo->forw);
1172 7281 : ASSERT(!blkinfo->back);
1173 7281 : }
1174 : #else /* !DEBUG */
1175 : #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1176 : #endif /* !DEBUG */
1177 :
1178 : /*
1179 : * We have only one entry in the root. Copy the only remaining child of
1180 : * the old root to block 0 as the new root node.
1181 : */
1182 : STATIC int
1183 45242 : xfs_da3_root_join(
1184 : struct xfs_da_state *state,
1185 : struct xfs_da_state_blk *root_blk)
1186 : {
1187 45242 : struct xfs_da_intnode *oldroot;
1188 45242 : struct xfs_da_args *args;
1189 45242 : xfs_dablk_t child;
1190 45242 : struct xfs_buf *bp;
1191 45242 : struct xfs_da3_icnode_hdr oldroothdr;
1192 45242 : int error;
1193 45242 : struct xfs_inode *dp = state->args->dp;
1194 :
1195 45242 : trace_xfs_da_root_join(state->args);
1196 :
1197 45241 : ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
1198 :
1199 45241 : args = state->args;
1200 45241 : oldroot = root_blk->bp->b_addr;
1201 45241 : xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
1202 45241 : ASSERT(oldroothdr.forw == 0);
1203 45241 : ASSERT(oldroothdr.back == 0);
1204 :
1205 : /*
1206 : * If the root has more than one child, then don't do anything.
1207 : */
1208 45241 : if (oldroothdr.count > 1)
1209 : return 0;
1210 :
1211 : /*
1212 : * Read in the (only) child block, then copy those bytes into
1213 : * the root block's buffer and free the original child block.
1214 : */
1215 7280 : child = be32_to_cpu(oldroothdr.btree[0].before);
1216 7280 : ASSERT(child != 0);
1217 7280 : error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
1218 7281 : if (error)
1219 : return error;
1220 7281 : xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
1221 :
1222 : /*
1223 : * This could be copying a leaf back into the root block in the case of
1224 : * there only being a single leaf block left in the tree. Hence we have
1225 : * to update the b_ops pointer as well to match the buffer type change
1226 : * that could occur. For dir3 blocks we also need to update the block
1227 : * number in the buffer header.
1228 : */
1229 14562 : memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
1230 7281 : root_blk->bp->b_ops = bp->b_ops;
1231 7281 : xfs_trans_buf_copy_type(root_blk->bp, bp);
1232 7281 : if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1233 7281 : struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1234 7281 : da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp));
1235 : }
1236 7281 : xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1237 7281 : args->geo->blksize - 1);
1238 7281 : error = xfs_da_shrink_inode(args, child, bp);
1239 7281 : return error;
1240 : }
1241 :
1242 : /*
1243 : * Check a node block and its neighbors to see if the block should be
1244 : * collapsed into one or the other neighbor. Always keep the block
1245 : * with the smaller block number.
1246 : * If the current block is over 50% full, don't try to join it, return 0.
1247 : * If the block is empty, fill in the state structure and return 2.
1248 : * If it can be collapsed, fill in the state structure and return 1.
1249 : * If nothing can be done, return 0.
1250 : */
1251 : STATIC int
1252 11680 : xfs_da3_node_toosmall(
1253 : struct xfs_da_state *state,
1254 : int *action)
1255 : {
1256 11680 : struct xfs_da_intnode *node;
1257 11680 : struct xfs_da_state_blk *blk;
1258 11680 : struct xfs_da_blkinfo *info;
1259 11680 : xfs_dablk_t blkno;
1260 11680 : struct xfs_buf *bp;
1261 11680 : struct xfs_da3_icnode_hdr nodehdr;
1262 11680 : int count;
1263 11680 : int forward;
1264 11680 : int error;
1265 11680 : int retval;
1266 11680 : int i;
1267 11680 : struct xfs_inode *dp = state->args->dp;
1268 :
1269 11680 : trace_xfs_da_node_toosmall(state->args);
1270 :
1271 : /*
1272 : * Check for the degenerate case of the block being over 50% full.
1273 : * If so, it's not worth even looking to see if we might be able
1274 : * to coalesce with a sibling.
1275 : */
1276 11680 : blk = &state->path.blk[ state->path.active-1 ];
1277 11680 : info = blk->bp->b_addr;
1278 11680 : node = (xfs_da_intnode_t *)info;
1279 11680 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1280 11680 : if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1281 6425 : *action = 0; /* blk over 50%, don't try to join */
1282 6425 : return 0; /* blk over 50%, don't try to join */
1283 : }
1284 :
1285 : /*
1286 : * Check for the degenerate case of the block being empty.
1287 : * If the block is empty, we'll simply delete it, no need to
1288 : * coalesce it with a sibling block. We choose (arbitrarily)
1289 : * to merge with the forward block unless it is NULL.
1290 : */
1291 5255 : if (nodehdr.count == 0) {
1292 : /*
1293 : * Make altpath point to the block we want to keep and
1294 : * path point to the block we want to drop (this one).
1295 : */
1296 0 : forward = (info->forw != 0);
1297 0 : memcpy(&state->altpath, &state->path, sizeof(state->path));
1298 0 : error = xfs_da3_path_shift(state, &state->altpath, forward,
1299 : 0, &retval);
1300 0 : if (error)
1301 : return error;
1302 0 : if (retval) {
1303 0 : *action = 0;
1304 : } else {
1305 0 : *action = 2;
1306 : }
1307 0 : return 0;
1308 : }
1309 :
1310 : /*
1311 : * Examine each sibling block to see if we can coalesce with
1312 : * at least 25% free space to spare. We need to figure out
1313 : * whether to merge with the forward or the backward block.
1314 : * We prefer coalescing with the lower numbered sibling so as
1315 : * to shrink a directory over time.
1316 : */
1317 5255 : count = state->args->geo->node_ents;
1318 5255 : count -= state->args->geo->node_ents >> 2;
1319 5255 : count -= nodehdr.count;
1320 :
1321 : /* start with smaller blk num */
1322 5255 : forward = nodehdr.forw < nodehdr.back;
1323 15724 : for (i = 0; i < 2; forward = !forward, i++) {
1324 10510 : struct xfs_da3_icnode_hdr thdr;
1325 10510 : if (forward)
1326 : blkno = nodehdr.forw;
1327 : else
1328 5255 : blkno = nodehdr.back;
1329 10510 : if (blkno == 0)
1330 4051 : continue;
1331 6459 : error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
1332 6459 : state->args->whichfork);
1333 6459 : if (error)
1334 0 : return error;
1335 :
1336 6459 : node = bp->b_addr;
1337 6459 : xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
1338 6459 : xfs_trans_brelse(state->args->trans, bp);
1339 :
1340 6459 : if (count - thdr.count >= 0)
1341 : break; /* fits with at least 25% to spare */
1342 : }
1343 5255 : if (i >= 2) {
1344 5214 : *action = 0;
1345 5214 : return 0;
1346 : }
1347 :
1348 : /*
1349 : * Make altpath point to the block we want to keep (the lower
1350 : * numbered block) and path point to the block we want to drop.
1351 : */
1352 82 : memcpy(&state->altpath, &state->path, sizeof(state->path));
1353 41 : if (blkno < blk->blkno) {
1354 11 : error = xfs_da3_path_shift(state, &state->altpath, forward,
1355 : 0, &retval);
1356 : } else {
1357 30 : error = xfs_da3_path_shift(state, &state->path, forward,
1358 : 0, &retval);
1359 : }
1360 41 : if (error)
1361 : return error;
1362 41 : if (retval) {
1363 0 : *action = 0;
1364 0 : return 0;
1365 : }
1366 41 : *action = 1;
1367 41 : return 0;
1368 : }
1369 :
1370 : /*
1371 : * Pick up the last hashvalue from an intermediate node.
1372 : */
1373 : STATIC uint
1374 210433 : xfs_da3_node_lasthash(
1375 : struct xfs_inode *dp,
1376 : struct xfs_buf *bp,
1377 : int *count)
1378 : {
1379 210433 : struct xfs_da3_icnode_hdr nodehdr;
1380 :
1381 210433 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
1382 210432 : if (count)
1383 210432 : *count = nodehdr.count;
1384 210432 : if (!nodehdr.count)
1385 : return 0;
1386 210432 : return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
1387 : }
1388 :
1389 : /*
1390 : * Walk back up the tree adjusting hash values as necessary,
1391 : * when we stop making changes, return.
1392 : */
1393 : void
1394 76411956 : xfs_da3_fixhashpath(
1395 : struct xfs_da_state *state,
1396 : struct xfs_da_state_path *path)
1397 : {
1398 76411956 : struct xfs_da_state_blk *blk;
1399 76411956 : struct xfs_da_intnode *node;
1400 76411956 : struct xfs_da_node_entry *btree;
1401 76411956 : xfs_dahash_t lasthash=0;
1402 76411956 : int level;
1403 76411956 : int count;
1404 76411956 : struct xfs_inode *dp = state->args->dp;
1405 :
1406 76411956 : trace_xfs_da_fixhashpath(state->args);
1407 :
1408 76204083 : level = path->active-1;
1409 76204083 : blk = &path->blk[ level ];
1410 76221568 : switch (blk->magic) {
1411 1878517 : case XFS_ATTR_LEAF_MAGIC:
1412 1878517 : lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1413 1878528 : if (count == 0)
1414 24 : return;
1415 : break;
1416 74132617 : case XFS_DIR2_LEAFN_MAGIC:
1417 74132617 : lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
1418 74231359 : if (count == 0)
1419 : return;
1420 : break;
1421 210434 : case XFS_DA_NODE_MAGIC:
1422 210434 : lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
1423 210433 : if (count == 0)
1424 : return;
1425 : break;
1426 : }
1427 79557732 : for (blk--, level--; level >= 0; blk--, level--) {
1428 76227522 : struct xfs_da3_icnode_hdr nodehdr;
1429 :
1430 76227522 : node = blk->bp->b_addr;
1431 76227522 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1432 76319950 : btree = nodehdr.btree;
1433 76319950 : if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1434 : break;
1435 3234952 : blk->hashval = lasthash;
1436 3234952 : btree[blk->index].hashval = cpu_to_be32(lasthash);
1437 3234952 : xfs_trans_log_buf(state->args->trans, blk->bp,
1438 3234952 : XFS_DA_LOGRANGE(node, &btree[blk->index],
1439 : sizeof(*btree)));
1440 :
1441 3237436 : lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
1442 : }
1443 : }
1444 :
1445 : /*
1446 : * Remove an entry from an intermediate node.
1447 : */
1448 : STATIC void
1449 56922 : xfs_da3_node_remove(
1450 : struct xfs_da_state *state,
1451 : struct xfs_da_state_blk *drop_blk)
1452 : {
1453 56922 : struct xfs_da_intnode *node;
1454 56922 : struct xfs_da3_icnode_hdr nodehdr;
1455 56922 : struct xfs_da_node_entry *btree;
1456 56922 : int index;
1457 56922 : int tmp;
1458 56922 : struct xfs_inode *dp = state->args->dp;
1459 :
1460 56922 : trace_xfs_da_node_remove(state->args);
1461 :
1462 56921 : node = drop_blk->bp->b_addr;
1463 56921 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1464 56922 : ASSERT(drop_blk->index < nodehdr.count);
1465 56922 : ASSERT(drop_blk->index >= 0);
1466 :
1467 : /*
1468 : * Copy over the offending entry, or just zero it out.
1469 : */
1470 56922 : index = drop_blk->index;
1471 56922 : btree = nodehdr.btree;
1472 56922 : if (index < nodehdr.count - 1) {
1473 52085 : tmp = nodehdr.count - index - 1;
1474 52085 : tmp *= (uint)sizeof(xfs_da_node_entry_t);
1475 104170 : memmove(&btree[index], &btree[index + 1], tmp);
1476 52085 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1477 52085 : XFS_DA_LOGRANGE(node, &btree[index], tmp));
1478 52085 : index = nodehdr.count - 1;
1479 : }
1480 56922 : memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
1481 56922 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1482 56922 : XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1483 56922 : nodehdr.count -= 1;
1484 56922 : xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
1485 56921 : xfs_trans_log_buf(state->args->trans, drop_blk->bp,
1486 56921 : XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
1487 :
1488 : /*
1489 : * Copy the last hash value from the block to propagate upwards.
1490 : */
1491 56922 : drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
1492 56922 : }
1493 :
1494 : /*
1495 : * Unbalance the elements between two intermediate nodes,
1496 : * move all Btree elements from one node into another.
1497 : */
1498 : STATIC void
1499 41 : xfs_da3_node_unbalance(
1500 : struct xfs_da_state *state,
1501 : struct xfs_da_state_blk *drop_blk,
1502 : struct xfs_da_state_blk *save_blk)
1503 : {
1504 41 : struct xfs_da_intnode *drop_node;
1505 41 : struct xfs_da_intnode *save_node;
1506 41 : struct xfs_da_node_entry *drop_btree;
1507 41 : struct xfs_da_node_entry *save_btree;
1508 41 : struct xfs_da3_icnode_hdr drop_hdr;
1509 41 : struct xfs_da3_icnode_hdr save_hdr;
1510 41 : struct xfs_trans *tp;
1511 41 : int sindex;
1512 41 : int tmp;
1513 41 : struct xfs_inode *dp = state->args->dp;
1514 :
1515 41 : trace_xfs_da_node_unbalance(state->args);
1516 :
1517 41 : drop_node = drop_blk->bp->b_addr;
1518 41 : save_node = save_blk->bp->b_addr;
1519 41 : xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
1520 41 : xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
1521 41 : drop_btree = drop_hdr.btree;
1522 41 : save_btree = save_hdr.btree;
1523 41 : tp = state->args->trans;
1524 :
1525 : /*
1526 : * If the dying block has lower hashvals, then move all the
1527 : * elements in the remaining block up to make a hole.
1528 : */
1529 41 : if ((be32_to_cpu(drop_btree[0].hashval) <
1530 41 : be32_to_cpu(save_btree[0].hashval)) ||
1531 21 : (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1532 21 : be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
1533 : /* XXX: check this - is memmove dst correct? */
1534 20 : tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1535 40 : memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
1536 :
1537 20 : sindex = 0;
1538 20 : xfs_trans_log_buf(tp, save_blk->bp,
1539 20 : XFS_DA_LOGRANGE(save_node, &save_btree[0],
1540 : (save_hdr.count + drop_hdr.count) *
1541 : sizeof(xfs_da_node_entry_t)));
1542 : } else {
1543 21 : sindex = save_hdr.count;
1544 21 : xfs_trans_log_buf(tp, save_blk->bp,
1545 21 : XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1546 : drop_hdr.count * sizeof(xfs_da_node_entry_t)));
1547 : }
1548 :
1549 : /*
1550 : * Move all the B-tree elements from drop_blk to save_blk.
1551 : */
1552 41 : tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1553 82 : memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1554 41 : save_hdr.count += drop_hdr.count;
1555 :
1556 41 : xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
1557 41 : xfs_trans_log_buf(tp, save_blk->bp,
1558 41 : XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1559 : state->args->geo->node_hdr_size));
1560 :
1561 : /*
1562 : * Save the last hashval in the remaining block for upward propagation.
1563 : */
1564 41 : save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
1565 41 : }
1566 :
1567 : /*========================================================================
1568 : * Routines used for finding things in the Btree.
1569 : *========================================================================*/
1570 :
1571 : /*
1572 : * Walk down the Btree looking for a particular filename, filling
1573 : * in the state structure as we go.
1574 : *
1575 : * We will set the state structure to point to each of the elements
1576 : * in each of the nodes where either the hashval is or should be.
1577 : *
1578 : * We support duplicate hashval's so for each entry in the current
1579 : * node that could contain the desired hashval, descend. This is a
1580 : * pruned depth-first tree search.
1581 : */
1582 : int /* error */
1583 223147287 : xfs_da3_node_lookup_int(
1584 : struct xfs_da_state *state,
1585 : int *result)
1586 : {
1587 223147287 : struct xfs_da_state_blk *blk;
1588 223147287 : struct xfs_da_blkinfo *curr;
1589 223147287 : struct xfs_da_intnode *node;
1590 223147287 : struct xfs_da_node_entry *btree;
1591 223147287 : struct xfs_da3_icnode_hdr nodehdr;
1592 223147287 : struct xfs_da_args *args;
1593 223147287 : xfs_dablk_t blkno;
1594 223147287 : xfs_dahash_t hashval;
1595 223147287 : xfs_dahash_t btreehashval;
1596 223147287 : int probe;
1597 223147287 : int span;
1598 223147287 : int max;
1599 223147287 : int error;
1600 223147287 : int retval;
1601 223147287 : unsigned int expected_level = 0;
1602 223147287 : uint16_t magic;
1603 223147287 : struct xfs_inode *dp = state->args->dp;
1604 :
1605 223147287 : args = state->args;
1606 :
1607 : /*
1608 : * Descend thru the B-tree searching each level for the right
1609 : * node to use, until the right hashval is found.
1610 : */
1611 223147287 : blkno = args->geo->leafblk;
1612 223147287 : for (blk = &state->path.blk[0], state->path.active = 1;
1613 467315643 : state->path.active <= XFS_DA_NODE_MAXDEPTH;
1614 244168356 : blk++, state->path.active++) {
1615 : /*
1616 : * Read the next node down in the tree.
1617 : */
1618 467315643 : blk->blkno = blkno;
1619 467315643 : error = xfs_da3_node_read(args->trans, args->dp, blkno,
1620 : &blk->bp, args->whichfork);
1621 468664720 : if (error) {
1622 1325 : blk->blkno = 0;
1623 1325 : state->path.active--;
1624 1325 : return error;
1625 : }
1626 468663395 : curr = blk->bp->b_addr;
1627 468663395 : magic = be16_to_cpu(curr->magic);
1628 :
1629 468663395 : if (magic == XFS_ATTR_LEAF_MAGIC ||
1630 468663395 : magic == XFS_ATTR3_LEAF_MAGIC) {
1631 4420732 : blk->magic = XFS_ATTR_LEAF_MAGIC;
1632 4420732 : blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1633 4420656 : break;
1634 : }
1635 :
1636 464242663 : if (magic == XFS_DIR2_LEAFN_MAGIC ||
1637 464242663 : magic == XFS_DIR3_LEAFN_MAGIC) {
1638 218960877 : blk->magic = XFS_DIR2_LEAFN_MAGIC;
1639 218960877 : blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
1640 : blk->bp, NULL);
1641 217905954 : break;
1642 : }
1643 :
1644 245281786 : if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
1645 0 : xfs_buf_mark_corrupt(blk->bp);
1646 0 : return -EFSCORRUPTED;
1647 : }
1648 :
1649 245281786 : blk->magic = XFS_DA_NODE_MAGIC;
1650 :
1651 : /*
1652 : * Search an intermediate node for a match.
1653 : */
1654 245281786 : node = blk->bp->b_addr;
1655 245281786 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
1656 244168356 : btree = nodehdr.btree;
1657 :
1658 : /* Tree taller than we can handle; bail out! */
1659 244168356 : if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
1660 0 : xfs_buf_mark_corrupt(blk->bp);
1661 0 : return -EFSCORRUPTED;
1662 : }
1663 :
1664 : /* Check the level from the root. */
1665 244168356 : if (blkno == args->geo->leafblk)
1666 221319909 : expected_level = nodehdr.level - 1;
1667 22848447 : else if (expected_level != nodehdr.level) {
1668 0 : xfs_buf_mark_corrupt(blk->bp);
1669 0 : return -EFSCORRUPTED;
1670 : } else
1671 22848447 : expected_level--;
1672 :
1673 244168356 : max = nodehdr.count;
1674 244168356 : blk->hashval = be32_to_cpu(btree[max - 1].hashval);
1675 :
1676 : /*
1677 : * Binary search. (note: small blocks will skip loop)
1678 : */
1679 244168356 : probe = span = max / 2;
1680 244168356 : hashval = args->hashval;
1681 555088908 : while (span > 4) {
1682 311819477 : span /= 2;
1683 311819477 : btreehashval = be32_to_cpu(btree[probe].hashval);
1684 311819477 : if (btreehashval < hashval)
1685 154631328 : probe += span;
1686 157188149 : else if (btreehashval > hashval)
1687 156289224 : probe -= span;
1688 : else
1689 : break;
1690 : }
1691 244168356 : ASSERT((probe >= 0) && (probe < max));
1692 244168356 : ASSERT((span <= 4) ||
1693 : (be32_to_cpu(btree[probe].hashval) == hashval));
1694 :
1695 : /*
1696 : * Since we may have duplicate hashval's, find the first
1697 : * matching hashval in the node.
1698 : */
1699 597642953 : while (probe > 0 &&
1700 504809629 : be32_to_cpu(btree[probe].hashval) >= hashval) {
1701 353474597 : probe--;
1702 : }
1703 537432451 : while (probe < max &&
1704 532824272 : be32_to_cpu(btree[probe].hashval) < hashval) {
1705 293264095 : probe++;
1706 : }
1707 :
1708 : /*
1709 : * Pick the right block to descend on.
1710 : */
1711 244168356 : if (probe == max) {
1712 5269092 : blk->index = max - 1;
1713 5269092 : blkno = be32_to_cpu(btree[max - 1].before);
1714 : } else {
1715 238899264 : blk->index = probe;
1716 238899264 : blkno = be32_to_cpu(btree[probe].before);
1717 : }
1718 :
1719 : /* We can't point back to the root. */
1720 244168356 : if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
1721 0 : return -EFSCORRUPTED;
1722 : }
1723 :
1724 222326610 : if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
1725 0 : return -EFSCORRUPTED;
1726 :
1727 : /*
1728 : * A leaf block that ends in the hashval that we are interested in
1729 : * (final hashval == search hashval) means that the next block may
1730 : * contain more entries with the same hashval, shift upward to the
1731 : * next leaf and keep searching.
1732 : */
1733 974190300 : for (;;) {
1734 598258455 : if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1735 225288430 : retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1736 : &blk->index, state);
1737 372970025 : } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1738 372970025 : retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
1739 373024055 : blk->index = args->index;
1740 373024055 : args->blkno = blk->blkno;
1741 : } else {
1742 0 : ASSERT(0);
1743 0 : return -EFSCORRUPTED;
1744 : }
1745 598669282 : if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1746 441813099 : (blk->hashval == args->hashval)) {
1747 377404229 : error = xfs_da3_path_shift(state, &state->path, 1, 1,
1748 : &retval);
1749 377246085 : if (error)
1750 0 : return error;
1751 377246085 : if (retval == 0) {
1752 375931845 : continue;
1753 1314240 : } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1754 : /* path_shift() gives ENOENT */
1755 360129 : retval = -ENOATTR;
1756 : }
1757 : }
1758 222582115 : break;
1759 : }
1760 222582115 : *result = retval;
1761 222582115 : return 0;
1762 : }
1763 :
1764 : /*========================================================================
1765 : * Utility routines.
1766 : *========================================================================*/
1767 :
1768 : /*
1769 : * Compare two intermediate nodes for "order".
1770 : */
1771 : STATIC int
1772 239 : xfs_da3_node_order(
1773 : struct xfs_inode *dp,
1774 : struct xfs_buf *node1_bp,
1775 : struct xfs_buf *node2_bp)
1776 : {
1777 239 : struct xfs_da_intnode *node1;
1778 239 : struct xfs_da_intnode *node2;
1779 239 : struct xfs_da_node_entry *btree1;
1780 239 : struct xfs_da_node_entry *btree2;
1781 239 : struct xfs_da3_icnode_hdr node1hdr;
1782 239 : struct xfs_da3_icnode_hdr node2hdr;
1783 :
1784 239 : node1 = node1_bp->b_addr;
1785 239 : node2 = node2_bp->b_addr;
1786 239 : xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
1787 239 : xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
1788 239 : btree1 = node1hdr.btree;
1789 239 : btree2 = node2hdr.btree;
1790 :
1791 239 : if (node1hdr.count > 0 && node2hdr.count > 0 &&
1792 239 : ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1793 239 : (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1794 239 : be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1795 0 : return 1;
1796 : }
1797 : return 0;
1798 : }
1799 :
1800 : /*
1801 : * Link a new block into a doubly linked list of blocks (of whatever type).
1802 : */
1803 : int /* error */
1804 162537 : xfs_da3_blk_link(
1805 : struct xfs_da_state *state,
1806 : struct xfs_da_state_blk *old_blk,
1807 : struct xfs_da_state_blk *new_blk)
1808 : {
1809 162537 : struct xfs_da_blkinfo *old_info;
1810 162537 : struct xfs_da_blkinfo *new_info;
1811 162537 : struct xfs_da_blkinfo *tmp_info;
1812 162537 : struct xfs_da_args *args;
1813 162537 : struct xfs_buf *bp;
1814 162537 : int before = 0;
1815 162537 : int error;
1816 162537 : struct xfs_inode *dp = state->args->dp;
1817 :
1818 : /*
1819 : * Set up environment.
1820 : */
1821 162537 : args = state->args;
1822 162537 : ASSERT(args != NULL);
1823 162537 : old_info = old_blk->bp->b_addr;
1824 162537 : new_info = new_blk->bp->b_addr;
1825 162537 : ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1826 : old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1827 : old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1828 :
1829 162537 : switch (old_blk->magic) {
1830 88853 : case XFS_ATTR_LEAF_MAGIC:
1831 88853 : before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1832 88853 : break;
1833 73445 : case XFS_DIR2_LEAFN_MAGIC:
1834 73445 : before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
1835 73445 : break;
1836 239 : case XFS_DA_NODE_MAGIC:
1837 239 : before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
1838 239 : break;
1839 : }
1840 :
1841 : /*
1842 : * Link blocks in appropriate order.
1843 : */
1844 162538 : if (before) {
1845 : /*
1846 : * Link new block in before existing block.
1847 : */
1848 0 : trace_xfs_da_link_before(args);
1849 0 : new_info->forw = cpu_to_be32(old_blk->blkno);
1850 0 : new_info->back = old_info->back;
1851 0 : if (old_info->back) {
1852 0 : error = xfs_da3_node_read(args->trans, dp,
1853 : be32_to_cpu(old_info->back),
1854 : &bp, args->whichfork);
1855 0 : if (error)
1856 : return error;
1857 0 : ASSERT(bp != NULL);
1858 0 : tmp_info = bp->b_addr;
1859 0 : ASSERT(tmp_info->magic == old_info->magic);
1860 0 : ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1861 0 : tmp_info->forw = cpu_to_be32(new_blk->blkno);
1862 0 : xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1863 : }
1864 0 : old_info->back = cpu_to_be32(new_blk->blkno);
1865 : } else {
1866 : /*
1867 : * Link new block in after existing block.
1868 : */
1869 162538 : trace_xfs_da_link_after(args);
1870 162538 : new_info->forw = old_info->forw;
1871 162538 : new_info->back = cpu_to_be32(old_blk->blkno);
1872 162538 : if (old_info->forw) {
1873 44632 : error = xfs_da3_node_read(args->trans, dp,
1874 : be32_to_cpu(old_info->forw),
1875 : &bp, args->whichfork);
1876 44632 : if (error)
1877 : return error;
1878 44632 : ASSERT(bp != NULL);
1879 44632 : tmp_info = bp->b_addr;
1880 44632 : ASSERT(tmp_info->magic == old_info->magic);
1881 44632 : ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1882 44632 : tmp_info->back = cpu_to_be32(new_blk->blkno);
1883 44632 : xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1884 : }
1885 162538 : old_info->forw = cpu_to_be32(new_blk->blkno);
1886 : }
1887 :
1888 162538 : xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1889 162539 : xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1890 162539 : return 0;
1891 : }
1892 :
1893 : /*
1894 : * Unlink a block from a doubly linked list of blocks.
1895 : */
1896 : STATIC int /* error */
1897 56922 : xfs_da3_blk_unlink(
1898 : struct xfs_da_state *state,
1899 : struct xfs_da_state_blk *drop_blk,
1900 : struct xfs_da_state_blk *save_blk)
1901 : {
1902 56922 : struct xfs_da_blkinfo *drop_info;
1903 56922 : struct xfs_da_blkinfo *save_info;
1904 56922 : struct xfs_da_blkinfo *tmp_info;
1905 56922 : struct xfs_da_args *args;
1906 56922 : struct xfs_buf *bp;
1907 56922 : int error;
1908 :
1909 : /*
1910 : * Set up environment.
1911 : */
1912 56922 : args = state->args;
1913 56922 : ASSERT(args != NULL);
1914 56922 : save_info = save_blk->bp->b_addr;
1915 56922 : drop_info = drop_blk->bp->b_addr;
1916 56922 : ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1917 : save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1918 : save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1919 56922 : ASSERT(save_blk->magic == drop_blk->magic);
1920 56922 : ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1921 : (be32_to_cpu(save_info->back) == drop_blk->blkno));
1922 56922 : ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1923 : (be32_to_cpu(drop_info->back) == save_blk->blkno));
1924 :
1925 : /*
1926 : * Unlink the leaf block from the doubly linked chain of leaves.
1927 : */
1928 56922 : if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1929 18268 : trace_xfs_da_unlink_back(args);
1930 18268 : save_info->back = drop_info->back;
1931 18268 : if (drop_info->back) {
1932 10903 : error = xfs_da3_node_read(args->trans, args->dp,
1933 : be32_to_cpu(drop_info->back),
1934 : &bp, args->whichfork);
1935 10903 : if (error)
1936 : return error;
1937 10903 : ASSERT(bp != NULL);
1938 10903 : tmp_info = bp->b_addr;
1939 10903 : ASSERT(tmp_info->magic == save_info->magic);
1940 10903 : ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1941 10903 : tmp_info->forw = cpu_to_be32(save_blk->blkno);
1942 10903 : xfs_trans_log_buf(args->trans, bp, 0,
1943 : sizeof(*tmp_info) - 1);
1944 : }
1945 : } else {
1946 38654 : trace_xfs_da_unlink_forward(args);
1947 38654 : save_info->forw = drop_info->forw;
1948 38654 : if (drop_info->forw) {
1949 33838 : error = xfs_da3_node_read(args->trans, args->dp,
1950 : be32_to_cpu(drop_info->forw),
1951 : &bp, args->whichfork);
1952 33838 : if (error)
1953 : return error;
1954 33838 : ASSERT(bp != NULL);
1955 33838 : tmp_info = bp->b_addr;
1956 33838 : ASSERT(tmp_info->magic == save_info->magic);
1957 33838 : ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1958 33838 : tmp_info->back = cpu_to_be32(save_blk->blkno);
1959 33838 : xfs_trans_log_buf(args->trans, bp, 0,
1960 : sizeof(*tmp_info) - 1);
1961 : }
1962 : }
1963 :
1964 56922 : xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1965 56922 : return 0;
1966 : }
1967 :
1968 : /*
1969 : * Move a path "forward" or "!forward" one block at the current level.
1970 : *
1971 : * This routine will adjust a "path" to point to the next block
1972 : * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1973 : * Btree, including updating pointers to the intermediate nodes between
1974 : * the new bottom and the root.
1975 : */
1976 : int /* error */
1977 378344221 : xfs_da3_path_shift(
1978 : struct xfs_da_state *state,
1979 : struct xfs_da_state_path *path,
1980 : int forward,
1981 : int release,
1982 : int *result)
1983 : {
1984 378344221 : struct xfs_da_state_blk *blk;
1985 378344221 : struct xfs_da_blkinfo *info;
1986 378344221 : struct xfs_da_args *args;
1987 378344221 : struct xfs_da_node_entry *btree;
1988 378344221 : struct xfs_da3_icnode_hdr nodehdr;
1989 378344221 : struct xfs_buf *bp;
1990 378344221 : xfs_dablk_t blkno = 0;
1991 378344221 : int level;
1992 378344221 : int error;
1993 378344221 : struct xfs_inode *dp = state->args->dp;
1994 :
1995 378344221 : trace_xfs_da_path_shift(state->args);
1996 :
1997 : /*
1998 : * Roll up the Btree looking for the first block where our
1999 : * current index is not at the edge of the block. Note that
2000 : * we skip the bottom layer because we want the sibling block.
2001 : */
2002 378344205 : args = state->args;
2003 378344205 : ASSERT(args != NULL);
2004 378344205 : ASSERT(path != NULL);
2005 378344205 : ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
2006 378344205 : level = (path->active-1) - 1; /* skip bottom layer in path */
2007 381031958 : for (; level >= 0; level--) {
2008 379586025 : blk = &path->blk[level];
2009 379586029 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2010 379586029 : blk->bp->b_addr);
2011 :
2012 379586023 : if (forward && (blk->index < nodehdr.count - 1)) {
2013 376419030 : blk->index++;
2014 376419030 : blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2015 376419030 : break;
2016 3166993 : } else if (!forward && (blk->index > 0)) {
2017 479240 : blk->index--;
2018 479240 : blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
2019 479240 : break;
2020 : }
2021 : }
2022 378344203 : if (level < 0) {
2023 1445931 : *result = -ENOENT; /* we're out of our tree */
2024 1445931 : ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
2025 1445931 : return 0;
2026 : }
2027 :
2028 : /*
2029 : * Roll down the edge of the subtree until we reach the
2030 : * same depth we were at originally.
2031 : */
2032 754767217 : for (blk++, level++; level < path->active; blk++, level++) {
2033 : /*
2034 : * Read the next child block into a local buffer.
2035 : */
2036 377868950 : error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
2037 : args->whichfork);
2038 377868944 : if (error)
2039 0 : return error;
2040 :
2041 : /*
2042 : * Release the old block (if it's dirty, the trans doesn't
2043 : * actually let go) and swap the local buffer into the path
2044 : * structure. This ensures failure of the above read doesn't set
2045 : * a NULL buffer in an active slot in the path.
2046 : */
2047 377868944 : if (release)
2048 376901695 : xfs_trans_brelse(args->trans, blk->bp);
2049 377868944 : blk->blkno = blkno;
2050 377868944 : blk->bp = bp;
2051 :
2052 377868944 : info = blk->bp->b_addr;
2053 377868944 : ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
2054 : info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
2055 : info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2056 : info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
2057 : info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
2058 : info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
2059 :
2060 :
2061 : /*
2062 : * Note: we flatten the magic number to a single type so we
2063 : * don't have to compare against crc/non-crc types elsewhere.
2064 : */
2065 377868944 : switch (be16_to_cpu(info->magic)) {
2066 971441 : case XFS_DA_NODE_MAGIC:
2067 : case XFS_DA3_NODE_MAGIC:
2068 971441 : blk->magic = XFS_DA_NODE_MAGIC;
2069 971441 : xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
2070 971441 : bp->b_addr);
2071 971441 : btree = nodehdr.btree;
2072 971441 : blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2073 971441 : if (forward)
2074 970664 : blk->index = 0;
2075 : else
2076 777 : blk->index = nodehdr.count - 1;
2077 971441 : blkno = be32_to_cpu(btree[blk->index].before);
2078 971441 : break;
2079 368820292 : case XFS_ATTR_LEAF_MAGIC:
2080 : case XFS_ATTR3_LEAF_MAGIC:
2081 368820292 : blk->magic = XFS_ATTR_LEAF_MAGIC;
2082 368820292 : ASSERT(level == path->active-1);
2083 368820292 : blk->index = 0;
2084 368820292 : blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
2085 368820295 : break;
2086 8077211 : case XFS_DIR2_LEAFN_MAGIC:
2087 : case XFS_DIR3_LEAFN_MAGIC:
2088 8077211 : blk->magic = XFS_DIR2_LEAFN_MAGIC;
2089 8077211 : ASSERT(level == path->active-1);
2090 8077211 : blk->index = 0;
2091 8077211 : blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
2092 : blk->bp, NULL);
2093 8077209 : break;
2094 0 : default:
2095 0 : ASSERT(0);
2096 0 : break;
2097 : }
2098 : }
2099 376898267 : *result = 0;
2100 376898267 : return 0;
2101 : }
2102 :
2103 :
2104 : /*========================================================================
2105 : * Utility routines.
2106 : *========================================================================*/
2107 :
2108 : /*
2109 : * Implement a simple hash on a character string.
2110 : * Rotate the hash value by 7 bits, then XOR each character in.
2111 : * This is implemented with some source-level loop unrolling.
2112 : */
2113 : xfs_dahash_t
2114 3196695208 : xfs_da_hashname(const uint8_t *name, int namelen)
2115 : {
2116 3196695208 : xfs_dahash_t hash;
2117 :
2118 : /*
2119 : * Do four characters at a time as long as we can.
2120 : */
2121 6499992595 : for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2122 3303297387 : hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
2123 3303297387 : (name[3] << 0) ^ rol32(hash, 7 * 4);
2124 :
2125 : /*
2126 : * Now do the rest of the characters.
2127 : */
2128 3196695208 : switch (namelen) {
2129 847566824 : case 3:
2130 847566824 : return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
2131 : rol32(hash, 7 * 3);
2132 1147931400 : case 2:
2133 1147931400 : return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2134 619623087 : case 1:
2135 619623087 : return (name[0] << 0) ^ rol32(hash, 7 * 1);
2136 : default: /* case 0: */
2137 : return hash;
2138 : }
2139 : }
2140 :
2141 : enum xfs_dacmp
2142 2273623178 : xfs_da_compname(
2143 : struct xfs_da_args *args,
2144 : const unsigned char *name,
2145 : int len)
2146 : {
2147 2040426292 : return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
2148 4314049470 : XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
2149 : }
2150 :
2151 : int
2152 3429069 : xfs_da_grow_inode_int(
2153 : struct xfs_da_args *args,
2154 : xfs_fileoff_t *bno,
2155 : int count)
2156 : {
2157 3429069 : struct xfs_trans *tp = args->trans;
2158 3429069 : struct xfs_inode *dp = args->dp;
2159 3429069 : int w = args->whichfork;
2160 3429069 : xfs_rfsblock_t nblks = dp->i_nblocks;
2161 3429069 : struct xfs_bmbt_irec map, *mapp;
2162 3429069 : int nmap, error, got, i, mapi;
2163 :
2164 : /*
2165 : * Find a spot in the file space to put the new block.
2166 : */
2167 3429069 : error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2168 3428701 : if (error)
2169 : return error;
2170 :
2171 : /*
2172 : * Try mapping it in one filesystem block.
2173 : */
2174 3428701 : nmap = 1;
2175 3428701 : error = xfs_bmapi_write(tp, dp, *bno, count,
2176 3428701 : xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2177 : args->total, &map, &nmap);
2178 3427189 : if (error)
2179 : return error;
2180 :
2181 3427187 : ASSERT(nmap <= 1);
2182 3427187 : if (nmap == 1) {
2183 : mapp = ↦
2184 : mapi = 1;
2185 171 : } else if (nmap == 0 && count > 1) {
2186 166 : xfs_fileoff_t b;
2187 166 : int c;
2188 :
2189 : /*
2190 : * If we didn't get it and the block might work if fragmented,
2191 : * try without the CONTIG flag. Loop until we get it all.
2192 : */
2193 166 : mapp = kmem_alloc(sizeof(*mapp) * count, 0);
2194 350 : for (b = *bno, mapi = 0; b < *bno + count; ) {
2195 184 : c = (int)(*bno + count - b);
2196 184 : nmap = min(XFS_BMAP_MAX_NMAP, c);
2197 368 : error = xfs_bmapi_write(tp, dp, b, c,
2198 184 : xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2199 184 : args->total, &mapp[mapi], &nmap);
2200 184 : if (error)
2201 0 : goto out_free_map;
2202 184 : if (nmap < 1)
2203 : break;
2204 184 : mapi += nmap;
2205 184 : b = mapp[mapi - 1].br_startoff +
2206 184 : mapp[mapi - 1].br_blockcount;
2207 : }
2208 : } else {
2209 : mapi = 0;
2210 : mapp = NULL;
2211 : }
2212 :
2213 : /*
2214 : * Count the blocks we got, make sure it matches the total.
2215 : */
2216 6854359 : for (i = 0, got = 0; i < mapi; i++)
2217 3427172 : got += mapp[i].br_blockcount;
2218 3427187 : if (got != count || mapp[0].br_startoff != *bno ||
2219 3425696 : mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2220 3425696 : *bno + count) {
2221 417 : error = -ENOSPC;
2222 417 : goto out_free_map;
2223 : }
2224 :
2225 : /* account for newly allocated blocks in reserved blocks total */
2226 3426804 : args->total -= dp->i_nblocks - nblks;
2227 :
2228 3427187 : out_free_map:
2229 3427187 : if (mapp != &map)
2230 171 : kmem_free(mapp);
2231 : return error;
2232 : }
2233 :
2234 : /*
2235 : * Add a block to the btree ahead of the file.
2236 : * Return the new block number to the caller.
2237 : */
2238 : int
2239 3084707 : xfs_da_grow_inode(
2240 : struct xfs_da_args *args,
2241 : xfs_dablk_t *new_blkno)
2242 : {
2243 3084707 : xfs_fileoff_t bno;
2244 3084707 : int error;
2245 :
2246 3084707 : trace_xfs_da_grow_inode(args);
2247 :
2248 3084427 : bno = args->geo->leafblk;
2249 3084427 : error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
2250 3082590 : if (!error)
2251 3082466 : *new_blkno = (xfs_dablk_t)bno;
2252 3082590 : return error;
2253 : }
2254 :
2255 : /*
2256 : * Ick. We need to always be able to remove a btree block, even
2257 : * if there's no space reservation because the filesystem is full.
2258 : * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2259 : * It swaps the target block with the last block in the file. The
2260 : * last block in the file can always be removed since it can't cause
2261 : * a bmap btree split to do that.
2262 : */
2263 : STATIC int
2264 0 : xfs_da3_swap_lastblock(
2265 : struct xfs_da_args *args,
2266 : xfs_dablk_t *dead_blknop,
2267 : struct xfs_buf **dead_bufp)
2268 0 : {
2269 0 : struct xfs_da_blkinfo *dead_info;
2270 0 : struct xfs_da_blkinfo *sib_info;
2271 0 : struct xfs_da_intnode *par_node;
2272 0 : struct xfs_da_intnode *dead_node;
2273 0 : struct xfs_dir2_leaf *dead_leaf2;
2274 0 : struct xfs_da_node_entry *btree;
2275 0 : struct xfs_da3_icnode_hdr par_hdr;
2276 0 : struct xfs_inode *dp;
2277 0 : struct xfs_trans *tp;
2278 0 : struct xfs_mount *mp;
2279 0 : struct xfs_buf *dead_buf;
2280 0 : struct xfs_buf *last_buf;
2281 0 : struct xfs_buf *sib_buf;
2282 0 : struct xfs_buf *par_buf;
2283 0 : xfs_dahash_t dead_hash;
2284 0 : xfs_fileoff_t lastoff;
2285 0 : xfs_dablk_t dead_blkno;
2286 0 : xfs_dablk_t last_blkno;
2287 0 : xfs_dablk_t sib_blkno;
2288 0 : xfs_dablk_t par_blkno;
2289 0 : int error;
2290 0 : int w;
2291 0 : int entno;
2292 0 : int level;
2293 0 : int dead_level;
2294 :
2295 0 : trace_xfs_da_swap_lastblock(args);
2296 :
2297 0 : dead_buf = *dead_bufp;
2298 0 : dead_blkno = *dead_blknop;
2299 0 : tp = args->trans;
2300 0 : dp = args->dp;
2301 0 : w = args->whichfork;
2302 0 : ASSERT(w == XFS_DATA_FORK);
2303 0 : mp = dp->i_mount;
2304 0 : lastoff = args->geo->freeblk;
2305 0 : error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2306 0 : if (error)
2307 : return error;
2308 0 : if (XFS_IS_CORRUPT(mp, lastoff == 0))
2309 0 : return -EFSCORRUPTED;
2310 : /*
2311 : * Read the last block in the btree space.
2312 : */
2313 0 : last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2314 0 : error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
2315 0 : if (error)
2316 : return error;
2317 : /*
2318 : * Copy the last block into the dead buffer and log it.
2319 : */
2320 0 : memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
2321 0 : xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
2322 0 : dead_info = dead_buf->b_addr;
2323 : /*
2324 : * Get values from the moved block.
2325 : */
2326 0 : if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2327 : dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2328 0 : struct xfs_dir3_icleaf_hdr leafhdr;
2329 0 : struct xfs_dir2_leaf_entry *ents;
2330 :
2331 0 : dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
2332 0 : xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
2333 : dead_leaf2);
2334 0 : ents = leafhdr.ents;
2335 0 : dead_level = 0;
2336 0 : dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2337 : } else {
2338 0 : struct xfs_da3_icnode_hdr deadhdr;
2339 :
2340 0 : dead_node = (xfs_da_intnode_t *)dead_info;
2341 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
2342 0 : btree = deadhdr.btree;
2343 0 : dead_level = deadhdr.level;
2344 0 : dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2345 : }
2346 0 : sib_buf = par_buf = NULL;
2347 : /*
2348 : * If the moved block has a left sibling, fix up the pointers.
2349 : */
2350 0 : if ((sib_blkno = be32_to_cpu(dead_info->back))) {
2351 0 : error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2352 0 : if (error)
2353 0 : goto done;
2354 0 : sib_info = sib_buf->b_addr;
2355 0 : if (XFS_IS_CORRUPT(mp,
2356 : be32_to_cpu(sib_info->forw) != last_blkno ||
2357 : sib_info->magic != dead_info->magic)) {
2358 0 : error = -EFSCORRUPTED;
2359 0 : goto done;
2360 : }
2361 0 : sib_info->forw = cpu_to_be32(dead_blkno);
2362 0 : xfs_trans_log_buf(tp, sib_buf,
2363 : XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2364 : sizeof(sib_info->forw)));
2365 0 : sib_buf = NULL;
2366 : }
2367 : /*
2368 : * If the moved block has a right sibling, fix up the pointers.
2369 : */
2370 0 : if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
2371 0 : error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
2372 0 : if (error)
2373 0 : goto done;
2374 0 : sib_info = sib_buf->b_addr;
2375 0 : if (XFS_IS_CORRUPT(mp,
2376 : be32_to_cpu(sib_info->back) != last_blkno ||
2377 : sib_info->magic != dead_info->magic)) {
2378 0 : error = -EFSCORRUPTED;
2379 0 : goto done;
2380 : }
2381 0 : sib_info->back = cpu_to_be32(dead_blkno);
2382 0 : xfs_trans_log_buf(tp, sib_buf,
2383 : XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2384 : sizeof(sib_info->back)));
2385 0 : sib_buf = NULL;
2386 : }
2387 0 : par_blkno = args->geo->leafblk;
2388 0 : level = -1;
2389 : /*
2390 : * Walk down the tree looking for the parent of the moved block.
2391 : */
2392 0 : for (;;) {
2393 0 : error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2394 0 : if (error)
2395 0 : goto done;
2396 0 : par_node = par_buf->b_addr;
2397 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2398 0 : if (XFS_IS_CORRUPT(mp,
2399 : level >= 0 && level != par_hdr.level + 1)) {
2400 0 : error = -EFSCORRUPTED;
2401 0 : goto done;
2402 : }
2403 0 : level = par_hdr.level;
2404 0 : btree = par_hdr.btree;
2405 0 : for (entno = 0;
2406 0 : entno < par_hdr.count &&
2407 0 : be32_to_cpu(btree[entno].hashval) < dead_hash;
2408 0 : entno++)
2409 0 : continue;
2410 0 : if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
2411 0 : error = -EFSCORRUPTED;
2412 0 : goto done;
2413 : }
2414 0 : par_blkno = be32_to_cpu(btree[entno].before);
2415 0 : if (level == dead_level + 1)
2416 : break;
2417 0 : xfs_trans_brelse(tp, par_buf);
2418 0 : par_buf = NULL;
2419 : }
2420 : /*
2421 : * We're in the right parent block.
2422 : * Look for the right entry.
2423 : */
2424 0 : for (;;) {
2425 0 : for (;
2426 0 : entno < par_hdr.count &&
2427 0 : be32_to_cpu(btree[entno].before) != last_blkno;
2428 0 : entno++)
2429 0 : continue;
2430 0 : if (entno < par_hdr.count)
2431 : break;
2432 0 : par_blkno = par_hdr.forw;
2433 0 : xfs_trans_brelse(tp, par_buf);
2434 0 : par_buf = NULL;
2435 0 : if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
2436 0 : error = -EFSCORRUPTED;
2437 0 : goto done;
2438 : }
2439 0 : error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
2440 0 : if (error)
2441 0 : goto done;
2442 0 : par_node = par_buf->b_addr;
2443 0 : xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
2444 0 : if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
2445 0 : error = -EFSCORRUPTED;
2446 0 : goto done;
2447 : }
2448 0 : btree = par_hdr.btree;
2449 0 : entno = 0;
2450 : }
2451 : /*
2452 : * Update the parent entry pointing to the moved block.
2453 : */
2454 0 : btree[entno].before = cpu_to_be32(dead_blkno);
2455 0 : xfs_trans_log_buf(tp, par_buf,
2456 0 : XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2457 : sizeof(btree[entno].before)));
2458 0 : *dead_blknop = last_blkno;
2459 0 : *dead_bufp = last_buf;
2460 0 : return 0;
2461 0 : done:
2462 0 : if (par_buf)
2463 0 : xfs_trans_brelse(tp, par_buf);
2464 0 : if (sib_buf)
2465 0 : xfs_trans_brelse(tp, sib_buf);
2466 0 : xfs_trans_brelse(tp, last_buf);
2467 0 : return error;
2468 : }
2469 :
2470 : /*
2471 : * Remove a btree block from a directory or attribute.
2472 : */
2473 : int
2474 1523349 : xfs_da_shrink_inode(
2475 : struct xfs_da_args *args,
2476 : xfs_dablk_t dead_blkno,
2477 : struct xfs_buf *dead_buf)
2478 : {
2479 1523349 : struct xfs_inode *dp;
2480 1523349 : int done, error, w, count;
2481 1523349 : struct xfs_trans *tp;
2482 :
2483 1523349 : trace_xfs_da_shrink_inode(args);
2484 :
2485 1523283 : dp = args->dp;
2486 1523283 : w = args->whichfork;
2487 1523283 : tp = args->trans;
2488 1523283 : count = args->geo->fsbcount;
2489 1523286 : for (;;) {
2490 : /*
2491 : * Remove extents. If we get ENOSPC for a dir we have to move
2492 : * the last block to the place we want to kill.
2493 : */
2494 1625879 : error = xfs_bunmapi(tp, dp, dead_blkno, count,
2495 : xfs_bmapi_aflag(w), 0, &done);
2496 1523108 : if (error == -ENOSPC) {
2497 0 : if (w != XFS_DATA_FORK)
2498 : break;
2499 0 : error = xfs_da3_swap_lastblock(args, &dead_blkno,
2500 : &dead_buf);
2501 3 : if (error)
2502 : break;
2503 : } else {
2504 : break;
2505 : }
2506 : }
2507 1523108 : xfs_trans_binval(tp, dead_buf);
2508 1523386 : return error;
2509 : }
2510 :
2511 : static int
2512 1804429816 : xfs_dabuf_map(
2513 : struct xfs_inode *dp,
2514 : xfs_dablk_t bno,
2515 : unsigned int flags,
2516 : int whichfork,
2517 : struct xfs_buf_map **mapp,
2518 : int *nmaps)
2519 : {
2520 1804429816 : struct xfs_mount *mp = dp->i_mount;
2521 1804429816 : int nfsb = xfs_dabuf_nfsb(mp, whichfork);
2522 1804429816 : struct xfs_bmbt_irec irec, *irecs = &irec;
2523 1804429816 : struct xfs_buf_map *map = *mapp;
2524 1804429816 : xfs_fileoff_t off = bno;
2525 1804429816 : int error = 0, nirecs, i;
2526 :
2527 1804429816 : if (nfsb > 1)
2528 80865581 : irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS);
2529 :
2530 1800603785 : nirecs = nfsb;
2531 3010319338 : error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
2532 : xfs_bmapi_aflag(whichfork));
2533 1805028541 : if (error)
2534 252 : goto out_free_irecs;
2535 :
2536 : /*
2537 : * Use the caller provided map for the single map case, else allocate a
2538 : * larger one that needs to be free by the caller.
2539 : */
2540 1805028289 : if (nirecs > 1) {
2541 249958 : map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS);
2542 249958 : if (!map) {
2543 0 : error = -ENOMEM;
2544 0 : goto out_free_irecs;
2545 : }
2546 249958 : *mapp = map;
2547 : }
2548 :
2549 3608820173 : for (i = 0; i < nirecs; i++) {
2550 1805778310 : if (irecs[i].br_startblock == HOLESTARTBLOCK ||
2551 : irecs[i].br_startblock == DELAYSTARTBLOCK)
2552 192592 : goto invalid_mapping;
2553 1805585718 : if (off != irecs[i].br_startoff)
2554 0 : goto invalid_mapping;
2555 :
2556 1805585718 : map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2557 1803791884 : map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2558 1803791884 : off += irecs[i].br_blockcount;
2559 : }
2560 :
2561 1803041863 : if (off != bno + nfsb)
2562 0 : goto invalid_mapping;
2563 :
2564 1803041863 : *nmaps = nirecs;
2565 1803234707 : out_free_irecs:
2566 1803234707 : if (irecs != &irec)
2567 80843314 : kmem_free(irecs);
2568 1803241452 : return error;
2569 :
2570 192592 : invalid_mapping:
2571 : /* Caller ok with no mapping. */
2572 192592 : if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
2573 0 : error = -EFSCORRUPTED;
2574 0 : if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2575 0 : xfs_alert(mp, "%s: bno %u inode %llu",
2576 : __func__, bno, dp->i_ino);
2577 :
2578 0 : for (i = 0; i < nirecs; i++) {
2579 0 : xfs_alert(mp,
2580 : "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2581 : i, irecs[i].br_startoff,
2582 : irecs[i].br_startblock,
2583 : irecs[i].br_blockcount,
2584 : irecs[i].br_state);
2585 : }
2586 : }
2587 : } else {
2588 192592 : *nmaps = 0;
2589 : }
2590 192592 : goto out_free_irecs;
2591 : }
2592 :
2593 : /*
2594 : * Get a buffer for the dir/attr block.
2595 : */
2596 : int
2597 3458989 : xfs_da_get_buf(
2598 : struct xfs_trans *tp,
2599 : struct xfs_inode *dp,
2600 : xfs_dablk_t bno,
2601 : struct xfs_buf **bpp,
2602 : int whichfork)
2603 : {
2604 3458989 : struct xfs_mount *mp = dp->i_mount;
2605 3458989 : struct xfs_buf *bp;
2606 3458989 : struct xfs_buf_map map, *mapp = ↦
2607 3458989 : int nmap = 1;
2608 3458989 : int error;
2609 :
2610 3458989 : *bpp = NULL;
2611 3458989 : error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
2612 3459691 : if (error || nmap == 0)
2613 0 : goto out_free;
2614 :
2615 3459691 : error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
2616 3462054 : if (error)
2617 0 : goto out_free;
2618 :
2619 3462054 : *bpp = bp;
2620 :
2621 3462054 : out_free:
2622 3462054 : if (mapp != &map)
2623 171 : kmem_free(mapp);
2624 :
2625 3462054 : return error;
2626 : }
2627 :
2628 : /*
2629 : * Get a buffer for the dir/attr block, fill in the contents.
2630 : */
2631 : int
2632 1702476283 : xfs_da_read_buf(
2633 : struct xfs_trans *tp,
2634 : struct xfs_inode *dp,
2635 : xfs_dablk_t bno,
2636 : unsigned int flags,
2637 : struct xfs_buf **bpp,
2638 : int whichfork,
2639 : const struct xfs_buf_ops *ops)
2640 : {
2641 1702476283 : struct xfs_mount *mp = dp->i_mount;
2642 1702476283 : struct xfs_buf *bp;
2643 1702476283 : struct xfs_buf_map map, *mapp = ↦
2644 1702476283 : int nmap = 1;
2645 1702476283 : int error;
2646 :
2647 1702476283 : *bpp = NULL;
2648 1702476283 : error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2649 1699187944 : if (error || !nmap)
2650 192834 : goto out_free;
2651 :
2652 1698995110 : error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
2653 : &bp, ops);
2654 1698459463 : if (error)
2655 2555 : goto out_free;
2656 :
2657 1698456908 : if (whichfork == XFS_ATTR_FORK)
2658 587912355 : xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2659 : else
2660 1110544553 : xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2661 1700293972 : *bpp = bp;
2662 1700489361 : out_free:
2663 1700489361 : if (mapp != &map)
2664 249643 : kmem_free(mapp);
2665 :
2666 1700489361 : return error;
2667 : }
2668 :
2669 : /*
2670 : * Readahead the dir/attr block.
2671 : */
2672 : int
2673 98319812 : xfs_da_reada_buf(
2674 : struct xfs_inode *dp,
2675 : xfs_dablk_t bno,
2676 : unsigned int flags,
2677 : int whichfork,
2678 : const struct xfs_buf_ops *ops)
2679 : {
2680 98319812 : struct xfs_buf_map map;
2681 98319812 : struct xfs_buf_map *mapp;
2682 98319812 : int nmap;
2683 98319812 : int error;
2684 :
2685 98319812 : mapp = ↦
2686 98319812 : nmap = 1;
2687 98319812 : error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
2688 98404797 : if (error || !nmap)
2689 9 : goto out_free;
2690 :
2691 98404788 : xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2692 :
2693 98373656 : out_free:
2694 98373656 : if (mapp != &map)
2695 144 : kmem_free(mapp);
2696 :
2697 98373656 : return error;
2698 : }
|