Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_shared.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_defer.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_rmap.h"
17 : #include "xfs_alloc_btree.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_extent_busy.h"
20 : #include "xfs_errortag.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_trans.h"
24 : #include "xfs_buf_item.h"
25 : #include "xfs_log.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_health.h"
30 :
31 : struct kmem_cache *xfs_extfree_item_cache;
32 :
33 : struct workqueue_struct *xfs_alloc_wq;
34 :
35 : #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
36 :
37 : #define XFSA_FIXUP_BNO_OK 1
38 : #define XFSA_FIXUP_CNT_OK 2
39 :
40 : /*
41 : * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
42 : * the beginning of the block for a proper header with the location information
43 : * and CRC.
44 : */
45 : unsigned int
46 39948989 : xfs_agfl_size(
47 : struct xfs_mount *mp)
48 : {
49 1940126409 : unsigned int size = mp->m_sb.sb_sectsize;
50 :
51 39948989 : if (xfs_has_crc(mp))
52 1926947107 : size -= sizeof(struct xfs_agfl);
53 :
54 1940126409 : return size / sizeof(xfs_agblock_t);
55 : }
56 :
57 : unsigned int
58 385513 : xfs_refc_block(
59 : struct xfs_mount *mp)
60 : {
61 385513 : if (xfs_has_rmapbt(mp))
62 371879 : return XFS_RMAP_BLOCK(mp) + 1;
63 13634 : if (xfs_has_finobt(mp))
64 37 : return XFS_FIBT_BLOCK(mp) + 1;
65 13597 : return XFS_IBT_BLOCK(mp) + 1;
66 : }
67 :
68 : xfs_extlen_t
69 80854 : xfs_prealloc_blocks(
70 : struct xfs_mount *mp)
71 : {
72 80854 : if (xfs_has_reflink(mp))
73 75793 : return xfs_refc_block(mp) + 1;
74 5061 : if (xfs_has_rmapbt(mp))
75 48 : return XFS_RMAP_BLOCK(mp) + 1;
76 5013 : if (xfs_has_finobt(mp))
77 4753 : return XFS_FIBT_BLOCK(mp) + 1;
78 260 : return XFS_IBT_BLOCK(mp) + 1;
79 : }
80 :
81 : /*
82 : * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
83 : * guarantee that we can refill the AGFL prior to allocating space in a nearly
84 : * full AG. Although the space described by the free space btrees, the
85 : * blocks used by the freesp btrees themselves, and the blocks owned by the
86 : * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
87 : * free space in the AG drop so low that the free space btrees cannot refill an
88 : * empty AGFL up to the minimum level. Rather than grind through empty AGs
89 : * until the fs goes down, we subtract this many AG blocks from the incore
90 : * fdblocks to ensure user allocation does not overcommit the space the
91 : * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
92 : * withhold space from xfs_mod_fdblocks, so we do not account for that here.
93 : */
94 : #define XFS_ALLOCBT_AGFL_RESERVE 4
95 :
96 : /*
97 : * Compute the number of blocks that we set aside to guarantee the ability to
98 : * refill the AGFL and handle a full bmap btree split.
99 : *
100 : * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
101 : * AGF buffer (PV 947395), we place constraints on the relationship among
102 : * actual allocations for data blocks, freelist blocks, and potential file data
103 : * bmap btree blocks. However, these restrictions may result in no actual space
104 : * allocated for a delayed extent, for example, a data block in a certain AG is
105 : * allocated but there is no additional block for the additional bmap btree
106 : * block due to a split of the bmap btree of the file. The result of this may
107 : * lead to an infinite loop when the file gets flushed to disk and all delayed
108 : * extents need to be actually allocated. To get around this, we explicitly set
109 : * aside a few blocks which will not be reserved in delayed allocation.
110 : *
111 : * For each AG, we need to reserve enough blocks to replenish a totally empty
112 : * AGFL and 4 more to handle a potential split of the file's bmap btree.
113 : */
114 : unsigned int
115 81229 : xfs_alloc_set_aside(
116 : struct xfs_mount *mp)
117 : {
118 81229 : return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
119 : }
120 :
121 : /*
122 : * When deciding how much space to allocate out of an AG, we limit the
123 : * allocation maximum size to the size the AG. However, we cannot use all the
124 : * blocks in the AG - some are permanently used by metadata. These
125 : * blocks are generally:
126 : * - the AG superblock, AGF, AGI and AGFL
127 : * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
128 : * the AGI free inode and rmap btree root blocks.
129 : * - blocks on the AGFL according to xfs_alloc_set_aside() limits
130 : * - the rmapbt root block
131 : *
132 : * The AG headers are sector sized, so the amount of space they take up is
133 : * dependent on filesystem geometry. The others are all single blocks.
134 : */
135 : unsigned int
136 66982 : xfs_alloc_ag_max_usable(
137 : struct xfs_mount *mp)
138 : {
139 66982 : unsigned int blocks;
140 :
141 66982 : blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
142 66982 : blocks += XFS_ALLOCBT_AGFL_RESERVE;
143 66982 : blocks += 3; /* AGF, AGI btree root blocks */
144 66982 : if (xfs_has_finobt(mp))
145 66713 : blocks++; /* finobt root block */
146 66982 : if (xfs_has_rmapbt(mp))
147 62365 : blocks++; /* rmap root block */
148 66982 : if (xfs_has_reflink(mp))
149 62317 : blocks++; /* refcount root block */
150 :
151 66982 : return mp->m_sb.sb_agblocks - blocks;
152 : }
153 :
154 : /*
155 : * Lookup the record equal to [bno, len] in the btree given by cur.
156 : */
157 : STATIC int /* error */
158 433096049 : xfs_alloc_lookup_eq(
159 : struct xfs_btree_cur *cur, /* btree cursor */
160 : xfs_agblock_t bno, /* starting block of extent */
161 : xfs_extlen_t len, /* length of extent */
162 : int *stat) /* success/failure */
163 : {
164 433096049 : int error;
165 :
166 433096049 : cur->bc_rec.a.ar_startblock = bno;
167 433096049 : cur->bc_rec.a.ar_blockcount = len;
168 433096049 : error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
169 433123920 : cur->bc_ag.abt.active = (*stat == 1);
170 433123920 : return error;
171 : }
172 :
173 : /*
174 : * Lookup the first record greater than or equal to [bno, len]
175 : * in the btree given by cur.
176 : */
177 : int /* error */
178 15969 : xfs_alloc_lookup_ge(
179 : struct xfs_btree_cur *cur, /* btree cursor */
180 : xfs_agblock_t bno, /* starting block of extent */
181 : xfs_extlen_t len, /* length of extent */
182 : int *stat) /* success/failure */
183 : {
184 363371962 : int error;
185 :
186 363371962 : cur->bc_rec.a.ar_startblock = bno;
187 363371962 : cur->bc_rec.a.ar_blockcount = len;
188 15969 : error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
189 363344230 : cur->bc_ag.abt.active = (*stat == 1);
190 363344230 : return error;
191 : }
192 :
193 : /*
194 : * Lookup the first record less than or equal to [bno, len]
195 : * in the btree given by cur.
196 : */
197 : int /* error */
198 433803591 : xfs_alloc_lookup_le(
199 : struct xfs_btree_cur *cur, /* btree cursor */
200 : xfs_agblock_t bno, /* starting block of extent */
201 : xfs_extlen_t len, /* length of extent */
202 : int *stat) /* success/failure */
203 : {
204 593753476 : int error;
205 593753476 : cur->bc_rec.a.ar_startblock = bno;
206 593753476 : cur->bc_rec.a.ar_blockcount = len;
207 433803591 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
208 593800697 : cur->bc_ag.abt.active = (*stat == 1);
209 593800697 : return error;
210 : }
211 :
212 : static inline bool
213 11628546039 : xfs_alloc_cur_active(
214 : struct xfs_btree_cur *cur)
215 : {
216 11628546039 : return cur && cur->bc_ag.abt.active;
217 : }
218 :
219 : /*
220 : * Update the record referred to by cur to the value given
221 : * by [bno, len].
222 : * This either works (return 0) or gets an EFSCORRUPTED error.
223 : */
224 : STATIC int /* error */
225 112995709 : xfs_alloc_update(
226 : struct xfs_btree_cur *cur, /* btree cursor */
227 : xfs_agblock_t bno, /* starting block of extent */
228 : xfs_extlen_t len) /* length of extent */
229 : {
230 112995709 : union xfs_btree_rec rec;
231 :
232 112995709 : rec.alloc.ar_startblock = cpu_to_be32(bno);
233 112995709 : rec.alloc.ar_blockcount = cpu_to_be32(len);
234 112995709 : return xfs_btree_update(cur, &rec);
235 : }
236 :
237 : /* Convert the ondisk btree record to its incore representation. */
238 : void
239 432618654 : xfs_alloc_btrec_to_irec(
240 : const union xfs_btree_rec *rec,
241 : struct xfs_alloc_rec_incore *irec)
242 : {
243 10049532570 : irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
244 10049532570 : irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
245 432618654 : }
246 :
247 : inline xfs_failaddr_t
248 10094486663 : xfs_alloc_check_perag_irec(
249 : struct xfs_perag *pag,
250 : const struct xfs_alloc_rec_incore *irec)
251 : {
252 10094486663 : if (irec->ar_blockcount == 0)
253 0 : return __this_address;
254 :
255 : /* check for valid extent range, including overflow */
256 10094486663 : if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
257 0 : return __this_address;
258 :
259 : return NULL;
260 : }
261 :
262 : /* Simple checks for free space records. */
263 : xfs_failaddr_t
264 432619832 : xfs_alloc_check_irec(
265 : struct xfs_btree_cur *cur,
266 : const struct xfs_alloc_rec_incore *irec)
267 : {
268 432619832 : return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
269 : }
270 :
271 : static inline int
272 0 : xfs_alloc_complain_bad_rec(
273 : struct xfs_btree_cur *cur,
274 : xfs_failaddr_t fa,
275 : const struct xfs_alloc_rec_incore *irec)
276 : {
277 0 : struct xfs_mount *mp = cur->bc_mp;
278 :
279 0 : xfs_warn(mp,
280 : "%s Freespace BTree record corruption in AG %d detected at %pS!",
281 : cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
282 : cur->bc_ag.pag->pag_agno, fa);
283 0 : xfs_warn(mp,
284 : "start block 0x%x block count 0x%x", irec->ar_startblock,
285 : irec->ar_blockcount);
286 0 : xfs_btree_mark_sick(cur);
287 0 : return -EFSCORRUPTED;
288 : }
289 :
290 : /*
291 : * Get the data from the pointed-to record.
292 : */
293 : int /* error */
294 7530206785 : xfs_alloc_get_rec(
295 : struct xfs_btree_cur *cur, /* btree cursor */
296 : xfs_agblock_t *bno, /* output: starting block of extent */
297 : xfs_extlen_t *len, /* output: length of extent */
298 : int *stat) /* output: success/failure */
299 : {
300 7530206785 : struct xfs_alloc_rec_incore irec;
301 7530206785 : union xfs_btree_rec *rec;
302 7530206785 : xfs_failaddr_t fa;
303 7530206785 : int error;
304 :
305 7530206785 : error = xfs_btree_get_rec(cur, &rec, stat);
306 7513608311 : if (error || !(*stat))
307 : return error;
308 :
309 7514544713 : xfs_alloc_btrec_to_irec(rec, &irec);
310 7514544713 : fa = xfs_alloc_check_irec(cur, &irec);
311 7510292197 : if (fa)
312 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
313 :
314 7510292197 : *bno = irec.ar_startblock;
315 7510292197 : *len = irec.ar_blockcount;
316 7510292197 : return 0;
317 : }
318 :
319 : /*
320 : * Compute aligned version of the found extent.
321 : * Takes alignment and min length into account.
322 : */
323 : STATIC bool
324 6120110959 : xfs_alloc_compute_aligned(
325 : xfs_alloc_arg_t *args, /* allocation argument structure */
326 : xfs_agblock_t foundbno, /* starting block in found extent */
327 : xfs_extlen_t foundlen, /* length in found extent */
328 : xfs_agblock_t *resbno, /* result block number */
329 : xfs_extlen_t *reslen, /* result length */
330 : unsigned *busy_gen)
331 : {
332 6120110959 : xfs_agblock_t bno = foundbno;
333 6120110959 : xfs_extlen_t len = foundlen;
334 6120110959 : xfs_extlen_t diff;
335 6120110959 : bool busy;
336 :
337 : /* Trim busy sections out of found extent */
338 6120110959 : busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
339 :
340 : /*
341 : * If we have a largish extent that happens to start before min_agbno,
342 : * see if we can shift it into range...
343 : */
344 6130077905 : if (bno < args->min_agbno && bno + len > args->min_agbno) {
345 4233 : diff = args->min_agbno - bno;
346 4233 : if (len > diff) {
347 4233 : bno += diff;
348 4233 : len -= diff;
349 : }
350 : }
351 :
352 6130077905 : if (args->alignment > 1 && len >= args->minlen) {
353 19047292 : xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
354 :
355 19047292 : diff = aligned_bno - bno;
356 :
357 19047292 : *resbno = aligned_bno;
358 19047292 : *reslen = diff >= len ? 0 : len - diff;
359 : } else {
360 6111030613 : *resbno = bno;
361 6111030613 : *reslen = len;
362 : }
363 :
364 6130077905 : return busy;
365 : }
366 :
367 : /*
368 : * Compute best start block and diff for "near" allocations.
369 : * freelen >= wantlen already checked by caller.
370 : */
371 : STATIC xfs_extlen_t /* difference value (absolute) */
372 2690274531 : xfs_alloc_compute_diff(
373 : xfs_agblock_t wantbno, /* target starting block */
374 : xfs_extlen_t wantlen, /* target length */
375 : xfs_extlen_t alignment, /* target alignment */
376 : int datatype, /* are we allocating data? */
377 : xfs_agblock_t freebno, /* freespace's starting block */
378 : xfs_extlen_t freelen, /* freespace's length */
379 : xfs_agblock_t *newbnop) /* result: best start block from free */
380 : {
381 2690274531 : xfs_agblock_t freeend; /* end of freespace extent */
382 2690274531 : xfs_agblock_t newbno1; /* return block number */
383 2690274531 : xfs_agblock_t newbno2; /* other new block number */
384 2690274531 : xfs_extlen_t newlen1=0; /* length with newbno1 */
385 2690274531 : xfs_extlen_t newlen2=0; /* length with newbno2 */
386 2690274531 : xfs_agblock_t wantend; /* end of target extent */
387 2690274531 : bool userdata = datatype & XFS_ALLOC_USERDATA;
388 :
389 2690274531 : ASSERT(freelen >= wantlen);
390 2690274531 : freeend = freebno + freelen;
391 2690274531 : wantend = wantbno + wantlen;
392 : /*
393 : * We want to allocate from the start of a free extent if it is past
394 : * the desired block or if we are allocating user data and the free
395 : * extent is before desired block. The second case is there to allow
396 : * for contiguous allocation from the remaining free space if the file
397 : * grows in the short term.
398 : */
399 2690274531 : if (freebno >= wantbno || (userdata && freeend < wantend)) {
400 1991507135 : if ((newbno1 = roundup(freebno, alignment)) >= freeend)
401 0 : newbno1 = NULLAGBLOCK;
402 698767396 : } else if (freeend >= wantend && alignment > 1) {
403 23 : newbno1 = roundup(wantbno, alignment);
404 23 : newbno2 = newbno1 - alignment;
405 23 : if (newbno1 >= freeend)
406 : newbno1 = NULLAGBLOCK;
407 : else
408 23 : newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
409 23 : if (newbno2 < freebno)
410 : newbno2 = NULLAGBLOCK;
411 : else
412 23 : newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
413 23 : if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
414 23 : if (newlen1 < newlen2 ||
415 19 : (newlen1 == newlen2 &&
416 19 : XFS_ABSDIFF(newbno1, wantbno) >
417 19 : XFS_ABSDIFF(newbno2, wantbno)))
418 13 : newbno1 = newbno2;
419 0 : } else if (newbno2 != NULLAGBLOCK)
420 0 : newbno1 = newbno2;
421 698767373 : } else if (freeend >= wantend) {
422 : newbno1 = wantbno;
423 696748987 : } else if (alignment > 1) {
424 26978 : newbno1 = roundup(freeend - wantlen, alignment);
425 26978 : if (newbno1 > freeend - wantlen &&
426 17338 : newbno1 - alignment >= freebno)
427 : newbno1 -= alignment;
428 9640 : else if (newbno1 >= freeend)
429 0 : newbno1 = NULLAGBLOCK;
430 : } else
431 696722009 : newbno1 = freeend - wantlen;
432 2690274531 : *newbnop = newbno1;
433 2690274531 : return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
434 : }
435 :
436 : /*
437 : * Fix up the length, based on mod and prod.
438 : * len should be k * prod + mod for some k.
439 : * If len is too small it is returned unchanged.
440 : * If len hits maxlen it is left alone.
441 : */
442 : STATIC void
443 2698914646 : xfs_alloc_fix_len(
444 : xfs_alloc_arg_t *args) /* allocation argument structure */
445 : {
446 2698914646 : xfs_extlen_t k;
447 2698914646 : xfs_extlen_t rlen;
448 :
449 2698914646 : ASSERT(args->mod < args->prod);
450 2698914646 : rlen = args->len;
451 2698914646 : ASSERT(rlen >= args->minlen);
452 2698914646 : ASSERT(rlen <= args->maxlen);
453 2698914646 : if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
454 214421 : (args->mod == 0 && rlen < args->prod))
455 : return;
456 24602 : k = rlen % args->prod;
457 24602 : if (k == args->mod)
458 : return;
459 20738 : if (k > args->mod)
460 20361 : rlen = rlen - (k - args->mod);
461 : else
462 377 : rlen = rlen - args->prod + (args->mod - k);
463 : /* casts to (int) catch length underflows */
464 20738 : if ((int)rlen < (int)args->minlen)
465 : return;
466 1706 : ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
467 1706 : ASSERT(rlen % args->prod == args->mod);
468 1706 : ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
469 : rlen + args->minleft);
470 1706 : args->len = rlen;
471 : }
472 :
473 : /*
474 : * Update the two btrees, logically removing from freespace the extent
475 : * starting at rbno, rlen blocks. The extent is contained within the
476 : * actual (current) free extent fbno for flen blocks.
477 : * Flags are passed in indicating whether the cursors are set to the
478 : * relevant records.
479 : */
480 : STATIC int /* error code */
481 98618344 : xfs_alloc_fixup_trees(
482 : struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
483 : struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
484 : xfs_agblock_t fbno, /* starting block of free extent */
485 : xfs_extlen_t flen, /* length of free extent */
486 : xfs_agblock_t rbno, /* starting block of returned extent */
487 : xfs_extlen_t rlen, /* length of returned extent */
488 : int flags) /* flags, XFSA_FIXUP_... */
489 : {
490 98618344 : int error; /* error code */
491 98618344 : int i; /* operation results */
492 98618344 : xfs_agblock_t nfbno1; /* first new free startblock */
493 98618344 : xfs_agblock_t nfbno2; /* second new free startblock */
494 98618344 : xfs_extlen_t nflen1=0; /* first new free length */
495 98618344 : xfs_extlen_t nflen2=0; /* second new free length */
496 98618344 : struct xfs_mount *mp;
497 :
498 98618344 : mp = cnt_cur->bc_mp;
499 :
500 : /*
501 : * Look up the record in the by-size tree if necessary.
502 : */
503 98618344 : if (flags & XFSA_FIXUP_CNT_OK) {
504 : #ifdef DEBUG
505 7826360 : if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
506 : return error;
507 7826378 : if (XFS_IS_CORRUPT(mp,
508 : i != 1 ||
509 : nfbno1 != fbno ||
510 : nflen1 != flen)) {
511 0 : xfs_btree_mark_sick(cnt_cur);
512 0 : return -EFSCORRUPTED;
513 : }
514 : #endif
515 : } else {
516 90791984 : if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
517 : return error;
518 90798764 : if (XFS_IS_CORRUPT(mp, i != 1)) {
519 0 : xfs_btree_mark_sick(cnt_cur);
520 0 : return -EFSCORRUPTED;
521 : }
522 : }
523 : /*
524 : * Look up the record in the by-block tree if necessary.
525 : */
526 98625142 : if (flags & XFSA_FIXUP_BNO_OK) {
527 : #ifdef DEBUG
528 410964 : if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
529 : return error;
530 410970 : if (XFS_IS_CORRUPT(mp,
531 : i != 1 ||
532 : nfbno1 != fbno ||
533 : nflen1 != flen)) {
534 0 : xfs_btree_mark_sick(bno_cur);
535 0 : return -EFSCORRUPTED;
536 : }
537 : #endif
538 : } else {
539 98214178 : if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
540 : return error;
541 98213974 : if (XFS_IS_CORRUPT(mp, i != 1)) {
542 0 : xfs_btree_mark_sick(bno_cur);
543 0 : return -EFSCORRUPTED;
544 : }
545 : }
546 :
547 : #ifdef DEBUG
548 98624944 : if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
549 66374572 : struct xfs_btree_block *bnoblock;
550 66374572 : struct xfs_btree_block *cntblock;
551 :
552 66374572 : bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
553 66374572 : cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
554 :
555 66374572 : if (XFS_IS_CORRUPT(mp,
556 : bnoblock->bb_numrecs !=
557 : cntblock->bb_numrecs)) {
558 0 : xfs_btree_mark_sick(bno_cur);
559 0 : return -EFSCORRUPTED;
560 : }
561 : }
562 : #endif
563 :
564 : /*
565 : * Deal with all four cases: the allocated record is contained
566 : * within the freespace record, so we can have new freespace
567 : * at either (or both) end, or no freespace remaining.
568 : */
569 98624944 : if (rbno == fbno && rlen == flen)
570 29782645 : nfbno1 = nfbno2 = NULLAGBLOCK;
571 68842299 : else if (rbno == fbno) {
572 58224520 : nfbno1 = rbno + rlen;
573 58224520 : nflen1 = flen - rlen;
574 58224520 : nfbno2 = NULLAGBLOCK;
575 10617779 : } else if (rbno + rlen == fbno + flen) {
576 5126219 : nfbno1 = fbno;
577 5126219 : nflen1 = flen - rlen;
578 5126219 : nfbno2 = NULLAGBLOCK;
579 : } else {
580 5491560 : nfbno1 = fbno;
581 5491560 : nflen1 = rbno - fbno;
582 5491560 : nfbno2 = rbno + rlen;
583 5491560 : nflen2 = (fbno + flen) - nfbno2;
584 : }
585 : /*
586 : * Delete the entry from the by-size btree.
587 : */
588 98624944 : if ((error = xfs_btree_delete(cnt_cur, &i)))
589 : return error;
590 98614516 : if (XFS_IS_CORRUPT(mp, i != 1)) {
591 0 : xfs_btree_mark_sick(cnt_cur);
592 0 : return -EFSCORRUPTED;
593 : }
594 : /*
595 : * Add new by-size btree entry(s).
596 : */
597 98614516 : if (nfbno1 != NULLAGBLOCK) {
598 68833927 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
599 : return error;
600 68841321 : if (XFS_IS_CORRUPT(mp, i != 0)) {
601 0 : xfs_btree_mark_sick(cnt_cur);
602 0 : return -EFSCORRUPTED;
603 : }
604 68841321 : if ((error = xfs_btree_insert(cnt_cur, &i)))
605 : return error;
606 68828646 : if (XFS_IS_CORRUPT(mp, i != 1)) {
607 0 : xfs_btree_mark_sick(cnt_cur);
608 0 : return -EFSCORRUPTED;
609 : }
610 : }
611 98609235 : if (nfbno2 != NULLAGBLOCK) {
612 5491400 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
613 : return error;
614 5491563 : if (XFS_IS_CORRUPT(mp, i != 0)) {
615 0 : xfs_btree_mark_sick(cnt_cur);
616 0 : return -EFSCORRUPTED;
617 : }
618 5491563 : if ((error = xfs_btree_insert(cnt_cur, &i)))
619 : return error;
620 5491579 : if (XFS_IS_CORRUPT(mp, i != 1)) {
621 0 : xfs_btree_mark_sick(cnt_cur);
622 0 : return -EFSCORRUPTED;
623 : }
624 : }
625 : /*
626 : * Fix up the by-block btree entry(s).
627 : */
628 98609414 : if (nfbno1 == NULLAGBLOCK) {
629 : /*
630 : * No remaining freespace, just delete the by-block tree entry.
631 : */
632 29777035 : if ((error = xfs_btree_delete(bno_cur, &i)))
633 : return error;
634 29782527 : if (XFS_IS_CORRUPT(mp, i != 1)) {
635 0 : xfs_btree_mark_sick(bno_cur);
636 0 : return -EFSCORRUPTED;
637 : }
638 : } else {
639 : /*
640 : * Update the by-block entry to start later|be shorter.
641 : */
642 68832379 : if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
643 : return error;
644 : }
645 98619930 : if (nfbno2 != NULLAGBLOCK) {
646 : /*
647 : * 2 resulting free entries, need to add one.
648 : */
649 5491543 : if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
650 : return error;
651 5491563 : if (XFS_IS_CORRUPT(mp, i != 0)) {
652 0 : xfs_btree_mark_sick(bno_cur);
653 0 : return -EFSCORRUPTED;
654 : }
655 5491563 : if ((error = xfs_btree_insert(bno_cur, &i)))
656 : return error;
657 5491568 : if (XFS_IS_CORRUPT(mp, i != 1)) {
658 0 : xfs_btree_mark_sick(bno_cur);
659 0 : return -EFSCORRUPTED;
660 : }
661 : }
662 : return 0;
663 : }
664 :
665 : /*
666 : * We do not verify the AGFL contents against AGF-based index counters here,
667 : * even though we may have access to the perag that contains shadow copies. We
668 : * don't know if the AGF based counters have been checked, and if they have they
669 : * still may be inconsistent because they haven't yet been reset on the first
670 : * allocation after the AGF has been read in.
671 : *
672 : * This means we can only check that all agfl entries contain valid or null
673 : * values because we can't reliably determine the active range to exclude
674 : * NULLAGBNO as a valid value.
675 : *
676 : * However, we can't even do that for v4 format filesystems because there are
677 : * old versions of mkfs out there that does not initialise the AGFL to known,
678 : * verifiable values. HEnce we can't tell the difference between a AGFL block
679 : * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
680 : *
681 : * As a result, we can only fully validate AGFL block numbers when we pull them
682 : * from the freelist in xfs_alloc_get_freelist().
683 : */
684 : static xfs_failaddr_t
685 1243831 : xfs_agfl_verify(
686 : struct xfs_buf *bp)
687 : {
688 1243831 : struct xfs_mount *mp = bp->b_mount;
689 1243831 : struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
690 1243831 : __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
691 1243831 : int i;
692 :
693 1243831 : if (!xfs_has_crc(mp))
694 : return NULL;
695 :
696 1243809 : if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
697 0 : return __this_address;
698 1243699 : if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
699 0 : return __this_address;
700 : /*
701 : * during growfs operations, the perag is not fully initialised,
702 : * so we can't use it for any useful checking. growfs ensures we can't
703 : * use it by using uncached buffers that don't have the perag attached
704 : * so we can detect and avoid this problem.
705 : */
706 1244101 : if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
707 0 : return __this_address;
708 :
709 2457307088 : for (i = 0; i < xfs_agfl_size(mp); i++) {
710 1227407936 : if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
711 141589091 : be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
712 0 : return __this_address;
713 : }
714 :
715 1244101 : if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
716 0 : return __this_address;
717 : return NULL;
718 : }
719 :
720 : static void
721 210737 : xfs_agfl_read_verify(
722 : struct xfs_buf *bp)
723 : {
724 210737 : struct xfs_mount *mp = bp->b_mount;
725 210737 : xfs_failaddr_t fa;
726 :
727 : /*
728 : * There is no verification of non-crc AGFLs because mkfs does not
729 : * initialise the AGFL to zero or NULL. Hence the only valid part of the
730 : * AGFL is what the AGF says is active. We can't get to the AGF, so we
731 : * can't verify just those entries are valid.
732 : */
733 210737 : if (!xfs_has_crc(mp))
734 : return;
735 :
736 210640 : if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
737 640 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
738 : else {
739 210000 : fa = xfs_agfl_verify(bp);
740 210000 : if (fa)
741 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
742 : }
743 : }
744 :
745 : static void
746 157474 : xfs_agfl_write_verify(
747 : struct xfs_buf *bp)
748 : {
749 157474 : struct xfs_mount *mp = bp->b_mount;
750 157474 : struct xfs_buf_log_item *bip = bp->b_log_item;
751 157474 : xfs_failaddr_t fa;
752 :
753 : /* no verification of non-crc AGFLs */
754 157474 : if (!xfs_has_crc(mp))
755 : return;
756 :
757 143877 : fa = xfs_agfl_verify(bp);
758 143877 : if (fa) {
759 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
760 0 : return;
761 : }
762 :
763 143877 : if (bip)
764 116353 : XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
765 :
766 143877 : xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
767 : }
768 :
769 : const struct xfs_buf_ops xfs_agfl_buf_ops = {
770 : .name = "xfs_agfl",
771 : .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
772 : .verify_read = xfs_agfl_read_verify,
773 : .verify_write = xfs_agfl_write_verify,
774 : .verify_struct = xfs_agfl_verify,
775 : };
776 :
777 : /*
778 : * Read in the allocation group free block array.
779 : */
780 : int
781 651876127 : xfs_alloc_read_agfl(
782 : struct xfs_perag *pag,
783 : struct xfs_trans *tp,
784 : struct xfs_buf **bpp)
785 : {
786 651876127 : struct xfs_mount *mp = pag->pag_mount;
787 651876127 : struct xfs_buf *bp;
788 651876127 : int error;
789 :
790 1955628381 : error = xfs_trans_read_buf(
791 651876127 : mp, tp, mp->m_ddev_targp,
792 651876127 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
793 651876127 : XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
794 651934340 : if (xfs_metadata_is_sick(error))
795 640 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
796 651934340 : if (error)
797 : return error;
798 651934159 : xfs_buf_set_ref(bp, XFS_AGFL_REF);
799 651915952 : *bpp = bp;
800 651915952 : return 0;
801 : }
802 :
803 : STATIC int
804 204483180 : xfs_alloc_update_counters(
805 : struct xfs_trans *tp,
806 : struct xfs_buf *agbp,
807 : long len)
808 : {
809 204483180 : struct xfs_agf *agf = agbp->b_addr;
810 :
811 204483180 : agbp->b_pag->pagf_freeblks += len;
812 204483180 : be32_add_cpu(&agf->agf_freeblks, len);
813 :
814 204483180 : if (unlikely(be32_to_cpu(agf->agf_freeblks) >
815 : be32_to_cpu(agf->agf_length))) {
816 0 : xfs_buf_mark_corrupt(agbp);
817 0 : xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
818 0 : return -EFSCORRUPTED;
819 : }
820 :
821 204483180 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
822 204483180 : return 0;
823 : }
824 :
825 : /*
826 : * Block allocation algorithm and data structures.
827 : */
828 : struct xfs_alloc_cur {
829 : struct xfs_btree_cur *cnt; /* btree cursors */
830 : struct xfs_btree_cur *bnolt;
831 : struct xfs_btree_cur *bnogt;
832 : xfs_extlen_t cur_len;/* current search length */
833 : xfs_agblock_t rec_bno;/* extent startblock */
834 : xfs_extlen_t rec_len;/* extent length */
835 : xfs_agblock_t bno; /* alloc bno */
836 : xfs_extlen_t len; /* alloc len */
837 : xfs_extlen_t diff; /* diff from search bno */
838 : unsigned int busy_gen;/* busy state */
839 : bool busy;
840 : };
841 :
842 : /*
843 : * Set up cursors, etc. in the extent allocation cursor. This function can be
844 : * called multiple times to reset an initialized structure without having to
845 : * reallocate cursors.
846 : */
847 : static int
848 90421839 : xfs_alloc_cur_setup(
849 : struct xfs_alloc_arg *args,
850 : struct xfs_alloc_cur *acur)
851 : {
852 90421839 : int error;
853 90421839 : int i;
854 :
855 90421839 : acur->cur_len = args->maxlen;
856 90421839 : acur->rec_bno = 0;
857 90421839 : acur->rec_len = 0;
858 90421839 : acur->bno = 0;
859 90421839 : acur->len = 0;
860 90421839 : acur->diff = -1;
861 90421839 : acur->busy = false;
862 90421839 : acur->busy_gen = 0;
863 :
864 : /*
865 : * Perform an initial cntbt lookup to check for availability of maxlen
866 : * extents. If this fails, we'll return -ENOSPC to signal the caller to
867 : * attempt a small allocation.
868 : */
869 90421839 : if (!acur->cnt)
870 90267157 : acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
871 : args->agbp, args->pag, XFS_BTNUM_CNT);
872 90527728 : error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
873 90484481 : if (error)
874 : return error;
875 :
876 : /*
877 : * Allocate the bnobt left and right search cursors.
878 : */
879 90482837 : if (!acur->bnolt)
880 90362901 : acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
881 : args->agbp, args->pag, XFS_BTNUM_BNO);
882 90494991 : if (!acur->bnogt)
883 90371334 : acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
884 : args->agbp, args->pag, XFS_BTNUM_BNO);
885 90505494 : return i == 1 ? 0 : -ENOSPC;
886 : }
887 :
888 : static void
889 90376134 : xfs_alloc_cur_close(
890 : struct xfs_alloc_cur *acur,
891 : bool error)
892 : {
893 90376134 : int cur_error = XFS_BTREE_NOERROR;
894 :
895 90376134 : if (error)
896 3137 : cur_error = XFS_BTREE_ERROR;
897 :
898 90376134 : if (acur->cnt)
899 90376134 : xfs_btree_del_cursor(acur->cnt, cur_error);
900 90388305 : if (acur->bnolt)
901 90386661 : xfs_btree_del_cursor(acur->bnolt, cur_error);
902 90395724 : if (acur->bnogt)
903 90394080 : xfs_btree_del_cursor(acur->bnogt, cur_error);
904 90392812 : acur->cnt = acur->bnolt = acur->bnogt = NULL;
905 90392812 : }
906 :
907 : /*
908 : * Check an extent for allocation and track the best available candidate in the
909 : * allocation structure. The cursor is deactivated if it has entered an out of
910 : * range state based on allocation arguments. Optionally return the extent
911 : * extent geometry and allocation status if requested by the caller.
912 : */
913 : static int
914 5554057736 : xfs_alloc_cur_check(
915 : struct xfs_alloc_arg *args,
916 : struct xfs_alloc_cur *acur,
917 : struct xfs_btree_cur *cur,
918 : int *new)
919 : {
920 5554057736 : int error, i;
921 5554057736 : xfs_agblock_t bno, bnoa, bnew;
922 5554057736 : xfs_extlen_t len, lena, diff = -1;
923 5554057736 : bool busy;
924 5554057736 : unsigned busy_gen = 0;
925 5554057736 : bool deactivate = false;
926 5554057736 : bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
927 :
928 5554057736 : *new = 0;
929 :
930 5554057736 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
931 5548307186 : if (error)
932 : return error;
933 5548307186 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
934 0 : xfs_btree_mark_sick(cur);
935 0 : return -EFSCORRUPTED;
936 : }
937 :
938 : /*
939 : * Check minlen and deactivate a cntbt cursor if out of acceptable size
940 : * range (i.e., walking backwards looking for a minlen extent).
941 : */
942 5548307186 : if (len < args->minlen) {
943 297951025 : deactivate = !isbnobt;
944 297951025 : goto out;
945 : }
946 :
947 5250356161 : busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
948 : &busy_gen);
949 5266647534 : acur->busy |= busy;
950 5266647534 : if (busy)
951 2737232280 : acur->busy_gen = busy_gen;
952 : /* deactivate a bnobt cursor outside of locality range */
953 5266647534 : if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
954 18093 : deactivate = isbnobt;
955 18093 : goto out;
956 : }
957 5266629441 : if (lena < args->minlen)
958 2575154144 : goto out;
959 :
960 2691475297 : args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
961 2691475297 : xfs_alloc_fix_len(args);
962 2690298871 : ASSERT(args->len >= args->minlen);
963 2690298871 : if (args->len < acur->len)
964 3363110 : goto out;
965 :
966 : /*
967 : * We have an aligned record that satisfies minlen and beats or matches
968 : * the candidate extent size. Compare locality for near allocation mode.
969 : */
970 2686935761 : diff = xfs_alloc_compute_diff(args->agbno, args->len,
971 : args->alignment, args->datatype,
972 : bnoa, lena, &bnew);
973 2688888137 : if (bnew == NULLAGBLOCK)
974 0 : goto out;
975 :
976 : /*
977 : * Deactivate a bnobt cursor with worse locality than the current best.
978 : */
979 2688888137 : if (diff > acur->diff) {
980 2052766514 : deactivate = isbnobt;
981 2052766514 : goto out;
982 : }
983 :
984 636121623 : ASSERT(args->len > acur->len ||
985 : (args->len == acur->len && diff <= acur->diff));
986 636121623 : acur->rec_bno = bno;
987 636121623 : acur->rec_len = len;
988 636121623 : acur->bno = bnew;
989 636121623 : acur->len = args->len;
990 636121623 : acur->diff = diff;
991 636121623 : *new = 1;
992 :
993 : /*
994 : * We're done if we found a perfect allocation. This only deactivates
995 : * the current cursor, but this is just an optimization to terminate a
996 : * cntbt search that otherwise runs to the edge of the tree.
997 : */
998 636121623 : if (acur->diff == 0 && acur->len == args->maxlen)
999 : deactivate = true;
1000 614082011 : out:
1001 4929252886 : if (deactivate)
1002 35290996 : cur->bc_ag.abt.active = false;
1003 5565374509 : trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
1004 5565374509 : *new);
1005 5565374509 : return 0;
1006 : }
1007 :
1008 : /*
1009 : * Complete an allocation of a candidate extent. Remove the extent from both
1010 : * trees and update the args structure.
1011 : */
1012 : STATIC int
1013 90384163 : xfs_alloc_cur_finish(
1014 : struct xfs_alloc_arg *args,
1015 : struct xfs_alloc_cur *acur)
1016 : {
1017 90384163 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1018 90384163 : int error;
1019 :
1020 90384163 : ASSERT(acur->cnt && acur->bnolt);
1021 90384163 : ASSERT(acur->bno >= acur->rec_bno);
1022 90384163 : ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
1023 90384163 : ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
1024 :
1025 90384163 : error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
1026 : acur->rec_len, acur->bno, acur->len, 0);
1027 90380105 : if (error)
1028 : return error;
1029 :
1030 90376318 : args->agbno = acur->bno;
1031 90376318 : args->len = acur->len;
1032 90376318 : args->wasfromfl = 0;
1033 :
1034 90376318 : trace_xfs_alloc_cur(args);
1035 90376318 : return 0;
1036 : }
1037 :
1038 : /*
1039 : * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1040 : * bno optimized lookup to search for extents with ideal size and locality.
1041 : */
1042 : STATIC int
1043 156567831 : xfs_alloc_cntbt_iter(
1044 : struct xfs_alloc_arg *args,
1045 : struct xfs_alloc_cur *acur)
1046 : {
1047 156567831 : struct xfs_btree_cur *cur = acur->cnt;
1048 156567831 : xfs_agblock_t bno;
1049 156567831 : xfs_extlen_t len, cur_len;
1050 156567831 : int error;
1051 156567831 : int i;
1052 :
1053 156567831 : if (!xfs_alloc_cur_active(cur))
1054 : return 0;
1055 :
1056 : /* locality optimized lookup */
1057 156246734 : cur_len = acur->cur_len;
1058 156246734 : error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1059 156248288 : if (error)
1060 : return error;
1061 156248270 : if (i == 0)
1062 : return 0;
1063 141319622 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1064 141308536 : if (error)
1065 : return error;
1066 :
1067 : /* check the current record and update search length from it */
1068 141309191 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1069 141350840 : if (error)
1070 : return error;
1071 141350840 : ASSERT(len >= acur->cur_len);
1072 141350840 : acur->cur_len = len;
1073 :
1074 : /*
1075 : * We looked up the first record >= [agbno, len] above. The agbno is a
1076 : * secondary key and so the current record may lie just before or after
1077 : * agbno. If it is past agbno, check the previous record too so long as
1078 : * the length matches as it may be closer. Don't check a smaller record
1079 : * because that could deactivate our cursor.
1080 : */
1081 141350840 : if (bno > args->agbno) {
1082 137287714 : error = xfs_btree_decrement(cur, 0, &i);
1083 137275680 : if (!error && i) {
1084 135424344 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1085 135418097 : if (!error && i && len == acur->cur_len)
1086 34781012 : error = xfs_alloc_cur_check(args, acur, cur,
1087 : &i);
1088 : }
1089 137273434 : if (error)
1090 : return error;
1091 : }
1092 :
1093 : /*
1094 : * Increment the search key until we find at least one allocation
1095 : * candidate or if the extent we found was larger. Otherwise, double the
1096 : * search key to optimize the search. Efficiency is more important here
1097 : * than absolute best locality.
1098 : */
1099 141336560 : cur_len <<= 1;
1100 141336560 : if (!acur->len || acur->cur_len >= cur_len)
1101 91775298 : acur->cur_len++;
1102 : else
1103 49561262 : acur->cur_len = cur_len;
1104 :
1105 : return error;
1106 : }
1107 :
1108 : /*
1109 : * Deal with the case where only small freespaces remain. Either return the
1110 : * contents of the last freespace record, or allocate space from the freelist if
1111 : * there is nothing in the tree.
1112 : */
1113 : STATIC int /* error */
1114 1314452 : xfs_alloc_ag_vextent_small(
1115 : struct xfs_alloc_arg *args, /* allocation argument structure */
1116 : struct xfs_btree_cur *ccur, /* optional by-size cursor */
1117 : xfs_agblock_t *fbnop, /* result block number */
1118 : xfs_extlen_t *flenp, /* result length */
1119 : int *stat) /* status: 0-freelist, 1-normal/none */
1120 : {
1121 1314452 : struct xfs_agf *agf = args->agbp->b_addr;
1122 1314452 : int error = 0;
1123 1314452 : xfs_agblock_t fbno = NULLAGBLOCK;
1124 1314452 : xfs_extlen_t flen = 0;
1125 1314452 : int i = 0;
1126 :
1127 : /*
1128 : * If a cntbt cursor is provided, try to allocate the largest record in
1129 : * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1130 : * allocation. Make sure to respect minleft even when pulling from the
1131 : * freelist.
1132 : */
1133 1314452 : if (ccur)
1134 1314452 : error = xfs_btree_decrement(ccur, 0, &i);
1135 1314449 : if (error)
1136 0 : goto error;
1137 1314449 : if (i) {
1138 1314226 : error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1139 1314223 : if (error)
1140 0 : goto error;
1141 1314223 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1142 0 : xfs_btree_mark_sick(ccur);
1143 0 : error = -EFSCORRUPTED;
1144 0 : goto error;
1145 : }
1146 1314223 : goto out;
1147 : }
1148 :
1149 223 : if (args->minlen != 1 || args->alignment != 1 ||
1150 223 : args->resv == XFS_AG_RESV_AGFL ||
1151 0 : be32_to_cpu(agf->agf_flcount) <= args->minleft)
1152 223 : goto out;
1153 :
1154 0 : error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
1155 : &fbno, 0);
1156 0 : if (error)
1157 0 : goto error;
1158 0 : if (fbno == NULLAGBLOCK)
1159 0 : goto out;
1160 :
1161 0 : xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1162 0 : (args->datatype & XFS_ALLOC_NOBUSY));
1163 :
1164 0 : if (args->datatype & XFS_ALLOC_USERDATA) {
1165 0 : struct xfs_buf *bp;
1166 :
1167 0 : error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1168 0 : XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1169 0 : args->mp->m_bsize, 0, &bp);
1170 0 : if (error)
1171 0 : goto error;
1172 0 : xfs_trans_binval(args->tp, bp);
1173 : }
1174 0 : *fbnop = args->agbno = fbno;
1175 0 : *flenp = args->len = 1;
1176 0 : if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1177 0 : xfs_btree_mark_sick(ccur);
1178 0 : error = -EFSCORRUPTED;
1179 0 : goto error;
1180 : }
1181 0 : args->wasfromfl = 1;
1182 0 : trace_xfs_alloc_small_freelist(args);
1183 :
1184 : /*
1185 : * If we're feeding an AGFL block to something that doesn't live in the
1186 : * free space, we need to clear out the OWN_AG rmap.
1187 : */
1188 0 : error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1189 : &XFS_RMAP_OINFO_AG);
1190 0 : if (error)
1191 0 : goto error;
1192 :
1193 0 : *stat = 0;
1194 0 : return 0;
1195 :
1196 1314446 : out:
1197 : /*
1198 : * Can't do the allocation, give up.
1199 : */
1200 1314446 : if (flen < args->minlen) {
1201 223 : args->agbno = NULLAGBLOCK;
1202 223 : trace_xfs_alloc_small_notenough(args);
1203 223 : flen = 0;
1204 : }
1205 1314446 : *fbnop = fbno;
1206 1314446 : *flenp = flen;
1207 1314446 : *stat = 1;
1208 1314446 : trace_xfs_alloc_small_done(args);
1209 1314446 : return 0;
1210 :
1211 0 : error:
1212 0 : trace_xfs_alloc_small_error(args);
1213 0 : return error;
1214 : }
1215 :
1216 : /*
1217 : * Allocate a variable extent at exactly agno/bno.
1218 : * Extent's length (returned in *len) will be between minlen and maxlen,
1219 : * and of the form k * prod + mod unless there's nothing that large.
1220 : * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1221 : */
1222 : STATIC int /* error */
1223 1112033 : xfs_alloc_ag_vextent_exact(
1224 : xfs_alloc_arg_t *args) /* allocation argument structure */
1225 : {
1226 1112033 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1227 1112033 : struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1228 1112033 : struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1229 1112033 : int error;
1230 1112033 : xfs_agblock_t fbno; /* start block of found extent */
1231 1112033 : xfs_extlen_t flen; /* length of found extent */
1232 1112033 : xfs_agblock_t tbno; /* start block of busy extent */
1233 1112033 : xfs_extlen_t tlen; /* length of busy extent */
1234 1112033 : xfs_agblock_t tend; /* end block of busy extent */
1235 1112033 : int i; /* success/failure of operation */
1236 1112033 : unsigned busy_gen;
1237 :
1238 1112033 : ASSERT(args->alignment == 1);
1239 :
1240 : /*
1241 : * Allocate/initialize a cursor for the by-number freespace btree.
1242 : */
1243 1112033 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1244 : args->pag, XFS_BTNUM_BNO);
1245 :
1246 : /*
1247 : * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1248 : * Look for the closest free block <= bno, it must contain bno
1249 : * if any free block does.
1250 : */
1251 1112083 : error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1252 1112072 : if (error)
1253 1 : goto error0;
1254 1112071 : if (!i)
1255 95029 : goto not_found;
1256 :
1257 : /*
1258 : * Grab the freespace record.
1259 : */
1260 1017042 : error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1261 1016920 : if (error)
1262 0 : goto error0;
1263 1016920 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1264 0 : xfs_btree_mark_sick(bno_cur);
1265 0 : error = -EFSCORRUPTED;
1266 0 : goto error0;
1267 : }
1268 1016920 : ASSERT(fbno <= args->agbno);
1269 :
1270 : /*
1271 : * Check for overlapping busy extents.
1272 : */
1273 1016920 : tbno = fbno;
1274 1016920 : tlen = flen;
1275 1016920 : xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1276 :
1277 : /*
1278 : * Give up if the start of the extent is busy, or the freespace isn't
1279 : * long enough for the minimum request.
1280 : */
1281 1016964 : if (tbno > args->agbno)
1282 2190 : goto not_found;
1283 1014774 : if (tlen < args->minlen)
1284 590574 : goto not_found;
1285 424200 : tend = tbno + tlen;
1286 424200 : if (tend < args->agbno + args->minlen)
1287 13319 : goto not_found;
1288 :
1289 : /*
1290 : * End of extent will be smaller of the freespace end and the
1291 : * maximal requested end.
1292 : *
1293 : * Fix the length according to mod and prod if given.
1294 : */
1295 410881 : args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1296 410881 : - args->agbno;
1297 410881 : xfs_alloc_fix_len(args);
1298 410853 : ASSERT(args->agbno + args->len <= tend);
1299 :
1300 : /*
1301 : * We are allocating agbno for args->len
1302 : * Allocate/initialize a cursor for the by-size btree.
1303 : */
1304 410853 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305 : args->pag, XFS_BTNUM_CNT);
1306 410953 : ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1307 410953 : error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1308 : args->len, XFSA_FIXUP_BNO_OK);
1309 410944 : if (error) {
1310 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1311 0 : goto error0;
1312 : }
1313 :
1314 410944 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1315 410961 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1316 :
1317 410980 : args->wasfromfl = 0;
1318 410980 : trace_xfs_alloc_exact_done(args);
1319 410980 : return 0;
1320 :
1321 701112 : not_found:
1322 : /* Didn't find it, return null. */
1323 701112 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1324 701121 : args->agbno = NULLAGBLOCK;
1325 701121 : trace_xfs_alloc_exact_notfound(args);
1326 701121 : return 0;
1327 :
1328 1 : error0:
1329 1 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1330 1 : trace_xfs_alloc_exact_error(args);
1331 1 : return error;
1332 : }
1333 :
1334 : /*
1335 : * Search a given number of btree records in a given direction. Check each
1336 : * record against the good extent we've already found.
1337 : */
1338 : STATIC int
1339 440229966 : xfs_alloc_walk_iter(
1340 : struct xfs_alloc_arg *args,
1341 : struct xfs_alloc_cur *acur,
1342 : struct xfs_btree_cur *cur,
1343 : bool increment,
1344 : bool find_one, /* quit on first candidate */
1345 : int count, /* rec count (-1 for infinite) */
1346 : int *stat)
1347 : {
1348 440229966 : int error;
1349 440229966 : int i;
1350 :
1351 440229966 : *stat = 0;
1352 :
1353 : /*
1354 : * Search so long as the cursor is active or we find a better extent.
1355 : * The cursor is deactivated if it extends beyond the range of the
1356 : * current allocation candidate.
1357 : */
1358 5745118571 : while (xfs_alloc_cur_active(cur) && count) {
1359 5373414296 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1360 5382991975 : if (error)
1361 0 : return error;
1362 5382991975 : if (i == 1) {
1363 608243540 : *stat = 1;
1364 608243540 : if (find_one)
1365 : break;
1366 : }
1367 5329658757 : if (!xfs_alloc_cur_active(cur))
1368 : break;
1369 :
1370 5306318748 : if (increment)
1371 3948982949 : error = xfs_btree_increment(cur, 0, &i);
1372 : else
1373 1357335799 : error = xfs_btree_decrement(cur, 0, &i);
1374 5304888613 : if (error)
1375 8 : return error;
1376 5304888605 : if (i == 0)
1377 35810625 : cur->bc_ag.abt.active = false;
1378 :
1379 5304888605 : if (count > 0)
1380 287145767 : count--;
1381 : }
1382 :
1383 : return 0;
1384 : }
1385 :
1386 : /*
1387 : * Search the by-bno and by-size btrees in parallel in search of an extent with
1388 : * ideal locality based on the NEAR mode ->agbno locality hint.
1389 : */
1390 : STATIC int
1391 52920175 : xfs_alloc_ag_vextent_locality(
1392 : struct xfs_alloc_arg *args,
1393 : struct xfs_alloc_cur *acur,
1394 : int *stat)
1395 : {
1396 52920175 : struct xfs_btree_cur *fbcur = NULL;
1397 52920175 : int error;
1398 52920175 : int i;
1399 52920175 : bool fbinc;
1400 :
1401 52920175 : ASSERT(acur->len == 0);
1402 :
1403 52920175 : *stat = 0;
1404 :
1405 52920175 : error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1406 52935222 : if (error)
1407 : return error;
1408 52932474 : error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1409 52937268 : if (error)
1410 : return error;
1411 52936517 : error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1412 52936492 : if (error)
1413 : return error;
1414 :
1415 : /*
1416 : * Search the bnobt and cntbt in parallel. Search the bnobt left and
1417 : * right and lookup the closest extent to the locality hint for each
1418 : * extent size key in the cntbt. The entire search terminates
1419 : * immediately on a bnobt hit because that means we've found best case
1420 : * locality. Otherwise the search continues until the cntbt cursor runs
1421 : * off the end of the tree. If no allocation candidate is found at this
1422 : * point, give up on locality, walk backwards from the end of the cntbt
1423 : * and take the first available extent.
1424 : *
1425 : * The parallel tree searches balance each other out to provide fairly
1426 : * consistent performance for various situations. The bnobt search can
1427 : * have pathological behavior in the worst case scenario of larger
1428 : * allocation requests and fragmented free space. On the other hand, the
1429 : * bnobt is able to satisfy most smaller allocation requests much more
1430 : * quickly than the cntbt. The cntbt search can sift through fragmented
1431 : * free space and sets of free extents for larger allocation requests
1432 : * more quickly than the bnobt. Since the locality hint is just a hint
1433 : * and we don't want to scan the entire bnobt for perfect locality, the
1434 : * cntbt search essentially bounds the bnobt search such that we can
1435 : * find good enough locality at reasonable performance in most cases.
1436 : */
1437 38707819 : while (xfs_alloc_cur_active(acur->bnolt) ||
1438 194266011 : xfs_alloc_cur_active(acur->bnogt) ||
1439 22243 : xfs_alloc_cur_active(acur->cnt)) {
1440 :
1441 194242962 : trace_xfs_alloc_cur_lookup(args);
1442 :
1443 : /*
1444 : * Search the bnobt left and right. In the case of a hit, finish
1445 : * the search in the opposite direction and we're done.
1446 : */
1447 194243953 : error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1448 : true, 1, &i);
1449 194222128 : if (error)
1450 2 : return error;
1451 194222126 : if (i == 1) {
1452 24210675 : trace_xfs_alloc_cur_left(args);
1453 24208430 : fbcur = acur->bnogt;
1454 24208430 : fbinc = true;
1455 24208430 : break;
1456 : }
1457 170011451 : error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1458 : 1, &i);
1459 170058248 : if (error)
1460 0 : return error;
1461 170058248 : if (i == 1) {
1462 13475514 : trace_xfs_alloc_cur_right(args);
1463 13475604 : fbcur = acur->bnolt;
1464 13475604 : fbinc = false;
1465 13475604 : break;
1466 : }
1467 :
1468 : /*
1469 : * Check the extent with best locality based on the current
1470 : * extent size search key and keep track of the best candidate.
1471 : */
1472 156582734 : error = xfs_alloc_cntbt_iter(args, acur);
1473 156569156 : if (error)
1474 18 : return error;
1475 156569138 : if (!xfs_alloc_cur_active(acur->cnt)) {
1476 15247559 : trace_xfs_alloc_cur_lookup_done(args);
1477 15247559 : break;
1478 : }
1479 : }
1480 :
1481 : /*
1482 : * If we failed to find anything due to busy extents, return empty
1483 : * handed so the caller can flush and retry. If no busy extents were
1484 : * found, walk backwards from the end of the cntbt as a last resort.
1485 : */
1486 52931489 : if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1487 308960 : error = xfs_btree_decrement(acur->cnt, 0, &i);
1488 308961 : if (error)
1489 : return error;
1490 308961 : if (i) {
1491 308961 : acur->cnt->bc_ag.abt.active = true;
1492 308961 : fbcur = acur->cnt;
1493 308961 : fbinc = false;
1494 : }
1495 : }
1496 :
1497 : /*
1498 : * Search in the opposite direction for a better entry in the case of
1499 : * a bnobt hit or walk backwards from the end of the cntbt.
1500 : */
1501 52931793 : if (fbcur) {
1502 37994006 : error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1503 : &i);
1504 37998681 : if (error)
1505 : return error;
1506 : }
1507 :
1508 52936462 : if (acur->len)
1509 52816123 : *stat = 1;
1510 :
1511 : return 0;
1512 : }
1513 :
1514 : /* Check the last block of the cnt btree for allocations. */
1515 : static int
1516 76140357 : xfs_alloc_ag_vextent_lastblock(
1517 : struct xfs_alloc_arg *args,
1518 : struct xfs_alloc_cur *acur,
1519 : xfs_agblock_t *bno,
1520 : xfs_extlen_t *len,
1521 : bool *allocated)
1522 : {
1523 76140357 : int error;
1524 76140357 : int i;
1525 :
1526 : #ifdef DEBUG
1527 : /* Randomly don't execute the first algorithm. */
1528 76140357 : if (get_random_u32_below(2))
1529 : return 0;
1530 : #endif
1531 :
1532 : /*
1533 : * Start from the entry that lookup found, sequence through all larger
1534 : * free blocks. If we're actually pointing at a record smaller than
1535 : * maxlen, go to the start of this block, and skip all those smaller
1536 : * than minlen.
1537 : */
1538 38069220 : if (*len || args->alignment > 1) {
1539 1165510 : acur->cnt->bc_levels[0].ptr = 1;
1540 129746576 : do {
1541 129746576 : error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1542 129746795 : if (error)
1543 0 : return error;
1544 129746795 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1545 0 : xfs_btree_mark_sick(acur->cnt);
1546 0 : return -EFSCORRUPTED;
1547 : }
1548 129746795 : if (*len >= args->minlen)
1549 : break;
1550 128581260 : error = xfs_btree_increment(acur->cnt, 0, &i);
1551 128581066 : if (error)
1552 0 : return error;
1553 128581066 : } while (i);
1554 1165535 : ASSERT(*len >= args->minlen);
1555 1165535 : if (!i)
1556 : return 0;
1557 : }
1558 :
1559 38069245 : error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1560 38096037 : if (error)
1561 : return error;
1562 :
1563 : /*
1564 : * It didn't work. We COULD be in a case where there's a good record
1565 : * somewhere, so try again.
1566 : */
1567 38096037 : if (acur->len == 0)
1568 : return 0;
1569 :
1570 37572525 : trace_xfs_alloc_near_first(args);
1571 37571959 : *allocated = true;
1572 37571959 : return 0;
1573 : }
1574 :
1575 : /*
1576 : * Allocate a variable extent near bno in the allocation group agno.
1577 : * Extent's length (returned in len) will be between minlen and maxlen,
1578 : * and of the form k * prod + mod unless there's nothing that large.
1579 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1580 : */
1581 : STATIC int
1582 90333155 : xfs_alloc_ag_vextent_near(
1583 : struct xfs_alloc_arg *args,
1584 : uint32_t alloc_flags)
1585 : {
1586 90333155 : struct xfs_alloc_cur acur = {};
1587 90333155 : int error; /* error code */
1588 90333155 : int i; /* result code, temporary */
1589 90333155 : xfs_agblock_t bno;
1590 90333155 : xfs_extlen_t len;
1591 :
1592 : /* handle uninitialized agbno range so caller doesn't have to */
1593 90333155 : if (!args->min_agbno && !args->max_agbno)
1594 89546809 : args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1595 90333155 : ASSERT(args->min_agbno <= args->max_agbno);
1596 :
1597 : /* clamp agbno to the range if it's outside */
1598 90333155 : if (args->agbno < args->min_agbno)
1599 340595 : args->agbno = args->min_agbno;
1600 90333155 : if (args->agbno > args->max_agbno)
1601 0 : args->agbno = args->max_agbno;
1602 :
1603 : /* Retry once quickly if we find busy extents before blocking. */
1604 90333155 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1605 90450147 : restart:
1606 90450147 : len = 0;
1607 :
1608 : /*
1609 : * Set up cursors and see if there are any free extents as big as
1610 : * maxlen. If not, pick the last entry in the tree unless the tree is
1611 : * empty.
1612 : */
1613 90450147 : error = xfs_alloc_cur_setup(args, &acur);
1614 90501316 : if (error == -ENOSPC) {
1615 976561 : error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1616 : &len, &i);
1617 976557 : if (error)
1618 0 : goto out;
1619 976557 : if (i == 0 || len == 0) {
1620 0 : trace_xfs_alloc_near_noentry(args);
1621 0 : goto out;
1622 : }
1623 976557 : ASSERT(i == 1);
1624 89524755 : } else if (error) {
1625 1644 : goto out;
1626 : }
1627 :
1628 : /*
1629 : * First algorithm.
1630 : * If the requested extent is large wrt the freespaces available
1631 : * in this a.g., then the cursor will be pointing to a btree entry
1632 : * near the right edge of the tree. If it's in the last btree leaf
1633 : * block, then we just examine all the entries in that block
1634 : * that are big enough, and pick the best one.
1635 : */
1636 90499668 : if (xfs_btree_islastblock(acur.cnt, 0)) {
1637 76148789 : bool allocated = false;
1638 :
1639 76148789 : error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1640 : &allocated);
1641 76159678 : if (error)
1642 0 : goto out;
1643 76159678 : if (allocated)
1644 37572006 : goto alloc_finish;
1645 : }
1646 :
1647 : /*
1648 : * Second algorithm. Combined cntbt and bnobt search to find ideal
1649 : * locality.
1650 : */
1651 52904527 : error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1652 52937126 : if (error)
1653 750 : goto out;
1654 :
1655 : /*
1656 : * If we couldn't get anything, give up.
1657 : */
1658 52936376 : if (!acur.len) {
1659 120339 : if (acur.busy) {
1660 : /*
1661 : * Our only valid extents must have been busy. Flush and
1662 : * retry the allocation again. If we get an -EAGAIN
1663 : * error, we're being told that a deadlock was avoided
1664 : * and the current transaction needs committing before
1665 : * the allocation can be retried.
1666 : */
1667 116996 : trace_xfs_alloc_near_busy(args);
1668 116996 : error = xfs_extent_busy_flush(args->tp, args->pag,
1669 : acur.busy_gen, alloc_flags);
1670 116996 : if (error)
1671 4 : goto out;
1672 :
1673 116992 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1674 116992 : goto restart;
1675 : }
1676 3343 : trace_xfs_alloc_size_neither(args);
1677 3343 : args->agbno = NULLAGBLOCK;
1678 3343 : goto out;
1679 : }
1680 :
1681 52816037 : alloc_finish:
1682 : /* fix up btrees on a successful allocation */
1683 90388043 : error = xfs_alloc_cur_finish(args, &acur);
1684 :
1685 90376703 : out:
1686 90376703 : xfs_alloc_cur_close(&acur, error);
1687 90391453 : return error;
1688 : }
1689 :
1690 : /*
1691 : * Allocate a variable extent anywhere in the allocation group agno.
1692 : * Extent's length (returned in len) will be between minlen and maxlen,
1693 : * and of the form k * prod + mod unless there's nothing that large.
1694 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1695 : */
1696 : static int
1697 8001729 : xfs_alloc_ag_vextent_size(
1698 : struct xfs_alloc_arg *args,
1699 : uint32_t alloc_flags)
1700 : {
1701 8001729 : struct xfs_agf *agf = args->agbp->b_addr;
1702 8001729 : struct xfs_btree_cur *bno_cur;
1703 8001729 : struct xfs_btree_cur *cnt_cur;
1704 8001729 : xfs_agblock_t fbno; /* start of found freespace */
1705 8001729 : xfs_extlen_t flen; /* length of found freespace */
1706 8001729 : xfs_agblock_t rbno; /* returned block number */
1707 8001729 : xfs_extlen_t rlen; /* length of returned extent */
1708 8001729 : bool busy;
1709 8001729 : unsigned busy_gen;
1710 8001729 : int error;
1711 8001729 : int i;
1712 :
1713 : /* Retry once quickly if we find busy extents before blocking. */
1714 8001729 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1715 : restart:
1716 : /*
1717 : * Allocate and initialize a cursor for the by-size btree.
1718 : */
1719 8049622 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1720 : args->pag, XFS_BTNUM_CNT);
1721 8050357 : bno_cur = NULL;
1722 :
1723 : /*
1724 : * Look for an entry >= maxlen+alignment-1 blocks.
1725 : */
1726 8049871 : if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1727 8050357 : args->maxlen + args->alignment - 1, &i)))
1728 47 : goto error0;
1729 :
1730 : /*
1731 : * If none then we have to settle for a smaller extent. In the case that
1732 : * there are no large extents, this will return the last entry in the
1733 : * tree unless the tree is empty. In the case that there are only busy
1734 : * large extents, this will return the largest small extent unless there
1735 : * are no smaller extents available.
1736 : */
1737 8049824 : if (!i) {
1738 337891 : error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1739 : &fbno, &flen, &i);
1740 337891 : if (error)
1741 0 : goto error0;
1742 337891 : if (i == 0 || flen == 0) {
1743 223 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1744 223 : trace_xfs_alloc_size_noentry(args);
1745 223 : return 0;
1746 : }
1747 337668 : ASSERT(i == 1);
1748 337668 : busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1749 : &rlen, &busy_gen);
1750 : } else {
1751 : /*
1752 : * Search for a non-busy extent that is large enough.
1753 : */
1754 1560465937 : for (;;) {
1755 784088935 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1756 784000032 : if (error)
1757 0 : goto error0;
1758 784000032 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1759 0 : xfs_btree_mark_sick(cnt_cur);
1760 0 : error = -EFSCORRUPTED;
1761 0 : goto error0;
1762 : }
1763 :
1764 784000032 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1765 : &rbno, &rlen, &busy_gen);
1766 :
1767 784235271 : if (rlen >= args->maxlen)
1768 : break;
1769 :
1770 776568944 : error = xfs_btree_increment(cnt_cur, 0, &i);
1771 776423281 : if (error)
1772 0 : goto error0;
1773 776423281 : if (i)
1774 776377002 : continue;
1775 :
1776 : /*
1777 : * Our only valid extents must have been busy. Flush and
1778 : * retry the allocation again. If we get an -EAGAIN
1779 : * error, we're being told that a deadlock was avoided
1780 : * and the current transaction needs committing before
1781 : * the allocation can be retried.
1782 : */
1783 46279 : trace_xfs_alloc_size_busy(args);
1784 46279 : error = xfs_extent_busy_flush(args->tp, args->pag,
1785 : busy_gen, alloc_flags);
1786 46278 : if (error)
1787 0 : goto error0;
1788 :
1789 46278 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1790 46278 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1791 46279 : goto restart;
1792 : }
1793 : }
1794 :
1795 : /*
1796 : * In the first case above, we got the last entry in the
1797 : * by-size btree. Now we check to see if the space hits maxlen
1798 : * once aligned; if not, we search left for something better.
1799 : * This can't happen in the second case above.
1800 : */
1801 8003520 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1802 8003520 : if (XFS_IS_CORRUPT(args->mp,
1803 : rlen != 0 &&
1804 : (rlen > flen ||
1805 : rbno + rlen > fbno + flen))) {
1806 0 : xfs_btree_mark_sick(cnt_cur);
1807 0 : error = -EFSCORRUPTED;
1808 0 : goto error0;
1809 : }
1810 8003520 : if (rlen < args->maxlen) {
1811 337301 : xfs_agblock_t bestfbno;
1812 337301 : xfs_extlen_t bestflen;
1813 337301 : xfs_agblock_t bestrbno;
1814 337301 : xfs_extlen_t bestrlen;
1815 :
1816 337301 : bestrlen = rlen;
1817 337301 : bestrbno = rbno;
1818 337301 : bestflen = flen;
1819 337301 : bestfbno = fbno;
1820 82807747 : for (;;) {
1821 82807747 : if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1822 0 : goto error0;
1823 82807819 : if (i == 0)
1824 : break;
1825 82805086 : if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1826 : &i)))
1827 0 : goto error0;
1828 82804858 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1829 0 : xfs_btree_mark_sick(cnt_cur);
1830 0 : error = -EFSCORRUPTED;
1831 0 : goto error0;
1832 : }
1833 82804858 : if (flen < bestrlen)
1834 : break;
1835 82470289 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1836 : &rbno, &rlen, &busy_gen);
1837 82470446 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1838 82470446 : if (XFS_IS_CORRUPT(args->mp,
1839 : rlen != 0 &&
1840 : (rlen > flen ||
1841 : rbno + rlen > fbno + flen))) {
1842 0 : xfs_btree_mark_sick(cnt_cur);
1843 0 : error = -EFSCORRUPTED;
1844 0 : goto error0;
1845 : }
1846 82470446 : if (rlen > bestrlen) {
1847 180356 : bestrlen = rlen;
1848 180356 : bestrbno = rbno;
1849 180356 : bestflen = flen;
1850 180356 : bestfbno = fbno;
1851 180356 : if (rlen == args->maxlen)
1852 : break;
1853 : }
1854 : }
1855 337302 : if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1856 : &i)))
1857 0 : goto error0;
1858 337302 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1859 0 : xfs_btree_mark_sick(cnt_cur);
1860 0 : error = -EFSCORRUPTED;
1861 0 : goto error0;
1862 : }
1863 337302 : rlen = bestrlen;
1864 337302 : rbno = bestrbno;
1865 337302 : flen = bestflen;
1866 337302 : fbno = bestfbno;
1867 : }
1868 8003521 : args->wasfromfl = 0;
1869 : /*
1870 : * Fix up the length.
1871 : */
1872 8003521 : args->len = rlen;
1873 8003521 : if (rlen < args->minlen) {
1874 177517 : if (busy) {
1875 : /*
1876 : * Our only valid extents must have been busy. Flush and
1877 : * retry the allocation again. If we get an -EAGAIN
1878 : * error, we're being told that a deadlock was avoided
1879 : * and the current transaction needs committing before
1880 : * the allocation can be retried.
1881 : */
1882 1614 : trace_xfs_alloc_size_busy(args);
1883 1614 : error = xfs_extent_busy_flush(args->tp, args->pag,
1884 : busy_gen, alloc_flags);
1885 1614 : if (error)
1886 0 : goto error0;
1887 :
1888 1614 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1889 1614 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1890 1614 : goto restart;
1891 : }
1892 175903 : goto out_nominleft;
1893 : }
1894 7826004 : xfs_alloc_fix_len(args);
1895 :
1896 7825601 : rlen = args->len;
1897 7825601 : if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1898 0 : xfs_btree_mark_sick(cnt_cur);
1899 0 : error = -EFSCORRUPTED;
1900 0 : goto error0;
1901 : }
1902 : /*
1903 : * Allocate and initialize a cursor for the by-block tree.
1904 : */
1905 7825601 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1906 : args->pag, XFS_BTNUM_BNO);
1907 7826510 : if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1908 : rbno, rlen, XFSA_FIXUP_CNT_OK)))
1909 37 : goto error0;
1910 7825547 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1911 7826367 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1912 7826752 : cnt_cur = bno_cur = NULL;
1913 7826752 : args->len = rlen;
1914 7826752 : args->agbno = rbno;
1915 7826752 : if (XFS_IS_CORRUPT(args->mp,
1916 : args->agbno + args->len >
1917 : be32_to_cpu(agf->agf_length))) {
1918 0 : xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
1919 0 : error = -EFSCORRUPTED;
1920 0 : goto error0;
1921 : }
1922 7826752 : trace_xfs_alloc_size_done(args);
1923 7826752 : return 0;
1924 :
1925 84 : error0:
1926 84 : trace_xfs_alloc_size_error(args);
1927 84 : if (cnt_cur)
1928 84 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1929 84 : if (bno_cur)
1930 37 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931 : return error;
1932 :
1933 : out_nominleft:
1934 175903 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1935 175903 : trace_xfs_alloc_size_nominleft(args);
1936 175903 : args->agbno = NULLAGBLOCK;
1937 175903 : return 0;
1938 : }
1939 :
1940 : /*
1941 : * Free the extent starting at agno/bno for length.
1942 : */
1943 : STATIC int
1944 105901561 : xfs_free_ag_extent(
1945 : struct xfs_trans *tp,
1946 : struct xfs_buf *agbp,
1947 : xfs_agnumber_t agno,
1948 : xfs_agblock_t bno,
1949 : xfs_extlen_t len,
1950 : const struct xfs_owner_info *oinfo,
1951 : enum xfs_ag_resv_type type)
1952 : {
1953 105901561 : struct xfs_mount *mp;
1954 105901561 : struct xfs_btree_cur *bno_cur;
1955 105901561 : struct xfs_btree_cur *cnt_cur;
1956 105901561 : xfs_agblock_t gtbno; /* start of right neighbor */
1957 105901561 : xfs_extlen_t gtlen; /* length of right neighbor */
1958 105901561 : xfs_agblock_t ltbno; /* start of left neighbor */
1959 105901561 : xfs_extlen_t ltlen; /* length of left neighbor */
1960 105901561 : xfs_agblock_t nbno; /* new starting block of freesp */
1961 105901561 : xfs_extlen_t nlen; /* new length of freespace */
1962 105901561 : int haveleft; /* have a left neighbor */
1963 105901561 : int haveright; /* have a right neighbor */
1964 105901561 : int i;
1965 105901561 : int error;
1966 105901561 : struct xfs_perag *pag = agbp->b_pag;
1967 :
1968 105901561 : bno_cur = cnt_cur = NULL;
1969 105901561 : mp = tp->t_mountp;
1970 :
1971 105901561 : if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1972 4182818 : error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1973 4183496 : if (error)
1974 35 : goto error0;
1975 : }
1976 :
1977 : /*
1978 : * Allocate and initialize a cursor for the by-block btree.
1979 : */
1980 105902204 : bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1981 : /*
1982 : * Look for a neighboring block on the left (lower block numbers)
1983 : * that is contiguous with this space.
1984 : */
1985 105904619 : if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1986 111 : goto error0;
1987 105902952 : if (haveleft) {
1988 : /*
1989 : * There is a block to our left.
1990 : */
1991 103477356 : if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
1992 0 : goto error0;
1993 103471545 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1994 0 : xfs_btree_mark_sick(bno_cur);
1995 0 : error = -EFSCORRUPTED;
1996 0 : goto error0;
1997 : }
1998 : /*
1999 : * It's not contiguous, though.
2000 : */
2001 103471545 : if (ltbno + ltlen < bno)
2002 77153106 : haveleft = 0;
2003 : else {
2004 : /*
2005 : * If this failure happens the request to free this
2006 : * space was invalid, it's (partly) already free.
2007 : * Very bad.
2008 : */
2009 26318439 : if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
2010 0 : xfs_btree_mark_sick(bno_cur);
2011 0 : error = -EFSCORRUPTED;
2012 0 : goto error0;
2013 : }
2014 : }
2015 : }
2016 : /*
2017 : * Look for a neighboring block on the right (higher block numbers)
2018 : * that is contiguous with this space.
2019 : */
2020 105897141 : if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2021 0 : goto error0;
2022 105898683 : if (haveright) {
2023 : /*
2024 : * There is a block to our right.
2025 : */
2026 104771196 : if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
2027 0 : goto error0;
2028 104773730 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2029 0 : xfs_btree_mark_sick(bno_cur);
2030 0 : error = -EFSCORRUPTED;
2031 0 : goto error0;
2032 : }
2033 : /*
2034 : * It's not contiguous, though.
2035 : */
2036 104773730 : if (bno + len < gtbno)
2037 73026554 : haveright = 0;
2038 : else {
2039 : /*
2040 : * If this failure happens the request to free this
2041 : * space was invalid, it's (partly) already free.
2042 : * Very bad.
2043 : */
2044 31747176 : if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
2045 0 : xfs_btree_mark_sick(bno_cur);
2046 0 : error = -EFSCORRUPTED;
2047 0 : goto error0;
2048 : }
2049 : }
2050 : }
2051 : /*
2052 : * Now allocate and initialize a cursor for the by-size tree.
2053 : */
2054 105901217 : cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
2055 : /*
2056 : * Have both left and right contiguous neighbors.
2057 : * Merge all three into a single free block.
2058 : */
2059 105903294 : if (haveleft && haveright) {
2060 : /*
2061 : * Delete the old by-size entry on the left.
2062 : */
2063 13890665 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2064 15 : goto error0;
2065 13890869 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2066 0 : xfs_btree_mark_sick(cnt_cur);
2067 0 : error = -EFSCORRUPTED;
2068 0 : goto error0;
2069 : }
2070 13890869 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2071 0 : goto error0;
2072 13889932 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2073 0 : xfs_btree_mark_sick(cnt_cur);
2074 0 : error = -EFSCORRUPTED;
2075 0 : goto error0;
2076 : }
2077 : /*
2078 : * Delete the old by-size entry on the right.
2079 : */
2080 13889932 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2081 7 : goto error0;
2082 13890929 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 0 : xfs_btree_mark_sick(cnt_cur);
2084 0 : error = -EFSCORRUPTED;
2085 0 : goto error0;
2086 : }
2087 13890929 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2088 0 : goto error0;
2089 13890974 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2090 0 : xfs_btree_mark_sick(cnt_cur);
2091 0 : error = -EFSCORRUPTED;
2092 0 : goto error0;
2093 : }
2094 : /*
2095 : * Delete the old by-block entry for the right block.
2096 : */
2097 13890974 : if ((error = xfs_btree_delete(bno_cur, &i)))
2098 1 : goto error0;
2099 13890964 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2100 0 : xfs_btree_mark_sick(bno_cur);
2101 0 : error = -EFSCORRUPTED;
2102 0 : goto error0;
2103 : }
2104 : /*
2105 : * Move the by-block cursor back to the left neighbor.
2106 : */
2107 13890964 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2108 0 : goto error0;
2109 13890575 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2110 0 : xfs_btree_mark_sick(bno_cur);
2111 0 : error = -EFSCORRUPTED;
2112 0 : goto error0;
2113 : }
2114 : #ifdef DEBUG
2115 : /*
2116 : * Check that this is the right record: delete didn't
2117 : * mangle the cursor.
2118 : */
2119 : {
2120 13890575 : xfs_agblock_t xxbno;
2121 13890575 : xfs_extlen_t xxlen;
2122 :
2123 13890575 : if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2124 : &i)))
2125 0 : goto error0;
2126 13890744 : if (XFS_IS_CORRUPT(mp,
2127 : i != 1 ||
2128 : xxbno != ltbno ||
2129 : xxlen != ltlen)) {
2130 0 : xfs_btree_mark_sick(bno_cur);
2131 0 : error = -EFSCORRUPTED;
2132 0 : goto error0;
2133 : }
2134 : }
2135 : #endif
2136 : /*
2137 : * Update remaining by-block entry to the new, joined block.
2138 : */
2139 13890744 : nbno = ltbno;
2140 13890744 : nlen = len + ltlen + gtlen;
2141 13890744 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2142 0 : goto error0;
2143 : }
2144 : /*
2145 : * Have only a left contiguous neighbor.
2146 : * Merge it together with the new freespace.
2147 : */
2148 92012629 : else if (haveleft) {
2149 : /*
2150 : * Delete the old by-size entry on the left.
2151 : */
2152 12429367 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2153 6 : goto error0;
2154 12429870 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2155 0 : xfs_btree_mark_sick(cnt_cur);
2156 0 : error = -EFSCORRUPTED;
2157 0 : goto error0;
2158 : }
2159 12429870 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2160 0 : goto error0;
2161 12428892 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2162 0 : xfs_btree_mark_sick(cnt_cur);
2163 0 : error = -EFSCORRUPTED;
2164 0 : goto error0;
2165 : }
2166 : /*
2167 : * Back up the by-block cursor to the left neighbor, and
2168 : * update its length.
2169 : */
2170 12428892 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2171 0 : goto error0;
2172 12429028 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2173 0 : xfs_btree_mark_sick(bno_cur);
2174 0 : error = -EFSCORRUPTED;
2175 0 : goto error0;
2176 : }
2177 12429028 : nbno = ltbno;
2178 12429028 : nlen = len + ltlen;
2179 12429028 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2180 0 : goto error0;
2181 : }
2182 : /*
2183 : * Have only a right contiguous neighbor.
2184 : * Merge it together with the new freespace.
2185 : */
2186 79583262 : else if (haveright) {
2187 : /*
2188 : * Delete the old by-size entry on the right.
2189 : */
2190 17856818 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2191 27 : goto error0;
2192 17856846 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2193 0 : xfs_btree_mark_sick(cnt_cur);
2194 0 : error = -EFSCORRUPTED;
2195 0 : goto error0;
2196 : }
2197 17856846 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2198 0 : goto error0;
2199 17856156 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2200 0 : xfs_btree_mark_sick(cnt_cur);
2201 0 : error = -EFSCORRUPTED;
2202 0 : goto error0;
2203 : }
2204 : /*
2205 : * Update the starting block and length of the right
2206 : * neighbor in the by-block tree.
2207 : */
2208 17856156 : nbno = bno;
2209 17856156 : nlen = len + gtlen;
2210 17856156 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2211 0 : goto error0;
2212 : }
2213 : /*
2214 : * No contiguous neighbors.
2215 : * Insert the new freespace into the by-block tree.
2216 : */
2217 : else {
2218 61726444 : nbno = bno;
2219 61726444 : nlen = len;
2220 61726444 : if ((error = xfs_btree_insert(bno_cur, &i)))
2221 0 : goto error0;
2222 61722493 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2223 0 : xfs_btree_mark_sick(bno_cur);
2224 0 : error = -EFSCORRUPTED;
2225 0 : goto error0;
2226 : }
2227 : }
2228 105898409 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2229 105902510 : bno_cur = NULL;
2230 : /*
2231 : * In all cases we need to insert the new freespace in the by-size tree.
2232 : */
2233 105902510 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2234 39 : goto error0;
2235 105902880 : if (XFS_IS_CORRUPT(mp, i != 0)) {
2236 0 : xfs_btree_mark_sick(cnt_cur);
2237 0 : error = -EFSCORRUPTED;
2238 0 : goto error0;
2239 : }
2240 105902880 : if ((error = xfs_btree_insert(cnt_cur, &i)))
2241 0 : goto error0;
2242 105893135 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2243 0 : xfs_btree_mark_sick(cnt_cur);
2244 0 : error = -EFSCORRUPTED;
2245 0 : goto error0;
2246 : }
2247 105893135 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2248 105903024 : cnt_cur = NULL;
2249 :
2250 : /*
2251 : * Update the freespace totals in the ag and superblock.
2252 : */
2253 105903024 : error = xfs_alloc_update_counters(tp, agbp, len);
2254 105892986 : xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2255 105867019 : if (error)
2256 0 : goto error0;
2257 :
2258 105867019 : XFS_STATS_INC(mp, xs_freex);
2259 105881825 : XFS_STATS_ADD(mp, xs_freeb, len);
2260 :
2261 105881943 : trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2262 :
2263 105881943 : return 0;
2264 :
2265 241 : error0:
2266 241 : trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2267 241 : if (bno_cur)
2268 167 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2269 241 : if (cnt_cur)
2270 95 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2271 : return error;
2272 : }
2273 :
2274 : /*
2275 : * Visible (exported) allocation/free functions.
2276 : * Some of these are used just by xfs_alloc_btree.c and this file.
2277 : */
2278 :
2279 : /*
2280 : * Compute and fill in value of m_alloc_maxlevels.
2281 : */
2282 : void
2283 66972 : xfs_alloc_compute_maxlevels(
2284 : xfs_mount_t *mp) /* file system mount structure */
2285 : {
2286 133944 : mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2287 66972 : (mp->m_sb.sb_agblocks + 1) / 2);
2288 66972 : ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
2289 66972 : }
2290 :
2291 : /*
2292 : * Find the length of the longest extent in an AG. The 'need' parameter
2293 : * specifies how much space we're going to need for the AGFL and the
2294 : * 'reserved' parameter tells us how many blocks in this AG are reserved for
2295 : * other callers.
2296 : */
2297 : xfs_extlen_t
2298 480540823 : xfs_alloc_longest_free_extent(
2299 : struct xfs_perag *pag,
2300 : xfs_extlen_t need,
2301 : xfs_extlen_t reserved)
2302 : {
2303 480540823 : xfs_extlen_t delta = 0;
2304 :
2305 : /*
2306 : * If the AGFL needs a recharge, we'll have to subtract that from the
2307 : * longest extent.
2308 : */
2309 480540823 : if (need > pag->pagf_flcount)
2310 798862 : delta = need - pag->pagf_flcount;
2311 :
2312 : /*
2313 : * If we cannot maintain others' reservations with space from the
2314 : * not-longest freesp extents, we'll have to subtract /that/ from
2315 : * the longest extent too.
2316 : */
2317 480540823 : if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2318 373023198 : delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2319 :
2320 : /*
2321 : * If the longest extent is long enough to satisfy all the
2322 : * reservations and AGFL rules in place, we can return this extent.
2323 : */
2324 480540823 : if (pag->pagf_longest > delta)
2325 480041791 : return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2326 : pag->pagf_longest - delta);
2327 :
2328 : /* Otherwise, let the caller try for 1 block if there's space. */
2329 499032 : return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2330 : }
2331 :
2332 : /*
2333 : * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2334 : * return the largest possible minimum length.
2335 : */
2336 : unsigned int
2337 1580612266 : xfs_alloc_min_freelist(
2338 : struct xfs_mount *mp,
2339 : struct xfs_perag *pag)
2340 : {
2341 : /* AG btrees have at least 1 level. */
2342 1580612266 : static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2343 1580612266 : const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2344 1580612266 : unsigned int min_free;
2345 :
2346 1580612266 : ASSERT(mp->m_alloc_maxlevels > 0);
2347 :
2348 : /* space needed by-bno freespace btree */
2349 1580612266 : min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2350 : mp->m_alloc_maxlevels);
2351 : /* space needed by-size freespace btree */
2352 1580612266 : min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2353 : mp->m_alloc_maxlevels);
2354 : /* space needed reverse mapping used space btree */
2355 1580612266 : if (xfs_has_rmapbt(mp))
2356 1570176017 : min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2357 : mp->m_rmap_maxlevels);
2358 :
2359 1580612266 : return min_free;
2360 : }
2361 :
2362 : /*
2363 : * Check if the operation we are fixing up the freelist for should go ahead or
2364 : * not. If we are freeing blocks, we always allow it, otherwise the allocation
2365 : * is dependent on whether the size and shape of free space available will
2366 : * permit the requested allocation to take place.
2367 : */
2368 : static bool
2369 1489281510 : xfs_alloc_space_available(
2370 : struct xfs_alloc_arg *args,
2371 : xfs_extlen_t min_free,
2372 : int flags)
2373 : {
2374 1489281510 : struct xfs_perag *pag = args->pag;
2375 1489281510 : xfs_extlen_t alloc_len, longest;
2376 1489281510 : xfs_extlen_t reservation; /* blocks that are still reserved */
2377 1489281510 : int available;
2378 1489281510 : xfs_extlen_t agflcount;
2379 :
2380 1489281510 : if (flags & XFS_ALLOC_FLAG_FREEING)
2381 : return true;
2382 :
2383 389181141 : reservation = xfs_ag_resv_needed(pag, args->resv);
2384 :
2385 : /* do we have enough contiguous free space for the allocation? */
2386 389200880 : alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2387 389200880 : longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2388 389200880 : if (longest < alloc_len)
2389 : return false;
2390 :
2391 : /*
2392 : * Do we have enough free space remaining for the allocation? Don't
2393 : * account extra agfl blocks because we are about to defer free them,
2394 : * making them unavailable until the current transaction commits.
2395 : */
2396 313400171 : agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2397 313400171 : available = (int)(pag->pagf_freeblks + agflcount -
2398 313400171 : reservation - min_free - args->minleft);
2399 313400171 : if (available < (int)max(args->total, alloc_len))
2400 : return false;
2401 :
2402 : /*
2403 : * Clamp maxlen to the amount of free space available for the actual
2404 : * extent allocation.
2405 : */
2406 205292149 : if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2407 453118 : args->maxlen = available;
2408 453118 : ASSERT(args->maxlen > 0);
2409 453118 : ASSERT(args->maxlen >= args->minlen);
2410 : }
2411 :
2412 : return true;
2413 : }
2414 :
2415 : int
2416 328618 : xfs_free_agfl_block(
2417 : struct xfs_trans *tp,
2418 : xfs_agnumber_t agno,
2419 : xfs_agblock_t agbno,
2420 : struct xfs_buf *agbp,
2421 : struct xfs_owner_info *oinfo)
2422 : {
2423 328618 : int error;
2424 328618 : struct xfs_buf *bp;
2425 :
2426 328618 : error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2427 : XFS_AG_RESV_AGFL);
2428 328619 : if (error)
2429 : return error;
2430 :
2431 657222 : error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2432 328611 : XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2433 328611 : tp->t_mountp->m_bsize, 0, &bp);
2434 328610 : if (error)
2435 : return error;
2436 328610 : xfs_trans_binval(tp, bp);
2437 :
2438 328610 : return 0;
2439 : }
2440 :
2441 : /*
2442 : * Check the agfl fields of the agf for inconsistency or corruption.
2443 : *
2444 : * The original purpose was to detect an agfl header padding mismatch between
2445 : * current and early v5 kernels. This problem manifests as a 1-slot size
2446 : * difference between the on-disk flcount and the active [first, last] range of
2447 : * a wrapped agfl.
2448 : *
2449 : * However, we need to use these same checks to catch agfl count corruptions
2450 : * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2451 : * way, we need to reset the agfl and warn the user.
2452 : *
2453 : * Return true if a reset is required before the agfl can be used, false
2454 : * otherwise.
2455 : */
2456 : static bool
2457 779899 : xfs_agfl_needs_reset(
2458 : struct xfs_mount *mp,
2459 : struct xfs_agf *agf)
2460 : {
2461 779899 : uint32_t f = be32_to_cpu(agf->agf_flfirst);
2462 779899 : uint32_t l = be32_to_cpu(agf->agf_fllast);
2463 779899 : uint32_t c = be32_to_cpu(agf->agf_flcount);
2464 779899 : int agfl_size = xfs_agfl_size(mp);
2465 779899 : int active;
2466 :
2467 : /*
2468 : * The agf read verifier catches severe corruption of these fields.
2469 : * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2470 : * the verifier allows it.
2471 : */
2472 779899 : if (f >= agfl_size || l >= agfl_size)
2473 : return true;
2474 779899 : if (c > agfl_size)
2475 : return true;
2476 :
2477 : /*
2478 : * Check consistency between the on-disk count and the active range. An
2479 : * agfl padding mismatch manifests as an inconsistent flcount.
2480 : */
2481 779899 : if (c && l >= f)
2482 727816 : active = l - f + 1;
2483 52083 : else if (c)
2484 201 : active = agfl_size - f + l + 1;
2485 : else
2486 : active = 0;
2487 :
2488 779899 : return active != c;
2489 : }
2490 :
2491 : /*
2492 : * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2493 : * agfl content cannot be trusted. Warn the user that a repair is required to
2494 : * recover leaked blocks.
2495 : *
2496 : * The purpose of this mechanism is to handle filesystems affected by the agfl
2497 : * header padding mismatch problem. A reset keeps the filesystem online with a
2498 : * relatively minor free space accounting inconsistency rather than suffer the
2499 : * inevitable crash from use of an invalid agfl block.
2500 : */
2501 : static void
2502 20 : xfs_agfl_reset(
2503 : struct xfs_trans *tp,
2504 : struct xfs_buf *agbp,
2505 : struct xfs_perag *pag)
2506 : {
2507 20 : struct xfs_mount *mp = tp->t_mountp;
2508 20 : struct xfs_agf *agf = agbp->b_addr;
2509 :
2510 40 : ASSERT(xfs_perag_agfl_needs_reset(pag));
2511 20 : trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2512 :
2513 20 : xfs_warn(mp,
2514 : "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2515 : "Please unmount and run xfs_repair.",
2516 : pag->pag_agno, pag->pagf_flcount);
2517 :
2518 20 : agf->agf_flfirst = 0;
2519 20 : agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2520 20 : agf->agf_flcount = 0;
2521 20 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2522 : XFS_AGF_FLCOUNT);
2523 :
2524 20 : pag->pagf_flcount = 0;
2525 20 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2526 20 : }
2527 :
2528 : /*
2529 : * Defer an AGFL block free. This is effectively equivalent to
2530 : * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2531 : *
2532 : * Deferring AGFL frees helps prevent log reservation overruns due to too many
2533 : * allocation operations in a transaction. AGFL frees are prone to this problem
2534 : * because for one they are always freed one at a time. Further, an immediate
2535 : * AGFL block free can cause a btree join and require another block free before
2536 : * the real allocation can proceed. Deferring the free disconnects freeing up
2537 : * the AGFL slot from freeing the block.
2538 : */
2539 : static int
2540 328633 : xfs_defer_agfl_block(
2541 : struct xfs_trans *tp,
2542 : xfs_agnumber_t agno,
2543 : xfs_agblock_t agbno,
2544 : struct xfs_owner_info *oinfo)
2545 : {
2546 328633 : struct xfs_mount *mp = tp->t_mountp;
2547 328633 : struct xfs_extent_free_item *xefi;
2548 328633 : xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2549 :
2550 328633 : ASSERT(xfs_extfree_item_cache != NULL);
2551 328633 : ASSERT(oinfo != NULL);
2552 :
2553 328633 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
2554 0 : return -EFSCORRUPTED;
2555 :
2556 328633 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2557 : GFP_KERNEL | __GFP_NOFAIL);
2558 328632 : xefi->xefi_startblock = fsbno;
2559 328632 : xefi->xefi_blockcount = 1;
2560 328632 : xefi->xefi_owner = oinfo->oi_owner;
2561 328632 : xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2562 :
2563 328632 : trace_xfs_agfl_free_defer(mp, xefi);
2564 :
2565 328633 : xfs_extent_free_get_group(mp, xefi);
2566 328634 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
2567 328634 : return 0;
2568 : }
2569 :
2570 : /*
2571 : * Add the extent to the list of extents to be free at transaction end.
2572 : * The list is maintained sorted (by block number).
2573 : */
2574 : int
2575 149346695 : xfs_free_extent_later(
2576 : struct xfs_trans *tp,
2577 : xfs_fsblock_t bno,
2578 : xfs_filblks_t len,
2579 : const struct xfs_owner_info *oinfo,
2580 : enum xfs_ag_resv_type type,
2581 : unsigned int flags)
2582 : {
2583 149346695 : struct xfs_extent_free_item *xefi;
2584 149346695 : struct xfs_mount *mp = tp->t_mountp;
2585 149346695 : enum xfs_defer_ops_type optype;
2586 : #ifdef DEBUG
2587 149346695 : xfs_agnumber_t agno;
2588 149346695 : xfs_agblock_t agbno;
2589 :
2590 149346695 : ASSERT(bno != NULLFSBLOCK);
2591 149346695 : ASSERT(len > 0);
2592 149346695 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2593 149346695 : ASSERT(!isnullstartblock(bno));
2594 149346695 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2595 43898021 : ASSERT(bno < mp->m_sb.sb_rblocks);
2596 43898021 : ASSERT(len <= mp->m_sb.sb_rblocks);
2597 43898021 : ASSERT(bno + len <= mp->m_sb.sb_rblocks);
2598 : } else {
2599 105448674 : agno = XFS_FSB_TO_AGNO(mp, bno);
2600 105448674 : agbno = XFS_FSB_TO_AGBNO(mp, bno);
2601 :
2602 105364507 : ASSERT(agno < mp->m_sb.sb_agcount);
2603 105364507 : ASSERT(agbno < mp->m_sb.sb_agblocks);
2604 105364507 : ASSERT(len < mp->m_sb.sb_agblocks);
2605 105364507 : ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2606 : }
2607 : #endif
2608 149262528 : ASSERT(!(flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
2609 149262528 : ASSERT(xfs_extfree_item_cache != NULL);
2610 149262528 : ASSERT(type != XFS_AG_RESV_AGFL);
2611 :
2612 149262528 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2613 43897839 : if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
2614 0 : return -EFSCORRUPTED;
2615 : } else {
2616 105364689 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2617 0 : return -EFSCORRUPTED;
2618 : }
2619 :
2620 149343050 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2621 : GFP_KERNEL | __GFP_NOFAIL);
2622 149367418 : xefi->xefi_startblock = bno;
2623 149367418 : xefi->xefi_blockcount = (xfs_extlen_t)len;
2624 149367418 : xefi->xefi_agresv = type;
2625 149367418 : if (flags & XFS_FREE_EXTENT_SKIP_DISCARD)
2626 15976699 : xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2627 149367418 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2628 : /*
2629 : * Realtime and data section EFIs must use separate
2630 : * transactions to finish deferred work because updates to
2631 : * realtime metadata files can lock AGFs to allocate btree
2632 : * blocks and we don't want that mixing with the AGF locks
2633 : * taken to finish data section EFIs.
2634 : */
2635 43897817 : optype = XFS_DEFER_OPS_TYPE_FREE_RT;
2636 43897817 : xefi->xefi_flags |= XFS_EFI_REALTIME;
2637 : } else {
2638 : optype = XFS_DEFER_OPS_TYPE_FREE;
2639 : }
2640 149367418 : if (oinfo) {
2641 3854360 : ASSERT(oinfo->oi_offset == 0);
2642 :
2643 3854360 : if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2644 268 : xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2645 3854360 : if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2646 2838240 : xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2647 3854360 : xefi->xefi_owner = oinfo->oi_owner;
2648 : } else {
2649 145513058 : xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2650 : }
2651 :
2652 149367418 : trace_xfs_extent_free_defer(mp, xefi);
2653 :
2654 149277078 : xfs_extent_free_get_group(mp, xefi);
2655 149439875 : xfs_defer_add(tp, optype, &xefi->xefi_list);
2656 149439875 : return 0;
2657 : }
2658 :
2659 : #ifdef DEBUG
2660 : /*
2661 : * Check if an AGF has a free extent record whose length is equal to
2662 : * args->minlen.
2663 : */
2664 : STATIC int
2665 2674380 : xfs_exact_minlen_extent_available(
2666 : struct xfs_alloc_arg *args,
2667 : struct xfs_buf *agbp,
2668 : int *stat)
2669 : {
2670 2674380 : struct xfs_btree_cur *cnt_cur;
2671 2674380 : xfs_agblock_t fbno;
2672 2674380 : xfs_extlen_t flen;
2673 2674380 : int error = 0;
2674 :
2675 2674380 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2676 : args->pag, XFS_BTNUM_CNT);
2677 2674482 : error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2678 2673908 : if (error)
2679 0 : goto out;
2680 :
2681 2673908 : if (*stat == 0) {
2682 0 : xfs_btree_mark_sick(cnt_cur);
2683 0 : error = -EFSCORRUPTED;
2684 0 : goto out;
2685 : }
2686 :
2687 2673908 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2688 2671932 : if (error)
2689 0 : goto out;
2690 :
2691 2671932 : if (*stat == 1 && flen != args->minlen)
2692 34219 : *stat = 0;
2693 :
2694 2637713 : out:
2695 2671932 : xfs_btree_del_cursor(cnt_cur, error);
2696 :
2697 2673714 : return error;
2698 : }
2699 : #endif
2700 :
2701 : /*
2702 : * Decide whether to use this allocation group for this allocation.
2703 : * If so, fix up the btree freelist's size.
2704 : */
2705 : int /* error */
2706 840631665 : xfs_alloc_fix_freelist(
2707 : struct xfs_alloc_arg *args, /* allocation argument structure */
2708 : uint32_t alloc_flags)
2709 : {
2710 840631665 : struct xfs_mount *mp = args->mp;
2711 840631665 : struct xfs_perag *pag = args->pag;
2712 840631665 : struct xfs_trans *tp = args->tp;
2713 840631665 : struct xfs_buf *agbp = NULL;
2714 840631665 : struct xfs_buf *agflbp = NULL;
2715 840631665 : struct xfs_alloc_arg targs; /* local allocation arguments */
2716 840631665 : xfs_agblock_t bno; /* freelist block */
2717 840631665 : xfs_extlen_t need; /* total blocks needed in freelist */
2718 840631665 : int error = 0;
2719 :
2720 : /* deferred ops (AGFL block frees) require permanent transactions */
2721 840631665 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2722 :
2723 1681263330 : if (!xfs_perag_initialised_agf(pag)) {
2724 6462 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2725 6462 : if (error) {
2726 : /* Couldn't lock the AGF so skip this AG. */
2727 2755 : if (error == -EAGAIN)
2728 2755 : error = 0;
2729 2755 : goto out_no_agbp;
2730 : }
2731 : }
2732 :
2733 : /*
2734 : * If this is a metadata preferred pag and we are user data then try
2735 : * somewhere else if we are not being asked to try harder at this
2736 : * point
2737 : */
2738 1681257820 : if (xfs_perag_prefers_metadata(pag) &&
2739 0 : (args->datatype & XFS_ALLOC_USERDATA) &&
2740 0 : (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2741 0 : ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2742 0 : goto out_agbp_relse;
2743 : }
2744 :
2745 840628910 : need = xfs_alloc_min_freelist(mp, pag);
2746 840583157 : if (!xfs_alloc_space_available(args, need, alloc_flags |
2747 : XFS_ALLOC_FLAG_CHECK))
2748 183901941 : goto out_agbp_relse;
2749 :
2750 : /*
2751 : * Get the a.g. freespace buffer.
2752 : * Can fail if we're not blocking on locks, and it's held.
2753 : */
2754 656719850 : if (!agbp) {
2755 656707246 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2756 656755817 : if (error) {
2757 : /* Couldn't lock the AGF so skip this AG. */
2758 8007289 : if (error == -EAGAIN)
2759 8006026 : error = 0;
2760 8007289 : goto out_no_agbp;
2761 : }
2762 : }
2763 :
2764 : /* reset a padding mismatched agfl before final free space check */
2765 1297522264 : if (xfs_perag_agfl_needs_reset(pag))
2766 20 : xfs_agfl_reset(tp, agbp, pag);
2767 :
2768 : /* If there isn't enough total space or single-extent, reject it. */
2769 648761132 : need = xfs_alloc_min_freelist(mp, pag);
2770 648755663 : if (!xfs_alloc_space_available(args, need, alloc_flags))
2771 6762 : goto out_agbp_relse;
2772 :
2773 : #ifdef DEBUG
2774 648748927 : if (args->alloc_minlen_only) {
2775 2674511 : int stat;
2776 :
2777 2674511 : error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2778 2673927 : if (error || !stat)
2779 34219 : goto out_agbp_relse;
2780 : }
2781 : #endif
2782 : /*
2783 : * Make the freelist shorter if it's too long.
2784 : *
2785 : * Note that from this point onwards, we will always release the agf and
2786 : * agfl buffers on error. This handles the case where we error out and
2787 : * the buffers are clean or may not have been joined to the transaction
2788 : * and hence need to be released manually. If they have been joined to
2789 : * the transaction, then xfs_trans_brelse() will handle them
2790 : * appropriately based on the recursion count and dirty state of the
2791 : * buffer.
2792 : *
2793 : * XXX (dgc): When we have lots of free space, does this buy us
2794 : * anything other than extra overhead when we need to put more blocks
2795 : * back on the free list? Maybe we should only do this when space is
2796 : * getting low or the AGFL is more than half full?
2797 : *
2798 : * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2799 : * big; the NORMAP flag prevents AGFL expand/shrink operations from
2800 : * updating the rmapbt. Both flags are used in xfs_repair while we're
2801 : * rebuilding the rmapbt, and neither are used by the kernel. They're
2802 : * both required to ensure that rmaps are correctly recorded for the
2803 : * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2804 : * repair/rmap.c in xfsprogs for details.
2805 : */
2806 648714124 : memset(&targs, 0, sizeof(targs));
2807 : /* struct copy below */
2808 648714124 : if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
2809 38096 : targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2810 : else
2811 648676028 : targs.oinfo = XFS_RMAP_OINFO_AG;
2812 649042758 : while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
2813 649042758 : pag->pagf_flcount > need) {
2814 338111 : error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
2815 328634 : if (error)
2816 0 : goto out_agbp_relse;
2817 :
2818 : /* defer agfl frees */
2819 328634 : error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2820 328634 : if (error)
2821 0 : goto out_agbp_relse;
2822 : }
2823 :
2824 648704647 : targs.tp = tp;
2825 648704647 : targs.mp = mp;
2826 648704647 : targs.agbp = agbp;
2827 648704647 : targs.agno = args->agno;
2828 648704647 : targs.alignment = targs.minlen = targs.prod = 1;
2829 648704647 : targs.pag = pag;
2830 648704647 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2831 648731189 : if (error)
2832 1400 : goto out_agbp_relse;
2833 :
2834 : /* Make the freelist longer if it's too short. */
2835 649608419 : while (pag->pagf_flcount < need) {
2836 880688 : targs.agbno = 0;
2837 880688 : targs.maxlen = need - pag->pagf_flcount;
2838 880688 : targs.resv = XFS_AG_RESV_AGFL;
2839 :
2840 : /* Allocate as many blocks as possible at once. */
2841 880688 : error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2842 878866 : if (error)
2843 1 : goto out_agflbp_relse;
2844 :
2845 : /*
2846 : * Stop if we run out. Won't happen if callers are obeying
2847 : * the restrictions correctly. Can happen for free calls
2848 : * on a completely full ag.
2849 : */
2850 878865 : if (targs.agbno == NULLAGBLOCK) {
2851 223 : if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
2852 : break;
2853 0 : goto out_agflbp_relse;
2854 : }
2855 :
2856 878642 : if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
2857 878631 : error = xfs_rmap_alloc(tp, agbp, pag,
2858 : targs.agbno, targs.len, &targs.oinfo);
2859 878631 : if (error)
2860 12 : goto out_agflbp_relse;
2861 : }
2862 878630 : error = xfs_alloc_update_counters(tp, agbp,
2863 878630 : -((long)(targs.len)));
2864 878629 : if (error)
2865 0 : goto out_agflbp_relse;
2866 :
2867 : /*
2868 : * Put each allocated block on the list.
2869 : */
2870 1911215 : for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2871 1032585 : error = xfs_alloc_put_freelist(pag, tp, agbp,
2872 : agflbp, bno, 0);
2873 1032586 : if (error)
2874 0 : goto out_agflbp_relse;
2875 : }
2876 : }
2877 648727954 : xfs_trans_brelse(tp, agflbp);
2878 648717224 : args->agbp = agbp;
2879 648717224 : return 0;
2880 :
2881 13 : out_agflbp_relse:
2882 13 : xfs_trans_brelse(tp, agflbp);
2883 183944335 : out_agbp_relse:
2884 183944335 : if (agbp)
2885 42368 : xfs_trans_brelse(tp, agbp);
2886 183901967 : out_no_agbp:
2887 191954405 : args->agbp = NULL;
2888 191954405 : return error;
2889 : }
2890 :
2891 : /*
2892 : * Get a block from the freelist.
2893 : * Returns with the buffer for the block gotten.
2894 : */
2895 : int
2896 1310531 : xfs_alloc_get_freelist(
2897 : struct xfs_perag *pag,
2898 : struct xfs_trans *tp,
2899 : struct xfs_buf *agbp,
2900 : xfs_agblock_t *bnop,
2901 : int btreeblk)
2902 : {
2903 1310531 : struct xfs_agf *agf = agbp->b_addr;
2904 1310531 : struct xfs_buf *agflbp;
2905 1310531 : xfs_agblock_t bno;
2906 1310531 : __be32 *agfl_bno;
2907 1310531 : int error;
2908 1310531 : uint32_t logflags;
2909 1310531 : struct xfs_mount *mp = tp->t_mountp;
2910 :
2911 : /*
2912 : * Freelist is empty, give up.
2913 : */
2914 1310531 : if (!agf->agf_flcount) {
2915 0 : *bnop = NULLAGBLOCK;
2916 0 : return 0;
2917 : }
2918 : /*
2919 : * Read the array of free blocks.
2920 : */
2921 1310531 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2922 1310531 : if (error)
2923 : return error;
2924 :
2925 :
2926 : /*
2927 : * Get the block number and update the data structures.
2928 : */
2929 1310531 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2930 1310531 : bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2931 2621062 : if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
2932 0 : return -EFSCORRUPTED;
2933 :
2934 1310531 : be32_add_cpu(&agf->agf_flfirst, 1);
2935 1310531 : xfs_trans_brelse(tp, agflbp);
2936 2621064 : if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2937 805 : agf->agf_flfirst = 0;
2938 :
2939 2621064 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
2940 1310532 : be32_add_cpu(&agf->agf_flcount, -1);
2941 1310532 : pag->pagf_flcount--;
2942 :
2943 1310532 : logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2944 1310532 : if (btreeblk) {
2945 981898 : be32_add_cpu(&agf->agf_btreeblks, 1);
2946 981898 : pag->pagf_btreeblks++;
2947 981898 : logflags |= XFS_AGF_BTREEBLKS;
2948 : }
2949 :
2950 1310532 : xfs_alloc_log_agf(tp, agbp, logflags);
2951 1310532 : *bnop = bno;
2952 :
2953 1310532 : return 0;
2954 : }
2955 :
2956 : /*
2957 : * Log the given fields from the agf structure.
2958 : */
2959 : void
2960 263263918 : xfs_alloc_log_agf(
2961 : struct xfs_trans *tp,
2962 : struct xfs_buf *bp,
2963 : uint32_t fields)
2964 : {
2965 263263918 : int first; /* first byte offset */
2966 263263918 : int last; /* last byte offset */
2967 263263918 : static const short offsets[] = {
2968 : offsetof(xfs_agf_t, agf_magicnum),
2969 : offsetof(xfs_agf_t, agf_versionnum),
2970 : offsetof(xfs_agf_t, agf_seqno),
2971 : offsetof(xfs_agf_t, agf_length),
2972 : offsetof(xfs_agf_t, agf_roots[0]),
2973 : offsetof(xfs_agf_t, agf_levels[0]),
2974 : offsetof(xfs_agf_t, agf_flfirst),
2975 : offsetof(xfs_agf_t, agf_fllast),
2976 : offsetof(xfs_agf_t, agf_flcount),
2977 : offsetof(xfs_agf_t, agf_freeblks),
2978 : offsetof(xfs_agf_t, agf_longest),
2979 : offsetof(xfs_agf_t, agf_btreeblks),
2980 : offsetof(xfs_agf_t, agf_uuid),
2981 : offsetof(xfs_agf_t, agf_rmap_blocks),
2982 : offsetof(xfs_agf_t, agf_refcount_blocks),
2983 : offsetof(xfs_agf_t, agf_refcount_root),
2984 : offsetof(xfs_agf_t, agf_refcount_level),
2985 : /* needed so that we don't log the whole rest of the structure: */
2986 : offsetof(xfs_agf_t, agf_spare64),
2987 : sizeof(xfs_agf_t)
2988 : };
2989 :
2990 263263918 : trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2991 :
2992 263244740 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2993 :
2994 263260038 : xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2995 263275223 : xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2996 263317664 : }
2997 :
2998 : /*
2999 : * Put the block on the freelist for the allocation group.
3000 : */
3001 : int
3002 1320966 : xfs_alloc_put_freelist(
3003 : struct xfs_perag *pag,
3004 : struct xfs_trans *tp,
3005 : struct xfs_buf *agbp,
3006 : struct xfs_buf *agflbp,
3007 : xfs_agblock_t bno,
3008 : int btreeblk)
3009 : {
3010 1320966 : struct xfs_mount *mp = tp->t_mountp;
3011 1320966 : struct xfs_agf *agf = agbp->b_addr;
3012 1320966 : __be32 *blockp;
3013 1320966 : int error;
3014 1320966 : uint32_t logflags;
3015 1320966 : __be32 *agfl_bno;
3016 1320966 : int startoff;
3017 :
3018 1320966 : if (!agflbp) {
3019 288383 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
3020 288383 : if (error)
3021 : return error;
3022 : }
3023 :
3024 1320966 : be32_add_cpu(&agf->agf_fllast, 1);
3025 2641932 : if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
3026 815 : agf->agf_fllast = 0;
3027 :
3028 2641932 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
3029 1320966 : be32_add_cpu(&agf->agf_flcount, 1);
3030 1320966 : pag->pagf_flcount++;
3031 :
3032 1320966 : logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
3033 1320966 : if (btreeblk) {
3034 288383 : be32_add_cpu(&agf->agf_btreeblks, -1);
3035 288383 : pag->pagf_btreeblks--;
3036 288383 : logflags |= XFS_AGF_BTREEBLKS;
3037 : }
3038 :
3039 1320966 : xfs_alloc_log_agf(tp, agbp, logflags);
3040 :
3041 2641938 : ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
3042 :
3043 1320969 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3044 1320969 : blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
3045 1320969 : *blockp = cpu_to_be32(bno);
3046 1320969 : startoff = (char *)blockp - (char *)agflbp->b_addr;
3047 :
3048 1320969 : xfs_alloc_log_agf(tp, agbp, logflags);
3049 :
3050 1320969 : xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
3051 1320969 : xfs_trans_log_buf(tp, agflbp, startoff,
3052 : startoff + sizeof(xfs_agblock_t) - 1);
3053 1320969 : return 0;
3054 : }
3055 :
3056 : /*
3057 : * Check that this AGF/AGI header's sequence number and length matches the AG
3058 : * number and size in fsblocks.
3059 : */
3060 : xfs_failaddr_t
3061 5761510 : xfs_validate_ag_length(
3062 : struct xfs_buf *bp,
3063 : uint32_t seqno,
3064 : uint32_t length)
3065 : {
3066 5761510 : struct xfs_mount *mp = bp->b_mount;
3067 : /*
3068 : * During growfs operations, the perag is not fully initialised,
3069 : * so we can't use it for any useful checking. growfs ensures we can't
3070 : * use it by using uncached buffers that don't have the perag attached
3071 : * so we can detect and avoid this problem.
3072 : */
3073 5761510 : if (bp->b_pag && seqno != bp->b_pag->pag_agno)
3074 0 : return __this_address;
3075 :
3076 : /*
3077 : * Only the last AG in the filesystem is allowed to be shorter
3078 : * than the AG size recorded in the superblock.
3079 : */
3080 5761510 : if (length != mp->m_sb.sb_agblocks) {
3081 : /*
3082 : * During growfs, the new last AG can get here before we
3083 : * have updated the superblock. Give it a pass on the seqno
3084 : * check.
3085 : */
3086 25545 : if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
3087 0 : return __this_address;
3088 25545 : if (length < XFS_MIN_AG_BLOCKS)
3089 0 : return __this_address;
3090 25545 : if (length > mp->m_sb.sb_agblocks)
3091 11 : return __this_address;
3092 : }
3093 :
3094 : return NULL;
3095 : }
3096 :
3097 : /*
3098 : * Verify the AGF is consistent.
3099 : *
3100 : * We do not verify the AGFL indexes in the AGF are fully consistent here
3101 : * because of issues with variable on-disk structure sizes. Instead, we check
3102 : * the agfl indexes for consistency when we initialise the perag from the AGF
3103 : * information after a read completes.
3104 : *
3105 : * If the index is inconsistent, then we mark the perag as needing an AGFL
3106 : * reset. The first AGFL update performed then resets the AGFL indexes and
3107 : * refills the AGFL with known good free blocks, allowing the filesystem to
3108 : * continue operating normally at the cost of a few leaked free space blocks.
3109 : */
3110 : static xfs_failaddr_t
3111 3075366 : xfs_agf_verify(
3112 : struct xfs_buf *bp)
3113 : {
3114 3075366 : struct xfs_mount *mp = bp->b_mount;
3115 3075366 : struct xfs_agf *agf = bp->b_addr;
3116 3075366 : xfs_failaddr_t fa;
3117 3075366 : uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3118 3075366 : uint32_t agf_length = be32_to_cpu(agf->agf_length);
3119 :
3120 3075366 : if (xfs_has_crc(mp)) {
3121 3060870 : if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3122 0 : return __this_address;
3123 3061291 : if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3124 0 : return __this_address;
3125 : }
3126 :
3127 3075965 : if (!xfs_verify_magic(bp, agf->agf_magicnum))
3128 0 : return __this_address;
3129 :
3130 3075021 : if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3131 0 : return __this_address;
3132 :
3133 : /*
3134 : * Both agf_seqno and agf_length need to validated before anything else
3135 : * block number related in the AGF or AGFL can be checked.
3136 : */
3137 3075021 : fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
3138 3074790 : if (fa)
3139 : return fa;
3140 :
3141 6135736 : if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3142 10 : return __this_address;
3143 3074727 : if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3144 0 : return __this_address;
3145 3074727 : if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3146 0 : return __this_address;
3147 :
3148 3074727 : if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3149 : be32_to_cpu(agf->agf_freeblks) > agf_length)
3150 22 : return __this_address;
3151 :
3152 3074705 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3153 3074705 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
3154 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
3155 3074705 : mp->m_alloc_maxlevels ||
3156 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
3157 : mp->m_alloc_maxlevels)
3158 0 : return __this_address;
3159 :
3160 3074705 : if (xfs_has_lazysbcount(mp) &&
3161 3075021 : be32_to_cpu(agf->agf_btreeblks) > agf_length)
3162 0 : return __this_address;
3163 :
3164 3074705 : if (xfs_has_rmapbt(mp)) {
3165 2989989 : if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3166 11 : return __this_address;
3167 :
3168 2989978 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3169 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3170 2989978 : mp->m_rmap_maxlevels)
3171 0 : return __this_address;
3172 : }
3173 :
3174 3074694 : if (xfs_has_reflink(mp)) {
3175 2989001 : if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3176 11 : return __this_address;
3177 :
3178 2988990 : if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3179 2988990 : be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3180 0 : return __this_address;
3181 : }
3182 :
3183 : return NULL;
3184 : }
3185 :
3186 : static void
3187 963998 : xfs_agf_read_verify(
3188 : struct xfs_buf *bp)
3189 : {
3190 963998 : struct xfs_mount *mp = bp->b_mount;
3191 963998 : xfs_failaddr_t fa;
3192 :
3193 1927899 : if (xfs_has_crc(mp) &&
3194 : !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3195 52 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3196 : else {
3197 963946 : fa = xfs_agf_verify(bp);
3198 963946 : if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3199 65 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3200 : }
3201 963998 : }
3202 :
3203 : static void
3204 1267446 : xfs_agf_write_verify(
3205 : struct xfs_buf *bp)
3206 : {
3207 1267446 : struct xfs_mount *mp = bp->b_mount;
3208 1267446 : struct xfs_buf_log_item *bip = bp->b_log_item;
3209 1267446 : struct xfs_agf *agf = bp->b_addr;
3210 1267446 : xfs_failaddr_t fa;
3211 :
3212 1267446 : fa = xfs_agf_verify(bp);
3213 1267446 : if (fa) {
3214 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3215 0 : return;
3216 : }
3217 :
3218 1267446 : if (!xfs_has_crc(mp))
3219 : return;
3220 :
3221 1253785 : if (bip)
3222 1226261 : agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
3223 :
3224 1253785 : xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
3225 : }
3226 :
3227 : const struct xfs_buf_ops xfs_agf_buf_ops = {
3228 : .name = "xfs_agf",
3229 : .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
3230 : .verify_read = xfs_agf_read_verify,
3231 : .verify_write = xfs_agf_write_verify,
3232 : .verify_struct = xfs_agf_verify,
3233 : };
3234 :
3235 : /*
3236 : * Read in the allocation group header (free/alloc section).
3237 : */
3238 : int
3239 2394889712 : xfs_read_agf(
3240 : struct xfs_perag *pag,
3241 : struct xfs_trans *tp,
3242 : int flags,
3243 : struct xfs_buf **agfbpp)
3244 : {
3245 2394889712 : struct xfs_mount *mp = pag->pag_mount;
3246 2394889712 : int error;
3247 :
3248 2394889712 : trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
3249 :
3250 9578614740 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3251 2394653685 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3252 2394653685 : XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
3253 2395507438 : if (xfs_metadata_is_sick(error))
3254 117 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
3255 2395507438 : if (error)
3256 : return error;
3257 :
3258 2387448820 : xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
3259 2387448820 : return 0;
3260 : }
3261 :
3262 : /*
3263 : * Read in the allocation group header (free/alloc section) and initialise the
3264 : * perag structure if necessary. If the caller provides @agfbpp, then return the
3265 : * locked buffer to the caller, otherwise free it.
3266 : */
3267 : int
3268 2395298148 : xfs_alloc_read_agf(
3269 : struct xfs_perag *pag,
3270 : struct xfs_trans *tp,
3271 : int flags,
3272 : struct xfs_buf **agfbpp)
3273 : {
3274 2395298148 : struct xfs_buf *agfbp;
3275 2395298148 : struct xfs_agf *agf;
3276 2395298148 : int error;
3277 2395298148 : int allocbt_blks;
3278 :
3279 2395298148 : trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
3280 :
3281 : /* We don't support trylock when freeing. */
3282 2394575776 : ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3283 : (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3284 2394575776 : error = xfs_read_agf(pag, tp,
3285 2394575776 : (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
3286 : &agfbp);
3287 2395522987 : if (error)
3288 : return error;
3289 :
3290 2387509713 : agf = agfbp->b_addr;
3291 4775019426 : if (!xfs_perag_initialised_agf(pag)) {
3292 779862 : pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3293 779862 : pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3294 779862 : pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3295 779862 : pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3296 779862 : pag->pagf_levels[XFS_BTNUM_BNOi] =
3297 779862 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3298 779862 : pag->pagf_levels[XFS_BTNUM_CNTi] =
3299 779862 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3300 779862 : pag->pagf_levels[XFS_BTNUM_RMAPi] =
3301 779862 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3302 779862 : pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3303 779862 : if (xfs_agfl_needs_reset(pag->pag_mount, agf))
3304 20 : set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3305 : else
3306 779842 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3307 :
3308 : /*
3309 : * Update the in-core allocbt counter. Filter out the rmapbt
3310 : * subset of the btreeblks counter because the rmapbt is managed
3311 : * by perag reservation. Subtract one for the rmapbt root block
3312 : * because the rmap counter includes it while the btreeblks
3313 : * counter only tracks non-root blocks.
3314 : */
3315 780288 : allocbt_blks = pag->pagf_btreeblks;
3316 780288 : if (xfs_has_rmapbt(pag->pag_mount))
3317 760177 : allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3318 780288 : if (allocbt_blks > 0)
3319 64361 : atomic64_add(allocbt_blks,
3320 : &pag->pag_mount->m_allocbt_blks);
3321 :
3322 780288 : set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
3323 : }
3324 : #ifdef DEBUG
3325 4773459702 : else if (!xfs_is_shutdown(pag->pag_mount)) {
3326 2386715474 : ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3327 2386715474 : ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3328 2386715474 : ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3329 2386715474 : ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3330 2386715474 : ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3331 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3332 2386715474 : ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3333 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3334 : }
3335 : #endif
3336 2387510030 : if (agfbpp)
3337 2386190283 : *agfbpp = agfbp;
3338 : else
3339 1319747 : xfs_trans_brelse(tp, agfbp);
3340 : return 0;
3341 : }
3342 :
3343 : /*
3344 : * Pre-proces allocation arguments to set initial state that we don't require
3345 : * callers to set up correctly, as well as bounds check the allocation args
3346 : * that are set up.
3347 : */
3348 : static int
3349 102237992 : xfs_alloc_vextent_check_args(
3350 : struct xfs_alloc_arg *args,
3351 : xfs_fsblock_t target,
3352 : xfs_agnumber_t *minimum_agno)
3353 : {
3354 102237992 : struct xfs_mount *mp = args->mp;
3355 102237992 : xfs_agblock_t agsize;
3356 :
3357 102237992 : args->fsbno = NULLFSBLOCK;
3358 :
3359 102237992 : *minimum_agno = 0;
3360 102237992 : if (args->tp->t_highest_agno != NULLAGNUMBER)
3361 1793657 : *minimum_agno = args->tp->t_highest_agno;
3362 :
3363 : /*
3364 : * Just fix this up, for the case where the last a.g. is shorter
3365 : * (or there's only one a.g.) and the caller couldn't easily figure
3366 : * that out (xfs_bmap_alloc).
3367 : */
3368 102237992 : agsize = mp->m_sb.sb_agblocks;
3369 102237992 : if (args->maxlen > agsize)
3370 0 : args->maxlen = agsize;
3371 102237992 : if (args->alignment == 0)
3372 5884920 : args->alignment = 1;
3373 :
3374 102237992 : ASSERT(args->minlen > 0);
3375 102237992 : ASSERT(args->maxlen > 0);
3376 102237992 : ASSERT(args->alignment > 0);
3377 102237992 : ASSERT(args->resv != XFS_AG_RESV_AGFL);
3378 :
3379 102237992 : ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3380 102237992 : ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
3381 102222078 : ASSERT(args->minlen <= args->maxlen);
3382 102222078 : ASSERT(args->minlen <= agsize);
3383 102222078 : ASSERT(args->mod < args->prod);
3384 :
3385 102222078 : if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3386 102222078 : XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
3387 102212947 : args->minlen > args->maxlen || args->minlen > agsize ||
3388 102212947 : args->mod >= args->prod) {
3389 0 : trace_xfs_alloc_vextent_badargs(args);
3390 0 : return -ENOSPC;
3391 : }
3392 :
3393 102212947 : if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
3394 9380 : trace_xfs_alloc_vextent_skip_deadlock(args);
3395 9380 : return -ENOSPC;
3396 : }
3397 : return 0;
3398 :
3399 : }
3400 :
3401 : /*
3402 : * Prepare an AG for allocation. If the AG is not prepared to accept the
3403 : * allocation, return failure.
3404 : *
3405 : * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
3406 : * modified to hold their own perag references.
3407 : */
3408 : static int
3409 290535973 : xfs_alloc_vextent_prepare_ag(
3410 : struct xfs_alloc_arg *args,
3411 : uint32_t alloc_flags)
3412 : {
3413 290535973 : bool need_pag = !args->pag;
3414 290535973 : int error;
3415 :
3416 290535973 : if (need_pag)
3417 0 : args->pag = xfs_perag_get(args->mp, args->agno);
3418 :
3419 290535973 : args->agbp = NULL;
3420 290535973 : error = xfs_alloc_fix_freelist(args, alloc_flags);
3421 290549236 : if (error) {
3422 1381 : trace_xfs_alloc_vextent_nofix(args);
3423 1381 : if (need_pag)
3424 0 : xfs_perag_put(args->pag);
3425 1381 : args->agbno = NULLAGBLOCK;
3426 1381 : return error;
3427 : }
3428 290547855 : if (!args->agbp) {
3429 : /* cannot allocate in this AG at all */
3430 191950798 : trace_xfs_alloc_vextent_noagbp(args);
3431 191948731 : args->agbno = NULLAGBLOCK;
3432 191948731 : return 0;
3433 : }
3434 98597057 : args->wasfromfl = 0;
3435 98597057 : return 0;
3436 : }
3437 :
3438 : /*
3439 : * Post-process allocation results to account for the allocation if it succeed
3440 : * and set the allocated block number correctly for the caller.
3441 : *
3442 : * XXX: we should really be returning ENOSPC for ENOSPC, not
3443 : * hiding it behind a "successful" NULLFSBLOCK allocation.
3444 : */
3445 : static int
3446 102319060 : xfs_alloc_vextent_finish(
3447 : struct xfs_alloc_arg *args,
3448 : xfs_agnumber_t minimum_agno,
3449 : int alloc_error,
3450 : bool drop_perag)
3451 : {
3452 102319060 : struct xfs_mount *mp = args->mp;
3453 102319060 : int error = 0;
3454 :
3455 : /*
3456 : * We can end up here with a locked AGF. If we failed, the caller is
3457 : * likely going to try to allocate again with different parameters, and
3458 : * that can widen the AGs that are searched for free space. If we have
3459 : * to do BMBT block allocation, we have to do a new allocation.
3460 : *
3461 : * Hence leaving this function with the AGF locked opens up potential
3462 : * ABBA AGF deadlocks because a future allocation attempt in this
3463 : * transaction may attempt to lock a lower number AGF.
3464 : *
3465 : * We can't release the AGF until the transaction is commited, so at
3466 : * this point we must update the "first allocation" tracker to point at
3467 : * this AG if the tracker is empty or points to a lower AG. This allows
3468 : * the next allocation attempt to be modified appropriately to avoid
3469 : * deadlocks.
3470 : */
3471 102319060 : if (args->agbp &&
3472 98603766 : (args->tp->t_highest_agno == NULLAGNUMBER ||
3473 1782723 : args->agno > minimum_agno))
3474 97057299 : args->tp->t_highest_agno = args->agno;
3475 :
3476 : /*
3477 : * If the allocation failed with an error or we had an ENOSPC result,
3478 : * preserve the returned error whilst also marking the allocation result
3479 : * as "no extent allocated". This ensures that callers that fail to
3480 : * capture the error will still treat it as a failed allocation.
3481 : */
3482 102319060 : if (alloc_error || args->agbno == NULLAGBLOCK) {
3483 4602394 : args->fsbno = NULLFSBLOCK;
3484 4602394 : error = alloc_error;
3485 4602394 : goto out_drop_perag;
3486 : }
3487 :
3488 97716666 : args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3489 :
3490 97716666 : ASSERT(args->len >= args->minlen);
3491 97716666 : ASSERT(args->len <= args->maxlen);
3492 97716666 : ASSERT(args->agbno % args->alignment == 0);
3493 97716666 : XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3494 :
3495 : /* if not file data, insert new block into the reverse map btree */
3496 97711984 : if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3497 7290699 : error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3498 7290699 : args->agbno, args->len, &args->oinfo);
3499 7291329 : if (error)
3500 40 : goto out_drop_perag;
3501 : }
3502 :
3503 97712574 : if (!args->wasfromfl) {
3504 97720018 : error = xfs_alloc_update_counters(args->tp, args->agbp,
3505 97720018 : -((long)(args->len)));
3506 97734320 : if (error)
3507 0 : goto out_drop_perag;
3508 :
3509 97734320 : ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3510 : args->len));
3511 : }
3512 :
3513 97719409 : xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3514 :
3515 97694531 : XFS_STATS_INC(mp, xs_allocx);
3516 97724004 : XFS_STATS_ADD(mp, xs_allocb, args->len);
3517 :
3518 97726955 : trace_xfs_alloc_vextent_finish(args);
3519 :
3520 102324079 : out_drop_perag:
3521 102324079 : if (drop_perag && args->pag) {
3522 96124694 : xfs_perag_rele(args->pag);
3523 96138204 : args->pag = NULL;
3524 : }
3525 102337589 : return error;
3526 : }
3527 :
3528 : /*
3529 : * Allocate within a single AG only. This uses a best-fit length algorithm so if
3530 : * you need an exact sized allocation without locality constraints, this is the
3531 : * fastest way to do it.
3532 : *
3533 : * Caller is expected to hold a perag reference in args->pag.
3534 : */
3535 : int
3536 0 : xfs_alloc_vextent_this_ag(
3537 : struct xfs_alloc_arg *args,
3538 : xfs_agnumber_t agno)
3539 : {
3540 0 : struct xfs_mount *mp = args->mp;
3541 0 : xfs_agnumber_t minimum_agno;
3542 0 : uint32_t alloc_flags = 0;
3543 0 : int error;
3544 :
3545 0 : ASSERT(args->pag != NULL);
3546 0 : ASSERT(args->pag->pag_agno == agno);
3547 :
3548 0 : args->agno = agno;
3549 0 : args->agbno = 0;
3550 :
3551 0 : trace_xfs_alloc_vextent_this_ag(args);
3552 :
3553 0 : error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
3554 : &minimum_agno);
3555 0 : if (error) {
3556 0 : if (error == -ENOSPC)
3557 : return 0;
3558 0 : return error;
3559 : }
3560 :
3561 0 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3562 0 : if (!error && args->agbp)
3563 0 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3564 :
3565 0 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3566 : }
3567 :
3568 : /*
3569 : * Iterate all AGs trying to allocate an extent starting from @start_ag.
3570 : *
3571 : * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3572 : * allocation attempts in @start_agno have locality information. If we fail to
3573 : * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3574 : * we attempt to allocation in as there is no locality optimisation possible for
3575 : * those allocations.
3576 : *
3577 : * On return, args->pag may be left referenced if we finish before the "all
3578 : * failed" return point. The allocation finish still needs the perag, and
3579 : * so the caller will release it once they've finished the allocation.
3580 : *
3581 : * When we wrap the AG iteration at the end of the filesystem, we have to be
3582 : * careful not to wrap into AGs below ones we already have locked in the
3583 : * transaction if we are doing a blocking iteration. This will result in an
3584 : * out-of-order locking of AGFs and hence can cause deadlocks.
3585 : */
3586 : static int
3587 96042285 : xfs_alloc_vextent_iterate_ags(
3588 : struct xfs_alloc_arg *args,
3589 : xfs_agnumber_t minimum_agno,
3590 : xfs_agnumber_t start_agno,
3591 : xfs_agblock_t target_agbno,
3592 : uint32_t alloc_flags)
3593 : {
3594 96042285 : struct xfs_mount *mp = args->mp;
3595 96042285 : xfs_agnumber_t restart_agno = minimum_agno;
3596 96042285 : xfs_agnumber_t agno;
3597 96042285 : int error = 0;
3598 :
3599 96042285 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
3600 96042285 : restart_agno = 0;
3601 0 : restart:
3602 285343032 : for_each_perag_wrap_range(mp, start_agno, restart_agno,
3603 : mp->m_sb.sb_agcount, agno, args->pag) {
3604 284357624 : args->agno = agno;
3605 284357624 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3606 284340032 : if (error)
3607 : break;
3608 284338651 : if (!args->agbp) {
3609 188667880 : trace_xfs_alloc_vextent_loopfailed(args);
3610 188667256 : continue;
3611 : }
3612 :
3613 : /*
3614 : * Allocation is supposed to succeed now, so break out of the
3615 : * loop regardless of whether we succeed or not.
3616 : */
3617 95670771 : if (args->agno == start_agno && target_agbno) {
3618 88541538 : args->agbno = target_agbno;
3619 88541538 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3620 : } else {
3621 7129233 : args->agbno = 0;
3622 7129233 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3623 : }
3624 : break;
3625 : }
3626 96770376 : if (error) {
3627 4599 : xfs_perag_rele(args->pag);
3628 4599 : args->pag = NULL;
3629 4599 : return error;
3630 : }
3631 96765777 : if (args->agbp)
3632 : return 0;
3633 :
3634 : /*
3635 : * We didn't find an AG we can alloation from. If we were given
3636 : * constraining flags by the caller, drop them and retry the allocation
3637 : * without any constraints being set.
3638 : */
3639 1066219 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
3640 630058 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
3641 630058 : restart_agno = minimum_agno;
3642 630058 : goto restart;
3643 : }
3644 :
3645 436161 : ASSERT(args->pag == NULL);
3646 436161 : trace_xfs_alloc_vextent_allfailed(args);
3647 436161 : return 0;
3648 : }
3649 :
3650 : /*
3651 : * Iterate from the AGs from the start AG to the end of the filesystem, trying
3652 : * to allocate blocks. It starts with a near allocation attempt in the initial
3653 : * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3654 : * back to zero if allowed by previous allocations in this transaction,
3655 : * otherwise will wrap back to the start AG and run a second blocking pass to
3656 : * the end of the filesystem.
3657 : */
3658 : int
3659 93417888 : xfs_alloc_vextent_start_ag(
3660 : struct xfs_alloc_arg *args,
3661 : xfs_fsblock_t target)
3662 : {
3663 93417888 : struct xfs_mount *mp = args->mp;
3664 93417888 : xfs_agnumber_t minimum_agno;
3665 93417888 : xfs_agnumber_t start_agno;
3666 93417888 : xfs_agnumber_t rotorstep = xfs_rotorstep;
3667 93417888 : bool bump_rotor = false;
3668 93417888 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3669 93417888 : int error;
3670 :
3671 93417888 : ASSERT(args->pag == NULL);
3672 :
3673 93417888 : args->agno = NULLAGNUMBER;
3674 93417888 : args->agbno = NULLAGBLOCK;
3675 :
3676 93417888 : trace_xfs_alloc_vextent_start_ag(args);
3677 :
3678 93391256 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3679 93392790 : if (error) {
3680 0 : if (error == -ENOSPC)
3681 : return 0;
3682 0 : return error;
3683 : }
3684 :
3685 96647594 : if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3686 : xfs_is_inode32(mp)) {
3687 0 : target = XFS_AGB_TO_FSB(mp,
3688 : ((mp->m_agfrotor / rotorstep) %
3689 : mp->m_sb.sb_agcount), 0);
3690 0 : bump_rotor = 1;
3691 : }
3692 :
3693 93392790 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3694 93397674 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3695 93392790 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3696 :
3697 93505362 : if (bump_rotor) {
3698 0 : if (args->agno == start_agno)
3699 0 : mp->m_agfrotor = (mp->m_agfrotor + 1) %
3700 0 : (mp->m_sb.sb_agcount * rotorstep);
3701 : else
3702 0 : mp->m_agfrotor = (args->agno * rotorstep + 1) %
3703 0 : (mp->m_sb.sb_agcount * rotorstep);
3704 : }
3705 :
3706 93505362 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3707 : }
3708 :
3709 : /*
3710 : * Iterate from the agno indicated via @target through to the end of the
3711 : * filesystem attempting blocking allocation. This does not wrap or try a second
3712 : * pass, so will not recurse into AGs lower than indicated by the target.
3713 : */
3714 : int
3715 2637248 : xfs_alloc_vextent_first_ag(
3716 : struct xfs_alloc_arg *args,
3717 : xfs_fsblock_t target)
3718 : {
3719 2637248 : struct xfs_mount *mp = args->mp;
3720 2637248 : xfs_agnumber_t minimum_agno;
3721 2637248 : xfs_agnumber_t start_agno;
3722 2637248 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3723 2637248 : int error;
3724 :
3725 2637248 : ASSERT(args->pag == NULL);
3726 :
3727 2637248 : args->agno = NULLAGNUMBER;
3728 2637248 : args->agbno = NULLAGBLOCK;
3729 :
3730 2637248 : trace_xfs_alloc_vextent_first_ag(args);
3731 :
3732 2634545 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3733 2633963 : if (error) {
3734 0 : if (error == -ENOSPC)
3735 : return 0;
3736 0 : return error;
3737 : }
3738 :
3739 2633963 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3740 2634047 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3741 2633963 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3742 2639823 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3743 : }
3744 :
3745 : /*
3746 : * Allocate at the exact block target or fail. Caller is expected to hold a
3747 : * perag reference in args->pag.
3748 : */
3749 : int
3750 1921884 : xfs_alloc_vextent_exact_bno(
3751 : struct xfs_alloc_arg *args,
3752 : xfs_fsblock_t target)
3753 : {
3754 1921884 : struct xfs_mount *mp = args->mp;
3755 1921884 : xfs_agnumber_t minimum_agno;
3756 1921884 : int error;
3757 :
3758 1921884 : ASSERT(args->pag != NULL);
3759 1921884 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3760 :
3761 1921884 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3762 1921884 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3763 :
3764 1921834 : trace_xfs_alloc_vextent_exact_bno(args);
3765 :
3766 1921845 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3767 1921847 : if (error) {
3768 1304 : if (error == -ENOSPC)
3769 : return 0;
3770 0 : return error;
3771 : }
3772 :
3773 1920543 : error = xfs_alloc_vextent_prepare_ag(args, 0);
3774 1920673 : if (!error && args->agbp)
3775 1112073 : error = xfs_alloc_ag_vextent_exact(args);
3776 :
3777 1920674 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3778 : }
3779 :
3780 : /*
3781 : * Allocate an extent as close to the target as possible. If there are not
3782 : * viable candidates in the AG, then fail the allocation.
3783 : *
3784 : * Caller may or may not have a per-ag reference in args->pag.
3785 : */
3786 : int
3787 4281674 : xfs_alloc_vextent_near_bno(
3788 : struct xfs_alloc_arg *args,
3789 : xfs_fsblock_t target)
3790 : {
3791 4281674 : struct xfs_mount *mp = args->mp;
3792 4281674 : xfs_agnumber_t minimum_agno;
3793 4281674 : bool needs_perag = args->pag == NULL;
3794 4281674 : uint32_t alloc_flags = 0;
3795 4281674 : int error;
3796 :
3797 4281674 : if (!needs_perag)
3798 3848415 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3799 :
3800 4281674 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3801 4281674 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3802 :
3803 4280864 : trace_xfs_alloc_vextent_near_bno(args);
3804 :
3805 4280995 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3806 4281520 : if (error) {
3807 8076 : if (error == -ENOSPC)
3808 : return 0;
3809 0 : return error;
3810 : }
3811 :
3812 4273444 : if (needs_perag)
3813 433144 : args->pag = xfs_perag_grab(mp, args->agno);
3814 :
3815 4274525 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3816 4274716 : if (!error && args->agbp)
3817 1801987 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3818 :
3819 4274666 : return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3820 : }
3821 :
3822 : /* Ensure that the freelist is at full capacity. */
3823 : int
3824 550100988 : xfs_free_extent_fix_freelist(
3825 : struct xfs_trans *tp,
3826 : struct xfs_perag *pag,
3827 : struct xfs_buf **agbp)
3828 : {
3829 550100988 : struct xfs_alloc_arg args;
3830 550100988 : int error;
3831 :
3832 550100988 : memset(&args, 0, sizeof(struct xfs_alloc_arg));
3833 550100988 : args.tp = tp;
3834 550100988 : args.mp = tp->t_mountp;
3835 550100988 : args.agno = pag->pag_agno;
3836 550100988 : args.pag = pag;
3837 :
3838 : /*
3839 : * validate that the block number is legal - the enables us to detect
3840 : * and handle a silent filesystem corruption rather than crashing.
3841 : */
3842 550100988 : if (args.agno >= args.mp->m_sb.sb_agcount)
3843 : return -EFSCORRUPTED;
3844 :
3845 550100988 : error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3846 550109780 : if (error)
3847 : return error;
3848 :
3849 550108468 : *agbp = args.agbp;
3850 550108468 : return 0;
3851 : }
3852 :
3853 : /*
3854 : * Free an extent.
3855 : * Just break up the extent address and hand off to xfs_free_ag_extent
3856 : * after fixing up the freelist.
3857 : */
3858 : int
3859 105571524 : __xfs_free_extent(
3860 : struct xfs_trans *tp,
3861 : struct xfs_perag *pag,
3862 : xfs_agblock_t agbno,
3863 : xfs_extlen_t len,
3864 : const struct xfs_owner_info *oinfo,
3865 : enum xfs_ag_resv_type type,
3866 : bool skip_discard)
3867 : {
3868 105571524 : struct xfs_mount *mp = tp->t_mountp;
3869 105571524 : struct xfs_buf *agbp;
3870 105571524 : struct xfs_agf *agf;
3871 105571524 : int error;
3872 105571524 : unsigned int busy_flags = 0;
3873 :
3874 105571524 : ASSERT(len != 0);
3875 105571524 : ASSERT(type != XFS_AG_RESV_AGFL);
3876 :
3877 105571524 : if (XFS_TEST_ERROR(false, mp,
3878 : XFS_ERRTAG_FREE_EXTENT))
3879 : return -EIO;
3880 :
3881 105563824 : error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
3882 105575407 : if (error) {
3883 32 : if (xfs_metadata_is_sick(error))
3884 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3885 32 : return error;
3886 : }
3887 :
3888 105575375 : agf = agbp->b_addr;
3889 :
3890 105575375 : if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3891 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3892 0 : error = -EFSCORRUPTED;
3893 0 : goto err_release;
3894 : }
3895 :
3896 : /* validate the extent size is legal now we have the agf locked */
3897 105575375 : if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3898 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3899 0 : error = -EFSCORRUPTED;
3900 0 : goto err_release;
3901 : }
3902 :
3903 105575375 : error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3904 : type);
3905 105552612 : if (error)
3906 233 : goto err_release;
3907 :
3908 105552379 : if (skip_discard)
3909 8336647 : busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3910 105552379 : xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3911 105552379 : return 0;
3912 :
3913 233 : err_release:
3914 233 : xfs_trans_brelse(tp, agbp);
3915 233 : return error;
3916 : }
3917 :
3918 : struct xfs_alloc_query_range_info {
3919 : xfs_alloc_query_range_fn fn;
3920 : void *priv;
3921 : };
3922 :
3923 : /* Format btree record and pass to our callback. */
3924 : STATIC int
3925 2102369203 : xfs_alloc_query_range_helper(
3926 : struct xfs_btree_cur *cur,
3927 : const union xfs_btree_rec *rec,
3928 : void *priv)
3929 : {
3930 2102369203 : struct xfs_alloc_query_range_info *query = priv;
3931 2102369203 : struct xfs_alloc_rec_incore irec;
3932 2102369203 : xfs_failaddr_t fa;
3933 :
3934 2102369203 : xfs_alloc_btrec_to_irec(rec, &irec);
3935 2102369203 : fa = xfs_alloc_check_irec(cur, &irec);
3936 2102332940 : if (fa)
3937 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
3938 :
3939 2102332940 : return query->fn(cur, &irec, query->priv);
3940 : }
3941 :
3942 : /* Find all free space within a given range of blocks. */
3943 : int
3944 22088 : xfs_alloc_query_range(
3945 : struct xfs_btree_cur *cur,
3946 : const struct xfs_alloc_rec_incore *low_rec,
3947 : const struct xfs_alloc_rec_incore *high_rec,
3948 : xfs_alloc_query_range_fn fn,
3949 : void *priv)
3950 : {
3951 22088 : union xfs_btree_irec low_brec = { .a = *low_rec };
3952 22088 : union xfs_btree_irec high_brec = { .a = *high_rec };
3953 22088 : struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
3954 :
3955 22088 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3956 22088 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
3957 : xfs_alloc_query_range_helper, &query);
3958 : }
3959 :
3960 : /* Find all free space records. */
3961 : int
3962 1087573 : xfs_alloc_query_all(
3963 : struct xfs_btree_cur *cur,
3964 : xfs_alloc_query_range_fn fn,
3965 : void *priv)
3966 : {
3967 1087573 : struct xfs_alloc_query_range_info query;
3968 :
3969 1087573 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3970 1087573 : query.priv = priv;
3971 1087573 : query.fn = fn;
3972 1087573 : return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3973 : }
3974 :
3975 : /*
3976 : * Scan part of the keyspace of the free space and tell us if the area has no
3977 : * records, is fully mapped by records, or is partially filled.
3978 : */
3979 : int
3980 2180179492 : xfs_alloc_has_records(
3981 : struct xfs_btree_cur *cur,
3982 : xfs_agblock_t bno,
3983 : xfs_extlen_t len,
3984 : enum xbtree_recpacking *outcome)
3985 : {
3986 2180179492 : union xfs_btree_irec low;
3987 2180179492 : union xfs_btree_irec high;
3988 :
3989 2180179492 : memset(&low, 0, sizeof(low));
3990 2180179492 : low.a.ar_startblock = bno;
3991 2180179492 : memset(&high, 0xFF, sizeof(high));
3992 2180179492 : high.a.ar_startblock = bno + len - 1;
3993 :
3994 2180179492 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3995 : }
3996 :
3997 : /*
3998 : * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3999 : * error code or XFS_ITER_*.
4000 : */
4001 : int
4002 82943458 : xfs_agfl_walk(
4003 : struct xfs_mount *mp,
4004 : struct xfs_agf *agf,
4005 : struct xfs_buf *agflbp,
4006 : xfs_agfl_walk_fn walk_fn,
4007 : void *priv)
4008 : {
4009 82943458 : __be32 *agfl_bno;
4010 82943458 : unsigned int i;
4011 82943458 : int error;
4012 :
4013 82943458 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
4014 82943458 : i = be32_to_cpu(agf->agf_flfirst);
4015 :
4016 : /* Nothing to walk in an empty AGFL. */
4017 82943458 : if (agf->agf_flcount == cpu_to_be32(0))
4018 : return 0;
4019 :
4020 : /* Otherwise, walk from first to last, wrapping as needed. */
4021 746613262 : for (;;) {
4022 746613262 : error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
4023 746614067 : if (error)
4024 4828372 : return error;
4025 741785695 : if (i == be32_to_cpu(agf->agf_fllast))
4026 : break;
4027 1327436332 : if (++i == xfs_agfl_size(mp))
4028 748751 : i = 0;
4029 : }
4030 :
4031 : return 0;
4032 : }
4033 :
4034 : int __init
4035 59 : xfs_extfree_intent_init_cache(void)
4036 : {
4037 59 : xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
4038 : sizeof(struct xfs_extent_free_item),
4039 : 0, 0, NULL);
4040 :
4041 59 : return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
4042 : }
4043 :
4044 : void
4045 58 : xfs_extfree_intent_destroy_cache(void)
4046 : {
4047 58 : kmem_cache_destroy(xfs_extfree_item_cache);
4048 58 : xfs_extfree_item_cache = NULL;
4049 58 : }
4050 :
4051 : /*
4052 : * Find the next chunk of free space in @pag starting at @agbno and going no
4053 : * higher than @end_agbno. Set @agbno and @len to whatever free space we find,
4054 : * or to @end_agbno if we find no space.
4055 : */
4056 : int
4057 709 : xfs_alloc_find_freesp(
4058 : struct xfs_trans *tp,
4059 : struct xfs_perag *pag,
4060 : xfs_agblock_t *agbno,
4061 : xfs_agblock_t end_agbno,
4062 : xfs_extlen_t *len)
4063 : {
4064 709 : struct xfs_mount *mp = pag->pag_mount;
4065 709 : struct xfs_btree_cur *cur;
4066 709 : struct xfs_buf *agf_bp = NULL;
4067 709 : xfs_agblock_t found_agbno;
4068 709 : xfs_extlen_t found_len;
4069 709 : int found;
4070 709 : int error;
4071 :
4072 709 : trace_xfs_alloc_find_freesp(mp, pag->pag_agno, *agbno,
4073 709 : end_agbno - *agbno);
4074 :
4075 709 : error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
4076 709 : if (error)
4077 : return error;
4078 :
4079 709 : cur = xfs_allocbt_init_cursor(mp, tp, agf_bp, pag, XFS_BTNUM_BNO);
4080 :
4081 : /* Try to find a free extent that starts before here. */
4082 709 : error = xfs_alloc_lookup_le(cur, *agbno, 0, &found);
4083 709 : if (error)
4084 0 : goto out_cur;
4085 709 : if (found) {
4086 146 : error = xfs_alloc_get_rec(cur, &found_agbno, &found_len,
4087 : &found);
4088 146 : if (error)
4089 0 : goto out_cur;
4090 146 : if (XFS_IS_CORRUPT(mp, !found)) {
4091 0 : xfs_btree_mark_sick(cur);
4092 0 : error = -EFSCORRUPTED;
4093 0 : goto out_cur;
4094 : }
4095 :
4096 146 : if (found_agbno + found_len > *agbno)
4097 106 : goto found;
4098 : }
4099 :
4100 : /* Examine the next record if free extent not in range. */
4101 603 : error = xfs_btree_increment(cur, 0, &found);
4102 603 : if (error)
4103 0 : goto out_cur;
4104 603 : if (!found)
4105 40 : goto next_ag;
4106 :
4107 563 : error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found);
4108 563 : if (error)
4109 0 : goto out_cur;
4110 563 : if (XFS_IS_CORRUPT(mp, !found)) {
4111 0 : xfs_btree_mark_sick(cur);
4112 0 : error = -EFSCORRUPTED;
4113 0 : goto out_cur;
4114 : }
4115 :
4116 563 : if (found_agbno >= end_agbno)
4117 506 : goto next_ag;
4118 :
4119 57 : found:
4120 : /* Found something, so update the mapping. */
4121 163 : trace_xfs_alloc_find_freesp_done(mp, pag->pag_agno, found_agbno,
4122 : found_len);
4123 163 : if (found_agbno < *agbno) {
4124 8 : found_len -= *agbno - found_agbno;
4125 8 : found_agbno = *agbno;
4126 : }
4127 163 : *len = found_len;
4128 163 : *agbno = found_agbno;
4129 163 : goto out_cur;
4130 546 : next_ag:
4131 : /* Found nothing, so advance the cursor beyond the end of the range. */
4132 546 : *agbno = end_agbno;
4133 546 : *len = 0;
4134 709 : out_cur:
4135 709 : xfs_btree_del_cursor(cur, error);
4136 709 : return error;
4137 : }
|