Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_shared.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_defer.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_rmap.h"
17 : #include "xfs_alloc_btree.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_extent_busy.h"
20 : #include "xfs_errortag.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_trans.h"
24 : #include "xfs_buf_item.h"
25 : #include "xfs_log.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_health.h"
30 :
31 : struct kmem_cache *xfs_extfree_item_cache;
32 :
33 : struct workqueue_struct *xfs_alloc_wq;
34 :
35 : #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
36 :
37 : #define XFSA_FIXUP_BNO_OK 1
38 : #define XFSA_FIXUP_CNT_OK 2
39 :
40 : /*
41 : * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
42 : * the beginning of the block for a proper header with the location information
43 : * and CRC.
44 : */
45 : unsigned int
46 8048250 : xfs_agfl_size(
47 : struct xfs_mount *mp)
48 : {
49 1106912166 : unsigned int size = mp->m_sb.sb_sectsize;
50 :
51 11532890 : if (xfs_has_crc(mp))
52 1104710383 : size -= sizeof(struct xfs_agfl);
53 :
54 1106912166 : return size / sizeof(xfs_agblock_t);
55 : }
56 :
57 : unsigned int
58 191942 : xfs_refc_block(
59 : struct xfs_mount *mp)
60 : {
61 191942 : if (xfs_has_rmapbt(mp))
62 189660 : return XFS_RMAP_BLOCK(mp) + 1;
63 2282 : if (xfs_has_finobt(mp))
64 0 : return XFS_FIBT_BLOCK(mp) + 1;
65 2282 : return XFS_IBT_BLOCK(mp) + 1;
66 : }
67 :
68 : xfs_extlen_t
69 35788 : xfs_prealloc_blocks(
70 : struct xfs_mount *mp)
71 : {
72 35788 : if (xfs_has_reflink(mp))
73 35707 : return xfs_refc_block(mp) + 1;
74 81 : if (xfs_has_rmapbt(mp))
75 5 : return XFS_RMAP_BLOCK(mp) + 1;
76 76 : if (xfs_has_finobt(mp))
77 28 : return XFS_FIBT_BLOCK(mp) + 1;
78 48 : return XFS_IBT_BLOCK(mp) + 1;
79 : }
80 :
81 : /*
82 : * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
83 : * guarantee that we can refill the AGFL prior to allocating space in a nearly
84 : * full AG. Although the space described by the free space btrees, the
85 : * blocks used by the freesp btrees themselves, and the blocks owned by the
86 : * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
87 : * free space in the AG drop so low that the free space btrees cannot refill an
88 : * empty AGFL up to the minimum level. Rather than grind through empty AGs
89 : * until the fs goes down, we subtract this many AG blocks from the incore
90 : * fdblocks to ensure user allocation does not overcommit the space the
91 : * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
92 : * withhold space from xfs_mod_fdblocks, so we do not account for that here.
93 : */
94 : #define XFS_ALLOCBT_AGFL_RESERVE 4
95 :
96 : /*
97 : * Compute the number of blocks that we set aside to guarantee the ability to
98 : * refill the AGFL and handle a full bmap btree split.
99 : *
100 : * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
101 : * AGF buffer (PV 947395), we place constraints on the relationship among
102 : * actual allocations for data blocks, freelist blocks, and potential file data
103 : * bmap btree blocks. However, these restrictions may result in no actual space
104 : * allocated for a delayed extent, for example, a data block in a certain AG is
105 : * allocated but there is no additional block for the additional bmap btree
106 : * block due to a split of the bmap btree of the file. The result of this may
107 : * lead to an infinite loop when the file gets flushed to disk and all delayed
108 : * extents need to be actually allocated. To get around this, we explicitly set
109 : * aside a few blocks which will not be reserved in delayed allocation.
110 : *
111 : * For each AG, we need to reserve enough blocks to replenish a totally empty
112 : * AGFL and 4 more to handle a potential split of the file's bmap btree.
113 : */
114 : unsigned int
115 35839 : xfs_alloc_set_aside(
116 : struct xfs_mount *mp)
117 : {
118 35839 : return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
119 : }
120 :
121 : /*
122 : * When deciding how much space to allocate out of an AG, we limit the
123 : * allocation maximum size to the size the AG. However, we cannot use all the
124 : * blocks in the AG - some are permanently used by metadata. These
125 : * blocks are generally:
126 : * - the AG superblock, AGF, AGI and AGFL
127 : * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
128 : * the AGI free inode and rmap btree root blocks.
129 : * - blocks on the AGFL according to xfs_alloc_set_aside() limits
130 : * - the rmapbt root block
131 : *
132 : * The AG headers are sector sized, so the amount of space they take up is
133 : * dependent on filesystem geometry. The others are all single blocks.
134 : */
135 : unsigned int
136 24343 : xfs_alloc_ag_max_usable(
137 : struct xfs_mount *mp)
138 : {
139 24343 : unsigned int blocks;
140 :
141 24343 : blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
142 24343 : blocks += XFS_ALLOCBT_AGFL_RESERVE;
143 24343 : blocks += 3; /* AGF, AGI btree root blocks */
144 24343 : if (xfs_has_finobt(mp))
145 24293 : blocks++; /* finobt root block */
146 24343 : if (xfs_has_rmapbt(mp))
147 24269 : blocks++; /* rmap root block */
148 24343 : if (xfs_has_reflink(mp))
149 24264 : blocks++; /* refcount root block */
150 :
151 24343 : return mp->m_sb.sb_agblocks - blocks;
152 : }
153 :
154 : /*
155 : * Lookup the record equal to [bno, len] in the btree given by cur.
156 : */
157 : STATIC int /* error */
158 255299483 : xfs_alloc_lookup_eq(
159 : struct xfs_btree_cur *cur, /* btree cursor */
160 : xfs_agblock_t bno, /* starting block of extent */
161 : xfs_extlen_t len, /* length of extent */
162 : int *stat) /* success/failure */
163 : {
164 255299483 : int error;
165 :
166 255299483 : cur->bc_rec.a.ar_startblock = bno;
167 255299483 : cur->bc_rec.a.ar_blockcount = len;
168 255299483 : error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
169 255318586 : cur->bc_ag.abt.active = (*stat == 1);
170 255318586 : return error;
171 : }
172 :
173 : /*
174 : * Lookup the first record greater than or equal to [bno, len]
175 : * in the btree given by cur.
176 : */
177 : int /* error */
178 6243 : xfs_alloc_lookup_ge(
179 : struct xfs_btree_cur *cur, /* btree cursor */
180 : xfs_agblock_t bno, /* starting block of extent */
181 : xfs_extlen_t len, /* length of extent */
182 : int *stat) /* success/failure */
183 : {
184 272144852 : int error;
185 :
186 272144852 : cur->bc_rec.a.ar_startblock = bno;
187 272144852 : cur->bc_rec.a.ar_blockcount = len;
188 6243 : error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
189 272139298 : cur->bc_ag.abt.active = (*stat == 1);
190 272139298 : return error;
191 : }
192 :
193 : /*
194 : * Lookup the first record less than or equal to [bno, len]
195 : * in the btree given by cur.
196 : */
197 : int /* error */
198 387924045 : xfs_alloc_lookup_le(
199 : struct xfs_btree_cur *cur, /* btree cursor */
200 : xfs_agblock_t bno, /* starting block of extent */
201 : xfs_extlen_t len, /* length of extent */
202 : int *stat) /* success/failure */
203 : {
204 476489664 : int error;
205 476489664 : cur->bc_rec.a.ar_startblock = bno;
206 476489664 : cur->bc_rec.a.ar_blockcount = len;
207 387924045 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
208 476490522 : cur->bc_ag.abt.active = (*stat == 1);
209 476490522 : return error;
210 : }
211 :
212 : static inline bool
213 : xfs_alloc_cur_active(
214 : struct xfs_btree_cur *cur)
215 : {
216 8161164209 : return cur && cur->bc_ag.abt.active;
217 : }
218 :
219 : /*
220 : * Update the record referred to by cur to the value given
221 : * by [bno, len].
222 : * This either works (return 0) or gets an EFSCORRUPTED error.
223 : */
224 : STATIC int /* error */
225 65824400 : xfs_alloc_update(
226 : struct xfs_btree_cur *cur, /* btree cursor */
227 : xfs_agblock_t bno, /* starting block of extent */
228 : xfs_extlen_t len) /* length of extent */
229 : {
230 65824400 : union xfs_btree_rec rec;
231 :
232 65824400 : rec.alloc.ar_startblock = cpu_to_be32(bno);
233 65824400 : rec.alloc.ar_blockcount = cpu_to_be32(len);
234 65824400 : return xfs_btree_update(cur, &rec);
235 : }
236 :
237 : /* Convert the ondisk btree record to its incore representation. */
238 : void
239 6112348677 : xfs_alloc_btrec_to_irec(
240 : const union xfs_btree_rec *rec,
241 : struct xfs_alloc_rec_incore *irec)
242 : {
243 6112348677 : irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
244 6112348677 : irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
245 6112348677 : }
246 :
247 : inline xfs_failaddr_t
248 6157385767 : xfs_alloc_check_perag_irec(
249 : struct xfs_perag *pag,
250 : const struct xfs_alloc_rec_incore *irec)
251 : {
252 6157385767 : if (irec->ar_blockcount == 0)
253 0 : return __this_address;
254 :
255 : /* check for valid extent range, including overflow */
256 6157385767 : if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
257 0 : return __this_address;
258 :
259 : return NULL;
260 : }
261 :
262 : /* Simple checks for free space records. */
263 : xfs_failaddr_t
264 387433508 : xfs_alloc_check_irec(
265 : struct xfs_btree_cur *cur,
266 : const struct xfs_alloc_rec_incore *irec)
267 : {
268 387433508 : return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
269 : }
270 :
271 : static inline int
272 0 : xfs_alloc_complain_bad_rec(
273 : struct xfs_btree_cur *cur,
274 : xfs_failaddr_t fa,
275 : const struct xfs_alloc_rec_incore *irec)
276 : {
277 0 : struct xfs_mount *mp = cur->bc_mp;
278 :
279 0 : xfs_warn(mp,
280 : "%s Freespace BTree record corruption in AG %d detected at %pS!",
281 : cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
282 : cur->bc_ag.pag->pag_agno, fa);
283 0 : xfs_warn(mp,
284 : "start block 0x%x block count 0x%x", irec->ar_startblock,
285 : irec->ar_blockcount);
286 0 : xfs_btree_mark_sick(cur);
287 0 : return -EFSCORRUPTED;
288 : }
289 :
290 : /*
291 : * Get the data from the pointed-to record.
292 : */
293 : int /* error */
294 4783944381 : xfs_alloc_get_rec(
295 : struct xfs_btree_cur *cur, /* btree cursor */
296 : xfs_agblock_t *bno, /* output: starting block of extent */
297 : xfs_extlen_t *len, /* output: length of extent */
298 : int *stat) /* output: success/failure */
299 : {
300 4783944381 : struct xfs_alloc_rec_incore irec;
301 4783944381 : union xfs_btree_rec *rec;
302 4783944381 : xfs_failaddr_t fa;
303 4783944381 : int error;
304 :
305 4783944381 : error = xfs_btree_get_rec(cur, &rec, stat);
306 4775819117 : if (error || !(*stat))
307 : return error;
308 :
309 4784117760 : xfs_alloc_btrec_to_irec(rec, &irec);
310 4785498248 : fa = xfs_alloc_check_irec(cur, &irec);
311 4786231856 : if (fa)
312 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
313 :
314 4786231856 : *bno = irec.ar_startblock;
315 4786231856 : *len = irec.ar_blockcount;
316 4786231856 : return 0;
317 : }
318 :
319 : /*
320 : * Compute aligned version of the found extent.
321 : * Takes alignment and min length into account.
322 : */
323 : STATIC bool
324 3746514146 : xfs_alloc_compute_aligned(
325 : xfs_alloc_arg_t *args, /* allocation argument structure */
326 : xfs_agblock_t foundbno, /* starting block in found extent */
327 : xfs_extlen_t foundlen, /* length in found extent */
328 : xfs_agblock_t *resbno, /* result block number */
329 : xfs_extlen_t *reslen, /* result length */
330 : unsigned *busy_gen)
331 : {
332 3746514146 : xfs_agblock_t bno = foundbno;
333 3746514146 : xfs_extlen_t len = foundlen;
334 3746514146 : xfs_extlen_t diff;
335 3746514146 : bool busy;
336 :
337 : /* Trim busy sections out of found extent */
338 3746514146 : busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
339 :
340 : /*
341 : * If we have a largish extent that happens to start before min_agbno,
342 : * see if we can shift it into range...
343 : */
344 3750380441 : if (bno < args->min_agbno && bno + len > args->min_agbno) {
345 1164 : diff = args->min_agbno - bno;
346 1164 : if (len > diff) {
347 1164 : bno += diff;
348 1164 : len -= diff;
349 : }
350 : }
351 :
352 3750380441 : if (args->alignment > 1 && len >= args->minlen) {
353 12585563 : xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
354 :
355 12585563 : diff = aligned_bno - bno;
356 :
357 12585563 : *resbno = aligned_bno;
358 12585563 : *reslen = diff >= len ? 0 : len - diff;
359 : } else {
360 3737794878 : *resbno = bno;
361 3737794878 : *reslen = len;
362 : }
363 :
364 3750380441 : return busy;
365 : }
366 :
367 : /*
368 : * Compute best start block and diff for "near" allocations.
369 : * freelen >= wantlen already checked by caller.
370 : */
371 : STATIC xfs_extlen_t /* difference value (absolute) */
372 2132079603 : xfs_alloc_compute_diff(
373 : xfs_agblock_t wantbno, /* target starting block */
374 : xfs_extlen_t wantlen, /* target length */
375 : xfs_extlen_t alignment, /* target alignment */
376 : int datatype, /* are we allocating data? */
377 : xfs_agblock_t freebno, /* freespace's starting block */
378 : xfs_extlen_t freelen, /* freespace's length */
379 : xfs_agblock_t *newbnop) /* result: best start block from free */
380 : {
381 2132079603 : xfs_agblock_t freeend; /* end of freespace extent */
382 2132079603 : xfs_agblock_t newbno1; /* return block number */
383 2132079603 : xfs_agblock_t newbno2; /* other new block number */
384 2132079603 : xfs_extlen_t newlen1=0; /* length with newbno1 */
385 2132079603 : xfs_extlen_t newlen2=0; /* length with newbno2 */
386 2132079603 : xfs_agblock_t wantend; /* end of target extent */
387 2132079603 : bool userdata = datatype & XFS_ALLOC_USERDATA;
388 :
389 2132079603 : ASSERT(freelen >= wantlen);
390 2132079603 : freeend = freebno + freelen;
391 2132079603 : wantend = wantbno + wantlen;
392 : /*
393 : * We want to allocate from the start of a free extent if it is past
394 : * the desired block or if we are allocating user data and the free
395 : * extent is before desired block. The second case is there to allow
396 : * for contiguous allocation from the remaining free space if the file
397 : * grows in the short term.
398 : */
399 2132079603 : if (freebno >= wantbno || (userdata && freeend < wantend)) {
400 1499890583 : if ((newbno1 = roundup(freebno, alignment)) >= freeend)
401 0 : newbno1 = NULLAGBLOCK;
402 632189020 : } else if (freeend >= wantend && alignment > 1) {
403 0 : newbno1 = roundup(wantbno, alignment);
404 0 : newbno2 = newbno1 - alignment;
405 0 : if (newbno1 >= freeend)
406 : newbno1 = NULLAGBLOCK;
407 : else
408 0 : newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
409 0 : if (newbno2 < freebno)
410 : newbno2 = NULLAGBLOCK;
411 : else
412 0 : newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
413 0 : if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
414 0 : if (newlen1 < newlen2 ||
415 0 : (newlen1 == newlen2 &&
416 0 : XFS_ABSDIFF(newbno1, wantbno) >
417 0 : XFS_ABSDIFF(newbno2, wantbno)))
418 0 : newbno1 = newbno2;
419 0 : } else if (newbno2 != NULLAGBLOCK)
420 0 : newbno1 = newbno2;
421 632189020 : } else if (freeend >= wantend) {
422 : newbno1 = wantbno;
423 631660539 : } else if (alignment > 1) {
424 14086 : newbno1 = roundup(freeend - wantlen, alignment);
425 14086 : if (newbno1 > freeend - wantlen &&
426 8862 : newbno1 - alignment >= freebno)
427 : newbno1 -= alignment;
428 5224 : else if (newbno1 >= freeend)
429 0 : newbno1 = NULLAGBLOCK;
430 : } else
431 631646453 : newbno1 = freeend - wantlen;
432 2132079603 : *newbnop = newbno1;
433 2132079603 : return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
434 : }
435 :
436 : /*
437 : * Fix up the length, based on mod and prod.
438 : * len should be k * prod + mod for some k.
439 : * If len is too small it is returned unchanged.
440 : * If len hits maxlen it is left alone.
441 : */
442 : STATIC void
443 2135357631 : xfs_alloc_fix_len(
444 : xfs_alloc_arg_t *args) /* allocation argument structure */
445 : {
446 2135357631 : xfs_extlen_t k;
447 2135357631 : xfs_extlen_t rlen;
448 :
449 2135357631 : ASSERT(args->mod < args->prod);
450 2135357631 : rlen = args->len;
451 2135357631 : ASSERT(rlen >= args->minlen);
452 2135357631 : ASSERT(rlen <= args->maxlen);
453 2135357631 : if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
454 21584 : (args->mod == 0 && rlen < args->prod))
455 : return;
456 35016 : k = rlen % args->prod;
457 35016 : if (k == args->mod)
458 : return;
459 28492 : if (k > args->mod)
460 13991 : rlen = rlen - (k - args->mod);
461 : else
462 14501 : rlen = rlen - args->prod + (args->mod - k);
463 : /* casts to (int) catch length underflows */
464 28492 : if ((int)rlen < (int)args->minlen)
465 : return;
466 6008 : ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
467 6008 : ASSERT(rlen % args->prod == args->mod);
468 6008 : ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
469 : rlen + args->minleft);
470 6008 : args->len = rlen;
471 : }
472 :
473 : /*
474 : * Update the two btrees, logically removing from freespace the extent
475 : * starting at rbno, rlen blocks. The extent is contained within the
476 : * actual (current) free extent fbno for flen blocks.
477 : * Flags are passed in indicating whether the cursors are set to the
478 : * relevant records.
479 : */
480 : STATIC int /* error code */
481 64163456 : xfs_alloc_fixup_trees(
482 : struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
483 : struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
484 : xfs_agblock_t fbno, /* starting block of free extent */
485 : xfs_extlen_t flen, /* length of free extent */
486 : xfs_agblock_t rbno, /* starting block of returned extent */
487 : xfs_extlen_t rlen, /* length of returned extent */
488 : int flags) /* flags, XFSA_FIXUP_... */
489 : {
490 64163456 : int error; /* error code */
491 64163456 : int i; /* operation results */
492 64163456 : xfs_agblock_t nfbno1; /* first new free startblock */
493 64163456 : xfs_agblock_t nfbno2; /* second new free startblock */
494 64163456 : xfs_extlen_t nflen1=0; /* first new free length */
495 64163456 : xfs_extlen_t nflen2=0; /* second new free length */
496 64163456 : struct xfs_mount *mp;
497 :
498 64163456 : mp = cnt_cur->bc_mp;
499 :
500 : /*
501 : * Look up the record in the by-size tree if necessary.
502 : */
503 64163456 : if (flags & XFSA_FIXUP_CNT_OK) {
504 : #ifdef DEBUG
505 3202724 : if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
506 : return error;
507 3202885 : if (XFS_IS_CORRUPT(mp,
508 : i != 1 ||
509 : nfbno1 != fbno ||
510 : nflen1 != flen)) {
511 0 : xfs_btree_mark_sick(cnt_cur);
512 0 : return -EFSCORRUPTED;
513 : }
514 : #endif
515 : } else {
516 60960732 : if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
517 : return error;
518 60961951 : if (XFS_IS_CORRUPT(mp, i != 1)) {
519 0 : xfs_btree_mark_sick(cnt_cur);
520 0 : return -EFSCORRUPTED;
521 : }
522 : }
523 : /*
524 : * Look up the record in the by-block tree if necessary.
525 : */
526 64164836 : if (flags & XFSA_FIXUP_BNO_OK) {
527 : #ifdef DEBUG
528 72177 : if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
529 : return error;
530 72183 : if (XFS_IS_CORRUPT(mp,
531 : i != 1 ||
532 : nfbno1 != fbno ||
533 : nflen1 != flen)) {
534 0 : xfs_btree_mark_sick(bno_cur);
535 0 : return -EFSCORRUPTED;
536 : }
537 : #endif
538 : } else {
539 64092659 : if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
540 : return error;
541 64093969 : if (XFS_IS_CORRUPT(mp, i != 1)) {
542 0 : xfs_btree_mark_sick(bno_cur);
543 0 : return -EFSCORRUPTED;
544 : }
545 : }
546 :
547 : #ifdef DEBUG
548 64166152 : if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
549 42896794 : struct xfs_btree_block *bnoblock;
550 42896794 : struct xfs_btree_block *cntblock;
551 :
552 42896794 : bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
553 42896794 : cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
554 :
555 42896794 : if (XFS_IS_CORRUPT(mp,
556 : bnoblock->bb_numrecs !=
557 : cntblock->bb_numrecs)) {
558 0 : xfs_btree_mark_sick(bno_cur);
559 0 : return -EFSCORRUPTED;
560 : }
561 : }
562 : #endif
563 :
564 : /*
565 : * Deal with all four cases: the allocated record is contained
566 : * within the freespace record, so we can have new freespace
567 : * at either (or both) end, or no freespace remaining.
568 : */
569 64166152 : if (rbno == fbno && rlen == flen)
570 20179694 : nfbno1 = nfbno2 = NULLAGBLOCK;
571 43986458 : else if (rbno == fbno) {
572 38027407 : nfbno1 = rbno + rlen;
573 38027407 : nflen1 = flen - rlen;
574 38027407 : nfbno2 = NULLAGBLOCK;
575 5959051 : } else if (rbno + rlen == fbno + flen) {
576 3522870 : nfbno1 = fbno;
577 3522870 : nflen1 = flen - rlen;
578 3522870 : nfbno2 = NULLAGBLOCK;
579 : } else {
580 2436181 : nfbno1 = fbno;
581 2436181 : nflen1 = rbno - fbno;
582 2436181 : nfbno2 = rbno + rlen;
583 2436181 : nflen2 = (fbno + flen) - nfbno2;
584 : }
585 : /*
586 : * Delete the entry from the by-size btree.
587 : */
588 64166152 : if ((error = xfs_btree_delete(cnt_cur, &i)))
589 : return error;
590 64165080 : if (XFS_IS_CORRUPT(mp, i != 1)) {
591 0 : xfs_btree_mark_sick(cnt_cur);
592 0 : return -EFSCORRUPTED;
593 : }
594 : /*
595 : * Add new by-size btree entry(s).
596 : */
597 64165080 : if (nfbno1 != NULLAGBLOCK) {
598 43985992 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
599 : return error;
600 43986063 : if (XFS_IS_CORRUPT(mp, i != 0)) {
601 0 : xfs_btree_mark_sick(cnt_cur);
602 0 : return -EFSCORRUPTED;
603 : }
604 43986063 : if ((error = xfs_btree_insert(cnt_cur, &i)))
605 : return error;
606 43985827 : if (XFS_IS_CORRUPT(mp, i != 1)) {
607 0 : xfs_btree_mark_sick(cnt_cur);
608 0 : return -EFSCORRUPTED;
609 : }
610 : }
611 64164915 : if (nfbno2 != NULLAGBLOCK) {
612 2436173 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
613 : return error;
614 2436176 : if (XFS_IS_CORRUPT(mp, i != 0)) {
615 0 : xfs_btree_mark_sick(cnt_cur);
616 0 : return -EFSCORRUPTED;
617 : }
618 2436176 : if ((error = xfs_btree_insert(cnt_cur, &i)))
619 : return error;
620 2436183 : if (XFS_IS_CORRUPT(mp, i != 1)) {
621 0 : xfs_btree_mark_sick(cnt_cur);
622 0 : return -EFSCORRUPTED;
623 : }
624 : }
625 : /*
626 : * Fix up the by-block btree entry(s).
627 : */
628 64164925 : if (nfbno1 == NULLAGBLOCK) {
629 : /*
630 : * No remaining freespace, just delete the by-block tree entry.
631 : */
632 20179447 : if ((error = xfs_btree_delete(bno_cur, &i)))
633 : return error;
634 20179759 : if (XFS_IS_CORRUPT(mp, i != 1)) {
635 0 : xfs_btree_mark_sick(bno_cur);
636 0 : return -EFSCORRUPTED;
637 : }
638 : } else {
639 : /*
640 : * Update the by-block entry to start later|be shorter.
641 : */
642 43985478 : if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
643 : return error;
644 : }
645 64164566 : if (nfbno2 != NULLAGBLOCK) {
646 : /*
647 : * 2 resulting free entries, need to add one.
648 : */
649 2436186 : if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
650 : return error;
651 2436184 : if (XFS_IS_CORRUPT(mp, i != 0)) {
652 0 : xfs_btree_mark_sick(bno_cur);
653 0 : return -EFSCORRUPTED;
654 : }
655 2436184 : if ((error = xfs_btree_insert(bno_cur, &i)))
656 : return error;
657 2436179 : if (XFS_IS_CORRUPT(mp, i != 1)) {
658 0 : xfs_btree_mark_sick(bno_cur);
659 0 : return -EFSCORRUPTED;
660 : }
661 : }
662 : return 0;
663 : }
664 :
665 : /*
666 : * We do not verify the AGFL contents against AGF-based index counters here,
667 : * even though we may have access to the perag that contains shadow copies. We
668 : * don't know if the AGF based counters have been checked, and if they have they
669 : * still may be inconsistent because they haven't yet been reset on the first
670 : * allocation after the AGF has been read in.
671 : *
672 : * This means we can only check that all agfl entries contain valid or null
673 : * values because we can't reliably determine the active range to exclude
674 : * NULLAGBNO as a valid value.
675 : *
676 : * However, we can't even do that for v4 format filesystems because there are
677 : * old versions of mkfs out there that does not initialise the AGFL to known,
678 : * verifiable values. HEnce we can't tell the difference between a AGFL block
679 : * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
680 : *
681 : * As a result, we can only fully validate AGFL block numbers when we pull them
682 : * from the freelist in xfs_alloc_get_freelist().
683 : */
684 : static xfs_failaddr_t
685 617548 : xfs_agfl_verify(
686 : struct xfs_buf *bp)
687 : {
688 617548 : struct xfs_mount *mp = bp->b_mount;
689 617548 : struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
690 617548 : __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
691 617548 : int i;
692 :
693 617548 : if (!xfs_has_crc(mp))
694 : return NULL;
695 :
696 617544 : if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
697 0 : return __this_address;
698 617544 : if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
699 0 : return __this_address;
700 : /*
701 : * during growfs operations, the perag is not fully initialised,
702 : * so we can't use it for any useful checking. growfs ensures we can't
703 : * use it by using uncached buffers that don't have the perag attached
704 : * so we can detect and avoid this problem.
705 : */
706 1229680 : if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
707 0 : return __this_address;
708 :
709 1243433178 : for (i = 0; i < xfs_agfl_size(mp); i++) {
710 621099166 : if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
711 212495030 : be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
712 0 : return __this_address;
713 : }
714 :
715 617544 : if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
716 0 : return __this_address;
717 : return NULL;
718 : }
719 :
720 : static void
721 64487 : xfs_agfl_read_verify(
722 : struct xfs_buf *bp)
723 : {
724 64487 : struct xfs_mount *mp = bp->b_mount;
725 64487 : xfs_failaddr_t fa;
726 :
727 : /*
728 : * There is no verification of non-crc AGFLs because mkfs does not
729 : * initialise the AGFL to zero or NULL. Hence the only valid part of the
730 : * AGFL is what the AGF says is active. We can't get to the AGF, so we
731 : * can't verify just those entries are valid.
732 : */
733 64487 : if (!xfs_has_crc(mp))
734 : return;
735 :
736 64469 : if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
737 128 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
738 : else {
739 64341 : fa = xfs_agfl_verify(bp);
740 64341 : if (fa)
741 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
742 : }
743 : }
744 :
745 : static void
746 94338 : xfs_agfl_write_verify(
747 : struct xfs_buf *bp)
748 : {
749 94338 : struct xfs_mount *mp = bp->b_mount;
750 94338 : struct xfs_buf_log_item *bip = bp->b_log_item;
751 94338 : xfs_failaddr_t fa;
752 :
753 : /* no verification of non-crc AGFLs */
754 94338 : if (!xfs_has_crc(mp))
755 : return;
756 :
757 92056 : fa = xfs_agfl_verify(bp);
758 92056 : if (fa) {
759 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
760 0 : return;
761 : }
762 :
763 92056 : if (bip)
764 86648 : XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
765 :
766 92056 : xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
767 : }
768 :
769 : const struct xfs_buf_ops xfs_agfl_buf_ops = {
770 : .name = "xfs_agfl",
771 : .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
772 : .verify_read = xfs_agfl_read_verify,
773 : .verify_write = xfs_agfl_write_verify,
774 : .verify_struct = xfs_agfl_verify,
775 : };
776 :
777 : /*
778 : * Read in the allocation group free block array.
779 : */
780 : int
781 374432574 : xfs_alloc_read_agfl(
782 : struct xfs_perag *pag,
783 : struct xfs_trans *tp,
784 : struct xfs_buf **bpp)
785 : {
786 374432574 : struct xfs_mount *mp = pag->pag_mount;
787 374432574 : struct xfs_buf *bp;
788 374432574 : int error;
789 :
790 374432574 : error = xfs_trans_read_buf(
791 374432574 : mp, tp, mp->m_ddev_targp,
792 374432574 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
793 : XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
794 374439529 : if (xfs_metadata_is_sick(error))
795 128 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
796 374439529 : if (error)
797 : return error;
798 374429019 : xfs_buf_set_ref(bp, XFS_AGFL_REF);
799 374438653 : *bpp = bp;
800 374438653 : return 0;
801 : }
802 :
803 : STATIC int
804 117177052 : xfs_alloc_update_counters(
805 : struct xfs_trans *tp,
806 : struct xfs_buf *agbp,
807 : long len)
808 : {
809 117177052 : struct xfs_agf *agf = agbp->b_addr;
810 :
811 117177052 : agbp->b_pag->pagf_freeblks += len;
812 117177052 : be32_add_cpu(&agf->agf_freeblks, len);
813 :
814 351533367 : if (unlikely(be32_to_cpu(agf->agf_freeblks) >
815 : be32_to_cpu(agf->agf_length))) {
816 0 : xfs_buf_mark_corrupt(agbp);
817 0 : xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
818 0 : return -EFSCORRUPTED;
819 : }
820 :
821 117177789 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
822 117177789 : return 0;
823 : }
824 :
825 : /*
826 : * Block allocation algorithm and data structures.
827 : */
828 : struct xfs_alloc_cur {
829 : struct xfs_btree_cur *cnt; /* btree cursors */
830 : struct xfs_btree_cur *bnolt;
831 : struct xfs_btree_cur *bnogt;
832 : xfs_extlen_t cur_len;/* current search length */
833 : xfs_agblock_t rec_bno;/* extent startblock */
834 : xfs_extlen_t rec_len;/* extent length */
835 : xfs_agblock_t bno; /* alloc bno */
836 : xfs_extlen_t len; /* alloc len */
837 : xfs_extlen_t diff; /* diff from search bno */
838 : unsigned int busy_gen;/* busy state */
839 : bool busy;
840 : };
841 :
842 : /*
843 : * Set up cursors, etc. in the extent allocation cursor. This function can be
844 : * called multiple times to reset an initialized structure without having to
845 : * reallocate cursors.
846 : */
847 : static int
848 60926324 : xfs_alloc_cur_setup(
849 : struct xfs_alloc_arg *args,
850 : struct xfs_alloc_cur *acur)
851 : {
852 60926324 : int error;
853 60926324 : int i;
854 :
855 60926324 : acur->cur_len = args->maxlen;
856 60926324 : acur->rec_bno = 0;
857 60926324 : acur->rec_len = 0;
858 60926324 : acur->bno = 0;
859 60926324 : acur->len = 0;
860 60926324 : acur->diff = -1;
861 60926324 : acur->busy = false;
862 60926324 : acur->busy_gen = 0;
863 :
864 : /*
865 : * Perform an initial cntbt lookup to check for availability of maxlen
866 : * extents. If this fails, we'll return -ENOSPC to signal the caller to
867 : * attempt a small allocation.
868 : */
869 60926324 : if (!acur->cnt)
870 60892421 : acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
871 : args->agbp, args->pag, XFS_BTNUM_CNT);
872 60924809 : error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
873 60927179 : if (error)
874 : return error;
875 :
876 : /*
877 : * Allocate the bnobt left and right search cursors.
878 : */
879 60926697 : if (!acur->bnolt)
880 60892086 : acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
881 : args->agbp, args->pag, XFS_BTNUM_BNO);
882 60926287 : if (!acur->bnogt)
883 60891787 : acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
884 : args->agbp, args->pag, XFS_BTNUM_BNO);
885 60926153 : return i == 1 ? 0 : -ENOSPC;
886 : }
887 :
888 : static void
889 60891527 : xfs_alloc_cur_close(
890 : struct xfs_alloc_cur *acur,
891 : bool error)
892 : {
893 60891527 : int cur_error = XFS_BTREE_NOERROR;
894 :
895 60891527 : if (error)
896 1028 : cur_error = XFS_BTREE_ERROR;
897 :
898 60891527 : if (acur->cnt)
899 60891527 : xfs_btree_del_cursor(acur->cnt, cur_error);
900 60891933 : if (acur->bnolt)
901 60891451 : xfs_btree_del_cursor(acur->bnolt, cur_error);
902 60891443 : if (acur->bnogt)
903 60890961 : xfs_btree_del_cursor(acur->bnogt, cur_error);
904 60891529 : acur->cnt = acur->bnolt = acur->bnogt = NULL;
905 60891529 : }
906 :
907 : /*
908 : * Check an extent for allocation and track the best available candidate in the
909 : * allocation structure. The cursor is deactivated if it has entered an out of
910 : * range state based on allocation arguments. Optionally return the extent
911 : * extent geometry and allocation status if requested by the caller.
912 : */
913 : static int
914 3848274183 : xfs_alloc_cur_check(
915 : struct xfs_alloc_arg *args,
916 : struct xfs_alloc_cur *acur,
917 : struct xfs_btree_cur *cur,
918 : int *new)
919 : {
920 3848274183 : int error, i;
921 3848274183 : xfs_agblock_t bno, bnoa, bnew;
922 3848274183 : xfs_extlen_t len, lena, diff = -1;
923 3848274183 : bool busy;
924 3848274183 : unsigned busy_gen = 0;
925 3848274183 : bool deactivate = false;
926 3848274183 : bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
927 :
928 3848274183 : *new = 0;
929 :
930 3848274183 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
931 3846344827 : if (error)
932 : return error;
933 3846344827 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
934 0 : xfs_btree_mark_sick(cur);
935 0 : return -EFSCORRUPTED;
936 : }
937 :
938 : /*
939 : * Check minlen and deactivate a cntbt cursor if out of acceptable size
940 : * range (i.e., walking backwards looking for a minlen extent).
941 : */
942 3846344827 : if (len < args->minlen) {
943 249792773 : deactivate = !isbnobt;
944 249792773 : goto out;
945 : }
946 :
947 3596552054 : busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
948 : &busy_gen);
949 3605995058 : acur->busy |= busy;
950 3605995058 : if (busy)
951 1614749424 : acur->busy_gen = busy_gen;
952 : /* deactivate a bnobt cursor outside of locality range */
953 3605995058 : if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
954 3558 : deactivate = isbnobt;
955 3558 : goto out;
956 : }
957 3605991500 : if (lena < args->minlen)
958 1473994669 : goto out;
959 :
960 2131996831 : args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
961 2131996831 : xfs_alloc_fix_len(args);
962 2131920015 : ASSERT(args->len >= args->minlen);
963 2131920015 : if (args->len < acur->len)
964 51223 : goto out;
965 :
966 : /*
967 : * We have an aligned record that satisfies minlen and beats or matches
968 : * the candidate extent size. Compare locality for near allocation mode.
969 : */
970 2131868792 : diff = xfs_alloc_compute_diff(args->agbno, args->len,
971 : args->alignment, args->datatype,
972 : bnoa, lena, &bnew);
973 2132353973 : if (bnew == NULLAGBLOCK)
974 0 : goto out;
975 :
976 : /*
977 : * Deactivate a bnobt cursor with worse locality than the current best.
978 : */
979 2132353973 : if (diff > acur->diff) {
980 1555439004 : deactivate = isbnobt;
981 1555439004 : goto out;
982 : }
983 :
984 576914969 : ASSERT(args->len > acur->len ||
985 : (args->len == acur->len && diff <= acur->diff));
986 576914969 : acur->rec_bno = bno;
987 576914969 : acur->rec_len = len;
988 576914969 : acur->bno = bnew;
989 576914969 : acur->len = args->len;
990 576914969 : acur->diff = diff;
991 576914969 : *new = 1;
992 :
993 : /*
994 : * We're done if we found a perfect allocation. This only deactivates
995 : * the current cursor, but this is just an optimization to terminate a
996 : * cntbt search that otherwise runs to the edge of the tree.
997 : */
998 576914969 : if (acur->diff == 0 && acur->len == args->maxlen)
999 : deactivate = true;
1000 569585621 : out:
1001 3279281227 : if (deactivate)
1002 16567864 : cur->bc_ag.abt.active = false;
1003 3856196196 : trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
1004 3856196196 : *new);
1005 3856196196 : return 0;
1006 : }
1007 :
1008 : /*
1009 : * Complete an allocation of a candidate extent. Remove the extent from both
1010 : * trees and update the args structure.
1011 : */
1012 : STATIC int
1013 60889456 : xfs_alloc_cur_finish(
1014 : struct xfs_alloc_arg *args,
1015 : struct xfs_alloc_cur *acur)
1016 : {
1017 60889456 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1018 60889456 : int error;
1019 :
1020 60889456 : ASSERT(acur->cnt && acur->bnolt);
1021 60889456 : ASSERT(acur->bno >= acur->rec_bno);
1022 60889456 : ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
1023 121778912 : ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
1024 :
1025 60889456 : error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
1026 : acur->rec_len, acur->bno, acur->len, 0);
1027 60890824 : if (error)
1028 : return error;
1029 :
1030 60890718 : args->agbno = acur->bno;
1031 60890718 : args->len = acur->len;
1032 60890718 : args->wasfromfl = 0;
1033 :
1034 60890718 : trace_xfs_alloc_cur(args);
1035 60890718 : return 0;
1036 : }
1037 :
1038 : /*
1039 : * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1040 : * bno optimized lookup to search for extents with ideal size and locality.
1041 : */
1042 : STATIC int
1043 137183350 : xfs_alloc_cntbt_iter(
1044 : struct xfs_alloc_arg *args,
1045 : struct xfs_alloc_cur *acur)
1046 : {
1047 137183350 : struct xfs_btree_cur *cur = acur->cnt;
1048 137183350 : xfs_agblock_t bno;
1049 137183350 : xfs_extlen_t len, cur_len;
1050 137183350 : int error;
1051 137183350 : int i;
1052 :
1053 274366700 : if (!xfs_alloc_cur_active(cur))
1054 : return 0;
1055 :
1056 : /* locality optimized lookup */
1057 137153148 : cur_len = acur->cur_len;
1058 137153148 : error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1059 137144766 : if (error)
1060 : return error;
1061 137144766 : if (i == 0)
1062 : return 0;
1063 124777805 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1064 124773073 : if (error)
1065 : return error;
1066 :
1067 : /* check the current record and update search length from it */
1068 124773001 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1069 124773853 : if (error)
1070 : return error;
1071 124773853 : ASSERT(len >= acur->cur_len);
1072 124773853 : acur->cur_len = len;
1073 :
1074 : /*
1075 : * We looked up the first record >= [agbno, len] above. The agbno is a
1076 : * secondary key and so the current record may lie just before or after
1077 : * agbno. If it is past agbno, check the previous record too so long as
1078 : * the length matches as it may be closer. Don't check a smaller record
1079 : * because that could deactivate our cursor.
1080 : */
1081 124773853 : if (bno > args->agbno) {
1082 121373533 : error = xfs_btree_decrement(cur, 0, &i);
1083 121381447 : if (!error && i) {
1084 120373543 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1085 120362356 : if (!error && i && len == acur->cur_len)
1086 33582048 : error = xfs_alloc_cur_check(args, acur, cur,
1087 : &i);
1088 : }
1089 121370370 : if (error)
1090 : return error;
1091 : }
1092 :
1093 : /*
1094 : * Increment the search key until we find at least one allocation
1095 : * candidate or if the extent we found was larger. Otherwise, double the
1096 : * search key to optimize the search. Efficiency is more important here
1097 : * than absolute best locality.
1098 : */
1099 124770690 : cur_len <<= 1;
1100 124770690 : if (!acur->len || acur->cur_len >= cur_len)
1101 83427264 : acur->cur_len++;
1102 : else
1103 41343426 : acur->cur_len = cur_len;
1104 :
1105 : return error;
1106 : }
1107 :
1108 : /*
1109 : * Deal with the case where only small freespaces remain. Either return the
1110 : * contents of the last freespace record, or allocate space from the freelist if
1111 : * there is nothing in the tree.
1112 : */
1113 : STATIC int /* error */
1114 94772 : xfs_alloc_ag_vextent_small(
1115 : struct xfs_alloc_arg *args, /* allocation argument structure */
1116 : struct xfs_btree_cur *ccur, /* optional by-size cursor */
1117 : xfs_agblock_t *fbnop, /* result block number */
1118 : xfs_extlen_t *flenp, /* result length */
1119 : int *stat) /* status: 0-freelist, 1-normal/none */
1120 : {
1121 94772 : struct xfs_agf *agf = args->agbp->b_addr;
1122 94772 : int error = 0;
1123 94772 : xfs_agblock_t fbno = NULLAGBLOCK;
1124 94772 : xfs_extlen_t flen = 0;
1125 94772 : int i = 0;
1126 :
1127 : /*
1128 : * If a cntbt cursor is provided, try to allocate the largest record in
1129 : * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1130 : * allocation. Make sure to respect minleft even when pulling from the
1131 : * freelist.
1132 : */
1133 94772 : if (ccur)
1134 94772 : error = xfs_btree_decrement(ccur, 0, &i);
1135 94771 : if (error)
1136 0 : goto error;
1137 94771 : if (i) {
1138 94771 : error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1139 94772 : if (error)
1140 0 : goto error;
1141 94772 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1142 0 : xfs_btree_mark_sick(ccur);
1143 0 : error = -EFSCORRUPTED;
1144 0 : goto error;
1145 : }
1146 94772 : goto out;
1147 : }
1148 :
1149 0 : if (args->minlen != 1 || args->alignment != 1 ||
1150 0 : args->resv == XFS_AG_RESV_AGFL ||
1151 0 : be32_to_cpu(agf->agf_flcount) <= args->minleft)
1152 0 : goto out;
1153 :
1154 0 : error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
1155 : &fbno, 0);
1156 0 : if (error)
1157 0 : goto error;
1158 0 : if (fbno == NULLAGBLOCK)
1159 0 : goto out;
1160 :
1161 0 : xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1162 0 : (args->datatype & XFS_ALLOC_NOBUSY));
1163 :
1164 0 : if (args->datatype & XFS_ALLOC_USERDATA) {
1165 0 : struct xfs_buf *bp;
1166 :
1167 0 : error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1168 0 : XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1169 : args->mp->m_bsize, 0, &bp);
1170 0 : if (error)
1171 0 : goto error;
1172 0 : xfs_trans_binval(args->tp, bp);
1173 : }
1174 0 : *fbnop = args->agbno = fbno;
1175 0 : *flenp = args->len = 1;
1176 0 : if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1177 0 : xfs_btree_mark_sick(ccur);
1178 0 : error = -EFSCORRUPTED;
1179 0 : goto error;
1180 : }
1181 0 : args->wasfromfl = 1;
1182 0 : trace_xfs_alloc_small_freelist(args);
1183 :
1184 : /*
1185 : * If we're feeding an AGFL block to something that doesn't live in the
1186 : * free space, we need to clear out the OWN_AG rmap.
1187 : */
1188 0 : error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1189 : &XFS_RMAP_OINFO_AG);
1190 0 : if (error)
1191 0 : goto error;
1192 :
1193 0 : *stat = 0;
1194 0 : return 0;
1195 :
1196 94772 : out:
1197 : /*
1198 : * Can't do the allocation, give up.
1199 : */
1200 94772 : if (flen < args->minlen) {
1201 0 : args->agbno = NULLAGBLOCK;
1202 0 : trace_xfs_alloc_small_notenough(args);
1203 0 : flen = 0;
1204 : }
1205 94772 : *fbnop = fbno;
1206 94772 : *flenp = flen;
1207 94772 : *stat = 1;
1208 94772 : trace_xfs_alloc_small_done(args);
1209 94772 : return 0;
1210 :
1211 0 : error:
1212 0 : trace_xfs_alloc_small_error(args);
1213 0 : return error;
1214 : }
1215 :
1216 : /*
1217 : * Allocate a variable extent at exactly agno/bno.
1218 : * Extent's length (returned in *len) will be between minlen and maxlen,
1219 : * and of the form k * prod + mod unless there's nothing that large.
1220 : * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1221 : */
1222 : STATIC int /* error */
1223 315165 : xfs_alloc_ag_vextent_exact(
1224 : xfs_alloc_arg_t *args) /* allocation argument structure */
1225 : {
1226 315165 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1227 315165 : struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1228 315165 : struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1229 315165 : int error;
1230 315165 : xfs_agblock_t fbno; /* start block of found extent */
1231 315165 : xfs_extlen_t flen; /* length of found extent */
1232 315165 : xfs_agblock_t tbno; /* start block of busy extent */
1233 315165 : xfs_extlen_t tlen; /* length of busy extent */
1234 315165 : xfs_agblock_t tend; /* end block of busy extent */
1235 315165 : int i; /* success/failure of operation */
1236 315165 : unsigned busy_gen;
1237 :
1238 315165 : ASSERT(args->alignment == 1);
1239 :
1240 : /*
1241 : * Allocate/initialize a cursor for the by-number freespace btree.
1242 : */
1243 315165 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1244 : args->pag, XFS_BTNUM_BNO);
1245 :
1246 : /*
1247 : * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1248 : * Look for the closest free block <= bno, it must contain bno
1249 : * if any free block does.
1250 : */
1251 315167 : error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1252 315168 : if (error)
1253 1 : goto error0;
1254 315167 : if (!i)
1255 39860 : goto not_found;
1256 :
1257 : /*
1258 : * Grab the freespace record.
1259 : */
1260 275307 : error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1261 275307 : if (error)
1262 0 : goto error0;
1263 275307 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1264 0 : xfs_btree_mark_sick(bno_cur);
1265 0 : error = -EFSCORRUPTED;
1266 0 : goto error0;
1267 : }
1268 275307 : ASSERT(fbno <= args->agbno);
1269 :
1270 : /*
1271 : * Check for overlapping busy extents.
1272 : */
1273 275307 : tbno = fbno;
1274 275307 : tlen = flen;
1275 275307 : xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1276 :
1277 : /*
1278 : * Give up if the start of the extent is busy, or the freespace isn't
1279 : * long enough for the minimum request.
1280 : */
1281 275306 : if (tbno > args->agbno)
1282 243 : goto not_found;
1283 275063 : if (tlen < args->minlen)
1284 196565 : goto not_found;
1285 78498 : tend = tbno + tlen;
1286 78498 : if (tend < args->agbno + args->minlen)
1287 6316 : goto not_found;
1288 :
1289 : /*
1290 : * End of extent will be smaller of the freespace end and the
1291 : * maximal requested end.
1292 : *
1293 : * Fix the length according to mod and prod if given.
1294 : */
1295 72182 : args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1296 72182 : - args->agbno;
1297 72182 : xfs_alloc_fix_len(args);
1298 72181 : ASSERT(args->agbno + args->len <= tend);
1299 :
1300 : /*
1301 : * We are allocating agbno for args->len
1302 : * Allocate/initialize a cursor for the by-size btree.
1303 : */
1304 72181 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305 : args->pag, XFS_BTNUM_CNT);
1306 144362 : ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1307 72181 : error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1308 : args->len, XFSA_FIXUP_BNO_OK);
1309 72183 : if (error) {
1310 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1311 0 : goto error0;
1312 : }
1313 :
1314 72183 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1315 72183 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1316 :
1317 72183 : args->wasfromfl = 0;
1318 72183 : trace_xfs_alloc_exact_done(args);
1319 72183 : return 0;
1320 :
1321 242984 : not_found:
1322 : /* Didn't find it, return null. */
1323 242984 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1324 242984 : args->agbno = NULLAGBLOCK;
1325 242984 : trace_xfs_alloc_exact_notfound(args);
1326 242984 : return 0;
1327 :
1328 1 : error0:
1329 1 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1330 1 : trace_xfs_alloc_exact_error(args);
1331 1 : return error;
1332 : }
1333 :
1334 : /*
1335 : * Search a given number of btree records in a given direction. Check each
1336 : * record against the good extent we've already found.
1337 : */
1338 : STATIC int
1339 354293874 : xfs_alloc_walk_iter(
1340 : struct xfs_alloc_arg *args,
1341 : struct xfs_alloc_cur *acur,
1342 : struct xfs_btree_cur *cur,
1343 : bool increment,
1344 : bool find_one, /* quit on first candidate */
1345 : int count, /* rec count (-1 for infinite) */
1346 : int *stat)
1347 : {
1348 354293874 : int error;
1349 354293874 : int i;
1350 :
1351 354293874 : *stat = 0;
1352 :
1353 : /*
1354 : * Search so long as the cursor is active or we find a better extent.
1355 : * The cursor is deactivated if it extends beyond the range of the
1356 : * current allocation candidate.
1357 : */
1358 8007810146 : while (xfs_alloc_cur_active(cur) && count) {
1359 3692653375 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1360 3696509299 : if (error)
1361 0 : return error;
1362 3696509299 : if (i == 1) {
1363 554217829 : *stat = 1;
1364 554217829 : if (find_one)
1365 : break;
1366 : }
1367 7329979044 : if (!xfs_alloc_cur_active(cur))
1368 : break;
1369 :
1370 3653371228 : if (increment)
1371 3174069249 : error = xfs_btree_increment(cur, 0, &i);
1372 : else
1373 479301979 : error = xfs_btree_decrement(cur, 0, &i);
1374 3649611205 : if (error)
1375 6 : return error;
1376 3649611199 : if (i == 0)
1377 25978145 : cur->bc_ag.abt.active = false;
1378 :
1379 3649611199 : if (count > 0)
1380 259784263 : count--;
1381 : }
1382 :
1383 : return 0;
1384 : }
1385 :
1386 : /*
1387 : * Search the by-bno and by-size btrees in parallel in search of an extent with
1388 : * ideal locality based on the NEAR mode ->agbno locality hint.
1389 : */
1390 : STATIC int
1391 35231381 : xfs_alloc_ag_vextent_locality(
1392 : struct xfs_alloc_arg *args,
1393 : struct xfs_alloc_cur *acur,
1394 : int *stat)
1395 : {
1396 35231381 : struct xfs_btree_cur *fbcur = NULL;
1397 35231381 : int error;
1398 35231381 : int i;
1399 35231381 : bool fbinc;
1400 :
1401 35231381 : ASSERT(acur->len == 0);
1402 :
1403 35231381 : *stat = 0;
1404 :
1405 35231381 : error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1406 35231771 : if (error)
1407 : return error;
1408 35231870 : error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1409 35231952 : if (error)
1410 : return error;
1411 35231778 : error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1412 35231828 : if (error)
1413 : return error;
1414 :
1415 : /*
1416 : * Search the bnobt and cntbt in parallel. Search the bnobt left and
1417 : * right and lookup the closest extent to the locality hint for each
1418 : * extent size key in the cntbt. The entire search terminates
1419 : * immediately on a bnobt hit because that means we've found best case
1420 : * locality. Otherwise the search continues until the cntbt cursor runs
1421 : * off the end of the tree. If no allocation candidate is found at this
1422 : * point, give up on locality, walk backwards from the end of the cntbt
1423 : * and take the first available extent.
1424 : *
1425 : * The parallel tree searches balance each other out to provide fairly
1426 : * consistent performance for various situations. The bnobt search can
1427 : * have pathological behavior in the worst case scenario of larger
1428 : * allocation requests and fragmented free space. On the other hand, the
1429 : * bnobt is able to satisfy most smaller allocation requests much more
1430 : * quickly than the cntbt. The cntbt search can sift through fragmented
1431 : * free space and sets of free extents for larger allocation requests
1432 : * more quickly than the bnobt. Since the locality hint is just a hint
1433 : * and we don't want to scan the entire bnobt for perfect locality, the
1434 : * cntbt search essentially bounds the bnobt search such that we can
1435 : * find good enough locality at reasonable performance in most cases.
1436 : */
1437 22705689 : while (xfs_alloc_cur_active(acur->bnolt) ||
1438 319985553 : xfs_alloc_cur_active(acur->bnogt) ||
1439 12849 : xfs_alloc_cur_active(acur->cnt)) {
1440 :
1441 159986352 : trace_xfs_alloc_cur_lookup(args);
1442 :
1443 : /*
1444 : * Search the bnobt left and right. In the case of a hit, finish
1445 : * the search in the opposite direction and we're done.
1446 : */
1447 159979807 : error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1448 : true, 1, &i);
1449 160010128 : if (error)
1450 0 : return error;
1451 160010128 : if (i == 1) {
1452 14153342 : trace_xfs_alloc_cur_left(args);
1453 14153445 : fbcur = acur->bnogt;
1454 14153445 : fbinc = true;
1455 14153445 : break;
1456 : }
1457 145856786 : error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1458 : 1, &i);
1459 145864625 : if (error)
1460 3 : return error;
1461 145864622 : if (i == 1) {
1462 8682651 : trace_xfs_alloc_cur_right(args);
1463 8682683 : fbcur = acur->bnolt;
1464 8682683 : fbinc = false;
1465 8682683 : break;
1466 : }
1467 :
1468 : /*
1469 : * Check the extent with best locality based on the current
1470 : * extent size search key and keep track of the best candidate.
1471 : */
1472 137181971 : error = xfs_alloc_cntbt_iter(args, acur);
1473 137149853 : if (error)
1474 0 : return error;
1475 274299706 : if (!xfs_alloc_cur_active(acur->cnt)) {
1476 12395329 : trace_xfs_alloc_cur_lookup_done(args);
1477 12395329 : break;
1478 : }
1479 : }
1480 :
1481 : /*
1482 : * If we failed to find anything due to busy extents, return empty
1483 : * handed so the caller can flush and retry. If no busy extents were
1484 : * found, walk backwards from the end of the cntbt as a last resort.
1485 : */
1486 70463042 : if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1487 25999 : error = xfs_btree_decrement(acur->cnt, 0, &i);
1488 25999 : if (error)
1489 : return error;
1490 25999 : if (i) {
1491 25999 : acur->cnt->bc_ag.abt.active = true;
1492 25999 : fbcur = acur->cnt;
1493 25999 : fbinc = false;
1494 : }
1495 : }
1496 :
1497 : /*
1498 : * Search in the opposite direction for a better entry in the case of
1499 : * a bnobt hit or walk backwards from the end of the cntbt.
1500 : */
1501 35231521 : if (fbcur) {
1502 22862013 : error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1503 : &i);
1504 22862288 : if (error)
1505 : return error;
1506 : }
1507 :
1508 35231793 : if (acur->len)
1509 35195857 : *stat = 1;
1510 :
1511 : return 0;
1512 : }
1513 :
1514 : /* Check the last block of the cnt btree for allocations. */
1515 : static int
1516 51495913 : xfs_alloc_ag_vextent_lastblock(
1517 : struct xfs_alloc_arg *args,
1518 : struct xfs_alloc_cur *acur,
1519 : xfs_agblock_t *bno,
1520 : xfs_extlen_t *len,
1521 : bool *allocated)
1522 : {
1523 51495913 : int error;
1524 51495913 : int i;
1525 :
1526 : #ifdef DEBUG
1527 : /* Randomly don't execute the first algorithm. */
1528 51495913 : if (get_random_u32_below(2))
1529 : return 0;
1530 : #endif
1531 :
1532 : /*
1533 : * Start from the entry that lookup found, sequence through all larger
1534 : * free blocks. If we're actually pointing at a record smaller than
1535 : * maxlen, go to the start of this block, and skip all those smaller
1536 : * than minlen.
1537 : */
1538 25741525 : if (*len || args->alignment > 1) {
1539 271303 : acur->cnt->bc_levels[0].ptr = 1;
1540 34253223 : do {
1541 34253223 : error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1542 34253221 : if (error)
1543 0 : return error;
1544 34253221 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1545 0 : xfs_btree_mark_sick(acur->cnt);
1546 0 : return -EFSCORRUPTED;
1547 : }
1548 34253221 : if (*len >= args->minlen)
1549 : break;
1550 33981916 : error = xfs_btree_increment(acur->cnt, 0, &i);
1551 33981920 : if (error)
1552 0 : return error;
1553 33981920 : } while (i);
1554 271305 : ASSERT(*len >= args->minlen);
1555 271305 : if (!i)
1556 : return 0;
1557 : }
1558 :
1559 25741527 : error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1560 25741710 : if (error)
1561 : return error;
1562 :
1563 : /*
1564 : * It didn't work. We COULD be in a case where there's a good record
1565 : * somewhere, so try again.
1566 : */
1567 25741710 : if (acur->len == 0)
1568 : return 0;
1569 :
1570 25695086 : trace_xfs_alloc_near_first(args);
1571 25695308 : *allocated = true;
1572 25695308 : return 0;
1573 : }
1574 :
1575 : /*
1576 : * Allocate a variable extent near bno in the allocation group agno.
1577 : * Extent's length (returned in len) will be between minlen and maxlen,
1578 : * and of the form k * prod + mod unless there's nothing that large.
1579 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1580 : */
1581 : STATIC int
1582 60892225 : xfs_alloc_ag_vextent_near(
1583 : struct xfs_alloc_arg *args,
1584 : uint32_t alloc_flags)
1585 : {
1586 60892225 : struct xfs_alloc_cur acur = {};
1587 60892225 : int error; /* error code */
1588 60892225 : int i; /* result code, temporary */
1589 60892225 : xfs_agblock_t bno;
1590 60892225 : xfs_extlen_t len;
1591 :
1592 : /* handle uninitialized agbno range so caller doesn't have to */
1593 60892225 : if (!args->min_agbno && !args->max_agbno)
1594 60572724 : args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1595 60892225 : ASSERT(args->min_agbno <= args->max_agbno);
1596 :
1597 : /* clamp agbno to the range if it's outside */
1598 60892225 : if (args->agbno < args->min_agbno)
1599 94005 : args->agbno = args->min_agbno;
1600 60892225 : if (args->agbno > args->max_agbno)
1601 0 : args->agbno = args->max_agbno;
1602 :
1603 : /* Retry once quickly if we find busy extents before blocking. */
1604 60892225 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1605 60926906 : restart:
1606 60926906 : len = 0;
1607 :
1608 : /*
1609 : * Set up cursors and see if there are any free extents as big as
1610 : * maxlen. If not, pick the last entry in the tree unless the tree is
1611 : * empty.
1612 : */
1613 60926906 : error = xfs_alloc_cur_setup(args, &acur);
1614 60926672 : if (error == -ENOSPC) {
1615 65139 : error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1616 : &len, &i);
1617 65139 : if (error)
1618 0 : goto out;
1619 65139 : if (i == 0 || len == 0) {
1620 0 : trace_xfs_alloc_near_noentry(args);
1621 0 : goto out;
1622 : }
1623 65139 : ASSERT(i == 1);
1624 60861533 : } else if (error) {
1625 482 : goto out;
1626 : }
1627 :
1628 : /*
1629 : * First algorithm.
1630 : * If the requested extent is large wrt the freespaces available
1631 : * in this a.g., then the cursor will be pointing to a btree entry
1632 : * near the right edge of the tree. If it's in the last btree leaf
1633 : * block, then we just examine all the entries in that block
1634 : * that are big enough, and pick the best one.
1635 : */
1636 60926190 : if (xfs_btree_islastblock(acur.cnt, 0)) {
1637 51495838 : bool allocated = false;
1638 :
1639 51495838 : error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1640 : &allocated);
1641 51496343 : if (error)
1642 0 : goto out;
1643 51496343 : if (allocated)
1644 25695310 : goto alloc_finish;
1645 : }
1646 :
1647 : /*
1648 : * Second algorithm. Combined cntbt and bnobt search to find ideal
1649 : * locality.
1650 : */
1651 35231259 : error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1652 35231502 : if (error)
1653 241 : goto out;
1654 :
1655 : /*
1656 : * If we couldn't get anything, give up.
1657 : */
1658 35231261 : if (!acur.len) {
1659 35374 : if (acur.busy) {
1660 : /*
1661 : * Our only valid extents must have been busy. Flush and
1662 : * retry the allocation again. If we get an -EAGAIN
1663 : * error, we're being told that a deadlock was avoided
1664 : * and the current transaction needs committing before
1665 : * the allocation can be retried.
1666 : */
1667 34685 : trace_xfs_alloc_near_busy(args);
1668 34685 : error = xfs_extent_busy_flush(args->tp, args->pag,
1669 : acur.busy_gen, alloc_flags);
1670 34685 : if (error)
1671 4 : goto out;
1672 :
1673 34681 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1674 34681 : goto restart;
1675 : }
1676 689 : trace_xfs_alloc_size_neither(args);
1677 689 : args->agbno = NULLAGBLOCK;
1678 689 : goto out;
1679 : }
1680 :
1681 35195887 : alloc_finish:
1682 : /* fix up btrees on a successful allocation */
1683 60891197 : error = xfs_alloc_cur_finish(args, &acur);
1684 :
1685 60891842 : out:
1686 60891842 : xfs_alloc_cur_close(&acur, error);
1687 60892344 : return error;
1688 : }
1689 :
1690 : /*
1691 : * Allocate a variable extent anywhere in the allocation group agno.
1692 : * Extent's length (returned in len) will be between minlen and maxlen,
1693 : * and of the form k * prod + mod unless there's nothing that large.
1694 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1695 : */
1696 : static int
1697 3219888 : xfs_alloc_ag_vextent_size(
1698 : struct xfs_alloc_arg *args,
1699 : uint32_t alloc_flags)
1700 : {
1701 3219888 : struct xfs_agf *agf = args->agbp->b_addr;
1702 3219888 : struct xfs_btree_cur *bno_cur;
1703 3219888 : struct xfs_btree_cur *cnt_cur;
1704 3219888 : xfs_agblock_t fbno; /* start of found freespace */
1705 3219888 : xfs_extlen_t flen; /* length of found freespace */
1706 3219888 : xfs_agblock_t rbno; /* returned block number */
1707 3219888 : xfs_extlen_t rlen; /* length of returned extent */
1708 3219888 : bool busy;
1709 3219888 : unsigned busy_gen;
1710 3219888 : int error;
1711 3219888 : int i;
1712 :
1713 : /* Retry once quickly if we find busy extents before blocking. */
1714 3219888 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1715 : restart:
1716 : /*
1717 : * Allocate and initialize a cursor for the by-size btree.
1718 : */
1719 3230280 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1720 : args->pag, XFS_BTNUM_CNT);
1721 3230288 : bno_cur = NULL;
1722 :
1723 : /*
1724 : * Look for an entry >= maxlen+alignment-1 blocks.
1725 : */
1726 3230296 : if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1727 3230288 : args->maxlen + args->alignment - 1, &i)))
1728 74 : goto error0;
1729 :
1730 : /*
1731 : * If none then we have to settle for a smaller extent. In the case that
1732 : * there are no large extents, this will return the last entry in the
1733 : * tree unless the tree is empty. In the case that there are only busy
1734 : * large extents, this will return the largest small extent unless there
1735 : * are no smaller extents available.
1736 : */
1737 3230222 : if (!i) {
1738 29633 : error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1739 : &fbno, &flen, &i);
1740 29633 : if (error)
1741 0 : goto error0;
1742 29633 : if (i == 0 || flen == 0) {
1743 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1744 0 : trace_xfs_alloc_size_noentry(args);
1745 0 : return 0;
1746 : }
1747 29633 : ASSERT(i == 1);
1748 29633 : busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1749 : &rlen, &busy_gen);
1750 : } else {
1751 : /*
1752 : * Search for a non-busy extent that is large enough.
1753 : */
1754 284000763 : for (;;) {
1755 143600676 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1756 143590265 : if (error)
1757 0 : goto error0;
1758 143590265 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1759 0 : xfs_btree_mark_sick(cnt_cur);
1760 0 : error = -EFSCORRUPTED;
1761 0 : goto error0;
1762 : }
1763 :
1764 143590265 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1765 : &rbno, &rlen, &busy_gen);
1766 :
1767 143600602 : if (rlen >= args->maxlen)
1768 : break;
1769 :
1770 140410208 : error = xfs_btree_increment(cnt_cur, 0, &i);
1771 140410285 : if (error)
1772 0 : goto error0;
1773 140410285 : if (i)
1774 140400087 : continue;
1775 :
1776 : /*
1777 : * Our only valid extents must have been busy. Flush and
1778 : * retry the allocation again. If we get an -EAGAIN
1779 : * error, we're being told that a deadlock was avoided
1780 : * and the current transaction needs committing before
1781 : * the allocation can be retried.
1782 : */
1783 10198 : trace_xfs_alloc_size_busy(args);
1784 10198 : error = xfs_extent_busy_flush(args->tp, args->pag,
1785 : busy_gen, alloc_flags);
1786 10198 : if (error)
1787 0 : goto error0;
1788 :
1789 10198 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1790 10198 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1791 10198 : goto restart;
1792 : }
1793 : }
1794 :
1795 : /*
1796 : * In the first case above, we got the last entry in the
1797 : * by-size btree. Now we check to see if the space hits maxlen
1798 : * once aligned; if not, we search left for something better.
1799 : * This can't happen in the second case above.
1800 : */
1801 3219986 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1802 3219986 : if (XFS_IS_CORRUPT(args->mp,
1803 : rlen != 0 &&
1804 : (rlen > flen ||
1805 : rbno + rlen > fbno + flen))) {
1806 0 : xfs_btree_mark_sick(cnt_cur);
1807 0 : error = -EFSCORRUPTED;
1808 0 : goto error0;
1809 : }
1810 3219986 : if (rlen < args->maxlen) {
1811 29633 : xfs_agblock_t bestfbno;
1812 29633 : xfs_extlen_t bestflen;
1813 29633 : xfs_agblock_t bestrbno;
1814 29633 : xfs_extlen_t bestrlen;
1815 :
1816 29633 : bestrlen = rlen;
1817 29633 : bestrbno = rbno;
1818 29633 : bestflen = flen;
1819 29633 : bestfbno = fbno;
1820 3566824 : for (;;) {
1821 3566824 : if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1822 0 : goto error0;
1823 3566824 : if (i == 0)
1824 : break;
1825 3566684 : if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1826 : &i)))
1827 0 : goto error0;
1828 3566684 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1829 0 : xfs_btree_mark_sick(cnt_cur);
1830 0 : error = -EFSCORRUPTED;
1831 0 : goto error0;
1832 : }
1833 3566684 : if (flen < bestrlen)
1834 : break;
1835 3537191 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1836 : &rbno, &rlen, &busy_gen);
1837 3537191 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1838 3537191 : if (XFS_IS_CORRUPT(args->mp,
1839 : rlen != 0 &&
1840 : (rlen > flen ||
1841 : rbno + rlen > fbno + flen))) {
1842 0 : xfs_btree_mark_sick(cnt_cur);
1843 0 : error = -EFSCORRUPTED;
1844 0 : goto error0;
1845 : }
1846 3537191 : if (rlen > bestrlen) {
1847 17091 : bestrlen = rlen;
1848 17091 : bestrbno = rbno;
1849 17091 : bestflen = flen;
1850 17091 : bestfbno = fbno;
1851 17091 : if (rlen == args->maxlen)
1852 : break;
1853 : }
1854 : }
1855 29633 : if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1856 : &i)))
1857 0 : goto error0;
1858 29633 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1859 0 : xfs_btree_mark_sick(cnt_cur);
1860 0 : error = -EFSCORRUPTED;
1861 0 : goto error0;
1862 : }
1863 29633 : rlen = bestrlen;
1864 29633 : rbno = bestrbno;
1865 29633 : flen = bestflen;
1866 29633 : fbno = bestfbno;
1867 : }
1868 3219986 : args->wasfromfl = 0;
1869 : /*
1870 : * Fix up the length.
1871 : */
1872 3219986 : args->len = rlen;
1873 3219986 : if (rlen < args->minlen) {
1874 17129 : if (busy) {
1875 : /*
1876 : * Our only valid extents must have been busy. Flush and
1877 : * retry the allocation again. If we get an -EAGAIN
1878 : * error, we're being told that a deadlock was avoided
1879 : * and the current transaction needs committing before
1880 : * the allocation can be retried.
1881 : */
1882 194 : trace_xfs_alloc_size_busy(args);
1883 194 : error = xfs_extent_busy_flush(args->tp, args->pag,
1884 : busy_gen, alloc_flags);
1885 194 : if (error)
1886 0 : goto error0;
1887 :
1888 194 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1889 194 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1890 194 : goto restart;
1891 : }
1892 16935 : goto out_nominleft;
1893 : }
1894 3202857 : xfs_alloc_fix_len(args);
1895 :
1896 3202880 : rlen = args->len;
1897 3202880 : if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1898 0 : xfs_btree_mark_sick(cnt_cur);
1899 0 : error = -EFSCORRUPTED;
1900 0 : goto error0;
1901 : }
1902 : /*
1903 : * Allocate and initialize a cursor for the by-block tree.
1904 : */
1905 3202880 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1906 : args->pag, XFS_BTNUM_BNO);
1907 3202852 : if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1908 : rbno, rlen, XFSA_FIXUP_CNT_OK)))
1909 55 : goto error0;
1910 3202815 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1911 3202825 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1912 3202826 : cnt_cur = bno_cur = NULL;
1913 3202826 : args->len = rlen;
1914 3202826 : args->agbno = rbno;
1915 6405652 : if (XFS_IS_CORRUPT(args->mp,
1916 : args->agbno + args->len >
1917 : be32_to_cpu(agf->agf_length))) {
1918 0 : xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
1919 0 : error = -EFSCORRUPTED;
1920 0 : goto error0;
1921 : }
1922 3202826 : trace_xfs_alloc_size_done(args);
1923 3202826 : return 0;
1924 :
1925 129 : error0:
1926 129 : trace_xfs_alloc_size_error(args);
1927 129 : if (cnt_cur)
1928 129 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1929 129 : if (bno_cur)
1930 55 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931 : return error;
1932 :
1933 : out_nominleft:
1934 16935 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1935 16935 : trace_xfs_alloc_size_nominleft(args);
1936 16935 : args->agbno = NULLAGBLOCK;
1937 16935 : return 0;
1938 : }
1939 :
1940 : /*
1941 : * Free the extent starting at agno/bno for length.
1942 : */
1943 : STATIC int
1944 53018256 : xfs_free_ag_extent(
1945 : struct xfs_trans *tp,
1946 : struct xfs_buf *agbp,
1947 : xfs_agnumber_t agno,
1948 : xfs_agblock_t bno,
1949 : xfs_extlen_t len,
1950 : const struct xfs_owner_info *oinfo,
1951 : enum xfs_ag_resv_type type)
1952 : {
1953 53018256 : struct xfs_mount *mp;
1954 53018256 : struct xfs_btree_cur *bno_cur;
1955 53018256 : struct xfs_btree_cur *cnt_cur;
1956 53018256 : xfs_agblock_t gtbno; /* start of right neighbor */
1957 53018256 : xfs_extlen_t gtlen; /* length of right neighbor */
1958 53018256 : xfs_agblock_t ltbno; /* start of left neighbor */
1959 53018256 : xfs_extlen_t ltlen; /* length of left neighbor */
1960 53018256 : xfs_agblock_t nbno; /* new starting block of freesp */
1961 53018256 : xfs_extlen_t nlen; /* new length of freespace */
1962 53018256 : int haveleft; /* have a left neighbor */
1963 53018256 : int haveright; /* have a right neighbor */
1964 53018256 : int i;
1965 53018256 : int error;
1966 53018256 : struct xfs_perag *pag = agbp->b_pag;
1967 :
1968 53018256 : bno_cur = cnt_cur = NULL;
1969 53018256 : mp = tp->t_mountp;
1970 :
1971 53018256 : if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1972 1596437 : error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1973 1596432 : if (error)
1974 17 : goto error0;
1975 : }
1976 :
1977 : /*
1978 : * Allocate and initialize a cursor for the by-block btree.
1979 : */
1980 53018234 : bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1981 : /*
1982 : * Look for a neighboring block on the left (lower block numbers)
1983 : * that is contiguous with this space.
1984 : */
1985 53018356 : if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1986 172 : goto error0;
1987 53018210 : if (haveleft) {
1988 : /*
1989 : * There is a block to our left.
1990 : */
1991 51544719 : if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
1992 0 : goto error0;
1993 51544676 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1994 0 : xfs_btree_mark_sick(bno_cur);
1995 0 : error = -EFSCORRUPTED;
1996 0 : goto error0;
1997 : }
1998 : /*
1999 : * It's not contiguous, though.
2000 : */
2001 51544676 : if (ltbno + ltlen < bno)
2002 39424537 : haveleft = 0;
2003 : else {
2004 : /*
2005 : * If this failure happens the request to free this
2006 : * space was invalid, it's (partly) already free.
2007 : * Very bad.
2008 : */
2009 12120139 : if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
2010 0 : xfs_btree_mark_sick(bno_cur);
2011 0 : error = -EFSCORRUPTED;
2012 0 : goto error0;
2013 : }
2014 : }
2015 : }
2016 : /*
2017 : * Look for a neighboring block on the right (higher block numbers)
2018 : * that is contiguous with this space.
2019 : */
2020 53018167 : if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2021 0 : goto error0;
2022 53017955 : if (haveright) {
2023 : /*
2024 : * There is a block to our right.
2025 : */
2026 52955639 : if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
2027 0 : goto error0;
2028 52955854 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2029 0 : xfs_btree_mark_sick(bno_cur);
2030 0 : error = -EFSCORRUPTED;
2031 0 : goto error0;
2032 : }
2033 : /*
2034 : * It's not contiguous, though.
2035 : */
2036 52955854 : if (bno + len < gtbno)
2037 36704581 : haveright = 0;
2038 : else {
2039 : /*
2040 : * If this failure happens the request to free this
2041 : * space was invalid, it's (partly) already free.
2042 : * Very bad.
2043 : */
2044 16251273 : if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
2045 0 : xfs_btree_mark_sick(bno_cur);
2046 0 : error = -EFSCORRUPTED;
2047 0 : goto error0;
2048 : }
2049 : }
2050 : }
2051 : /*
2052 : * Now allocate and initialize a cursor for the by-size tree.
2053 : */
2054 53018170 : cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
2055 : /*
2056 : * Have both left and right contiguous neighbors.
2057 : * Merge all three into a single free block.
2058 : */
2059 53018105 : if (haveleft && haveright) {
2060 : /*
2061 : * Delete the old by-size entry on the left.
2062 : */
2063 6531986 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2064 19 : goto error0;
2065 6531968 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2066 0 : xfs_btree_mark_sick(cnt_cur);
2067 0 : error = -EFSCORRUPTED;
2068 0 : goto error0;
2069 : }
2070 6531968 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2071 0 : goto error0;
2072 6531968 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2073 0 : xfs_btree_mark_sick(cnt_cur);
2074 0 : error = -EFSCORRUPTED;
2075 0 : goto error0;
2076 : }
2077 : /*
2078 : * Delete the old by-size entry on the right.
2079 : */
2080 6531968 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2081 7 : goto error0;
2082 6531958 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 0 : xfs_btree_mark_sick(cnt_cur);
2084 0 : error = -EFSCORRUPTED;
2085 0 : goto error0;
2086 : }
2087 6531958 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2088 0 : goto error0;
2089 6531962 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2090 0 : xfs_btree_mark_sick(cnt_cur);
2091 0 : error = -EFSCORRUPTED;
2092 0 : goto error0;
2093 : }
2094 : /*
2095 : * Delete the old by-block entry for the right block.
2096 : */
2097 6531962 : if ((error = xfs_btree_delete(bno_cur, &i)))
2098 0 : goto error0;
2099 6531960 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2100 0 : xfs_btree_mark_sick(bno_cur);
2101 0 : error = -EFSCORRUPTED;
2102 0 : goto error0;
2103 : }
2104 : /*
2105 : * Move the by-block cursor back to the left neighbor.
2106 : */
2107 6531960 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2108 0 : goto error0;
2109 6531958 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2110 0 : xfs_btree_mark_sick(bno_cur);
2111 0 : error = -EFSCORRUPTED;
2112 0 : goto error0;
2113 : }
2114 : #ifdef DEBUG
2115 : /*
2116 : * Check that this is the right record: delete didn't
2117 : * mangle the cursor.
2118 : */
2119 : {
2120 6531958 : xfs_agblock_t xxbno;
2121 6531958 : xfs_extlen_t xxlen;
2122 :
2123 6531958 : if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2124 : &i)))
2125 0 : goto error0;
2126 6531958 : if (XFS_IS_CORRUPT(mp,
2127 : i != 1 ||
2128 : xxbno != ltbno ||
2129 : xxlen != ltlen)) {
2130 0 : xfs_btree_mark_sick(bno_cur);
2131 0 : error = -EFSCORRUPTED;
2132 0 : goto error0;
2133 : }
2134 : }
2135 : #endif
2136 : /*
2137 : * Update remaining by-block entry to the new, joined block.
2138 : */
2139 6531958 : nbno = ltbno;
2140 6531958 : nlen = len + ltlen + gtlen;
2141 6531958 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2142 0 : goto error0;
2143 : }
2144 : /*
2145 : * Have only a left contiguous neighbor.
2146 : * Merge it together with the new freespace.
2147 : */
2148 46486119 : else if (haveleft) {
2149 : /*
2150 : * Delete the old by-size entry on the left.
2151 : */
2152 5588149 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2153 4 : goto error0;
2154 5588149 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2155 0 : xfs_btree_mark_sick(cnt_cur);
2156 0 : error = -EFSCORRUPTED;
2157 0 : goto error0;
2158 : }
2159 5588149 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2160 0 : goto error0;
2161 5588150 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2162 0 : xfs_btree_mark_sick(cnt_cur);
2163 0 : error = -EFSCORRUPTED;
2164 0 : goto error0;
2165 : }
2166 : /*
2167 : * Back up the by-block cursor to the left neighbor, and
2168 : * update its length.
2169 : */
2170 5588150 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2171 0 : goto error0;
2172 5588146 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2173 0 : xfs_btree_mark_sick(bno_cur);
2174 0 : error = -EFSCORRUPTED;
2175 0 : goto error0;
2176 : }
2177 5588146 : nbno = ltbno;
2178 5588146 : nlen = len + ltlen;
2179 5588146 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2180 0 : goto error0;
2181 : }
2182 : /*
2183 : * Have only a right contiguous neighbor.
2184 : * Merge it together with the new freespace.
2185 : */
2186 40897970 : else if (haveright) {
2187 : /*
2188 : * Delete the old by-size entry on the right.
2189 : */
2190 9719284 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2191 20 : goto error0;
2192 9719262 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2193 0 : xfs_btree_mark_sick(cnt_cur);
2194 0 : error = -EFSCORRUPTED;
2195 0 : goto error0;
2196 : }
2197 9719262 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2198 0 : goto error0;
2199 9719272 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2200 0 : xfs_btree_mark_sick(cnt_cur);
2201 0 : error = -EFSCORRUPTED;
2202 0 : goto error0;
2203 : }
2204 : /*
2205 : * Update the starting block and length of the right
2206 : * neighbor in the by-block tree.
2207 : */
2208 9719272 : nbno = bno;
2209 9719272 : nlen = len + gtlen;
2210 9719272 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2211 0 : goto error0;
2212 : }
2213 : /*
2214 : * No contiguous neighbors.
2215 : * Insert the new freespace into the by-block tree.
2216 : */
2217 : else {
2218 31178686 : nbno = bno;
2219 31178686 : nlen = len;
2220 31178686 : if ((error = xfs_btree_insert(bno_cur, &i)))
2221 0 : goto error0;
2222 31178655 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2223 0 : xfs_btree_mark_sick(bno_cur);
2224 0 : error = -EFSCORRUPTED;
2225 0 : goto error0;
2226 : }
2227 : }
2228 53018038 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2229 53018023 : bno_cur = NULL;
2230 : /*
2231 : * In all cases we need to insert the new freespace in the by-size tree.
2232 : */
2233 53018023 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2234 39 : goto error0;
2235 53017999 : if (XFS_IS_CORRUPT(mp, i != 0)) {
2236 0 : xfs_btree_mark_sick(cnt_cur);
2237 0 : error = -EFSCORRUPTED;
2238 0 : goto error0;
2239 : }
2240 53017999 : if ((error = xfs_btree_insert(cnt_cur, &i)))
2241 0 : goto error0;
2242 53017891 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2243 0 : xfs_btree_mark_sick(cnt_cur);
2244 0 : error = -EFSCORRUPTED;
2245 0 : goto error0;
2246 : }
2247 53017891 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2248 53017936 : cnt_cur = NULL;
2249 :
2250 : /*
2251 : * Update the freespace totals in the ag and superblock.
2252 : */
2253 53017936 : error = xfs_alloc_update_counters(tp, agbp, len);
2254 53017954 : xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2255 53017579 : if (error)
2256 0 : goto error0;
2257 :
2258 53017579 : XFS_STATS_INC(mp, xs_freex);
2259 53017579 : XFS_STATS_ADD(mp, xs_freeb, len);
2260 :
2261 53017579 : trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2262 :
2263 53017579 : return 0;
2264 :
2265 278 : error0:
2266 278 : trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2267 278 : if (bno_cur)
2268 222 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2269 278 : if (cnt_cur)
2270 89 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2271 : return error;
2272 : }
2273 :
2274 : /*
2275 : * Visible (exported) allocation/free functions.
2276 : * Some of these are used just by xfs_alloc_btree.c and this file.
2277 : */
2278 :
2279 : /*
2280 : * Compute and fill in value of m_alloc_maxlevels.
2281 : */
2282 : void
2283 24337 : xfs_alloc_compute_maxlevels(
2284 : xfs_mount_t *mp) /* file system mount structure */
2285 : {
2286 48674 : mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2287 24337 : (mp->m_sb.sb_agblocks + 1) / 2);
2288 24337 : ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
2289 24337 : }
2290 :
2291 : /*
2292 : * Find the length of the longest extent in an AG. The 'need' parameter
2293 : * specifies how much space we're going to need for the AGFL and the
2294 : * 'reserved' parameter tells us how many blocks in this AG are reserved for
2295 : * other callers.
2296 : */
2297 : xfs_extlen_t
2298 226169299 : xfs_alloc_longest_free_extent(
2299 : struct xfs_perag *pag,
2300 : xfs_extlen_t need,
2301 : xfs_extlen_t reserved)
2302 : {
2303 226169299 : xfs_extlen_t delta = 0;
2304 :
2305 : /*
2306 : * If the AGFL needs a recharge, we'll have to subtract that from the
2307 : * longest extent.
2308 : */
2309 226169299 : if (need > pag->pagf_flcount)
2310 402499 : delta = need - pag->pagf_flcount;
2311 :
2312 : /*
2313 : * If we cannot maintain others' reservations with space from the
2314 : * not-longest freesp extents, we'll have to subtract /that/ from
2315 : * the longest extent too.
2316 : */
2317 226169299 : if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2318 182502619 : delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2319 :
2320 : /*
2321 : * If the longest extent is long enough to satisfy all the
2322 : * reservations and AGFL rules in place, we can return this extent.
2323 : */
2324 226169299 : if (pag->pagf_longest > delta)
2325 226134408 : return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2326 : pag->pagf_longest - delta);
2327 :
2328 : /* Otherwise, let the caller try for 1 block if there's space. */
2329 34891 : return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2330 : }
2331 :
2332 : /*
2333 : * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2334 : * return the largest possible minimum length.
2335 : */
2336 : unsigned int
2337 843540793 : xfs_alloc_min_freelist(
2338 : struct xfs_mount *mp,
2339 : struct xfs_perag *pag)
2340 : {
2341 : /* AG btrees have at least 1 level. */
2342 843540793 : static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2343 843540793 : const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2344 843540793 : unsigned int min_free;
2345 :
2346 843540793 : ASSERT(mp->m_alloc_maxlevels > 0);
2347 :
2348 : /* space needed by-bno freespace btree */
2349 843540793 : min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2350 : mp->m_alloc_maxlevels);
2351 : /* space needed by-size freespace btree */
2352 843540793 : min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2353 : mp->m_alloc_maxlevels);
2354 : /* space needed reverse mapping used space btree */
2355 843540793 : if (xfs_has_rmapbt(mp))
2356 843279004 : min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2357 : mp->m_rmap_maxlevels);
2358 :
2359 843540793 : return min_free;
2360 : }
2361 :
2362 : /*
2363 : * Check if the operation we are fixing up the freelist for should go ahead or
2364 : * not. If we are freeing blocks, we always allow it, otherwise the allocation
2365 : * is dependent on whether the size and shape of free space available will
2366 : * permit the requested allocation to take place.
2367 : */
2368 : static bool
2369 782851334 : xfs_alloc_space_available(
2370 : struct xfs_alloc_arg *args,
2371 : xfs_extlen_t min_free,
2372 : int flags)
2373 : {
2374 782851334 : struct xfs_perag *pag = args->pag;
2375 782851334 : xfs_extlen_t alloc_len, longest;
2376 782851334 : xfs_extlen_t reservation; /* blocks that are still reserved */
2377 782851334 : int available;
2378 782851334 : xfs_extlen_t agflcount;
2379 :
2380 782851334 : if (flags & XFS_ALLOC_FLAG_FREEING)
2381 : return true;
2382 :
2383 165361714 : reservation = xfs_ag_resv_needed(pag, args->resv);
2384 :
2385 : /* do we have enough contiguous free space for the allocation? */
2386 165358781 : alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2387 165358781 : longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2388 165358781 : if (longest < alloc_len)
2389 : return false;
2390 :
2391 : /*
2392 : * Do we have enough free space remaining for the allocation? Don't
2393 : * account extra agfl blocks because we are about to defer free them,
2394 : * making them unavailable until the current transaction commits.
2395 : */
2396 150815841 : agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2397 150815841 : available = (int)(pag->pagf_freeblks + agflcount -
2398 150815841 : reservation - min_free - args->minleft);
2399 150815841 : if (available < (int)max(args->total, alloc_len))
2400 : return false;
2401 :
2402 : /*
2403 : * Clamp maxlen to the amount of free space available for the actual
2404 : * extent allocation.
2405 : */
2406 131193655 : if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2407 16485 : args->maxlen = available;
2408 16485 : ASSERT(args->maxlen > 0);
2409 16485 : ASSERT(args->maxlen >= args->minlen);
2410 : }
2411 :
2412 : return true;
2413 : }
2414 :
2415 : int
2416 194204 : xfs_free_agfl_block(
2417 : struct xfs_trans *tp,
2418 : xfs_agnumber_t agno,
2419 : xfs_agblock_t agbno,
2420 : struct xfs_buf *agbp,
2421 : struct xfs_owner_info *oinfo)
2422 : {
2423 194204 : int error;
2424 194204 : struct xfs_buf *bp;
2425 :
2426 194204 : error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2427 : XFS_AG_RESV_AGFL);
2428 194204 : if (error)
2429 : return error;
2430 :
2431 194202 : error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2432 194202 : XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2433 : tp->t_mountp->m_bsize, 0, &bp);
2434 194202 : if (error)
2435 : return error;
2436 194202 : xfs_trans_binval(tp, bp);
2437 :
2438 194202 : return 0;
2439 : }
2440 :
2441 : /*
2442 : * Check the agfl fields of the agf for inconsistency or corruption.
2443 : *
2444 : * The original purpose was to detect an agfl header padding mismatch between
2445 : * current and early v5 kernels. This problem manifests as a 1-slot size
2446 : * difference between the on-disk flcount and the active [first, last] range of
2447 : * a wrapped agfl.
2448 : *
2449 : * However, we need to use these same checks to catch agfl count corruptions
2450 : * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2451 : * way, we need to reset the agfl and warn the user.
2452 : *
2453 : * Return true if a reset is required before the agfl can be used, false
2454 : * otherwise.
2455 : */
2456 : static bool
2457 279019 : xfs_agfl_needs_reset(
2458 : struct xfs_mount *mp,
2459 : struct xfs_agf *agf)
2460 : {
2461 279019 : uint32_t f = be32_to_cpu(agf->agf_flfirst);
2462 279019 : uint32_t l = be32_to_cpu(agf->agf_fllast);
2463 279019 : uint32_t c = be32_to_cpu(agf->agf_flcount);
2464 279019 : int agfl_size = xfs_agfl_size(mp);
2465 279019 : int active;
2466 :
2467 : /*
2468 : * The agf read verifier catches severe corruption of these fields.
2469 : * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2470 : * the verifier allows it.
2471 : */
2472 279019 : if (f >= agfl_size || l >= agfl_size)
2473 : return true;
2474 279017 : if (c > agfl_size)
2475 : return true;
2476 :
2477 : /*
2478 : * Check consistency between the on-disk count and the active range. An
2479 : * agfl padding mismatch manifests as an inconsistent flcount.
2480 : */
2481 279017 : if (c && l >= f)
2482 268660 : active = l - f + 1;
2483 10357 : else if (c)
2484 124 : active = agfl_size - f + l + 1;
2485 : else
2486 : active = 0;
2487 :
2488 279017 : return active != c;
2489 : }
2490 :
2491 : /*
2492 : * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2493 : * agfl content cannot be trusted. Warn the user that a repair is required to
2494 : * recover leaked blocks.
2495 : *
2496 : * The purpose of this mechanism is to handle filesystems affected by the agfl
2497 : * header padding mismatch problem. A reset keeps the filesystem online with a
2498 : * relatively minor free space accounting inconsistency rather than suffer the
2499 : * inevitable crash from use of an invalid agfl block.
2500 : */
2501 : static void
2502 4 : xfs_agfl_reset(
2503 : struct xfs_trans *tp,
2504 : struct xfs_buf *agbp,
2505 : struct xfs_perag *pag)
2506 : {
2507 4 : struct xfs_mount *mp = tp->t_mountp;
2508 4 : struct xfs_agf *agf = agbp->b_addr;
2509 :
2510 8 : ASSERT(xfs_perag_agfl_needs_reset(pag));
2511 4 : trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2512 :
2513 4 : xfs_warn(mp,
2514 : "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2515 : "Please unmount and run xfs_repair.",
2516 : pag->pag_agno, pag->pagf_flcount);
2517 :
2518 4 : agf->agf_flfirst = 0;
2519 8 : agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2520 4 : agf->agf_flcount = 0;
2521 4 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2522 : XFS_AGF_FLCOUNT);
2523 :
2524 4 : pag->pagf_flcount = 0;
2525 4 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2526 4 : }
2527 :
2528 : /*
2529 : * Defer an AGFL block free. This is effectively equivalent to
2530 : * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2531 : *
2532 : * Deferring AGFL frees helps prevent log reservation overruns due to too many
2533 : * allocation operations in a transaction. AGFL frees are prone to this problem
2534 : * because for one they are always freed one at a time. Further, an immediate
2535 : * AGFL block free can cause a btree join and require another block free before
2536 : * the real allocation can proceed. Deferring the free disconnects freeing up
2537 : * the AGFL slot from freeing the block.
2538 : */
2539 : static int
2540 194204 : xfs_defer_agfl_block(
2541 : struct xfs_trans *tp,
2542 : xfs_agnumber_t agno,
2543 : xfs_agblock_t agbno,
2544 : struct xfs_owner_info *oinfo)
2545 : {
2546 194204 : struct xfs_mount *mp = tp->t_mountp;
2547 194204 : struct xfs_extent_free_item *xefi;
2548 194204 : xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2549 :
2550 194204 : ASSERT(xfs_extfree_item_cache != NULL);
2551 194204 : ASSERT(oinfo != NULL);
2552 :
2553 194204 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
2554 0 : return -EFSCORRUPTED;
2555 :
2556 194204 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2557 : GFP_KERNEL | __GFP_NOFAIL);
2558 194204 : xefi->xefi_startblock = fsbno;
2559 194204 : xefi->xefi_blockcount = 1;
2560 194204 : xefi->xefi_owner = oinfo->oi_owner;
2561 194204 : xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2562 :
2563 194204 : trace_xfs_agfl_free_defer(mp, xefi);
2564 :
2565 194204 : xfs_extent_free_get_group(mp, xefi);
2566 194204 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
2567 194204 : return 0;
2568 : }
2569 :
2570 : /*
2571 : * Add the extent to the list of extents to be free at transaction end.
2572 : * The list is maintained sorted (by block number).
2573 : */
2574 : int
2575 68642878 : xfs_free_extent_later(
2576 : struct xfs_trans *tp,
2577 : xfs_fsblock_t bno,
2578 : xfs_filblks_t len,
2579 : const struct xfs_owner_info *oinfo,
2580 : enum xfs_ag_resv_type type,
2581 : unsigned int flags)
2582 : {
2583 68642878 : struct xfs_extent_free_item *xefi;
2584 68642878 : struct xfs_mount *mp = tp->t_mountp;
2585 68642878 : enum xfs_defer_ops_type optype;
2586 : #ifdef DEBUG
2587 68642878 : xfs_agnumber_t agno;
2588 68642878 : xfs_agblock_t agbno;
2589 :
2590 68642878 : ASSERT(bno != NULLFSBLOCK);
2591 68642878 : ASSERT(len > 0);
2592 68642878 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2593 68642878 : ASSERT(!isnullstartblock(bno));
2594 68642878 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2595 15822334 : ASSERT(bno < mp->m_sb.sb_rblocks);
2596 15822334 : ASSERT(len <= mp->m_sb.sb_rblocks);
2597 15822334 : ASSERT(bno + len <= mp->m_sb.sb_rblocks);
2598 : } else {
2599 52820544 : agno = XFS_FSB_TO_AGNO(mp, bno);
2600 52820544 : agbno = XFS_FSB_TO_AGBNO(mp, bno);
2601 :
2602 52820544 : ASSERT(agno < mp->m_sb.sb_agcount);
2603 52820544 : ASSERT(agbno < mp->m_sb.sb_agblocks);
2604 52820544 : ASSERT(len < mp->m_sb.sb_agblocks);
2605 52820544 : ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2606 : }
2607 : #endif
2608 68642878 : ASSERT(!(flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
2609 68642878 : ASSERT(xfs_extfree_item_cache != NULL);
2610 68642878 : ASSERT(type != XFS_AG_RESV_AGFL);
2611 :
2612 68642878 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2613 15822350 : if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
2614 0 : return -EFSCORRUPTED;
2615 : } else {
2616 52820528 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2617 0 : return -EFSCORRUPTED;
2618 : }
2619 :
2620 68644095 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2621 : GFP_KERNEL | __GFP_NOFAIL);
2622 68644594 : xefi->xefi_startblock = bno;
2623 68644594 : xefi->xefi_blockcount = (xfs_extlen_t)len;
2624 68644594 : xefi->xefi_agresv = type;
2625 68644594 : if (flags & XFS_FREE_EXTENT_SKIP_DISCARD)
2626 2874515 : xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2627 68644594 : if (flags & XFS_FREE_EXTENT_REALTIME) {
2628 : /*
2629 : * Realtime and data section EFIs must use separate
2630 : * transactions to finish deferred work because updates to
2631 : * realtime metadata files can lock AGFs to allocate btree
2632 : * blocks and we don't want that mixing with the AGF locks
2633 : * taken to finish data section EFIs.
2634 : */
2635 15822331 : optype = XFS_DEFER_OPS_TYPE_FREE_RT;
2636 15822331 : xefi->xefi_flags |= XFS_EFI_REALTIME;
2637 : } else {
2638 : optype = XFS_DEFER_OPS_TYPE_FREE;
2639 : }
2640 68644594 : if (oinfo) {
2641 1401708 : ASSERT(oinfo->oi_offset == 0);
2642 :
2643 1401708 : if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2644 43 : xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2645 1401708 : if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2646 1010444 : xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2647 1401708 : xefi->xefi_owner = oinfo->oi_owner;
2648 : } else {
2649 67242886 : xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2650 : }
2651 :
2652 68644594 : trace_xfs_extent_free_defer(mp, xefi);
2653 :
2654 68642698 : xfs_extent_free_get_group(mp, xefi);
2655 68640815 : xfs_defer_add(tp, optype, &xefi->xefi_list);
2656 68640815 : return 0;
2657 : }
2658 :
2659 : #ifdef DEBUG
2660 : /*
2661 : * Check if an AGF has a free extent record whose length is equal to
2662 : * args->minlen.
2663 : */
2664 : STATIC int
2665 367212 : xfs_exact_minlen_extent_available(
2666 : struct xfs_alloc_arg *args,
2667 : struct xfs_buf *agbp,
2668 : int *stat)
2669 : {
2670 367212 : struct xfs_btree_cur *cnt_cur;
2671 367212 : xfs_agblock_t fbno;
2672 367212 : xfs_extlen_t flen;
2673 367212 : int error = 0;
2674 :
2675 367212 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2676 : args->pag, XFS_BTNUM_CNT);
2677 367205 : error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2678 367215 : if (error)
2679 0 : goto out;
2680 :
2681 367215 : if (*stat == 0) {
2682 0 : xfs_btree_mark_sick(cnt_cur);
2683 0 : error = -EFSCORRUPTED;
2684 0 : goto out;
2685 : }
2686 :
2687 367215 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2688 367207 : if (error)
2689 0 : goto out;
2690 :
2691 367207 : if (*stat == 1 && flen != args->minlen)
2692 2986 : *stat = 0;
2693 :
2694 364221 : out:
2695 367207 : xfs_btree_del_cursor(cnt_cur, error);
2696 :
2697 367215 : return error;
2698 : }
2699 : #endif
2700 :
2701 : /*
2702 : * Decide whether to use this allocation group for this allocation.
2703 : * If so, fix up the btree freelist's size.
2704 : */
2705 : int /* error */
2706 410127050 : xfs_alloc_fix_freelist(
2707 : struct xfs_alloc_arg *args, /* allocation argument structure */
2708 : uint32_t alloc_flags)
2709 : {
2710 410127050 : struct xfs_mount *mp = args->mp;
2711 410127050 : struct xfs_perag *pag = args->pag;
2712 410127050 : struct xfs_trans *tp = args->tp;
2713 410127050 : struct xfs_buf *agbp = NULL;
2714 410127050 : struct xfs_buf *agflbp = NULL;
2715 410127050 : struct xfs_alloc_arg targs; /* local allocation arguments */
2716 410127050 : xfs_agblock_t bno; /* freelist block */
2717 410127050 : xfs_extlen_t need; /* total blocks needed in freelist */
2718 410127050 : int error = 0;
2719 :
2720 : /* deferred ops (AGFL block frees) require permanent transactions */
2721 410127050 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2722 :
2723 820254100 : if (!xfs_perag_initialised_agf(pag)) {
2724 2408 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2725 2408 : if (error) {
2726 : /* Couldn't lock the AGF so skip this AG. */
2727 1614 : if (error == -EAGAIN)
2728 1614 : error = 0;
2729 1614 : goto out_no_agbp;
2730 : }
2731 : }
2732 :
2733 : /*
2734 : * If this is a metadata preferred pag and we are user data then try
2735 : * somewhere else if we are not being asked to try harder at this
2736 : * point
2737 : */
2738 820250872 : if (xfs_perag_prefers_metadata(pag) &&
2739 0 : (args->datatype & XFS_ALLOC_USERDATA) &&
2740 0 : (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2741 0 : ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2742 0 : goto out_agbp_relse;
2743 : }
2744 :
2745 410125436 : need = xfs_alloc_min_freelist(mp, pag);
2746 410129582 : if (!xfs_alloc_space_available(args, need, alloc_flags |
2747 : XFS_ALLOC_FLAG_CHECK))
2748 34164763 : goto out_agbp_relse;
2749 :
2750 : /*
2751 : * Get the a.g. freespace buffer.
2752 : * Can fail if we're not blocking on locks, and it's held.
2753 : */
2754 375970154 : if (!agbp) {
2755 375965542 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2756 375971837 : if (error) {
2757 : /* Couldn't lock the AGF so skip this AG. */
2758 3149117 : if (error == -EAGAIN)
2759 3148163 : error = 0;
2760 3149117 : goto out_no_agbp;
2761 : }
2762 : }
2763 :
2764 : /* reset a padding mismatched agfl before final free space check */
2765 745654664 : if (xfs_perag_agfl_needs_reset(pag))
2766 4 : xfs_agfl_reset(tp, agbp, pag);
2767 :
2768 : /* If there isn't enough total space or single-extent, reject it. */
2769 372827332 : need = xfs_alloc_min_freelist(mp, pag);
2770 372822262 : if (!xfs_alloc_space_available(args, need, alloc_flags))
2771 361 : goto out_agbp_relse;
2772 :
2773 : #ifdef DEBUG
2774 372811640 : if (args->alloc_minlen_only) {
2775 367209 : int stat;
2776 :
2777 367209 : error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2778 367215 : if (error || !stat)
2779 2986 : goto out_agbp_relse;
2780 : }
2781 : #endif
2782 : /*
2783 : * Make the freelist shorter if it's too long.
2784 : *
2785 : * Note that from this point onwards, we will always release the agf and
2786 : * agfl buffers on error. This handles the case where we error out and
2787 : * the buffers are clean or may not have been joined to the transaction
2788 : * and hence need to be released manually. If they have been joined to
2789 : * the transaction, then xfs_trans_brelse() will handle them
2790 : * appropriately based on the recursion count and dirty state of the
2791 : * buffer.
2792 : *
2793 : * XXX (dgc): When we have lots of free space, does this buy us
2794 : * anything other than extra overhead when we need to put more blocks
2795 : * back on the free list? Maybe we should only do this when space is
2796 : * getting low or the AGFL is more than half full?
2797 : *
2798 : * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2799 : * big; the NORMAP flag prevents AGFL expand/shrink operations from
2800 : * updating the rmapbt. Both flags are used in xfs_repair while we're
2801 : * rebuilding the rmapbt, and neither are used by the kernel. They're
2802 : * both required to ensure that rmaps are correctly recorded for the
2803 : * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2804 : * repair/rmap.c in xfsprogs for details.
2805 : */
2806 372808660 : memset(&targs, 0, sizeof(targs));
2807 : /* struct copy below */
2808 372808660 : if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
2809 10840 : targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2810 : else
2811 372797820 : targs.oinfo = XFS_RMAP_OINFO_AG;
2812 373002864 : while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
2813 373002864 : pag->pagf_flcount > need) {
2814 180727 : error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
2815 194204 : if (error)
2816 0 : goto out_agbp_relse;
2817 :
2818 : /* defer agfl frees */
2819 194204 : error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2820 194204 : if (error)
2821 0 : goto out_agbp_relse;
2822 : }
2823 :
2824 372822137 : targs.tp = tp;
2825 372822137 : targs.mp = mp;
2826 372822137 : targs.agbp = agbp;
2827 372822137 : targs.agno = args->agno;
2828 372822137 : targs.alignment = targs.minlen = targs.prod = 1;
2829 372822137 : targs.pag = pag;
2830 372822137 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2831 372824559 : if (error)
2832 811 : goto out_agbp_relse;
2833 :
2834 : /* Make the freelist longer if it's too short. */
2835 373238631 : while (pag->pagf_flcount < need) {
2836 417560 : targs.agbno = 0;
2837 417560 : targs.maxlen = need - pag->pagf_flcount;
2838 417560 : targs.resv = XFS_AG_RESV_AGFL;
2839 :
2840 : /* Allocate as many blocks as possible at once. */
2841 417560 : error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2842 414894 : if (error)
2843 4 : goto out_agflbp_relse;
2844 :
2845 : /*
2846 : * Stop if we run out. Won't happen if callers are obeying
2847 : * the restrictions correctly. Can happen for free calls
2848 : * on a completely full ag.
2849 : */
2850 414890 : if (targs.agbno == NULLAGBLOCK) {
2851 0 : if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
2852 : break;
2853 0 : goto out_agflbp_relse;
2854 : }
2855 :
2856 414890 : if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
2857 414885 : error = xfs_rmap_alloc(tp, agbp, pag,
2858 : targs.agbno, targs.len, &targs.oinfo);
2859 414885 : if (error)
2860 7 : goto out_agflbp_relse;
2861 : }
2862 414883 : error = xfs_alloc_update_counters(tp, agbp,
2863 414883 : -((long)(targs.len)));
2864 414883 : if (error)
2865 0 : goto out_agflbp_relse;
2866 :
2867 : /*
2868 : * Put each allocated block on the list.
2869 : */
2870 874229 : for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2871 459346 : error = xfs_alloc_put_freelist(pag, tp, agbp,
2872 : agflbp, bno, 0);
2873 459346 : if (error)
2874 0 : goto out_agflbp_relse;
2875 : }
2876 : }
2877 372821071 : xfs_trans_brelse(tp, agflbp);
2878 372825082 : args->agbp = agbp;
2879 372825082 : return 0;
2880 :
2881 11 : out_agflbp_relse:
2882 11 : xfs_trans_brelse(tp, agflbp);
2883 34168932 : out_agbp_relse:
2884 34168932 : if (agbp)
2885 4168 : xfs_trans_brelse(tp, agbp);
2886 34164764 : out_no_agbp:
2887 37319664 : args->agbp = NULL;
2888 37319664 : return error;
2889 : }
2890 :
2891 : /*
2892 : * Get a block from the freelist.
2893 : * Returns with the buffer for the block gotten.
2894 : */
2895 : int
2896 638168 : xfs_alloc_get_freelist(
2897 : struct xfs_perag *pag,
2898 : struct xfs_trans *tp,
2899 : struct xfs_buf *agbp,
2900 : xfs_agblock_t *bnop,
2901 : int btreeblk)
2902 : {
2903 638168 : struct xfs_agf *agf = agbp->b_addr;
2904 638168 : struct xfs_buf *agflbp;
2905 638168 : xfs_agblock_t bno;
2906 638168 : __be32 *agfl_bno;
2907 638168 : int error;
2908 638168 : uint32_t logflags;
2909 638168 : struct xfs_mount *mp = tp->t_mountp;
2910 :
2911 : /*
2912 : * Freelist is empty, give up.
2913 : */
2914 638168 : if (!agf->agf_flcount) {
2915 0 : *bnop = NULLAGBLOCK;
2916 0 : return 0;
2917 : }
2918 : /*
2919 : * Read the array of free blocks.
2920 : */
2921 638168 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2922 638168 : if (error)
2923 : return error;
2924 :
2925 :
2926 : /*
2927 : * Get the block number and update the data structures.
2928 : */
2929 638168 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2930 638168 : bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2931 1276336 : if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
2932 0 : return -EFSCORRUPTED;
2933 :
2934 638168 : be32_add_cpu(&agf->agf_flfirst, 1);
2935 638168 : xfs_trans_brelse(tp, agflbp);
2936 1276336 : if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2937 397 : agf->agf_flfirst = 0;
2938 :
2939 1276336 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
2940 638168 : be32_add_cpu(&agf->agf_flcount, -1);
2941 638168 : pag->pagf_flcount--;
2942 :
2943 638168 : logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2944 638168 : if (btreeblk) {
2945 443964 : be32_add_cpu(&agf->agf_btreeblks, 1);
2946 443964 : pag->pagf_btreeblks++;
2947 443964 : logflags |= XFS_AGF_BTREEBLKS;
2948 : }
2949 :
2950 638168 : xfs_alloc_log_agf(tp, agbp, logflags);
2951 638168 : *bnop = bno;
2952 :
2953 638168 : return 0;
2954 : }
2955 :
2956 : /*
2957 : * Log the given fields from the agf structure.
2958 : */
2959 : void
2960 148782840 : xfs_alloc_log_agf(
2961 : struct xfs_trans *tp,
2962 : struct xfs_buf *bp,
2963 : uint32_t fields)
2964 : {
2965 148782840 : int first; /* first byte offset */
2966 148782840 : int last; /* last byte offset */
2967 148782840 : static const short offsets[] = {
2968 : offsetof(xfs_agf_t, agf_magicnum),
2969 : offsetof(xfs_agf_t, agf_versionnum),
2970 : offsetof(xfs_agf_t, agf_seqno),
2971 : offsetof(xfs_agf_t, agf_length),
2972 : offsetof(xfs_agf_t, agf_roots[0]),
2973 : offsetof(xfs_agf_t, agf_levels[0]),
2974 : offsetof(xfs_agf_t, agf_flfirst),
2975 : offsetof(xfs_agf_t, agf_fllast),
2976 : offsetof(xfs_agf_t, agf_flcount),
2977 : offsetof(xfs_agf_t, agf_freeblks),
2978 : offsetof(xfs_agf_t, agf_longest),
2979 : offsetof(xfs_agf_t, agf_btreeblks),
2980 : offsetof(xfs_agf_t, agf_uuid),
2981 : offsetof(xfs_agf_t, agf_rmap_blocks),
2982 : offsetof(xfs_agf_t, agf_refcount_blocks),
2983 : offsetof(xfs_agf_t, agf_refcount_root),
2984 : offsetof(xfs_agf_t, agf_refcount_level),
2985 : /* needed so that we don't log the whole rest of the structure: */
2986 : offsetof(xfs_agf_t, agf_spare64),
2987 : sizeof(xfs_agf_t)
2988 : };
2989 :
2990 148782840 : trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2991 :
2992 148785092 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2993 :
2994 148786197 : xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2995 148787063 : xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2996 148787246 : }
2997 :
2998 : /*
2999 : * Put the block on the freelist for the allocation group.
3000 : */
3001 : int
3002 641028 : xfs_alloc_put_freelist(
3003 : struct xfs_perag *pag,
3004 : struct xfs_trans *tp,
3005 : struct xfs_buf *agbp,
3006 : struct xfs_buf *agflbp,
3007 : xfs_agblock_t bno,
3008 : int btreeblk)
3009 : {
3010 641028 : struct xfs_mount *mp = tp->t_mountp;
3011 641028 : struct xfs_agf *agf = agbp->b_addr;
3012 641028 : __be32 *blockp;
3013 641028 : int error;
3014 641028 : uint32_t logflags;
3015 641028 : __be32 *agfl_bno;
3016 641028 : int startoff;
3017 :
3018 641028 : if (!agflbp) {
3019 181682 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
3020 181682 : if (error)
3021 : return error;
3022 : }
3023 :
3024 641028 : be32_add_cpu(&agf->agf_fllast, 1);
3025 1282054 : if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
3026 399 : agf->agf_fllast = 0;
3027 :
3028 1282054 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
3029 641027 : be32_add_cpu(&agf->agf_flcount, 1);
3030 641027 : pag->pagf_flcount++;
3031 :
3032 641027 : logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
3033 641027 : if (btreeblk) {
3034 181682 : be32_add_cpu(&agf->agf_btreeblks, -1);
3035 181682 : pag->pagf_btreeblks--;
3036 181682 : logflags |= XFS_AGF_BTREEBLKS;
3037 : }
3038 :
3039 641027 : xfs_alloc_log_agf(tp, agbp, logflags);
3040 :
3041 1282056 : ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
3042 :
3043 641028 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3044 641028 : blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
3045 641028 : *blockp = cpu_to_be32(bno);
3046 641028 : startoff = (char *)blockp - (char *)agflbp->b_addr;
3047 :
3048 641028 : xfs_alloc_log_agf(tp, agbp, logflags);
3049 :
3050 641028 : xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
3051 641028 : xfs_trans_log_buf(tp, agflbp, startoff,
3052 : startoff + sizeof(xfs_agblock_t) - 1);
3053 641028 : return 0;
3054 : }
3055 :
3056 : /*
3057 : * Check that this AGF/AGI header's sequence number and length matches the AG
3058 : * number and size in fsblocks.
3059 : */
3060 : xfs_failaddr_t
3061 2798264 : xfs_validate_ag_length(
3062 : struct xfs_buf *bp,
3063 : uint32_t seqno,
3064 : uint32_t length)
3065 : {
3066 2798264 : struct xfs_mount *mp = bp->b_mount;
3067 : /*
3068 : * During growfs operations, the perag is not fully initialised,
3069 : * so we can't use it for any useful checking. growfs ensures we can't
3070 : * use it by using uncached buffers that don't have the perag attached
3071 : * so we can detect and avoid this problem.
3072 : */
3073 2798264 : if (bp->b_pag && seqno != bp->b_pag->pag_agno)
3074 0 : return __this_address;
3075 :
3076 : /*
3077 : * Only the last AG in the filesystem is allowed to be shorter
3078 : * than the AG size recorded in the superblock.
3079 : */
3080 2798264 : if (length != mp->m_sb.sb_agblocks) {
3081 : /*
3082 : * During growfs, the new last AG can get here before we
3083 : * have updated the superblock. Give it a pass on the seqno
3084 : * check.
3085 : */
3086 1075 : if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
3087 0 : return __this_address;
3088 1075 : if (length < XFS_MIN_AG_BLOCKS)
3089 0 : return __this_address;
3090 1075 : if (length > mp->m_sb.sb_agblocks)
3091 2 : return __this_address;
3092 : }
3093 :
3094 : return NULL;
3095 : }
3096 :
3097 : /*
3098 : * Verify the AGF is consistent.
3099 : *
3100 : * We do not verify the AGFL indexes in the AGF are fully consistent here
3101 : * because of issues with variable on-disk structure sizes. Instead, we check
3102 : * the agfl indexes for consistency when we initialise the perag from the AGF
3103 : * information after a read completes.
3104 : *
3105 : * If the index is inconsistent, then we mark the perag as needing an AGFL
3106 : * reset. The first AGFL update performed then resets the AGFL indexes and
3107 : * refills the AGFL with known good free blocks, allowing the filesystem to
3108 : * continue operating normally at the cost of a few leaked free space blocks.
3109 : */
3110 : static xfs_failaddr_t
3111 1564422 : xfs_agf_verify(
3112 : struct xfs_buf *bp)
3113 : {
3114 1564422 : struct xfs_mount *mp = bp->b_mount;
3115 1564422 : struct xfs_agf *agf = bp->b_addr;
3116 1564422 : xfs_failaddr_t fa;
3117 1564422 : uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3118 1564422 : uint32_t agf_length = be32_to_cpu(agf->agf_length);
3119 :
3120 1564422 : if (xfs_has_crc(mp)) {
3121 1562105 : if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3122 0 : return __this_address;
3123 1562104 : if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3124 0 : return __this_address;
3125 : }
3126 :
3127 1564424 : if (!xfs_verify_magic(bp, agf->agf_magicnum))
3128 0 : return __this_address;
3129 :
3130 1564423 : if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3131 5 : return __this_address;
3132 :
3133 : /*
3134 : * Both agf_seqno and agf_length need to validated before anything else
3135 : * block number related in the AGF or AGFL can be checked.
3136 : */
3137 1564418 : fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
3138 1564419 : if (fa)
3139 : return fa;
3140 :
3141 3128834 : if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3142 2 : return __this_address;
3143 3128830 : if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3144 0 : return __this_address;
3145 3128830 : if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3146 0 : return __this_address;
3147 :
3148 4693245 : if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3149 : be32_to_cpu(agf->agf_freeblks) > agf_length)
3150 3 : return __this_address;
3151 :
3152 1564415 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3153 1564415 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
3154 1564415 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
3155 3128831 : mp->m_alloc_maxlevels ||
3156 1564416 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
3157 : mp->m_alloc_maxlevels)
3158 2 : return __this_address;
3159 :
3160 3128819 : if (xfs_has_lazysbcount(mp) &&
3161 1564405 : be32_to_cpu(agf->agf_btreeblks) > agf_length)
3162 0 : return __this_address;
3163 :
3164 1564414 : if (xfs_has_rmapbt(mp)) {
3165 3123596 : if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3166 2 : return __this_address;
3167 :
3168 1561796 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3169 1561794 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3170 1561794 : mp->m_rmap_maxlevels)
3171 2 : return __this_address;
3172 : }
3173 :
3174 1564410 : if (xfs_has_reflink(mp)) {
3175 3123462 : if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3176 2 : return __this_address;
3177 :
3178 1561729 : if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3179 3123462 : be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3180 0 : return __this_address;
3181 : }
3182 :
3183 : return NULL;
3184 : }
3185 :
3186 : static void
3187 220020 : xfs_agf_read_verify(
3188 : struct xfs_buf *bp)
3189 : {
3190 220020 : struct xfs_mount *mp = bp->b_mount;
3191 220020 : xfs_failaddr_t fa;
3192 :
3193 440022 : if (xfs_has_crc(mp) &&
3194 : !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3195 8 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3196 : else {
3197 220012 : fa = xfs_agf_verify(bp);
3198 220012 : if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3199 12 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3200 : }
3201 220020 : }
3202 :
3203 : static void
3204 906210 : xfs_agf_write_verify(
3205 : struct xfs_buf *bp)
3206 : {
3207 906210 : struct xfs_mount *mp = bp->b_mount;
3208 906210 : struct xfs_buf_log_item *bip = bp->b_log_item;
3209 906210 : struct xfs_agf *agf = bp->b_addr;
3210 906210 : xfs_failaddr_t fa;
3211 :
3212 906210 : fa = xfs_agf_verify(bp);
3213 906210 : if (fa) {
3214 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3215 0 : return;
3216 : }
3217 :
3218 906210 : if (!xfs_has_crc(mp))
3219 : return;
3220 :
3221 903916 : if (bip)
3222 898508 : agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
3223 :
3224 903916 : xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
3225 : }
3226 :
3227 : const struct xfs_buf_ops xfs_agf_buf_ops = {
3228 : .name = "xfs_agf",
3229 : .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
3230 : .verify_read = xfs_agf_read_verify,
3231 : .verify_write = xfs_agf_write_verify,
3232 : .verify_struct = xfs_agf_verify,
3233 : };
3234 :
3235 : /*
3236 : * Read in the allocation group header (free/alloc section).
3237 : */
3238 : int
3239 1879765743 : xfs_read_agf(
3240 : struct xfs_perag *pag,
3241 : struct xfs_trans *tp,
3242 : int flags,
3243 : struct xfs_buf **agfbpp)
3244 : {
3245 1879765743 : struct xfs_mount *mp = pag->pag_mount;
3246 1879765743 : int error;
3247 :
3248 1879765743 : trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
3249 :
3250 1879782655 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3251 1879782655 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3252 : XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
3253 1879847693 : if (xfs_metadata_is_sick(error))
3254 20 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
3255 1879847693 : if (error)
3256 : return error;
3257 :
3258 1876698213 : xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
3259 1876698213 : return 0;
3260 : }
3261 :
3262 : /*
3263 : * Read in the allocation group header (free/alloc section) and initialise the
3264 : * perag structure if necessary. If the caller provides @agfbpp, then return the
3265 : * locked buffer to the caller, otherwise free it.
3266 : */
3267 : int
3268 1879773910 : xfs_alloc_read_agf(
3269 : struct xfs_perag *pag,
3270 : struct xfs_trans *tp,
3271 : int flags,
3272 : struct xfs_buf **agfbpp)
3273 : {
3274 1879773910 : struct xfs_buf *agfbp;
3275 1879773910 : struct xfs_agf *agf;
3276 1879773910 : int error;
3277 1879773910 : int allocbt_blks;
3278 :
3279 1879773910 : trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
3280 :
3281 : /* We don't support trylock when freeing. */
3282 1879793950 : ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3283 : (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3284 1879793950 : error = xfs_read_agf(pag, tp,
3285 1879793950 : (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
3286 : &agfbp);
3287 1879840999 : if (error)
3288 : return error;
3289 :
3290 1876688351 : agf = agfbp->b_addr;
3291 3753376702 : if (!xfs_perag_initialised_agf(pag)) {
3292 279018 : pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3293 279018 : pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3294 279018 : pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3295 279018 : pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3296 279018 : pag->pagf_levels[XFS_BTNUM_BNOi] =
3297 279018 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3298 279018 : pag->pagf_levels[XFS_BTNUM_CNTi] =
3299 279018 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3300 279018 : pag->pagf_levels[XFS_BTNUM_RMAPi] =
3301 279018 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3302 279018 : pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3303 279018 : if (xfs_agfl_needs_reset(pag->pag_mount, agf))
3304 4 : set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3305 : else
3306 279014 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3307 :
3308 : /*
3309 : * Update the in-core allocbt counter. Filter out the rmapbt
3310 : * subset of the btreeblks counter because the rmapbt is managed
3311 : * by perag reservation. Subtract one for the rmapbt root block
3312 : * because the rmap counter includes it while the btreeblks
3313 : * counter only tracks non-root blocks.
3314 : */
3315 279018 : allocbt_blks = pag->pagf_btreeblks;
3316 279018 : if (xfs_has_rmapbt(pag->pag_mount))
3317 278880 : allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3318 279018 : if (allocbt_blks > 0)
3319 49505 : atomic64_add(allocbt_blks,
3320 : &pag->pag_mount->m_allocbt_blks);
3321 :
3322 279018 : set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
3323 : }
3324 : #ifdef DEBUG
3325 3752818666 : else if (!xfs_is_shutdown(pag->pag_mount)) {
3326 3752818544 : ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3327 3752818544 : ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3328 3752818544 : ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3329 3752818544 : ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3330 3752818544 : ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3331 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3332 3752818544 : ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3333 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3334 : }
3335 : #endif
3336 1876688351 : if (agfbpp)
3337 1876276053 : *agfbpp = agfbp;
3338 : else
3339 412298 : xfs_trans_brelse(tp, agfbp);
3340 : return 0;
3341 : }
3342 :
3343 : /*
3344 : * Pre-proces allocation arguments to set initial state that we don't require
3345 : * callers to set up correctly, as well as bounds check the allocation args
3346 : * that are set up.
3347 : */
3348 : static int
3349 64582013 : xfs_alloc_vextent_check_args(
3350 : struct xfs_alloc_arg *args,
3351 : xfs_fsblock_t target,
3352 : xfs_agnumber_t *minimum_agno)
3353 : {
3354 64582013 : struct xfs_mount *mp = args->mp;
3355 64582013 : xfs_agblock_t agsize;
3356 :
3357 64582013 : args->fsbno = NULLFSBLOCK;
3358 :
3359 64582013 : *minimum_agno = 0;
3360 64582013 : if (args->tp->t_highest_agno != NULLAGNUMBER)
3361 793160 : *minimum_agno = args->tp->t_highest_agno;
3362 :
3363 : /*
3364 : * Just fix this up, for the case where the last a.g. is shorter
3365 : * (or there's only one a.g.) and the caller couldn't easily figure
3366 : * that out (xfs_bmap_alloc).
3367 : */
3368 64582013 : agsize = mp->m_sb.sb_agblocks;
3369 64582013 : if (args->maxlen > agsize)
3370 0 : args->maxlen = agsize;
3371 64582013 : if (args->alignment == 0)
3372 2302720 : args->alignment = 1;
3373 :
3374 64582013 : ASSERT(args->minlen > 0);
3375 64582013 : ASSERT(args->maxlen > 0);
3376 64582013 : ASSERT(args->alignment > 0);
3377 64582013 : ASSERT(args->resv != XFS_AG_RESV_AGFL);
3378 :
3379 64582013 : ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3380 64582013 : ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
3381 64582013 : ASSERT(args->minlen <= args->maxlen);
3382 64582013 : ASSERT(args->minlen <= agsize);
3383 64582013 : ASSERT(args->mod < args->prod);
3384 :
3385 64582013 : if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3386 64582013 : XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
3387 64582013 : args->minlen > args->maxlen || args->minlen > agsize ||
3388 64582013 : args->mod >= args->prod) {
3389 0 : trace_xfs_alloc_vextent_badargs(args);
3390 0 : return -ENOSPC;
3391 : }
3392 :
3393 64582013 : if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
3394 0 : trace_xfs_alloc_vextent_skip_deadlock(args);
3395 0 : return -ENOSPC;
3396 : }
3397 : return 0;
3398 :
3399 : }
3400 :
3401 : /*
3402 : * Prepare an AG for allocation. If the AG is not prepared to accept the
3403 : * allocation, return failure.
3404 : *
3405 : * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
3406 : * modified to hold their own perag references.
3407 : */
3408 : static int
3409 101329238 : xfs_alloc_vextent_prepare_ag(
3410 : struct xfs_alloc_arg *args,
3411 : uint32_t alloc_flags)
3412 : {
3413 101329238 : bool need_pag = !args->pag;
3414 101329238 : int error;
3415 :
3416 101329238 : if (need_pag)
3417 0 : args->pag = xfs_perag_get(args->mp, args->agno);
3418 :
3419 101329238 : args->agbp = NULL;
3420 101329238 : error = xfs_alloc_fix_freelist(args, alloc_flags);
3421 101330452 : if (error) {
3422 795 : trace_xfs_alloc_vextent_nofix(args);
3423 795 : if (need_pag)
3424 0 : xfs_perag_put(args->pag);
3425 795 : args->agbno = NULLAGBLOCK;
3426 795 : return error;
3427 : }
3428 101329657 : if (!args->agbp) {
3429 : /* cannot allocate in this AG at all */
3430 37317723 : trace_xfs_alloc_vextent_noagbp(args);
3431 37317878 : args->agbno = NULLAGBLOCK;
3432 37317878 : return 0;
3433 : }
3434 64011934 : args->wasfromfl = 0;
3435 64011934 : return 0;
3436 : }
3437 :
3438 : /*
3439 : * Post-process allocation results to account for the allocation if it succeed
3440 : * and set the allocated block number correctly for the caller.
3441 : *
3442 : * XXX: we should really be returning ENOSPC for ENOSPC, not
3443 : * hiding it behind a "successful" NULLFSBLOCK allocation.
3444 : */
3445 : static int
3446 64585286 : xfs_alloc_vextent_finish(
3447 : struct xfs_alloc_arg *args,
3448 : xfs_agnumber_t minimum_agno,
3449 : int alloc_error,
3450 : bool drop_perag)
3451 : {
3452 64585286 : struct xfs_mount *mp = args->mp;
3453 64585286 : int error = 0;
3454 :
3455 : /*
3456 : * We can end up here with a locked AGF. If we failed, the caller is
3457 : * likely going to try to allocate again with different parameters, and
3458 : * that can widen the AGs that are searched for free space. If we have
3459 : * to do BMBT block allocation, we have to do a new allocation.
3460 : *
3461 : * Hence leaving this function with the AGF locked opens up potential
3462 : * ABBA AGF deadlocks because a future allocation attempt in this
3463 : * transaction may attempt to lock a lower number AGF.
3464 : *
3465 : * We can't release the AGF until the transaction is commited, so at
3466 : * this point we must update the "first allocation" tracker to point at
3467 : * this AG if the tracker is empty or points to a lower AG. This allows
3468 : * the next allocation attempt to be modified appropriately to avoid
3469 : * deadlocks.
3470 : */
3471 64585286 : if (args->agbp &&
3472 64010454 : (args->tp->t_highest_agno == NULLAGNUMBER ||
3473 790956 : args->agno > minimum_agno))
3474 63266557 : args->tp->t_highest_agno = args->agno;
3475 :
3476 : /*
3477 : * If the allocation failed with an error or we had an ENOSPC result,
3478 : * preserve the returned error whilst also marking the allocation result
3479 : * as "no extent allocated". This ensures that callers that fail to
3480 : * capture the error will still treat it as a failed allocation.
3481 : */
3482 64585286 : if (alloc_error || args->agbno == NULLAGBLOCK) {
3483 834768 : args->fsbno = NULLFSBLOCK;
3484 834768 : error = alloc_error;
3485 834768 : goto out_drop_perag;
3486 : }
3487 :
3488 63750518 : args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3489 :
3490 63750518 : ASSERT(args->len >= args->minlen);
3491 63750518 : ASSERT(args->len <= args->maxlen);
3492 63750518 : ASSERT(args->agbno % args->alignment == 0);
3493 63750518 : XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3494 :
3495 : /* if not file data, insert new block into the reverse map btree */
3496 63750518 : if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3497 2926960 : error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3498 2926960 : args->agbno, args->len, &args->oinfo);
3499 2926970 : if (error)
3500 45 : goto out_drop_perag;
3501 : }
3502 :
3503 63750483 : if (!args->wasfromfl) {
3504 63749062 : error = xfs_alloc_update_counters(args->tp, args->agbp,
3505 63749062 : -((long)(args->len)));
3506 63749741 : if (error)
3507 0 : goto out_drop_perag;
3508 :
3509 63749741 : ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3510 : args->len));
3511 : }
3512 :
3513 63750703 : xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3514 :
3515 63748351 : XFS_STATS_INC(mp, xs_allocx);
3516 63748351 : XFS_STATS_ADD(mp, xs_allocb, args->len);
3517 :
3518 63748351 : trace_xfs_alloc_vextent_finish(args);
3519 :
3520 64584203 : out_drop_perag:
3521 64584203 : if (drop_perag && args->pag) {
3522 63027744 : xfs_perag_rele(args->pag);
3523 63028064 : args->pag = NULL;
3524 : }
3525 64584523 : return error;
3526 : }
3527 :
3528 : /*
3529 : * Allocate within a single AG only. This uses a best-fit length algorithm so if
3530 : * you need an exact sized allocation without locality constraints, this is the
3531 : * fastest way to do it.
3532 : *
3533 : * Caller is expected to hold a perag reference in args->pag.
3534 : */
3535 : int
3536 0 : xfs_alloc_vextent_this_ag(
3537 : struct xfs_alloc_arg *args,
3538 : xfs_agnumber_t agno)
3539 : {
3540 0 : struct xfs_mount *mp = args->mp;
3541 0 : xfs_agnumber_t minimum_agno;
3542 0 : uint32_t alloc_flags = 0;
3543 0 : int error;
3544 :
3545 0 : ASSERT(args->pag != NULL);
3546 0 : ASSERT(args->pag->pag_agno == agno);
3547 :
3548 0 : args->agno = agno;
3549 0 : args->agbno = 0;
3550 :
3551 0 : trace_xfs_alloc_vextent_this_ag(args);
3552 :
3553 0 : error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
3554 : &minimum_agno);
3555 0 : if (error) {
3556 0 : if (error == -ENOSPC)
3557 : return 0;
3558 0 : return error;
3559 : }
3560 :
3561 0 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3562 0 : if (!error && args->agbp)
3563 0 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3564 :
3565 0 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3566 : }
3567 :
3568 : /*
3569 : * Iterate all AGs trying to allocate an extent starting from @start_ag.
3570 : *
3571 : * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3572 : * allocation attempts in @start_agno have locality information. If we fail to
3573 : * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3574 : * we attempt to allocation in as there is no locality optimisation possible for
3575 : * those allocations.
3576 : *
3577 : * On return, args->pag may be left referenced if we finish before the "all
3578 : * failed" return point. The allocation finish still needs the perag, and
3579 : * so the caller will release it once they've finished the allocation.
3580 : *
3581 : * When we wrap the AG iteration at the end of the filesystem, we have to be
3582 : * careful not to wrap into AGs below ones we already have locked in the
3583 : * transaction if we are doing a blocking iteration. This will result in an
3584 : * out-of-order locking of AGFs and hence can cause deadlocks.
3585 : */
3586 : static int
3587 62861914 : xfs_alloc_vextent_iterate_ags(
3588 : struct xfs_alloc_arg *args,
3589 : xfs_agnumber_t minimum_agno,
3590 : xfs_agnumber_t start_agno,
3591 : xfs_agblock_t target_agbno,
3592 : uint32_t alloc_flags)
3593 : {
3594 62861914 : struct xfs_mount *mp = args->mp;
3595 62861914 : xfs_agnumber_t restart_agno = minimum_agno;
3596 62861914 : xfs_agnumber_t agno;
3597 62861914 : int error = 0;
3598 :
3599 62861914 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
3600 62861914 : restart_agno = 0;
3601 0 : restart:
3602 99654470 : for_each_perag_wrap_range(mp, start_agno, restart_agno,
3603 : mp->m_sb.sb_agcount, agno, args->pag) {
3604 99607657 : args->agno = agno;
3605 99607657 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3606 99609773 : if (error)
3607 : break;
3608 99608980 : if (!args->agbp) {
3609 36760737 : trace_xfs_alloc_vextent_loopfailed(args);
3610 36760738 : continue;
3611 : }
3612 :
3613 : /*
3614 : * Allocation is supposed to succeed now, so break out of the
3615 : * loop regardless of whether we succeed or not.
3616 : */
3617 62848243 : if (args->agno == start_agno && target_agbno) {
3618 60042504 : args->agbno = target_agbno;
3619 60042504 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3620 : } else {
3621 2805739 : args->agbno = 0;
3622 2805739 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3623 : }
3624 : break;
3625 : }
3626 62894976 : if (error) {
3627 1943 : xfs_perag_rele(args->pag);
3628 1943 : args->pag = NULL;
3629 1943 : return error;
3630 : }
3631 62893033 : if (args->agbp)
3632 : return 0;
3633 :
3634 : /*
3635 : * We didn't find an AG we can alloation from. If we were given
3636 : * constraining flags by the caller, drop them and retry the allocation
3637 : * without any constraints being set.
3638 : */
3639 46889 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
3640 31815 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
3641 31815 : restart_agno = minimum_agno;
3642 31815 : goto restart;
3643 : }
3644 :
3645 15074 : ASSERT(args->pag == NULL);
3646 15074 : trace_xfs_alloc_vextent_allfailed(args);
3647 15074 : return 0;
3648 : }
3649 :
3650 : /*
3651 : * Iterate from the AGs from the start AG to the end of the filesystem, trying
3652 : * to allocate blocks. It starts with a near allocation attempt in the initial
3653 : * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3654 : * back to zero if allowed by previous allocations in this transaction,
3655 : * otherwise will wrap back to the start AG and run a second blocking pass to
3656 : * the end of the filesystem.
3657 : */
3658 : int
3659 62497678 : xfs_alloc_vextent_start_ag(
3660 : struct xfs_alloc_arg *args,
3661 : xfs_fsblock_t target)
3662 : {
3663 62497678 : struct xfs_mount *mp = args->mp;
3664 62497678 : xfs_agnumber_t minimum_agno;
3665 62497678 : xfs_agnumber_t start_agno;
3666 62497678 : xfs_agnumber_t rotorstep = xfs_rotorstep;
3667 62497678 : bool bump_rotor = false;
3668 62497678 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3669 62497678 : int error;
3670 :
3671 62497678 : ASSERT(args->pag == NULL);
3672 :
3673 62497678 : args->agno = NULLAGNUMBER;
3674 62497678 : args->agbno = NULLAGBLOCK;
3675 :
3676 62497678 : trace_xfs_alloc_vextent_start_ag(args);
3677 :
3678 62497689 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3679 62496545 : if (error) {
3680 0 : if (error == -ENOSPC)
3681 : return 0;
3682 0 : return error;
3683 : }
3684 :
3685 64278829 : if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3686 : xfs_is_inode32(mp)) {
3687 0 : target = XFS_AGB_TO_FSB(mp,
3688 : ((mp->m_agfrotor / rotorstep) %
3689 : mp->m_sb.sb_agcount), 0);
3690 0 : bump_rotor = 1;
3691 : }
3692 :
3693 62496545 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3694 62496545 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3695 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3696 :
3697 62498692 : if (bump_rotor) {
3698 0 : if (args->agno == start_agno)
3699 0 : mp->m_agfrotor = (mp->m_agfrotor + 1) %
3700 0 : (mp->m_sb.sb_agcount * rotorstep);
3701 : else
3702 0 : mp->m_agfrotor = (args->agno * rotorstep + 1) %
3703 0 : (mp->m_sb.sb_agcount * rotorstep);
3704 : }
3705 :
3706 62498692 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3707 : }
3708 :
3709 : /*
3710 : * Iterate from the agno indicated via @target through to the end of the
3711 : * filesystem attempting blocking allocation. This does not wrap or try a second
3712 : * pass, so will not recurse into AGs lower than indicated by the target.
3713 : */
3714 : int
3715 364191 : xfs_alloc_vextent_first_ag(
3716 : struct xfs_alloc_arg *args,
3717 : xfs_fsblock_t target)
3718 : {
3719 364191 : struct xfs_mount *mp = args->mp;
3720 364191 : xfs_agnumber_t minimum_agno;
3721 364191 : xfs_agnumber_t start_agno;
3722 364191 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3723 364191 : int error;
3724 :
3725 364191 : ASSERT(args->pag == NULL);
3726 :
3727 364191 : args->agno = NULLAGNUMBER;
3728 364191 : args->agbno = NULLAGBLOCK;
3729 :
3730 364191 : trace_xfs_alloc_vextent_first_ag(args);
3731 :
3732 364260 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3733 364247 : if (error) {
3734 0 : if (error == -ENOSPC)
3735 : return 0;
3736 0 : return error;
3737 : }
3738 :
3739 364247 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3740 364247 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3741 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3742 364260 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3743 : }
3744 :
3745 : /*
3746 : * Allocate at the exact block target or fail. Caller is expected to hold a
3747 : * perag reference in args->pag.
3748 : */
3749 : int
3750 444736 : xfs_alloc_vextent_exact_bno(
3751 : struct xfs_alloc_arg *args,
3752 : xfs_fsblock_t target)
3753 : {
3754 444736 : struct xfs_mount *mp = args->mp;
3755 444736 : xfs_agnumber_t minimum_agno;
3756 444736 : int error;
3757 :
3758 444736 : ASSERT(args->pag != NULL);
3759 444736 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3760 :
3761 444736 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3762 444736 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3763 :
3764 444736 : trace_xfs_alloc_vextent_exact_bno(args);
3765 :
3766 444734 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3767 444733 : if (error) {
3768 0 : if (error == -ENOSPC)
3769 : return 0;
3770 0 : return error;
3771 : }
3772 :
3773 444733 : error = xfs_alloc_vextent_prepare_ag(args, 0);
3774 444734 : if (!error && args->agbp)
3775 315165 : error = xfs_alloc_ag_vextent_exact(args);
3776 :
3777 444737 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3778 : }
3779 :
3780 : /*
3781 : * Allocate an extent as close to the target as possible. If there are not
3782 : * viable candidates in the AG, then fail the allocation.
3783 : *
3784 : * Caller may or may not have a per-ag reference in args->pag.
3785 : */
3786 : int
3787 1277431 : xfs_alloc_vextent_near_bno(
3788 : struct xfs_alloc_arg *args,
3789 : xfs_fsblock_t target)
3790 : {
3791 1277431 : struct xfs_mount *mp = args->mp;
3792 1277431 : xfs_agnumber_t minimum_agno;
3793 1277431 : bool needs_perag = args->pag == NULL;
3794 1277431 : uint32_t alloc_flags = 0;
3795 1277431 : int error;
3796 :
3797 1277431 : if (!needs_perag)
3798 1094272 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3799 :
3800 1277431 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3801 1277431 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3802 :
3803 1277431 : trace_xfs_alloc_vextent_near_bno(args);
3804 :
3805 1277411 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3806 1277403 : if (error) {
3807 0 : if (error == -ENOSPC)
3808 : return 0;
3809 0 : return error;
3810 : }
3811 :
3812 1277403 : if (needs_perag)
3813 183133 : args->pag = xfs_perag_grab(mp, args->agno);
3814 :
3815 1277423 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3816 1277445 : if (!error && args->agbp)
3817 849873 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3818 :
3819 1277444 : return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3820 : }
3821 :
3822 : /* Ensure that the freelist is at full capacity. */
3823 : int
3824 308797037 : xfs_free_extent_fix_freelist(
3825 : struct xfs_trans *tp,
3826 : struct xfs_perag *pag,
3827 : struct xfs_buf **agbp)
3828 : {
3829 308797037 : struct xfs_alloc_arg args;
3830 308797037 : int error;
3831 :
3832 308797037 : memset(&args, 0, sizeof(struct xfs_alloc_arg));
3833 308797037 : args.tp = tp;
3834 308797037 : args.mp = tp->t_mountp;
3835 308797037 : args.agno = pag->pag_agno;
3836 308797037 : args.pag = pag;
3837 :
3838 : /*
3839 : * validate that the block number is legal - the enables us to detect
3840 : * and handle a silent filesystem corruption rather than crashing.
3841 : */
3842 308797037 : if (args.agno >= args.mp->m_sb.sb_agcount)
3843 : return -EFSCORRUPTED;
3844 :
3845 308797037 : error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3846 308796329 : if (error)
3847 : return error;
3848 :
3849 308795348 : *agbp = args.agbp;
3850 308795348 : return 0;
3851 : }
3852 :
3853 : /*
3854 : * Free an extent.
3855 : * Just break up the extent address and hand off to xfs_free_ag_extent
3856 : * after fixing up the freelist.
3857 : */
3858 : int
3859 52824169 : __xfs_free_extent(
3860 : struct xfs_trans *tp,
3861 : struct xfs_perag *pag,
3862 : xfs_agblock_t agbno,
3863 : xfs_extlen_t len,
3864 : const struct xfs_owner_info *oinfo,
3865 : enum xfs_ag_resv_type type,
3866 : bool skip_discard)
3867 : {
3868 52824169 : struct xfs_mount *mp = tp->t_mountp;
3869 52824169 : struct xfs_buf *agbp;
3870 52824169 : struct xfs_agf *agf;
3871 52824169 : int error;
3872 52824169 : unsigned int busy_flags = 0;
3873 :
3874 52824169 : ASSERT(len != 0);
3875 52824169 : ASSERT(type != XFS_AG_RESV_AGFL);
3876 :
3877 52824169 : if (XFS_TEST_ERROR(false, mp,
3878 : XFS_ERRTAG_FREE_EXTENT))
3879 : return -EIO;
3880 :
3881 52824208 : error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
3882 52824131 : if (error) {
3883 15 : if (xfs_metadata_is_sick(error))
3884 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3885 15 : return error;
3886 : }
3887 :
3888 52824116 : agf = agbp->b_addr;
3889 :
3890 52824116 : if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3891 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3892 0 : error = -EFSCORRUPTED;
3893 0 : goto err_release;
3894 : }
3895 :
3896 : /* validate the extent size is legal now we have the agf locked */
3897 105648232 : if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3898 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3899 0 : error = -EFSCORRUPTED;
3900 0 : goto err_release;
3901 : }
3902 :
3903 52824116 : error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3904 : type);
3905 52823870 : if (error)
3906 276 : goto err_release;
3907 :
3908 52823594 : if (skip_discard)
3909 1852746 : busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3910 52823594 : xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3911 52823594 : return 0;
3912 :
3913 276 : err_release:
3914 276 : xfs_trans_brelse(tp, agbp);
3915 276 : return error;
3916 : }
3917 :
3918 : struct xfs_alloc_query_range_info {
3919 : xfs_alloc_query_range_fn fn;
3920 : void *priv;
3921 : };
3922 :
3923 : /* Format btree record and pass to our callback. */
3924 : STATIC int
3925 941459424 : xfs_alloc_query_range_helper(
3926 : struct xfs_btree_cur *cur,
3927 : const union xfs_btree_rec *rec,
3928 : void *priv)
3929 : {
3930 941459424 : struct xfs_alloc_query_range_info *query = priv;
3931 941459424 : struct xfs_alloc_rec_incore irec;
3932 941459424 : xfs_failaddr_t fa;
3933 :
3934 941459424 : xfs_alloc_btrec_to_irec(rec, &irec);
3935 941461480 : fa = xfs_alloc_check_irec(cur, &irec);
3936 941464878 : if (fa)
3937 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
3938 :
3939 941464878 : return query->fn(cur, &irec, query->priv);
3940 : }
3941 :
3942 : /* Find all free space within a given range of blocks. */
3943 : int
3944 1762 : xfs_alloc_query_range(
3945 : struct xfs_btree_cur *cur,
3946 : const struct xfs_alloc_rec_incore *low_rec,
3947 : const struct xfs_alloc_rec_incore *high_rec,
3948 : xfs_alloc_query_range_fn fn,
3949 : void *priv)
3950 : {
3951 1762 : union xfs_btree_irec low_brec = { .a = *low_rec };
3952 1762 : union xfs_btree_irec high_brec = { .a = *high_rec };
3953 1762 : struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
3954 :
3955 1762 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3956 1762 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
3957 : xfs_alloc_query_range_helper, &query);
3958 : }
3959 :
3960 : /* Find all free space records. */
3961 : int
3962 572498 : xfs_alloc_query_all(
3963 : struct xfs_btree_cur *cur,
3964 : xfs_alloc_query_range_fn fn,
3965 : void *priv)
3966 : {
3967 572498 : struct xfs_alloc_query_range_info query;
3968 :
3969 572498 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3970 572498 : query.priv = priv;
3971 572498 : query.fn = fn;
3972 572498 : return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3973 : }
3974 :
3975 : /*
3976 : * Scan part of the keyspace of the free space and tell us if the area has no
3977 : * records, is fully mapped by records, or is partially filled.
3978 : */
3979 : int
3980 1854883470 : xfs_alloc_has_records(
3981 : struct xfs_btree_cur *cur,
3982 : xfs_agblock_t bno,
3983 : xfs_extlen_t len,
3984 : enum xbtree_recpacking *outcome)
3985 : {
3986 1854883470 : union xfs_btree_irec low;
3987 1854883470 : union xfs_btree_irec high;
3988 :
3989 1854883470 : memset(&low, 0, sizeof(low));
3990 1854883470 : low.a.ar_startblock = bno;
3991 1854883470 : memset(&high, 0xFF, sizeof(high));
3992 1854883470 : high.a.ar_startblock = bno + len - 1;
3993 :
3994 1854883470 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3995 : }
3996 :
3997 : /*
3998 : * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3999 : * error code or XFS_ITER_*.
4000 : */
4001 : int
4002 58330447 : xfs_agfl_walk(
4003 : struct xfs_mount *mp,
4004 : struct xfs_agf *agf,
4005 : struct xfs_buf *agflbp,
4006 : xfs_agfl_walk_fn walk_fn,
4007 : void *priv)
4008 : {
4009 58330447 : __be32 *agfl_bno;
4010 58330447 : unsigned int i;
4011 58330447 : int error;
4012 :
4013 58330447 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
4014 58330447 : i = be32_to_cpu(agf->agf_flfirst);
4015 :
4016 : /* Nothing to walk in an empty AGFL. */
4017 58330447 : if (agf->agf_flcount == cpu_to_be32(0))
4018 : return 0;
4019 :
4020 : /* Otherwise, walk from first to last, wrapping as needed. */
4021 531704372 : for (;;) {
4022 1063408744 : error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
4023 531704387 : if (error)
4024 2960742 : return error;
4025 1057487290 : if (i == be32_to_cpu(agf->agf_fllast))
4026 : break;
4027 946767160 : if (++i == xfs_agfl_size(mp))
4028 0 : i = 0;
4029 : }
4030 :
4031 : return 0;
4032 : }
4033 :
4034 : int __init
4035 12 : xfs_extfree_intent_init_cache(void)
4036 : {
4037 12 : xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
4038 : sizeof(struct xfs_extent_free_item),
4039 : 0, 0, NULL);
4040 :
4041 12 : return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
4042 : }
4043 :
4044 : void
4045 12 : xfs_extfree_intent_destroy_cache(void)
4046 : {
4047 12 : kmem_cache_destroy(xfs_extfree_item_cache);
4048 12 : xfs_extfree_item_cache = NULL;
4049 12 : }
4050 :
4051 : /*
4052 : * Find the next chunk of free space in @pag starting at @agbno and going no
4053 : * higher than @end_agbno. Set @agbno and @len to whatever free space we find,
4054 : * or to @end_agbno if we find no space.
4055 : */
4056 : int
4057 226 : xfs_alloc_find_freesp(
4058 : struct xfs_trans *tp,
4059 : struct xfs_perag *pag,
4060 : xfs_agblock_t *agbno,
4061 : xfs_agblock_t end_agbno,
4062 : xfs_extlen_t *len)
4063 : {
4064 226 : struct xfs_mount *mp = pag->pag_mount;
4065 226 : struct xfs_btree_cur *cur;
4066 226 : struct xfs_buf *agf_bp = NULL;
4067 226 : xfs_agblock_t found_agbno;
4068 226 : xfs_extlen_t found_len;
4069 226 : int found;
4070 226 : int error;
4071 :
4072 226 : trace_xfs_alloc_find_freesp(mp, pag->pag_agno, *agbno,
4073 : end_agbno - *agbno);
4074 :
4075 226 : error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
4076 226 : if (error)
4077 : return error;
4078 :
4079 226 : cur = xfs_allocbt_init_cursor(mp, tp, agf_bp, pag, XFS_BTNUM_BNO);
4080 :
4081 : /* Try to find a free extent that starts before here. */
4082 226 : error = xfs_alloc_lookup_le(cur, *agbno, 0, &found);
4083 226 : if (error)
4084 0 : goto out_cur;
4085 226 : if (found) {
4086 24 : error = xfs_alloc_get_rec(cur, &found_agbno, &found_len,
4087 : &found);
4088 24 : if (error)
4089 0 : goto out_cur;
4090 24 : if (XFS_IS_CORRUPT(mp, !found)) {
4091 0 : xfs_btree_mark_sick(cur);
4092 0 : error = -EFSCORRUPTED;
4093 0 : goto out_cur;
4094 : }
4095 :
4096 24 : if (found_agbno + found_len > *agbno)
4097 24 : goto found;
4098 : }
4099 :
4100 : /* Examine the next record if free extent not in range. */
4101 202 : error = xfs_btree_increment(cur, 0, &found);
4102 202 : if (error)
4103 0 : goto out_cur;
4104 202 : if (!found)
4105 0 : goto next_ag;
4106 :
4107 202 : error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found);
4108 202 : if (error)
4109 0 : goto out_cur;
4110 202 : if (XFS_IS_CORRUPT(mp, !found)) {
4111 0 : xfs_btree_mark_sick(cur);
4112 0 : error = -EFSCORRUPTED;
4113 0 : goto out_cur;
4114 : }
4115 :
4116 202 : if (found_agbno >= end_agbno)
4117 182 : goto next_ag;
4118 :
4119 20 : found:
4120 : /* Found something, so update the mapping. */
4121 44 : trace_xfs_alloc_find_freesp_done(mp, pag->pag_agno, found_agbno,
4122 : found_len);
4123 44 : if (found_agbno < *agbno) {
4124 0 : found_len -= *agbno - found_agbno;
4125 0 : found_agbno = *agbno;
4126 : }
4127 44 : *len = found_len;
4128 44 : *agbno = found_agbno;
4129 44 : goto out_cur;
4130 182 : next_ag:
4131 : /* Found nothing, so advance the cursor beyond the end of the range. */
4132 182 : *agbno = end_agbno;
4133 182 : *len = 0;
4134 226 : out_cur:
4135 226 : xfs_btree_del_cursor(cur, error);
4136 226 : return error;
4137 : }
|